### Attribute tagging for CoNLL-U and TEI output formats:

# Hashtags
<c>\u0023</c><w>([^<]+)</w> ==> <w lemma="\u0023\1" xpos="Xh" upos="SYM">\u0023\1</w>

# Mentions
<c>\u0040</c><w>([^<]+)</w> ==> <w lemma="\u0040\1" xpos="Xa" upos="SYM">\u0040\1</w>

# Emojis
<w>([\u231A-\u231B\u2328\u23CF\u23E9-\u23F3\u23F8-\u23FA\u24C2\u25AA-\u25AB\u25B6\u25C0\u25FB-\u25FE\u2600-\u2604\u260E\u2611\u2614-\u2615\u2618\u261D\u2620\u2622-\u2623\u2626\u262A\u262E-\u262F\u2638-\u263A\u2648-\u2653\u2660\u2663\u2665-\u2666\u2668\u267B\u267F\u2692-\u2694\u2696-\u2697\u2699\u269B-\u269C\u26A0-\u26A1\u26AA-\u26AB\u26B0-\u26B1\u26BD-\u26BE\u26C4-\u26C5\u26C8\u26CE\u26CF\u26D1\u26D3-\u26D4\u26E9-\u26EA\u26F0-\u26F5\u26F7-\u26FA\u26FD\u2702\u2705\u2708-\u2709\u270A-\u270B\u270C-\u270D\u270F\u2712\u2714\u2716\u271D\u2721\u2728\u2733-\u2734\u2744\u2747\u274C\u274E\u2753-\u2755\u2757\u2763-\u2764\u2795-\u2797\u27A1\u27B0\u27BF\u2934-\u2935\u2B05-\u2B07\u2B1B-\u2B1C\u2B50\u2B55\u3030\u303D\u3297\u3299\U0001F004\U0001F0CF\U0001F170-\U0001F171\U0001F17E\U0001F17F\U0001F18E\U0001F191-\U0001F19A\U0001F1E6-\U0001F1FF\U0001F201-\U0001F202\U0001F21A\U0001F22F\U0001F232-\U0001F23A\U0001F250-\U0001F251\U0001F300-\U0001F320\U0001F321\U0001F324-\U0001F32C\U0001F32D-\U0001F32F\U0001F330-\U0001F335\U0001F336\U0001F337-\U0001F37C\U0001F37D\U0001F37E-\U0001F37F\U0001F380-\U0001F393\U0001F396-\U0001F397\U0001F399-\U0001F39B\U0001F39E-\U0001F39F\U0001F3A0-\U0001F3C4\U0001F3C5\U0001F3C6-\U0001F3CA\U0001F3CB-\U0001F3CE\U0001F3CF-\U0001F3D3\U0001F3D4-\U0001F3DF\U0001F3E0-\U0001F3F0\U0001F3F3-\U0001F3F5\U0001F3F7\U0001F3F8-\U0001F3FF\U0001F400-\U0001F43E\U0001F43F\U0001F440\U0001F441\U0001F442-\U0001F4F7\U0001F4F8\U0001F4F9-\U0001F4FC\U0001F4FD\U0001F4FF\U0001F500-\U0001F53D\U0001F549-\U0001F54A\U0001F54B-\U0001F54E\U0001F550-\U0001F567\U0001F56F-\U0001F570\U0001F573-\U0001F579\U0001F57A\U0001F587\U0001F58A-\U0001F58D\U0001F590\U0001F595-\U0001F596\U0001F5A4\U0001F5A5\U0001F5A8\U0001F5B1-\U0001F5B2\U0001F5BC\U0001F5C2-\U0001F5C4\U0001F5D1-\U0001F5D3\U0001F5DC-\U0001F5DE\U0001F5E1\U0001F5E3\U0001F5E8\U0001F5EF\U0001F5F3\U0001F5FA\U0001F5FB-\U0001F5FF\U0001F600\U0001F601-\U0001F610\U0001F611\U0001F612-\U0001F614\U0001F615\U0001F616\U0001F617\U0001F618\U0001F619\U0001F61A\U0001F61B\U0001F61C-\U0001F61E\U0001F61F\U0001F620-\U0001F625\U0001F626-\U0001F627\U0001F628-\U0001F62B\U0001F62C\U0001F62D\U0001F62E-\U0001F62F\U0001F630-\U0001F633\U0001F634\U0001F635-\U0001F640\U0001F641-\U0001F642\U0001F643-\U0001F644\U0001F645-\U0001F64F\U0001F680-\U0001F6C5\U0001F6CB-\U0001F6CF\U0001F6D0\U0001F6D1-\U0001F6D2\U0001F6E0-\U0001F6E5\U0001F6E9\U0001F6EB-\U0001F6EC\U0001F6F0\U0001F6F3\U0001F6F4-\U0001F6F6\U0001F910-\U0001F918\U0001F919-\U0001F91E\U0001F920-\U0001F927\U0001F930\U0001F933-\U0001F93A\U0001F93C-\U0001F93E\U0001F940-\U0001F945\U0001F947-\U0001F94B\U0001F950-\U0001F95E\U0001F980-\U0001F984\U0001F985-\U0001F991\U0001F9C0\u231A-\u231B\u23E9-\u23EC\u23F0\u23F3\u25FD-\u25FE\u2614-\u2615\u2648-\u2653\u267F\u2693\u26A1\u26AA-\u26AB\u26BD-\u26BE\u26C4-\u26C5\u26CE\u26D4\u26EA\u26F2-\u26F3\u26F5\u26FA\u26FD\u2705\u270A-\u270B\u2728\u274C\u274E\u2753-\u2755\u2757\u2795-\u2797\u27B0\u27BF\u2B1B-\u2B1C\u2B50\u2B55\U0001F004\U0001F0CF\U0001F18E\U0001F191-\U0001F19A\U0001F1E6-\U0001F1FF\U0001F201\U0001F21A\U0001F22F\U0001F232-\U0001F236\U0001F238-\U0001F23A\U0001F250-\U0001F251\U0001F300-\U0001F320\U0001F32D-\U0001F32F\U0001F330-\U0001F335\U0001F337-\U0001F37C\U0001F37E-\U0001F37F\U0001F380-\U0001F393\U0001F3A0-\U0001F3C4\U0001F3C5\U0001F3C6-\U0001F3CA\U0001F3CF-\U0001F3D3\U0001F3E0-\U0001F3F0\U0001F3F4\U0001F3F8-\U0001F3FF\U0001F400-\U0001F43E\U0001F440\U0001F442-\U0001F4F7\U0001F4F8\U0001F4F9-\U0001F4FC\U0001F4FF\U0001F500-\U0001F53D\U0001F54B-\U0001F54E\U0001F550-\U0001F567\U0001F57A\U0001F595-\U0001F596\U0001F5A4\U0001F5FB-\U0001F5FF\U0001F600\U0001F601-\U0001F610\U0001F611\U0001F612-\U0001F614\U0001F615\U0001F616\U0001F617\U0001F618\U0001F619\U0001F61A\U0001F61B\U0001F61C-\U0001F61E\U0001F61F\U0001F620-\U0001F625\U0001F626-\U0001F627\U0001F628-\U0001F62B\U0001F62C\U0001F62D\U0001F62E-\U0001F62F\U0001F630-\U0001F633\U0001F634\U0001F635-\U0001F640\U0001F641-\U0001F642\U0001F643-\U0001F644\U0001F645-\U0001F64F\U0001F680-\U0001F6C5\U0001F6CC\U0001F6D0\U0001F6D1-\U0001F6D2\U0001F6EB-\U0001F6EC\U0001F6F4-\U0001F6F6\U0001F910-\U0001F918\U0001F919-\U0001F91E\U0001F920-\U0001F927\U0001F930\U0001F933-\U0001F93A\U0001F93C-\U0001F93E\U0001F940-\U0001F945\U0001F947-\U0001F94B\U0001F950-\U0001F95E\U0001F980-\U0001F984\U0001F985-\U0001F991\U0001F9C0\U0001F3FB-\U0001F3FF\u261D\u26F9\u270A-\u270B\u270C-\u270D\U0001F385\U0001F3C3-\U0001F3C4\U0001F3CA\U0001F3CB\U0001F442-\U0001F443\U0001F446-\U0001F450\U0001F466-\U0001F469\U0001F46E\U0001F470-\U0001F478\U0001F47C\U0001F481-\U0001F483\U0001F485-\U0001F487\U0001F4AA\U0001F575\U0001F57A\U0001F590\U0001F595-\U0001F596\U0001F645-\U0001F647\U0001F64B-\U0001F64F\U0001F6A3\U0001F6B4-\U0001F6B6\U0001F6C0\U0001F918\U0001F919-\U0001F91E\U0001F926\U0001F930\U0001F933-\U0001F939\U0001F93C-\U0001F93E])</w> ==> <w lemma="\1" xpos="Xe" upos="SYM">\1</w>

# Emoticons
(<c>[:;=]</c>)+(<c>-</c>)*(<c>[\)\(\$\*\\\/|]</c>)+ ==> <w lemma="$txt" xpos="Xe" upos="SYM">$txt</w>
(<c>[:;=]</c>)+(<c>-</c>)*(<w>[pPDO]</w>)+ ==> <w lemma="$txt" xpos="Xe" upos="SYM">$txt</w>
(<c>&lt;</c>)+<w>(3)+</w> ==> <w lemma="$txt" xpos="Xe" upos="SYM">$txt</w>


# Special punct
<c>([\u0023\u0025\u0026\u002B\u003D\u00B0\u0078\u00F7\u0024\u0040\u00B5\u00A9\u2122\u00AE\u00A7])</c> ==> <c lemma="\1" xpos="Z" upos="SYM">\1</c>
<c>(&(lt)|(gt);)</c> ==> <c lemma="\1" xpos="Z" upos="SYM">\1</c>

# Other punct
<c>([\u0000-\u001F\u0250-\u1DFF\u1F00-\uFFFF\u0021\u0022\u0027\u0028\u0029\u002A\u002C\u002D\u002E\u002F\u003A\u003B\u003C\u003E\u003F\u005B\u005C\u005D\u005E\u005F\u0060\u007B\u007C\u007D\u007E\u007F\u0080\u0081\u0082\u0083\u0084\u0085\u0086\u0087\u0088\u0089\u008A\u008B\u008C\u008D\u008E\u008F\u0090\u0091\u0092\u0093\u0094\u0095\u0096\u0097\u0098\u0099\u009A\u009B\u009C\u009D\u009E\u009F\u00A1\u00A2\u00A3\u00A4\u00A5\u00A6\u00A8\u00AB\u00AC\u00AD\u00AF\u00B1\u00B4\u00B6\u00B7\u00B8\u00BB\u00BF\u00D7\u1E9C\u1E9D\u1E9E\u1E9F\u1EFA\u1EFB\u1EFC\u1EFD\u1EFE\u1EFF])</c> ==> <c lemma="\1" xpos="Z" upos="PUNCT">\1</c>

# URLs
<w>((((ftp)|(https?))://)?((([0-9\p{L}]+)|([~_!*'-]))+[\.-])*([0-9\p{L}][0-9\p{L}-]{0,61})?[0-9\p{L}]\.[\p{L}]{2,6}(/(([0-9\p{L}]+)|([~_!*'().;?:@=+$,%#-])|(&))*(([0-9\p{L}]+)|([~_*'@=+$%#-])|(&)))*(/)?)</w> ==> <w lemma="\1" xpos="Xw" upos="SYM">\1</c>

# E-mail
<w>(((([A-Za-z0-9]+)|([_-]))+\.)*(([A-Za-z0-9]+)|([_-]))+@((([A-Za-z0-9]+)|([_-]))+\.)*(([A-Za-z0-9]+)|([_-]))+\.[A-Za-z]{2,6})</w> --> <w lemma="\1" xpos="Xw" upos="SYM">\1</c>

