Implement the confusable algorithm from Unicode more closely

See UTS#39 "Unicode Security Mechanisms":
http://www.unicode.org/reports/tr39/

This means that characters with accents or other things around them are
now considered the confusable with the base character.

Fixes #557. Fixes #575.
This commit is contained in:
heinrich5991 2016-10-30 12:25:49 +01:00
parent 795e68ec2b
commit cd3b0ae855
9 changed files with 23345 additions and 140 deletions

1
.gitignore vendored
View file

@ -9,6 +9,7 @@ SDL.dll
SDL2.dll SDL2.dll
freetype.dll freetype.dll
/confusables*
/crapnet* /crapnet*
/config_store* /config_store*
/config_retrieve* /config_retrieve*

View file

@ -0,0 +1,140 @@
# Needs UnicodeData.txt and confusables.txt in the current directory.
#
# Those can be obtained from unicode.org:
# - http://www.unicode.org/Public/security/<VERSION>/confusables.txt
# - http://www.unicode.org/Public/<VERSION>/ucd/UnicodeData.txt
#
# If executed as a script, it will generate the contents of the file
# `src/base/confusables_data.h`.
import csv
def confusables():
with open('confusables.txt', encoding='utf-8-sig') as f:
# Filter comments
f = map(lambda line: line.split('#')[0], f)
return list(csv.DictReader(f, fieldnames=['Value', 'Target', 'Category'], delimiter=';'))
UNICODEDATA_FIELDS = (
"Value",
"Name",
"General_Category",
"Canonical_Combining_Class",
"Bidi_Class",
"Decomposition",
"Numeric",
"Bidi_Mirrored",
"Unicode_1_Name",
"ISO_Comment",
"Simple_Uppercase_Mapping",
"Simple_Lowercase_Mapping",
"Simple_Titlecase_Mapping",
)
def unicodedata():
with open('UnicodeData.txt') as f:
return list(csv.DictReader(f, fieldnames=UNICODEDATA_FIELDS, delimiter=';'))
def unhex(s):
return int(s, 16)
def unhex_sequence(s):
return [unhex(x) for x in s.split()] if '<' not in s else None
def generate_decompositions():
ud = unicodedata()
con = confusables()
category = lambda x: {unhex(u["Value"]) for u in ud if u["General_Category"].startswith(x)}
nfd = {unhex(u["Value"]): unhex_sequence(u["Decomposition"]) for u in ud}
nfd = {k: v for k, v in nfd.items() if v}
con = {unhex(c["Value"]): unhex_sequence(c["Target"]) for c in con}
# C: Control
# M: Combining
# Z: Space
ignore = category("C") | category("M") | category("Z")
con[0x2800] = [] # BRAILLE PATTERN BLANK
con[0xFFFC] = [] # OBJECT REPLACEMENT CHARACTER
interesting = ignore | set(nfd) | set(con)
def apply(l, replacements):
return [d for c in l for d in replacements.get(c, [c])]
def gen(c):
result = [c]
while True:
first = apply(result, nfd)
second = apply(first, con)
# Apply substitutions until convergence.
if result == first and result == second:
break
result = second
return [c for c in result if c not in ignore]
return {c: gen(c) for c in interesting}
def main():
decompositions = generate_decompositions()
# Deduplicate
decomposition_set = sorted(set(tuple(x) for x in decompositions.values()))
len_set = sorted(set(len(x) for x in decomposition_set))
if len(len_set) > 8:
raise ValueError("Can't pack offset (13 bit) together with len (>3bit)")
cur_offset = 0
decomposition_offsets = []
for d in decomposition_set:
decomposition_offsets.append(cur_offset)
cur_offset += len(d)
print("""\
#include <stdint.h>
struct DECOMP_SLICE
{
\tuint16_t offset : 13;
\tuint16_t length : 3;
};
""")
print("enum")
print("{")
print("\tNUM_DECOMP_LENGTHS={},".format(len(len_set)))
print("\tNUM_DECOMPS={},".format(len(decompositions)))
print("};")
print()
print("static const uint8_t decomp_lengths[NUM_DECOMP_LENGTHS] = {")
for l in len_set:
print("\t{},".format(l))
print("};")
print()
print("static const int32_t decomp_chars[NUM_DECOMPS] = {")
for k in sorted(decompositions):
print("\t0x{:x},".format(k))
print("};")
print()
print("static const struct DECOMP_SLICE decomp_slices[NUM_DECOMPS] = {")
for k in sorted(decompositions):
d = decompositions[k]
i = decomposition_set.index(tuple(d))
l = len_set.index(len(d))
print("\t{{{}, {}}},".format(decomposition_offsets[i], l))
print("};")
print()
print("static const int32_t decomp_data[] = {")
for d in decomposition_set:
for c in d:
print("\t0x{:x},".format(c))
print("};")
if __name__ == '__main__':
main()

View file

@ -1,95 +1,81 @@
int str_utf8_is_confusable(int smaller, int bigger) #include "confusables_data.h"
#include "system.h"
#include <stddef.h>
static int str_utf8_skeleton(int ch, const int **skeleton, int *skeleton_len)
{ {
switch(smaller) int i;
for(i = 0; i < NUM_DECOMPS; i++)
{ {
case 0x0020: return bigger == 0x00A0 || bigger == 0x1680 || bigger == 0x2000 || bigger == 0x2001 || bigger == 0x2002 || bigger == 0x2003 || bigger == 0x2004 || bigger == 0x2005 || bigger == 0x2006 || bigger == 0x2007 || bigger == 0x2008 || bigger == 0x2009 || bigger == 0x200A || bigger == 0x2028 || bigger == 0x2029 || bigger == 0x202F || bigger == 0x205F; if(ch == decomp_chars[i])
case 0x0021: return bigger == 0x01C3 || bigger == 0x2D51 || bigger == 0xFF01; {
case 0x0022: return bigger == 0x02BA || bigger == 0x02DD || bigger == 0x02EE || bigger == 0x02F6 || bigger == 0x05F2 || bigger == 0x05F4 || bigger == 0x1CD3 || bigger == 0x201C || bigger == 0x201D || bigger == 0x201F || bigger == 0x2033 || bigger == 0x2036 || bigger == 0x3003 || bigger == 0xFF02; int offset = decomp_slices[i].offset;
case 0x0025: return bigger == 0x066A || bigger == 0x2052; int length = decomp_lengths[decomp_slices[i].length];
case 0x0026: return bigger == 0xA778;
case 0x0027: return bigger == 0x0060 || bigger == 0x00B4 || bigger == 0x02B9 || bigger == 0x02BB || bigger == 0x02BC || bigger == 0x02BD || bigger == 0x02BE || bigger == 0x02C8 || bigger == 0x02CA || bigger == 0x02CB || bigger == 0x02F4 || bigger == 0x0374 || bigger == 0x0384 || bigger == 0x055A || bigger == 0x055D || bigger == 0x05D9 || bigger == 0x05F3 || bigger == 0x07F4 || bigger == 0x07F5 || bigger == 0x144A || bigger == 0x16CC || bigger == 0x1FBD || bigger == 0x1FBF || bigger == 0x1FEF || bigger == 0x1FFD || bigger == 0x1FFE || bigger == 0x2018 || bigger == 0x2019 || bigger == 0x201B || bigger == 0x2032 || bigger == 0x2035 || bigger == 0xA78C || bigger == 0xFF07 || bigger == 0xFF40; *skeleton = &decomp_data[offset];
case 0x0028: return bigger == 0x2768 || bigger == 0x2772 || bigger == 0x3014 || bigger == 0xFD3E || bigger == 0xFF3B; *skeleton_len = length;
case 0x0029: return bigger == 0x2769 || bigger == 0x2773 || bigger == 0x3015 || bigger == 0xFD3F || bigger == 0xFF3D; return 1;
case 0x002A: return bigger == 0x066D || bigger == 0x204E || bigger == 0x2217 || bigger == 0x1031F; }
case 0x002B: return bigger == 0x16ED || bigger == 0x2795 || bigger == 0x1029B; }
case 0x002C: return bigger == 0x00B8 || bigger == 0x060D || bigger == 0x066B || bigger == 0x201A || bigger == 0xA4F9; *skeleton = NULL;
case 0x002D: return bigger == 0x02D7 || bigger == 0x06D4 || bigger == 0x2010 || bigger == 0x2011 || bigger == 0x2012 || bigger == 0x2013 || bigger == 0x2043 || bigger == 0x2212 || bigger == 0x2796 || bigger == 0x2CBA || bigger == 0xFE58; *skeleton_len = 1;
case 0x002E: return bigger == 0x0660 || bigger == 0x06F0 || bigger == 0x0701 || bigger == 0x0702 || bigger == 0x2024 || bigger == 0xA4F8 || bigger == 0xA60E || bigger == 0x10A50 || bigger == 0x1D16D; return 0;
case 0x002F: return bigger == 0x1735 || bigger == 0x2041 || bigger == 0x2044 || bigger == 0x2215 || bigger == 0x2571 || bigger == 0x27CB || bigger == 0x29F8 || bigger == 0x2CC6 || bigger == 0x2F03 || bigger == 0x3033 || bigger == 0x31D3 || bigger == 0x4E3F; }
case 0x0030: return bigger == 0x004F;
case 0x0031: return bigger == 0x006C; struct SKELETON
case 0x0032: return bigger == 0x01A7 || bigger == 0x03E8 || bigger == 0x14BF || bigger == 0xA644 || bigger == 0xA75A || bigger == 0x1D7D0 || bigger == 0x1D7DA || bigger == 0x1D7E4 || bigger == 0x1D7EE || bigger == 0x1D7F8; {
case 0x0033: return bigger == 0x01B7 || bigger == 0x021C || bigger == 0x0417 || bigger == 0x04E0 || bigger == 0x2CCC || bigger == 0xA76A || bigger == 0xA7AB || bigger == 0x118CA || bigger == 0x1D7D1 || bigger == 0x1D7DB || bigger == 0x1D7E5 || bigger == 0x1D7EF || bigger == 0x1D7F9; const int *skeleton;
case 0x0034: return bigger == 0x13CE || bigger == 0x118AF || bigger == 0x1D7D2 || bigger == 0x1D7DC || bigger == 0x1D7E6 || bigger == 0x1D7F0 || bigger == 0x1D7FA; int skeleton_len;
case 0x0035: return bigger == 0x01BC || bigger == 0x118BB || bigger == 0x1D7D3 || bigger == 0x1D7DD || bigger == 0x1D7E7 || bigger == 0x1D7F1 || bigger == 0x1D7FB; const char *str;
case 0x0036: return bigger == 0x0431 || bigger == 0x13EE || bigger == 0x2CD2 || bigger == 0x118D5 || bigger == 0x1D7D4 || bigger == 0x1D7DE || bigger == 0x1D7E8 || bigger == 0x1D7F2 || bigger == 0x1D7FC; };
case 0x0037: return bigger == 0x118C6 || bigger == 0x1D7D5 || bigger == 0x1D7DF || bigger == 0x1D7E9 || bigger == 0x1D7F3 || bigger == 0x1D7FD;
case 0x0038: return bigger == 0x0222 || bigger == 0x0223 || bigger == 0x09EA || bigger == 0x0A6A || bigger == 0x0B03 || bigger == 0x1031A || bigger == 0x1D7D6 || bigger == 0x1D7E0 || bigger == 0x1D7EA || bigger == 0x1D7F4 || bigger == 0x1D7FE || bigger == 0x1E8CB; void str_utf8_skeleton_begin(struct SKELETON *skel, const char *str)
case 0x0039: return bigger == 0x09ED || bigger == 0x0A67 || bigger == 0x0B68 || bigger == 0x2CCA || bigger == 0xA76E || bigger == 0x118AC || bigger == 0x118CC || bigger == 0x118D6 || bigger == 0x1D7D7 || bigger == 0x1D7E1 || bigger == 0x1D7EB || bigger == 0x1D7F5 || bigger == 0x1D7FF; {
case 0x003A: return bigger == 0x02D0 || bigger == 0x02F8 || bigger == 0x0589 || bigger == 0x05C3 || bigger == 0x0703 || bigger == 0x0704 || bigger == 0x0903 || bigger == 0x0A83 || bigger == 0x16EC || bigger == 0x1803 || bigger == 0x1809 || bigger == 0x205A || bigger == 0x2236 || bigger == 0xA4FD || bigger == 0xA789 || bigger == 0xFE30 || bigger == 0xFF1A; skel->skeleton = NULL;
case 0x003B: return bigger == 0x037E; skel->skeleton_len = 0;
case 0x003C: return bigger == 0x02C2 || bigger == 0x1438 || bigger == 0x16B2 || bigger == 0x2039 || bigger == 0x276E; skel->str = str;
case 0x003D: return bigger == 0x1400 || bigger == 0x2E40 || bigger == 0x30A0 || bigger == 0xA4FF; }
case 0x003E: return bigger == 0x02C3 || bigger == 0x1433 || bigger == 0x203A || bigger == 0x276F;
case 0x003F: return bigger == 0x0241 || bigger == 0x0294 || bigger == 0x097D || bigger == 0x13AE; int str_utf8_skeleton_next(struct SKELETON *skel)
case 0x0041: return bigger == 0x0391 || bigger == 0x0410 || bigger == 0x13AA || bigger == 0x15C5 || bigger == 0x1D00 || bigger == 0xA4EE || bigger == 0xFF21 || bigger == 0x102A0 || bigger == 0x1D400 || bigger == 0x1D434 || bigger == 0x1D468 || bigger == 0x1D49C || bigger == 0x1D4D0 || bigger == 0x1D504 || bigger == 0x1D538 || bigger == 0x1D56C || bigger == 0x1D5A0 || bigger == 0x1D5D4 || bigger == 0x1D608 || bigger == 0x1D63C || bigger == 0x1D670 || bigger == 0x1D6A8 || bigger == 0x1D6E2 || bigger == 0x1D71C || bigger == 0x1D756 || bigger == 0x1D790; {
case 0x0042: return bigger == 0x0392 || bigger == 0x0412 || bigger == 0x13F4 || bigger == 0x15F7 || bigger == 0x212C || bigger == 0xA4D0 || bigger == 0xA7B4 || bigger == 0xFF22 || bigger == 0x10282 || bigger == 0x102A1 || bigger == 0x10301 || bigger == 0x1D401 || bigger == 0x1D435 || bigger == 0x1D469 || bigger == 0x1D4D1 || bigger == 0x1D505 || bigger == 0x1D539 || bigger == 0x1D56D || bigger == 0x1D5A1 || bigger == 0x1D5D5 || bigger == 0x1D609 || bigger == 0x1D63D || bigger == 0x1D671 || bigger == 0x1D6A9 || bigger == 0x1D6E3 || bigger == 0x1D71D || bigger == 0x1D757 || bigger == 0x1D791; int ch;
case 0x0043: return bigger == 0x03F9 || bigger == 0x0421 || bigger == 0x13DF || bigger == 0x2102 || bigger == 0x212D || bigger == 0x216D || bigger == 0x2CA4 || bigger == 0xA4DA || bigger == 0xFF23 || bigger == 0x102A2 || bigger == 0x10302 || bigger == 0x10415 || bigger == 0x1051C || bigger == 0x118E9 || bigger == 0x118F2 || bigger == 0x1D402 || bigger == 0x1D436 || bigger == 0x1D46A || bigger == 0x1D49E || bigger == 0x1D4D2 || bigger == 0x1D56E || bigger == 0x1D5A2 || bigger == 0x1D5D6 || bigger == 0x1D60A || bigger == 0x1D63E || bigger == 0x1D672 || bigger == 0x1F74C; while(skel->skeleton_len == 0)
case 0x0044: return bigger == 0x13A0 || bigger == 0x15DE || bigger == 0x15EA || bigger == 0x2145 || bigger == 0x216E || bigger == 0xA4D3 || bigger == 0x1D403 || bigger == 0x1D437 || bigger == 0x1D46B || bigger == 0x1D49F || bigger == 0x1D4D3 || bigger == 0x1D507 || bigger == 0x1D53B || bigger == 0x1D56F || bigger == 0x1D5A3 || bigger == 0x1D5D7 || bigger == 0x1D60B || bigger == 0x1D63F || bigger == 0x1D673; {
case 0x0045: return bigger == 0x0395 || bigger == 0x0415 || bigger == 0x13AC || bigger == 0x2130 || bigger == 0x22FF || bigger == 0x2D39 || bigger == 0xA4F0 || bigger == 0xFF25 || bigger == 0x10286 || bigger == 0x118A6 || bigger == 0x118AE || bigger == 0x1D404 || bigger == 0x1D438 || bigger == 0x1D46C || bigger == 0x1D4D4 || bigger == 0x1D508 || bigger == 0x1D53C || bigger == 0x1D570 || bigger == 0x1D5A4 || bigger == 0x1D5D8 || bigger == 0x1D60C || bigger == 0x1D640 || bigger == 0x1D674 || bigger == 0x1D6AC || bigger == 0x1D6E6 || bigger == 0x1D720 || bigger == 0x1D75A || bigger == 0x1D794; ch = str_utf8_decode(&skel->str);
case 0x0046: return bigger == 0x03DC || bigger == 0x15B4 || bigger == 0x2131 || bigger == 0xA4DD || bigger == 0xA798 || bigger == 0x10287 || bigger == 0x102A5 || bigger == 0x10525 || bigger == 0x118A2 || bigger == 0x118C2 || bigger == 0x1D405 || bigger == 0x1D439 || bigger == 0x1D46D || bigger == 0x1D4D5 || bigger == 0x1D509 || bigger == 0x1D53D || bigger == 0x1D571 || bigger == 0x1D5A5 || bigger == 0x1D5D9 || bigger == 0x1D60D || bigger == 0x1D641 || bigger == 0x1D675 || bigger == 0x1D7CA; if(ch == 0)
case 0x0047: return bigger == 0x050C || bigger == 0x13C0 || bigger == 0x13F3 || bigger == 0xA4D6 || bigger == 0x1D406 || bigger == 0x1D43A || bigger == 0x1D46E || bigger == 0x1D4A2 || bigger == 0x1D4D6 || bigger == 0x1D50A || bigger == 0x1D53E || bigger == 0x1D572 || bigger == 0x1D5A6 || bigger == 0x1D5DA || bigger == 0x1D60E || bigger == 0x1D642 || bigger == 0x1D676; {
case 0x0048: return bigger == 0x0397 || bigger == 0x041D || bigger == 0x13BB || bigger == 0x157C || bigger == 0x210B || bigger == 0x210C || bigger == 0x210D || bigger == 0x2C8E || bigger == 0xA4E7 || bigger == 0xFF28 || bigger == 0x102CF || bigger == 0x1D407 || bigger == 0x1D43B || bigger == 0x1D46F || bigger == 0x1D4D7 || bigger == 0x1D573 || bigger == 0x1D5A7 || bigger == 0x1D5DB || bigger == 0x1D60F || bigger == 0x1D643 || bigger == 0x1D677 || bigger == 0x1D6AE || bigger == 0x1D6E8 || bigger == 0x1D722 || bigger == 0x1D75C || bigger == 0x1D796; return 0;
case 0x0049: return bigger == 0x006C; }
case 0x004A: return bigger == 0x037F || bigger == 0x0408 || bigger == 0x13AB || bigger == 0x148D || bigger == 0xA4D9 || bigger == 0xA7B2 || bigger == 0xFF2A || bigger == 0x1D409 || bigger == 0x1D43D || bigger == 0x1D471 || bigger == 0x1D4A5 || bigger == 0x1D4D9 || bigger == 0x1D50D || bigger == 0x1D541 || bigger == 0x1D575 || bigger == 0x1D5A9 || bigger == 0x1D5DD || bigger == 0x1D611 || bigger == 0x1D645 || bigger == 0x1D679; str_utf8_skeleton(ch, &skel->skeleton, &skel->skeleton_len);
case 0x004B: return bigger == 0x039A || bigger == 0x041A || bigger == 0x13E6 || bigger == 0x16D5 || bigger == 0x212A || bigger == 0x2C94 || bigger == 0xA4D7 || bigger == 0xFF2B || bigger == 0x10518 || bigger == 0x1D40A || bigger == 0x1D43E || bigger == 0x1D472 || bigger == 0x1D4A6 || bigger == 0x1D4DA || bigger == 0x1D50E || bigger == 0x1D542 || bigger == 0x1D576 || bigger == 0x1D5AA || bigger == 0x1D5DE || bigger == 0x1D612 || bigger == 0x1D646 || bigger == 0x1D67A || bigger == 0x1D6B1 || bigger == 0x1D6EB || bigger == 0x1D725 || bigger == 0x1D75F || bigger == 0x1D799; }
case 0x004C: return bigger == 0x13DE || bigger == 0x14AA || bigger == 0x2112 || bigger == 0x216C || bigger == 0x2CD0 || bigger == 0xA4E1 || bigger == 0x1041B || bigger == 0x10526 || bigger == 0x118A3 || bigger == 0x118B2 || bigger == 0x1D40B || bigger == 0x1D43F || bigger == 0x1D473 || bigger == 0x1D4DB || bigger == 0x1D50F || bigger == 0x1D543 || bigger == 0x1D577 || bigger == 0x1D5AB || bigger == 0x1D5DF || bigger == 0x1D613 || bigger == 0x1D647 || bigger == 0x1D67B; skel->skeleton_len--;
case 0x004D: return bigger == 0x039C || bigger == 0x03FA || bigger == 0x041C || bigger == 0x13B7 || bigger == 0x15F0 || bigger == 0x16D6 || bigger == 0x2133 || bigger == 0x216F || bigger == 0x2C98 || bigger == 0xA4DF || bigger == 0xFF2D || bigger == 0x102B0 || bigger == 0x10311 || bigger == 0x1D40C || bigger == 0x1D440 || bigger == 0x1D474 || bigger == 0x1D4DC || bigger == 0x1D510 || bigger == 0x1D544 || bigger == 0x1D578 || bigger == 0x1D5AC || bigger == 0x1D5E0 || bigger == 0x1D614 || bigger == 0x1D648 || bigger == 0x1D67C || bigger == 0x1D6B3 || bigger == 0x1D6ED || bigger == 0x1D727 || bigger == 0x1D761 || bigger == 0x1D79B; if(skel->skeleton != NULL)
case 0x004E: return bigger == 0x039D || bigger == 0x2115 || bigger == 0x2C9A || bigger == 0xA4E0 || bigger == 0xFF2E || bigger == 0x10513 || bigger == 0x1D40D || bigger == 0x1D441 || bigger == 0x1D475 || bigger == 0x1D4A9 || bigger == 0x1D4DD || bigger == 0x1D511 || bigger == 0x1D579 || bigger == 0x1D5AD || bigger == 0x1D5E1 || bigger == 0x1D615 || bigger == 0x1D649 || bigger == 0x1D67D || bigger == 0x1D6B4 || bigger == 0x1D6EE || bigger == 0x1D728 || bigger == 0x1D762 || bigger == 0x1D79C; {
case 0x004F: return bigger == 0x039F || bigger == 0x041E || bigger == 0x0555 || bigger == 0x07C0 || bigger == 0x09E6 || bigger == 0x0B20 || bigger == 0x0B66 || bigger == 0x0D20 || bigger == 0x2C9E || bigger == 0x2D54 || bigger == 0x3007 || bigger == 0xA4F3 || bigger == 0xFF2F || bigger == 0x10292 || bigger == 0x102AB || bigger == 0x10404 || bigger == 0x10516 || bigger == 0x114D0 || bigger == 0x118B5 || bigger == 0x118E0 || bigger == 0x1D40E || bigger == 0x1D442 || bigger == 0x1D476 || bigger == 0x1D4AA || bigger == 0x1D4DE || bigger == 0x1D512 || bigger == 0x1D546 || bigger == 0x1D57A || bigger == 0x1D5AE || bigger == 0x1D5E2 || bigger == 0x1D616 || bigger == 0x1D64A || bigger == 0x1D67E || bigger == 0x1D6B6 || bigger == 0x1D6F0 || bigger == 0x1D72A || bigger == 0x1D764 || bigger == 0x1D79E || bigger == 0x1D7CE || bigger == 0x1D7D8 || bigger == 0x1D7E2 || bigger == 0x1D7EC || bigger == 0x1D7F6; ch = *skel->skeleton;
case 0x0050: return bigger == 0x03A1 || bigger == 0x0420 || bigger == 0x13E2 || bigger == 0x146D || bigger == 0x2119 || bigger == 0x2CA2 || bigger == 0xA4D1 || bigger == 0xFF30 || bigger == 0x10295 || bigger == 0x1D40F || bigger == 0x1D443 || bigger == 0x1D477 || bigger == 0x1D4AB || bigger == 0x1D4DF || bigger == 0x1D513 || bigger == 0x1D57B || bigger == 0x1D5AF || bigger == 0x1D5E3 || bigger == 0x1D617 || bigger == 0x1D64B || bigger == 0x1D67F || bigger == 0x1D6B8 || bigger == 0x1D6F2 || bigger == 0x1D72C || bigger == 0x1D766 || bigger == 0x1D7A0; skel->skeleton++;
case 0x0051: return bigger == 0x211A || bigger == 0x2D55 || bigger == 0x1D410 || bigger == 0x1D444 || bigger == 0x1D478 || bigger == 0x1D4AC || bigger == 0x1D4E0 || bigger == 0x1D514 || bigger == 0x1D57C || bigger == 0x1D5B0 || bigger == 0x1D5E4 || bigger == 0x1D618 || bigger == 0x1D64C || bigger == 0x1D680; }
case 0x0052: return bigger == 0x01A6 || bigger == 0x13A1 || bigger == 0x13D2 || bigger == 0x1587 || bigger == 0x211B || bigger == 0x211C || bigger == 0x211D || bigger == 0xA4E3 || bigger == 0x1D411 || bigger == 0x1D445 || bigger == 0x1D479 || bigger == 0x1D4E1 || bigger == 0x1D57D || bigger == 0x1D5B1 || bigger == 0x1D5E5 || bigger == 0x1D619 || bigger == 0x1D64D || bigger == 0x1D681; return ch;
case 0x0053: return bigger == 0x0405 || bigger == 0x054F || bigger == 0x13D5 || bigger == 0x13DA || bigger == 0xA4E2 || bigger == 0xFF33 || bigger == 0x10296 || bigger == 0x10420 || bigger == 0x1D412 || bigger == 0x1D446 || bigger == 0x1D47A || bigger == 0x1D4AE || bigger == 0x1D4E2 || bigger == 0x1D516 || bigger == 0x1D54A || bigger == 0x1D57E || bigger == 0x1D5B2 || bigger == 0x1D5E6 || bigger == 0x1D61A || bigger == 0x1D64E || bigger == 0x1D682; }
case 0x0054: return bigger == 0x03A4 || bigger == 0x0422 || bigger == 0x13A2 || bigger == 0x22A4 || bigger == 0x27D9 || bigger == 0x2CA6 || bigger == 0xA4D4 || bigger == 0xFF34 || bigger == 0x10297 || bigger == 0x102B1 || bigger == 0x10315 || bigger == 0x118BC || bigger == 0x1D413 || bigger == 0x1D447 || bigger == 0x1D47B || bigger == 0x1D4AF || bigger == 0x1D4E3 || bigger == 0x1D517 || bigger == 0x1D54B || bigger == 0x1D57F || bigger == 0x1D5B3 || bigger == 0x1D5E7 || bigger == 0x1D61B || bigger == 0x1D64F || bigger == 0x1D683 || bigger == 0x1D6BB || bigger == 0x1D6F5 || bigger == 0x1D72F || bigger == 0x1D769 || bigger == 0x1D7A3 || bigger == 0x1F768;
case 0x0055: return bigger == 0x054D || bigger == 0x144C || bigger == 0x222A || bigger == 0x22C3 || bigger == 0xA4F4 || bigger == 0x118B8 || bigger == 0x1D414 || bigger == 0x1D448 || bigger == 0x1D47C || bigger == 0x1D4B0 || bigger == 0x1D4E4 || bigger == 0x1D518 || bigger == 0x1D54C || bigger == 0x1D580 || bigger == 0x1D5B4 || bigger == 0x1D5E8 || bigger == 0x1D61C || bigger == 0x1D650 || bigger == 0x1D684; int str_utf8_comp_confusable(const char *str1, const char *str2)
case 0x0056: return bigger == 0x0474 || bigger == 0x0667 || bigger == 0x06F7 || bigger == 0x13D9 || bigger == 0x142F || bigger == 0x2164 || bigger == 0x2D38 || bigger == 0xA4E6 || bigger == 0x1051D || bigger == 0x118A0 || bigger == 0x1D415 || bigger == 0x1D449 || bigger == 0x1D47D || bigger == 0x1D4B1 || bigger == 0x1D4E5 || bigger == 0x1D519 || bigger == 0x1D54D || bigger == 0x1D581 || bigger == 0x1D5B5 || bigger == 0x1D5E9 || bigger == 0x1D61D || bigger == 0x1D651 || bigger == 0x1D685; {
case 0x0057: return bigger == 0x051C || bigger == 0x13B3 || bigger == 0x13D4 || bigger == 0xA4EA || bigger == 0x118E6 || bigger == 0x118EF || bigger == 0x1D416 || bigger == 0x1D44A || bigger == 0x1D47E || bigger == 0x1D4B2 || bigger == 0x1D4E6 || bigger == 0x1D51A || bigger == 0x1D54E || bigger == 0x1D582 || bigger == 0x1D5B6 || bigger == 0x1D5EA || bigger == 0x1D61E || bigger == 0x1D652 || bigger == 0x1D686; struct SKELETON skel1;
case 0x0058: return bigger == 0x03A7 || bigger == 0x0425 || bigger == 0x166D || bigger == 0x16B7 || bigger == 0x2169 || bigger == 0x2573 || bigger == 0x2CAC || bigger == 0x2D5D || bigger == 0xA4EB || bigger == 0xA7B3 || bigger == 0xFF38 || bigger == 0x10290 || bigger == 0x102B4 || bigger == 0x10317 || bigger == 0x10322 || bigger == 0x10527 || bigger == 0x118EC || bigger == 0x1D417 || bigger == 0x1D44B || bigger == 0x1D47F || bigger == 0x1D4B3 || bigger == 0x1D4E7 || bigger == 0x1D51B || bigger == 0x1D54F || bigger == 0x1D583 || bigger == 0x1D5B7 || bigger == 0x1D5EB || bigger == 0x1D61F || bigger == 0x1D653 || bigger == 0x1D687 || bigger == 0x1D6BE || bigger == 0x1D6F8 || bigger == 0x1D732 || bigger == 0x1D76C || bigger == 0x1D7A6; struct SKELETON skel2;
case 0x0059: return bigger == 0x03A5 || bigger == 0x03D2 || bigger == 0x04AE || bigger == 0x13A9 || bigger == 0x13BD || bigger == 0x2CA8 || bigger == 0xA4EC || bigger == 0xFF39 || bigger == 0x102B2 || bigger == 0x118A4 || bigger == 0x1D418 || bigger == 0x1D44C || bigger == 0x1D480 || bigger == 0x1D4B4 || bigger == 0x1D4E8 || bigger == 0x1D51C || bigger == 0x1D550 || bigger == 0x1D584 || bigger == 0x1D5B8 || bigger == 0x1D5EC || bigger == 0x1D620 || bigger == 0x1D654 || bigger == 0x1D688 || bigger == 0x1D6BC || bigger == 0x1D6F6 || bigger == 0x1D730 || bigger == 0x1D76A || bigger == 0x1D7A4;
case 0x005A: return bigger == 0x0396 || bigger == 0x13C3 || bigger == 0x2124 || bigger == 0x2128 || bigger == 0xA4DC || bigger == 0xFF3A || bigger == 0x102F5 || bigger == 0x118A9 || bigger == 0x118E5 || bigger == 0x1D419 || bigger == 0x1D44D || bigger == 0x1D481 || bigger == 0x1D4B5 || bigger == 0x1D4E9 || bigger == 0x1D585 || bigger == 0x1D5B9 || bigger == 0x1D5ED || bigger == 0x1D621 || bigger == 0x1D655 || bigger == 0x1D689 || bigger == 0x1D6AD || bigger == 0x1D6E7 || bigger == 0x1D721 || bigger == 0x1D75B || bigger == 0x1D795; str_utf8_skeleton_begin(&skel1, str1);
case 0x005C: return bigger == 0x2216 || bigger == 0x27CD || bigger == 0x29F5 || bigger == 0x29F9 || bigger == 0x2F02 || bigger == 0x31D4 || bigger == 0x4E36 || bigger == 0xFE68 || bigger == 0xFF3C; str_utf8_skeleton_begin(&skel2, str2);
case 0x005E: return bigger == 0x02C4 || bigger == 0x02C6;
case 0x005F: return bigger == 0x07FA || bigger == 0xFE4D || bigger == 0xFE4E || bigger == 0xFE4F; while(1)
case 0x0061: return bigger == 0x0251 || bigger == 0x03B1 || bigger == 0x0430 || bigger == 0x237A || bigger == 0xFF41 || bigger == 0x1D41A || bigger == 0x1D44E || bigger == 0x1D482 || bigger == 0x1D4B6 || bigger == 0x1D4EA || bigger == 0x1D51E || bigger == 0x1D552 || bigger == 0x1D586 || bigger == 0x1D5BA || bigger == 0x1D5EE || bigger == 0x1D622 || bigger == 0x1D656 || bigger == 0x1D68A || bigger == 0x1D6C2 || bigger == 0x1D6FC || bigger == 0x1D736 || bigger == 0x1D770 || bigger == 0x1D7AA; {
case 0x0062: return bigger == 0x0184 || bigger == 0x042C || bigger == 0x13CF || bigger == 0x15AF || bigger == 0x1D41B || bigger == 0x1D44F || bigger == 0x1D483 || bigger == 0x1D4B7 || bigger == 0x1D4EB || bigger == 0x1D51F || bigger == 0x1D553 || bigger == 0x1D587 || bigger == 0x1D5BB || bigger == 0x1D5EF || bigger == 0x1D623 || bigger == 0x1D657 || bigger == 0x1D68B; int ch1 = str_utf8_skeleton_next(&skel1);
case 0x0063: return bigger == 0x03F2 || bigger == 0x0441 || bigger == 0x1D04 || bigger == 0x217D || bigger == 0x2CA5 || bigger == 0xFF43 || bigger == 0x1043D || bigger == 0x1D41C || bigger == 0x1D450 || bigger == 0x1D484 || bigger == 0x1D4B8 || bigger == 0x1D4EC || bigger == 0x1D520 || bigger == 0x1D554 || bigger == 0x1D588 || bigger == 0x1D5BC || bigger == 0x1D5F0 || bigger == 0x1D624 || bigger == 0x1D658 || bigger == 0x1D68C; int ch2 = str_utf8_skeleton_next(&skel2);
case 0x0064: return bigger == 0x0501 || bigger == 0x13E7 || bigger == 0x146F || bigger == 0x2146 || bigger == 0x217E || bigger == 0xA4D2 || bigger == 0x1D41D || bigger == 0x1D451 || bigger == 0x1D485 || bigger == 0x1D4B9 || bigger == 0x1D4ED || bigger == 0x1D521 || bigger == 0x1D555 || bigger == 0x1D589 || bigger == 0x1D5BD || bigger == 0x1D5F1 || bigger == 0x1D625 || bigger == 0x1D659 || bigger == 0x1D68D;
case 0x0065: return bigger == 0x0435 || bigger == 0x04BD || bigger == 0x212E || bigger == 0x212F || bigger == 0x2147 || bigger == 0xAB32 || bigger == 0xFF45 || bigger == 0x1D41E || bigger == 0x1D452 || bigger == 0x1D486 || bigger == 0x1D4EE || bigger == 0x1D522 || bigger == 0x1D556 || bigger == 0x1D58A || bigger == 0x1D5BE || bigger == 0x1D5F2 || bigger == 0x1D626 || bigger == 0x1D65A || bigger == 0x1D68E; if(ch1 == 0 || ch2 == 0)
case 0x0066: return bigger == 0x017F || bigger == 0x0584 || bigger == 0x1E9D || bigger == 0xA799 || bigger == 0xAB35 || bigger == 0x1D41F || bigger == 0x1D453 || bigger == 0x1D487 || bigger == 0x1D4BB || bigger == 0x1D4EF || bigger == 0x1D523 || bigger == 0x1D557 || bigger == 0x1D58B || bigger == 0x1D5BF || bigger == 0x1D5F3 || bigger == 0x1D627 || bigger == 0x1D65B || bigger == 0x1D68F; return ch1 != ch2;
case 0x0067: return bigger == 0x018D || bigger == 0x0261 || bigger == 0x0581 || bigger == 0x1D83 || bigger == 0x210A || bigger == 0xFF47 || bigger == 0x1D420 || bigger == 0x1D454 || bigger == 0x1D488 || bigger == 0x1D4F0 || bigger == 0x1D524 || bigger == 0x1D558 || bigger == 0x1D58C || bigger == 0x1D5C0 || bigger == 0x1D5F4 || bigger == 0x1D628 || bigger == 0x1D65C || bigger == 0x1D690;
case 0x0068: return bigger == 0x04BB || bigger == 0x0570 || bigger == 0x13C2 || bigger == 0x210E || bigger == 0xFF48 || bigger == 0x1D421 || bigger == 0x1D489 || bigger == 0x1D4BD || bigger == 0x1D4F1 || bigger == 0x1D525 || bigger == 0x1D559 || bigger == 0x1D58D || bigger == 0x1D5C1 || bigger == 0x1D5F5 || bigger == 0x1D629 || bigger == 0x1D65D || bigger == 0x1D691; if(ch1 != ch2)
case 0x0069: return bigger == 0x0131 || bigger == 0x0269 || bigger == 0x026A || bigger == 0x02DB || bigger == 0x037A || bigger == 0x03B9 || bigger == 0x0456 || bigger == 0x04CF || bigger == 0x13A5 || bigger == 0x1FBE || bigger == 0x2139 || bigger == 0x2148 || bigger == 0x2170 || bigger == 0x2373 || bigger == 0xA647 || bigger == 0xFF49 || bigger == 0x118C3 || bigger == 0x1D422 || bigger == 0x1D456 || bigger == 0x1D48A || bigger == 0x1D4BE || bigger == 0x1D4F2 || bigger == 0x1D526 || bigger == 0x1D55A || bigger == 0x1D58E || bigger == 0x1D5C2 || bigger == 0x1D5F6 || bigger == 0x1D62A || bigger == 0x1D65E || bigger == 0x1D692 || bigger == 0x1D6A4 || bigger == 0x1D6CA || bigger == 0x1D704 || bigger == 0x1D73E || bigger == 0x1D778 || bigger == 0x1D7B2; return 1;
case 0x006A: return bigger == 0x03F3 || bigger == 0x0458 || bigger == 0x2149 || bigger == 0xFF4A || bigger == 0x1D423 || bigger == 0x1D457 || bigger == 0x1D48B || bigger == 0x1D4BF || bigger == 0x1D4F3 || bigger == 0x1D527 || bigger == 0x1D55B || bigger == 0x1D58F || bigger == 0x1D5C3 || bigger == 0x1D5F7 || bigger == 0x1D62B || bigger == 0x1D65F || bigger == 0x1D693;
case 0x006B: return bigger == 0x0138 || bigger == 0x03BA || bigger == 0x03F0 || bigger == 0x043A || bigger == 0x1D0B || bigger == 0x2C95 || bigger == 0x1D424 || bigger == 0x1D458 || bigger == 0x1D48C || bigger == 0x1D4C0 || bigger == 0x1D4F4 || bigger == 0x1D528 || bigger == 0x1D55C || bigger == 0x1D590 || bigger == 0x1D5C4 || bigger == 0x1D5F8 || bigger == 0x1D62C || bigger == 0x1D660 || bigger == 0x1D694 || bigger == 0x1D6CB || bigger == 0x1D6DE || bigger == 0x1D705 || bigger == 0x1D718 || bigger == 0x1D73F || bigger == 0x1D752 || bigger == 0x1D779 || bigger == 0x1D78C || bigger == 0x1D7B3 || bigger == 0x1D7C6;
case 0x006C: return bigger == 0x007C || bigger == 0x0196 || bigger == 0x01C0 || bigger == 0x0399 || bigger == 0x0406 || bigger == 0x04C0 || bigger == 0x05C0 || bigger == 0x05D5 || bigger == 0x05DF || bigger == 0x0627 || bigger == 0x0661 || bigger == 0x06F1 || bigger == 0x07CA || bigger == 0x16C1 || bigger == 0x2110 || bigger == 0x2111 || bigger == 0x2113 || bigger == 0x2160 || bigger == 0x217C || bigger == 0x2223 || bigger == 0x2C92 || bigger == 0x2D4F || bigger == 0xA4F2 || bigger == 0xFE8D || bigger == 0xFE8E || bigger == 0xFF29 || bigger == 0xFF4C || bigger == 0xFFE8 || bigger == 0x1028A || bigger == 0x10309 || bigger == 0x10320 || bigger == 0x1D408 || bigger == 0x1D425 || bigger == 0x1D43C || bigger == 0x1D459 || bigger == 0x1D470 || bigger == 0x1D48D || bigger == 0x1D4C1 || bigger == 0x1D4D8 || bigger == 0x1D4F5 || bigger == 0x1D529 || bigger == 0x1D540 || bigger == 0x1D55D || bigger == 0x1D574 || bigger == 0x1D591 || bigger == 0x1D5A8 || bigger == 0x1D5C5 || bigger == 0x1D5DC || bigger == 0x1D5F9 || bigger == 0x1D610 || bigger == 0x1D62D || bigger == 0x1D644 || bigger == 0x1D661 || bigger == 0x1D678 || bigger == 0x1D695 || bigger == 0x1D6B0 || bigger == 0x1D6EA || bigger == 0x1D724 || bigger == 0x1D75E || bigger == 0x1D798 || bigger == 0x1D7CF || bigger == 0x1D7D9 || bigger == 0x1D7E3 || bigger == 0x1D7ED || bigger == 0x1D7F7 || bigger == 0x1E8C7 || bigger == 0x1EE00 || bigger == 0x1EE80;
case 0x006D: return bigger == 0x028D || bigger == 0x043C || bigger == 0x1D0D || bigger == 0x217F || bigger == 0xAB51 || bigger == 0x11700 || bigger == 0x118E3 || bigger == 0x1D426 || bigger == 0x1D45A || bigger == 0x1D48E || bigger == 0x1D4C2 || bigger == 0x1D4F6 || bigger == 0x1D52A || bigger == 0x1D55E || bigger == 0x1D592 || bigger == 0x1D5C6 || bigger == 0x1D5FA || bigger == 0x1D62E || bigger == 0x1D662 || bigger == 0x1D696;
case 0x006E: return bigger == 0x03C0 || bigger == 0x03D6 || bigger == 0x043F || bigger == 0x0578 || bigger == 0x057C || bigger == 0x1D28 || bigger == 0x213C || bigger == 0x1D427 || bigger == 0x1D45B || bigger == 0x1D48F || bigger == 0x1D4C3 || bigger == 0x1D4F7 || bigger == 0x1D52B || bigger == 0x1D55F || bigger == 0x1D593 || bigger == 0x1D5C7 || bigger == 0x1D5FB || bigger == 0x1D62F || bigger == 0x1D663 || bigger == 0x1D697 || bigger == 0x1D6D1 || bigger == 0x1D6E1 || bigger == 0x1D70B || bigger == 0x1D71B || bigger == 0x1D745 || bigger == 0x1D755 || bigger == 0x1D77F || bigger == 0x1D78F || bigger == 0x1D7B9 || bigger == 0x1D7C9;
case 0x006F: return bigger == 0x03BF || bigger == 0x03C3 || bigger == 0x043E || bigger == 0x0585 || bigger == 0x05E1 || bigger == 0x0647 || bigger == 0x0665 || bigger == 0x06BE || bigger == 0x06C1 || bigger == 0x06D5 || bigger == 0x06F5 || bigger == 0x0966 || bigger == 0x0A66 || bigger == 0x0AE6 || bigger == 0x0BE6 || bigger == 0x0C02 || bigger == 0x0C66 || bigger == 0x0C82 || bigger == 0x0CE6 || bigger == 0x0D02 || bigger == 0x0D66 || bigger == 0x0D82 || bigger == 0x0E50 || bigger == 0x0ED0 || bigger == 0x101D || bigger == 0x1040 || bigger == 0x10FF || bigger == 0x1D0F || bigger == 0x1D11 || bigger == 0x2134 || bigger == 0x2C9F || bigger == 0xAB3D || bigger == 0xFBA6 || bigger == 0xFBA7 || bigger == 0xFBA8 || bigger == 0xFBA9 || bigger == 0xFBAA || bigger == 0xFBAB || bigger == 0xFBAC || bigger == 0xFBAD || bigger == 0xFEE9 || bigger == 0xFEEA || bigger == 0xFEEB || bigger == 0xFEEC || bigger == 0xFF4F || bigger == 0x1042C || bigger == 0x118C8 || bigger == 0x118D7 || bigger == 0x1D428 || bigger == 0x1D45C || bigger == 0x1D490 || bigger == 0x1D4F8 || bigger == 0x1D52C || bigger == 0x1D560 || bigger == 0x1D594 || bigger == 0x1D5C8 || bigger == 0x1D5FC || bigger == 0x1D630 || bigger == 0x1D664 || bigger == 0x1D698 || bigger == 0x1D6D0 || bigger == 0x1D6D4 || bigger == 0x1D70A || bigger == 0x1D70E || bigger == 0x1D744 || bigger == 0x1D748 || bigger == 0x1D77E || bigger == 0x1D782 || bigger == 0x1D7B8 || bigger == 0x1D7BC || bigger == 0x1EE24 || bigger == 0x1EE64 || bigger == 0x1EE84;
case 0x0070: return bigger == 0x03C1 || bigger == 0x03F1 || bigger == 0x0440 || bigger == 0x2374 || bigger == 0x2CA3 || bigger == 0xFF50 || bigger == 0x1D429 || bigger == 0x1D45D || bigger == 0x1D491 || bigger == 0x1D4C5 || bigger == 0x1D4F9 || bigger == 0x1D52D || bigger == 0x1D561 || bigger == 0x1D595 || bigger == 0x1D5C9 || bigger == 0x1D5FD || bigger == 0x1D631 || bigger == 0x1D665 || bigger == 0x1D699 || bigger == 0x1D6D2 || bigger == 0x1D6E0 || bigger == 0x1D70C || bigger == 0x1D71A || bigger == 0x1D746 || bigger == 0x1D754 || bigger == 0x1D780 || bigger == 0x1D78E || bigger == 0x1D7BA || bigger == 0x1D7C8;
case 0x0071: return bigger == 0x051B || bigger == 0x0563 || bigger == 0x0566 || bigger == 0x1D42A || bigger == 0x1D45E || bigger == 0x1D492 || bigger == 0x1D4C6 || bigger == 0x1D4FA || bigger == 0x1D52E || bigger == 0x1D562 || bigger == 0x1D596 || bigger == 0x1D5CA || bigger == 0x1D5FE || bigger == 0x1D632 || bigger == 0x1D666 || bigger == 0x1D69A;
case 0x0072: return bigger == 0x0433 || bigger == 0x1D26 || bigger == 0x2C85 || bigger == 0xAB47 || bigger == 0xAB48 || bigger == 0x1D42B || bigger == 0x1D45F || bigger == 0x1D493 || bigger == 0x1D4C7 || bigger == 0x1D4FB || bigger == 0x1D52F || bigger == 0x1D563 || bigger == 0x1D597 || bigger == 0x1D5CB || bigger == 0x1D5FF || bigger == 0x1D633 || bigger == 0x1D667 || bigger == 0x1D69B;
case 0x0073: return bigger == 0x01BD || bigger == 0x0455 || bigger == 0xA731 || bigger == 0xFF53 || bigger == 0x10448 || bigger == 0x118C1 || bigger == 0x1D42C || bigger == 0x1D460 || bigger == 0x1D494 || bigger == 0x1D4C8 || bigger == 0x1D4FC || bigger == 0x1D530 || bigger == 0x1D564 || bigger == 0x1D598 || bigger == 0x1D5CC || bigger == 0x1D600 || bigger == 0x1D634 || bigger == 0x1D668 || bigger == 0x1D69C;
case 0x0074: return bigger == 0x03C4 || bigger == 0x0442 || bigger == 0x1D1B || bigger == 0x1D42D || bigger == 0x1D461 || bigger == 0x1D495 || bigger == 0x1D4C9 || bigger == 0x1D4FD || bigger == 0x1D531 || bigger == 0x1D565 || bigger == 0x1D599 || bigger == 0x1D5CD || bigger == 0x1D601 || bigger == 0x1D635 || bigger == 0x1D669 || bigger == 0x1D69D || bigger == 0x1D6D5 || bigger == 0x1D70F || bigger == 0x1D749 || bigger == 0x1D783 || bigger == 0x1D7BD;
case 0x0075: return bigger == 0x028B || bigger == 0x03C5 || bigger == 0x0446 || bigger == 0x057D || bigger == 0x1D1C || bigger == 0xA79F || bigger == 0xAB4E || bigger == 0xAB52 || bigger == 0x118D8 || bigger == 0x1D42E || bigger == 0x1D462 || bigger == 0x1D496 || bigger == 0x1D4CA || bigger == 0x1D4FE || bigger == 0x1D532 || bigger == 0x1D566 || bigger == 0x1D59A || bigger == 0x1D5CE || bigger == 0x1D602 || bigger == 0x1D636 || bigger == 0x1D66A || bigger == 0x1D69E || bigger == 0x1D6D6 || bigger == 0x1D710 || bigger == 0x1D74A || bigger == 0x1D784 || bigger == 0x1D7BE;
case 0x0076: return bigger == 0x03BD || bigger == 0x0475 || bigger == 0x05D8 || bigger == 0x1D20 || bigger == 0x2174 || bigger == 0x2228 || bigger == 0x22C1 || bigger == 0xFF56 || bigger == 0x118C0 || bigger == 0x1D42F || bigger == 0x1D463 || bigger == 0x1D497 || bigger == 0x1D4CB || bigger == 0x1D4FF || bigger == 0x1D533 || bigger == 0x1D567 || bigger == 0x1D59B || bigger == 0x1D5CF || bigger == 0x1D603 || bigger == 0x1D637 || bigger == 0x1D66B || bigger == 0x1D69F || bigger == 0x1D6CE || bigger == 0x1D708 || bigger == 0x1D742 || bigger == 0x1D77C || bigger == 0x1D7B6;
case 0x0077: return bigger == 0x026F || bigger == 0x0461 || bigger == 0x051D || bigger == 0x0561 || bigger == 0x1D21 || bigger == 0x1170E || bigger == 0x1170F || bigger == 0x1D430 || bigger == 0x1D464 || bigger == 0x1D498 || bigger == 0x1D4CC || bigger == 0x1D500 || bigger == 0x1D534 || bigger == 0x1D568 || bigger == 0x1D59C || bigger == 0x1D5D0 || bigger == 0x1D604 || bigger == 0x1D638 || bigger == 0x1D66C || bigger == 0x1D6A0;
case 0x0078: return bigger == 0x00D7 || bigger == 0x0445 || bigger == 0x1541 || bigger == 0x157D || bigger == 0x166E || bigger == 0x2179 || bigger == 0x292B || bigger == 0x292C || bigger == 0x2A2F || bigger == 0xFF58 || bigger == 0x1D431 || bigger == 0x1D465 || bigger == 0x1D499 || bigger == 0x1D4CD || bigger == 0x1D501 || bigger == 0x1D535 || bigger == 0x1D569 || bigger == 0x1D59D || bigger == 0x1D5D1 || bigger == 0x1D605 || bigger == 0x1D639 || bigger == 0x1D66D || bigger == 0x1D6A1;
case 0x0079: return bigger == 0x0263 || bigger == 0x028F || bigger == 0x03B3 || bigger == 0x0443 || bigger == 0x04AF || bigger == 0x10E7 || bigger == 0x1D8C || bigger == 0x1EFF || bigger == 0x213D || bigger == 0xAB5A || bigger == 0xFF59 || bigger == 0x118DC || bigger == 0x1D432 || bigger == 0x1D466 || bigger == 0x1D49A || bigger == 0x1D4CE || bigger == 0x1D502 || bigger == 0x1D536 || bigger == 0x1D56A || bigger == 0x1D59E || bigger == 0x1D5D2 || bigger == 0x1D606 || bigger == 0x1D63A || bigger == 0x1D66E || bigger == 0x1D6A2 || bigger == 0x1D6C4 || bigger == 0x1D6FE || bigger == 0x1D738 || bigger == 0x1D772 || bigger == 0x1D7AC;
case 0x007A: return bigger == 0x1D22 || bigger == 0x118C4 || bigger == 0x1D433 || bigger == 0x1D467 || bigger == 0x1D49B || bigger == 0x1D4CF || bigger == 0x1D503 || bigger == 0x1D537 || bigger == 0x1D56B || bigger == 0x1D59F || bigger == 0x1D5D3 || bigger == 0x1D607 || bigger == 0x1D63B || bigger == 0x1D66F || bigger == 0x1D6A3;
case 0x007B: return bigger == 0x2774 || bigger == 0x1D114;
case 0x007D: return bigger == 0x2775;
case 0x007E: return bigger == 0x02DC || bigger == 0x1FC0 || bigger == 0x2053 || bigger == 0x223C;
default: return 0;
} }
} }

View file

@ -1,16 +0,0 @@
#ifndef BASE_CONFUSABLES_H
#define BASE_CONFUSABLES_H
#include "detect.h"
#ifdef __cplusplus
extern "C" {
#endif
int str_utf8_is_confusable(int smaller, int bigger);
#ifdef __cplusplus
}
#endif
#endif

23100
src/base/confusables_data.h Normal file

File diff suppressed because it is too large Load diff

View file

@ -8,7 +8,6 @@
#include <time.h> #include <time.h>
#include "system.h" #include "system.h"
#include "confusables.h"
#include <sys/types.h> #include <sys/types.h>
#include <sys/stat.h> #include <sys/stat.h>
@ -2234,36 +2233,6 @@ int str_toint_base(const char *str, int base) { return strtol(str, NULL, base);
float str_tofloat(const char *str) { return atof(str); } float str_tofloat(const char *str) { return atof(str); }
int str_utf8_comp_names(const char *a, const char *b)
{
int codeA;
int codeB;
int diff;
while(*a && *b)
{
do
{
codeA = str_utf8_decode(&a);
}
while(*a && !str_utf8_isspace(codeA));
do
{
codeB = str_utf8_decode(&b);
}
while(*b && !str_utf8_isspace(codeB));
diff = codeA - codeB;
if((diff < 0 && !str_utf8_is_confusable(codeA, codeB))
|| (diff > 0 && !str_utf8_is_confusable(codeB, codeA)))
return diff;
}
return *a - *b;
}
int str_utf8_isspace(int code) int str_utf8_isspace(int code)
{ {
return code > 0x20 && code != 0xA0 && code != 0x034F && code != 0x2800 && return code > 0x20 && code != 0xA0 && code != 0x034F && code != 0x2800 &&

View file

@ -1265,7 +1265,19 @@ unsigned str_quickhash(const char *str);
*/ */
void gui_messagebox(const char *title, const char *message); void gui_messagebox(const char *title, const char *message);
int str_utf8_comp_names(const char *a, const char *b); /*
Function: str_utf8_comp_confusable
Compares two strings for visual appearance.
Parameters:
a - String to compare.
b - String to compare.
Returns:
0 if the strings are confusable.
!=0 otherwise.
*/
int str_utf8_comp_confusable(const char *a, const char *b);
int str_utf8_isspace(int code); int str_utf8_isspace(int code);

View file

@ -358,7 +358,7 @@ int CServer::TrySetClientName(int ClientID, const char *pName)
{ {
if(i != ClientID && m_aClients[i].m_State >= CClient::STATE_READY) if(i != ClientID && m_aClients[i].m_State >= CClient::STATE_READY)
{ {
if(str_utf8_comp_names(aTrimmedName, m_aClients[i].m_aName) == 0) if(str_utf8_comp_confusable(aTrimmedName, m_aClients[i].m_aName) == 0)
return -1; return -1;
} }
} }

13
src/tools/confusables.cpp Normal file
View file

@ -0,0 +1,13 @@
#include <base/system.h>
int main(int argc, const char **argv) // ignore_convention
{
dbg_logger_stdout();
if(argc < 1 + 2)
{
dbg_msg("usage", "%s STR1 STR2", argv[0] ? argv[0] : "confusables");
return -1;
}
dbg_msg("conf", "not_confusable=%d", str_utf8_comp_confusable(argv[1], argv[2]));
return 0;
}