mirror of
https://github.com/ddnet/ddnet.git
synced 2024-11-10 10:08:18 +00:00
Merge pull request #1423 from ddnet/utf8_nocase
UTF8 nocase compare & use for chat TAB completion
This commit is contained in:
commit
71ec8cc294
2
.gitignore
vendored
2
.gitignore
vendored
|
@ -40,7 +40,6 @@ DDNet-Server
|
||||||
DDNet-Server-Launcher
|
DDNet-Server-Launcher
|
||||||
config_retrieve
|
config_retrieve
|
||||||
config_store
|
config_store
|
||||||
confusables
|
|
||||||
crapnet
|
crapnet
|
||||||
dilate
|
dilate
|
||||||
dummy_map
|
dummy_map
|
||||||
|
@ -58,6 +57,7 @@ tileset_borderfix
|
||||||
tileset_borderrem
|
tileset_borderrem
|
||||||
tileset_borderset
|
tileset_borderset
|
||||||
twping
|
twping
|
||||||
|
unicode_confusables
|
||||||
uuid
|
uuid
|
||||||
versionsrv
|
versionsrv
|
||||||
|
|
||||||
|
|
|
@ -565,8 +565,6 @@ generate_source("src/game/generated/server_data.h" "server_content_header")
|
||||||
# Sources
|
# Sources
|
||||||
set_glob(BASE GLOB_RECURSE src/base
|
set_glob(BASE GLOB_RECURSE src/base
|
||||||
color.h
|
color.h
|
||||||
confusables.c
|
|
||||||
confusables_data.h
|
|
||||||
detect.h
|
detect.h
|
||||||
hash.c
|
hash.c
|
||||||
hash.h
|
hash.h
|
||||||
|
@ -584,6 +582,10 @@ set_glob(BASE GLOB_RECURSE src/base
|
||||||
tl/sorted_array.h
|
tl/sorted_array.h
|
||||||
tl/string.h
|
tl/string.h
|
||||||
tl/threading.h
|
tl/threading.h
|
||||||
|
unicode/confusables.c
|
||||||
|
unicode/confusables_data.h
|
||||||
|
unicode/tolower.c
|
||||||
|
unicode/tolower_data.h
|
||||||
vmath.h
|
vmath.h
|
||||||
)
|
)
|
||||||
set_glob(ENGINE_INTERFACE GLOB src/engine
|
set_glob(ENGINE_INTERFACE GLOB src/engine
|
||||||
|
@ -1094,7 +1096,6 @@ set_glob(TOOLS GLOB src/tools
|
||||||
config_common.h
|
config_common.h
|
||||||
config_retrieve.cpp
|
config_retrieve.cpp
|
||||||
config_store.cpp
|
config_store.cpp
|
||||||
confusables.cpp
|
|
||||||
crapnet.cpp
|
crapnet.cpp
|
||||||
dilate.cpp
|
dilate.cpp
|
||||||
dummy_map.cpp
|
dummy_map.cpp
|
||||||
|
@ -1108,6 +1109,7 @@ set_glob(TOOLS GLOB src/tools
|
||||||
tileset_borderfix.cpp
|
tileset_borderfix.cpp
|
||||||
tileset_borderrem.cpp
|
tileset_borderrem.cpp
|
||||||
tileset_borderset.cpp
|
tileset_borderset.cpp
|
||||||
|
unicode_confusables.cpp
|
||||||
uuid.cpp
|
uuid.cpp
|
||||||
)
|
)
|
||||||
foreach(ABS_T ${TOOLS})
|
foreach(ABS_T ${TOOLS})
|
||||||
|
|
|
@ -6,7 +6,8 @@ os.chdir(os.path.dirname(__file__) + "/..")
|
||||||
|
|
||||||
PATH = "src/"
|
PATH = "src/"
|
||||||
EXCEPTIONS = [
|
EXCEPTIONS = [
|
||||||
"src/base/confusables_data.h",
|
"src/base/unicode/confusables_data.h",
|
||||||
|
"src/base/unicode/tolower_data.h",
|
||||||
"src/tools/config_common.h"
|
"src/tools/config_common.h"
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
|
@ -5,51 +5,20 @@
|
||||||
# - http://www.unicode.org/Public/<VERSION>/ucd/UnicodeData.txt
|
# - http://www.unicode.org/Public/<VERSION>/ucd/UnicodeData.txt
|
||||||
#
|
#
|
||||||
# If executed as a script, it will generate the contents of the file
|
# If executed as a script, it will generate the contents of the file
|
||||||
# `src/base/confusables_data.h`.
|
# `src/base/unicode/confusables_data.h`.
|
||||||
|
|
||||||
import csv
|
import unicode
|
||||||
|
|
||||||
def confusables():
|
|
||||||
with open('confusables.txt', encoding='utf-8-sig') as f:
|
|
||||||
# Filter comments
|
|
||||||
f = map(lambda line: line.split('#')[0], f)
|
|
||||||
return list(csv.DictReader(f, fieldnames=['Value', 'Target', 'Category'], delimiter=';'))
|
|
||||||
|
|
||||||
UNICODEDATA_FIELDS = (
|
|
||||||
"Value",
|
|
||||||
"Name",
|
|
||||||
"General_Category",
|
|
||||||
"Canonical_Combining_Class",
|
|
||||||
"Bidi_Class",
|
|
||||||
"Decomposition",
|
|
||||||
"Numeric",
|
|
||||||
"Bidi_Mirrored",
|
|
||||||
"Unicode_1_Name",
|
|
||||||
"ISO_Comment",
|
|
||||||
"Simple_Uppercase_Mapping",
|
|
||||||
"Simple_Lowercase_Mapping",
|
|
||||||
"Simple_Titlecase_Mapping",
|
|
||||||
)
|
|
||||||
|
|
||||||
def unicodedata():
|
|
||||||
with open('UnicodeData.txt') as f:
|
|
||||||
return list(csv.DictReader(f, fieldnames=UNICODEDATA_FIELDS, delimiter=';'))
|
|
||||||
|
|
||||||
def unhex(s):
|
|
||||||
return int(s, 16)
|
|
||||||
|
|
||||||
def unhex_sequence(s):
|
|
||||||
return [unhex(x) for x in s.split()] if '<' not in s else None
|
|
||||||
|
|
||||||
def generate_decompositions():
|
def generate_decompositions():
|
||||||
ud = unicodedata()
|
ud = unicode.data()
|
||||||
con = confusables()
|
con = unicode.confusables()
|
||||||
|
|
||||||
category = lambda x: {unhex(u["Value"]) for u in ud if u["General_Category"].startswith(x)}
|
category = lambda x: {unicode.unhex(u["Value"]) for u in ud if u["General_Category"].startswith(x)}
|
||||||
|
|
||||||
nfd = {unhex(u["Value"]): unhex_sequence(u["Decomposition"]) for u in ud}
|
# TODO: Is this correct? They changed the decompositioning format
|
||||||
|
nfd = {unicode.unhex(u["Value"]): unicode.unhex_sequence(u["Decomposition_Type"]) for u in ud}
|
||||||
nfd = {k: v for k, v in nfd.items() if v}
|
nfd = {k: v for k, v in nfd.items() if v}
|
||||||
con = {unhex(c["Value"]): unhex_sequence(c["Target"]) for c in con}
|
con = {unicode.unhex(c["Value"]): unicode.unhex_sequence(c["Target"]) for c in con}
|
||||||
|
|
||||||
# C: Control
|
# C: Control
|
||||||
# M: Combining
|
# M: Combining
|
38
scripts/generate_unicode_tolower.py
Normal file
38
scripts/generate_unicode_tolower.py
Normal file
|
@ -0,0 +1,38 @@
|
||||||
|
# Needs UnicodeData.txt in the current directory.
|
||||||
|
#
|
||||||
|
# It can be obtained from unicode.org:
|
||||||
|
# - http://www.unicode.org/Public/<VERSION>/ucd/UnicodeData.txt
|
||||||
|
#
|
||||||
|
# If executed as a script, it will generate the contents of the file
|
||||||
|
# `src/base/unicode/tolower_data.h`.
|
||||||
|
|
||||||
|
import unicode
|
||||||
|
|
||||||
|
def generate_cases():
|
||||||
|
ud = unicode.data()
|
||||||
|
return [(unicode.unhex(u["Value"]), unicode.unhex(u["Simple_Lowercase_Mapping"])) for u in ud if u["Simple_Lowercase_Mapping"]]
|
||||||
|
|
||||||
|
def main():
|
||||||
|
cases = generate_cases()
|
||||||
|
|
||||||
|
print("""\
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
struct UPPER_LOWER
|
||||||
|
{{
|
||||||
|
\tint32_t upper;
|
||||||
|
\tint32_t lower;
|
||||||
|
}};
|
||||||
|
|
||||||
|
enum
|
||||||
|
{{
|
||||||
|
\tNUM_TOLOWER={},
|
||||||
|
}};
|
||||||
|
|
||||||
|
static const struct UPPER_LOWER tolower[NUM_TOLOWER] = {{""".format(len(cases)))
|
||||||
|
for upper_code, lower_code in cases:
|
||||||
|
print("\t{{{}, {}}},".format(upper_code, lower_code))
|
||||||
|
print("};")
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
35
scripts/unicode.py
Normal file
35
scripts/unicode.py
Normal file
|
@ -0,0 +1,35 @@
|
||||||
|
import csv
|
||||||
|
|
||||||
|
def confusables():
|
||||||
|
with open('confusables.txt', encoding='utf-8-sig') as f:
|
||||||
|
# Filter comments
|
||||||
|
f = map(lambda line: line.split('#')[0], f)
|
||||||
|
return list(csv.DictReader(f, fieldnames=['Value', 'Target', 'Category'], delimiter=';'))
|
||||||
|
|
||||||
|
UNICODEDATA_FIELDS = (
|
||||||
|
"Value",
|
||||||
|
"Name",
|
||||||
|
"General_Category",
|
||||||
|
"Canonical_Combining_Class",
|
||||||
|
"Bidi_Class",
|
||||||
|
"Decomposition_Type",
|
||||||
|
"Decomposition_Mapping",
|
||||||
|
"Numeric_Type",
|
||||||
|
"Numeric_Mapping",
|
||||||
|
"Bidi_Mirrored",
|
||||||
|
"Unicode_1_Name",
|
||||||
|
"ISO_Comment",
|
||||||
|
"Simple_Uppercase_Mapping",
|
||||||
|
"Simple_Lowercase_Mapping",
|
||||||
|
"Simple_Titlecase_Mapping",
|
||||||
|
)
|
||||||
|
|
||||||
|
def data():
|
||||||
|
with open('UnicodeData.txt') as f:
|
||||||
|
return list(csv.DictReader(f, fieldnames=UNICODEDATA_FIELDS, delimiter=';'))
|
||||||
|
|
||||||
|
def unhex(s):
|
||||||
|
return int(s, 16)
|
||||||
|
|
||||||
|
def unhex_sequence(s):
|
||||||
|
return [unhex(x) for x in s.split()] if '<' not in s else None
|
|
@ -2367,7 +2367,7 @@ int str_comp_nocase(const char *a, const char *b)
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
int str_comp_nocase_num(const char *a, const char *b, const int num)
|
int str_comp_nocase_num(const char *a, const char *b, int num)
|
||||||
{
|
{
|
||||||
#if defined(CONF_FAMILY_WINDOWS)
|
#if defined(CONF_FAMILY_WINDOWS)
|
||||||
return _strnicmp(a, b, num);
|
return _strnicmp(a, b, num);
|
||||||
|
@ -2381,7 +2381,7 @@ int str_comp(const char *a, const char *b)
|
||||||
return strcmp(a, b);
|
return strcmp(a, b);
|
||||||
}
|
}
|
||||||
|
|
||||||
int str_comp_num(const char *a, const char *b, const int num)
|
int str_comp_num(const char *a, const char *b, int num)
|
||||||
{
|
{
|
||||||
return strncmp(a, b, num);
|
return strncmp(a, b, num);
|
||||||
}
|
}
|
||||||
|
@ -2721,6 +2721,63 @@ int str_toint(const char *str) { return atoi(str); }
|
||||||
int str_toint_base(const char *str, int base) { return strtol(str, NULL, base); }
|
int str_toint_base(const char *str, int base) { return strtol(str, NULL, base); }
|
||||||
float str_tofloat(const char *str) { return atof(str); }
|
float str_tofloat(const char *str) { return atof(str); }
|
||||||
|
|
||||||
|
int str_utf8_comp_nocase(const char *a, const char *b)
|
||||||
|
{
|
||||||
|
int code_a;
|
||||||
|
int code_b;
|
||||||
|
|
||||||
|
while(*a && *b)
|
||||||
|
{
|
||||||
|
code_a = str_utf8_tolower(str_utf8_decode(&a));
|
||||||
|
code_b = str_utf8_tolower(str_utf8_decode(&b));
|
||||||
|
|
||||||
|
if(code_a != code_b)
|
||||||
|
return code_a - code_b;
|
||||||
|
}
|
||||||
|
return (unsigned char)*a - (unsigned char)*b;
|
||||||
|
}
|
||||||
|
|
||||||
|
int str_utf8_comp_nocase_num(const char *a, const char *b, int num)
|
||||||
|
{
|
||||||
|
int code_a;
|
||||||
|
int code_b;
|
||||||
|
const char *old_a = a;
|
||||||
|
|
||||||
|
while(*a && *b)
|
||||||
|
{
|
||||||
|
if(a - old_a >= num)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
code_a = str_utf8_tolower(str_utf8_decode(&a));
|
||||||
|
code_b = str_utf8_tolower(str_utf8_decode(&b));
|
||||||
|
|
||||||
|
if(code_a != code_b)
|
||||||
|
return code_a - code_b;
|
||||||
|
}
|
||||||
|
|
||||||
|
return (unsigned char)*a - (unsigned char)*b;
|
||||||
|
}
|
||||||
|
|
||||||
|
const char *str_utf8_find_nocase(const char *haystack, const char *needle)
|
||||||
|
{
|
||||||
|
while(*haystack) /* native implementation */
|
||||||
|
{
|
||||||
|
const char *a = haystack;
|
||||||
|
const char *b = needle;
|
||||||
|
const char *a_next = a;
|
||||||
|
const char *b_next = b;
|
||||||
|
while(*a && *b && str_utf8_tolower(str_utf8_decode(&a_next)) == str_utf8_tolower(str_utf8_decode(&b_next)))
|
||||||
|
{
|
||||||
|
a = a_next;
|
||||||
|
b = b_next;
|
||||||
|
}
|
||||||
|
if(!(*b))
|
||||||
|
return haystack;
|
||||||
|
str_utf8_decode(&haystack);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
int str_utf8_isspace(int code)
|
int str_utf8_isspace(int code)
|
||||||
{
|
{
|
||||||
|
|
|
@ -159,7 +159,7 @@ void mem_zero(void *block, unsigned size);
|
||||||
size - Size of the data to compare
|
size - Size of the data to compare
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
<0 - Block a is lesser than block b
|
<0 - Block a is less than block b
|
||||||
0 - Block a is equal to block b
|
0 - Block a is equal to block b
|
||||||
>0 - Block a is greater than block b
|
>0 - Block a is greater than block b
|
||||||
*/
|
*/
|
||||||
|
@ -676,7 +676,7 @@ int net_host_lookup(const char *hostname, NETADDR *addr, int types);
|
||||||
b - Address to compare to.
|
b - Address to compare to.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
<0 - Address a is lesser than address b
|
<0 - Address a is less than address b
|
||||||
0 - Address a is equal to address b
|
0 - Address a is equal to address b
|
||||||
>0 - Address a is greater than address b
|
>0 - Address a is greater than address b
|
||||||
*/
|
*/
|
||||||
|
@ -691,7 +691,7 @@ int net_addr_comp(const NETADDR *a, const NETADDR *b);
|
||||||
b - Address to compare to.
|
b - Address to compare to.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
<0 - Address a is lesser than address b
|
<0 - Address a is less than address b
|
||||||
0 - Address a is equal to address b
|
0 - Address a is equal to address b
|
||||||
>0 - Address a is greater than address b
|
>0 - Address a is greater than address b
|
||||||
*/
|
*/
|
||||||
|
@ -1114,26 +1114,26 @@ char *str_skip_whitespaces(char *str);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Function: str_comp_nocase
|
Function: str_comp_nocase
|
||||||
Compares to strings case insensitive.
|
Compares to strings case insensitively.
|
||||||
|
|
||||||
Parameters:
|
Parameters:
|
||||||
a - String to compare.
|
a - String to compare.
|
||||||
b - String to compare.
|
b - String to compare.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
<0 - String a is lesser than string b
|
<0 - String a is less than string b
|
||||||
0 - String a is equal to string b
|
0 - String a is equal to string b
|
||||||
>0 - String a is greater than string b
|
>0 - String a is greater than string b
|
||||||
|
|
||||||
Remarks:
|
Remarks:
|
||||||
- Only garanted to work with a-z/A-Z.
|
- Only guaranteed to work with a-z/A-Z.
|
||||||
- The strings are treated as zero-terminated strings.
|
- The strings are treated as zero-terminated strings.
|
||||||
*/
|
*/
|
||||||
int str_comp_nocase(const char *a, const char *b);
|
int str_comp_nocase(const char *a, const char *b);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Function: str_comp_nocase_num
|
Function: str_comp_nocase_num
|
||||||
Compares up to num characters of two strings case insensitive.
|
Compares up to num characters of two strings case insensitively.
|
||||||
|
|
||||||
Parameters:
|
Parameters:
|
||||||
a - String to compare.
|
a - String to compare.
|
||||||
|
@ -1141,15 +1141,16 @@ int str_comp_nocase(const char *a, const char *b);
|
||||||
num - Maximum characters to compare
|
num - Maximum characters to compare
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
<0 - String a is lesser than string b
|
<0 - String a is less than string b
|
||||||
0 - String a is equal to string b
|
0 - String a is equal to string b
|
||||||
>0 - String a is greater than string b
|
>0 - String a is greater than string b
|
||||||
|
|
||||||
Remarks:
|
Remarks:
|
||||||
- Only garanted to work with a-z/A-Z.
|
- Only guaranteed to work with a-z/A-Z.
|
||||||
|
(use str_utf8_comp_nocase_num for unicode support)
|
||||||
- The strings are treated as zero-terminated strings.
|
- The strings are treated as zero-terminated strings.
|
||||||
*/
|
*/
|
||||||
int str_comp_nocase_num(const char *a, const char *b, const int num);
|
int str_comp_nocase_num(const char *a, const char *b, int num);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Function: str_comp
|
Function: str_comp
|
||||||
|
@ -1160,7 +1161,7 @@ int str_comp_nocase_num(const char *a, const char *b, const int num);
|
||||||
b - String to compare.
|
b - String to compare.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
<0 - String a is lesser than string b
|
<0 - String a is less than string b
|
||||||
0 - String a is equal to string b
|
0 - String a is equal to string b
|
||||||
>0 - String a is greater than string b
|
>0 - String a is greater than string b
|
||||||
|
|
||||||
|
@ -1179,14 +1180,14 @@ int str_comp(const char *a, const char *b);
|
||||||
num - Maximum characters to compare
|
num - Maximum characters to compare
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
<0 - String a is lesser than string b
|
<0 - String a is less than string b
|
||||||
0 - String a is equal to string b
|
0 - String a is equal to string b
|
||||||
>0 - String a is greater than string b
|
>0 - String a is greater than string b
|
||||||
|
|
||||||
Remarks:
|
Remarks:
|
||||||
- The strings are treated as zero-terminated strings.
|
- The strings are treated as zero-terminated strings.
|
||||||
*/
|
*/
|
||||||
int str_comp_num(const char *a, const char *b, const int num);
|
int str_comp_num(const char *a, const char *b, int num);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Function: str_comp_filenames
|
Function: str_comp_filenames
|
||||||
|
@ -1197,7 +1198,7 @@ int str_comp_num(const char *a, const char *b, const int num);
|
||||||
b - String to compare.
|
b - String to compare.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
<0 - String a is lesser than string b
|
<0 - String a is less than string b
|
||||||
0 - String a is equal to string b
|
0 - String a is equal to string b
|
||||||
>0 - String a is greater than string b
|
>0 - String a is greater than string b
|
||||||
|
|
||||||
|
@ -1300,7 +1301,7 @@ int str_utf32_dist_buffer(const int *a, int a_len, const int *b, int b_len, int
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Function: str_find_nocase
|
Function: str_find_nocase
|
||||||
Finds a string inside another string case insensitive.
|
Finds a string inside another string case insensitively.
|
||||||
|
|
||||||
Parameters:
|
Parameters:
|
||||||
haystack - String to search in
|
haystack - String to search in
|
||||||
|
@ -1311,7 +1312,8 @@ int str_utf32_dist_buffer(const int *a, int a_len, const int *b, int b_len, int
|
||||||
Returns NULL of needle could not be found.
|
Returns NULL of needle could not be found.
|
||||||
|
|
||||||
Remarks:
|
Remarks:
|
||||||
- Only garanted to work with a-z/A-Z.
|
- Only guaranteed to work with a-z/A-Z.
|
||||||
|
(use str_utf8_find_nocase for unicode support)
|
||||||
- The strings are treated as zero-terminated strings.
|
- The strings are treated as zero-terminated strings.
|
||||||
*/
|
*/
|
||||||
const char *str_find_nocase(const char *haystack, const char *needle);
|
const char *str_find_nocase(const char *haystack, const char *needle);
|
||||||
|
@ -1631,6 +1633,66 @@ int str_utf8_to_skeleton(const char *str, int *buf, int buf_len);
|
||||||
*/
|
*/
|
||||||
int str_utf8_comp_confusable(const char *a, const char *b);
|
int str_utf8_comp_confusable(const char *a, const char *b);
|
||||||
|
|
||||||
|
/*
|
||||||
|
Function: str_utf8_tolower
|
||||||
|
Converts the given Unicode codepoint to lowercase (locale insensitive).
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
code - Unicode codepoint to convert.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Lowercase codepoint
|
||||||
|
*/
|
||||||
|
int str_utf8_tolower(int code);
|
||||||
|
|
||||||
|
/*
|
||||||
|
Function: str_utf8_comp_nocase
|
||||||
|
Compares two utf8 strings case insensitively.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
a - String to compare.
|
||||||
|
b - String to compare.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
<0 - String a is less than string b
|
||||||
|
0 - String a is equal to string b
|
||||||
|
>0 - String a is greater than string b
|
||||||
|
*/
|
||||||
|
int str_utf8_comp_nocase(const char *a, const char *b);
|
||||||
|
|
||||||
|
/*
|
||||||
|
Function: str_utf8_comp_nocase_num
|
||||||
|
Compares up to num bytes of two utf8 strings case insensitively.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
a - String to compare.
|
||||||
|
b - String to compare.
|
||||||
|
num - Maximum characters to compare
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
<0 - String a is less than string b
|
||||||
|
0 - String a is equal to string b
|
||||||
|
>0 - String a is greater than string b
|
||||||
|
*/
|
||||||
|
int str_utf8_comp_nocase_num(const char *a, const char *b, int num);
|
||||||
|
|
||||||
|
/*
|
||||||
|
Function: str_utf8_find_nocase
|
||||||
|
Finds a utf8 string inside another utf8 string case insensitively.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
haystack - String to search in
|
||||||
|
needle - String to search for
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A pointer into haystack where the needle was found.
|
||||||
|
Returns NULL of needle could not be found.
|
||||||
|
|
||||||
|
Remarks:
|
||||||
|
- The strings are treated as zero-terminated strings.
|
||||||
|
*/
|
||||||
|
const char *str_utf8_find_nocase(const char *haystack, const char *needle);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Function: str_utf8_isspace
|
Function: str_utf8_isspace
|
||||||
Checks whether the given Unicode codepoint renders as space.
|
Checks whether the given Unicode codepoint renders as space.
|
||||||
|
|
1
src/base/unicode/VERSION
Normal file
1
src/base/unicode/VERSION
Normal file
|
@ -0,0 +1 @@
|
||||||
|
12.0.0
|
|
@ -1,6 +1,6 @@
|
||||||
#include "confusables_data.h"
|
#include "confusables_data.h"
|
||||||
|
|
||||||
#include "system.h"
|
#include "../system.h"
|
||||||
|
|
||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
|
|
File diff suppressed because it is too large
Load diff
22
src/base/unicode/tolower.c
Normal file
22
src/base/unicode/tolower.c
Normal file
|
@ -0,0 +1,22 @@
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
#include "tolower_data.h"
|
||||||
|
|
||||||
|
static int compul(const void *a, const void *b)
|
||||||
|
{
|
||||||
|
struct UPPER_LOWER *ul_a = (struct UPPER_LOWER *) a;
|
||||||
|
struct UPPER_LOWER *ul_b = (struct UPPER_LOWER *) b;
|
||||||
|
return ul_a->upper - ul_b->upper;
|
||||||
|
}
|
||||||
|
|
||||||
|
int str_utf8_tolower(int code)
|
||||||
|
{
|
||||||
|
struct UPPER_LOWER key;
|
||||||
|
struct UPPER_LOWER *res;
|
||||||
|
key.upper = code;
|
||||||
|
res = bsearch(&key, tolower, NUM_TOLOWER, sizeof(struct UPPER_LOWER), compul);
|
||||||
|
|
||||||
|
if(res == NULL)
|
||||||
|
return code;
|
||||||
|
return res->lower;
|
||||||
|
}
|
1405
src/base/unicode/tolower_data.h
Normal file
1405
src/base/unicode/tolower_data.h
Normal file
File diff suppressed because it is too large
Load diff
|
@ -327,11 +327,11 @@ bool CChat::OnInput(IInput::CEvent Event)
|
||||||
bool Found = false;
|
bool Found = false;
|
||||||
if(SearchType == 1)
|
if(SearchType == 1)
|
||||||
{
|
{
|
||||||
if(str_comp_nocase_num(m_pClient->m_aClients[Index2].m_aName, m_aCompletionBuffer, str_length(m_aCompletionBuffer)) &&
|
if(str_utf8_comp_nocase_num(m_pClient->m_aClients[Index2].m_aName, m_aCompletionBuffer, str_length(m_aCompletionBuffer)) &&
|
||||||
str_find_nocase(m_pClient->m_aClients[Index2].m_aName, m_aCompletionBuffer))
|
str_utf8_find_nocase(m_pClient->m_aClients[Index2].m_aName, m_aCompletionBuffer))
|
||||||
Found = true;
|
Found = true;
|
||||||
}
|
}
|
||||||
else if(!str_comp_nocase_num(m_pClient->m_aClients[Index2].m_aName, m_aCompletionBuffer, str_length(m_aCompletionBuffer)))
|
else if(!str_utf8_comp_nocase_num(m_pClient->m_aClients[Index2].m_aName, m_aCompletionBuffer, str_length(m_aCompletionBuffer)))
|
||||||
Found = true;
|
Found = true;
|
||||||
|
|
||||||
if(Found)
|
if(Found)
|
||||||
|
|
|
@ -55,6 +55,35 @@ TEST(Str, Utf8CompConfusables)
|
||||||
EXPECT_TRUE(str_utf8_comp_confusable("aceiou", "ąçęįǫų") == 0);
|
EXPECT_TRUE(str_utf8_comp_confusable("aceiou", "ąçęįǫų") == 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(Str, Utf8ToLower)
|
||||||
|
{
|
||||||
|
EXPECT_TRUE(str_utf8_tolower('A') == 'a');
|
||||||
|
EXPECT_TRUE(str_utf8_tolower('z') == 'z');
|
||||||
|
EXPECT_TRUE(str_utf8_tolower(192) == 224); // À -> à
|
||||||
|
EXPECT_TRUE(str_utf8_tolower(7882) == 7883); // Ị -> ị
|
||||||
|
|
||||||
|
EXPECT_TRUE(str_utf8_comp_nocase("ÖlÜ", "ölü") == 0);
|
||||||
|
EXPECT_TRUE(str_utf8_comp_nocase("ÜlÖ", "ölü") > 0); // ü > ö
|
||||||
|
EXPECT_TRUE(str_utf8_comp_nocase("ÖlÜ", "ölüa") < 0); // NULL < a
|
||||||
|
EXPECT_TRUE(str_utf8_comp_nocase("ölüa", "ÖlÜ") > 0); // a < NULL
|
||||||
|
|
||||||
|
const char a[2] = {-128, 0};
|
||||||
|
const char b[2] = {0, 0};
|
||||||
|
EXPECT_TRUE(str_utf8_comp_nocase(a, b) > 0);
|
||||||
|
EXPECT_TRUE(str_utf8_comp_nocase(b, a) < 0);
|
||||||
|
|
||||||
|
EXPECT_TRUE(str_utf8_comp_nocase_num("ÖlÜ", "ölüa", 3) == 0);
|
||||||
|
EXPECT_TRUE(str_utf8_comp_nocase_num("ÖlÜ", "ölüa", 4) != 0);
|
||||||
|
EXPECT_TRUE(str_utf8_comp_nocase_num("a", "z", 0) == 0);
|
||||||
|
EXPECT_TRUE(str_utf8_comp_nocase_num("a", "z", 1) != 0);
|
||||||
|
|
||||||
|
const char str[] = "ÄÖÜ";
|
||||||
|
EXPECT_TRUE(str_utf8_find_nocase(str, "ä") == str);
|
||||||
|
EXPECT_TRUE(str_utf8_find_nocase(str, "ö") == str+2);
|
||||||
|
EXPECT_TRUE(str_utf8_find_nocase(str, "ü") == str+4);
|
||||||
|
EXPECT_TRUE(str_utf8_find_nocase(str, "z") == NULL);
|
||||||
|
}
|
||||||
|
|
||||||
TEST(Str, Startswith)
|
TEST(Str, Startswith)
|
||||||
{
|
{
|
||||||
EXPECT_TRUE(str_startswith("abcdef", "abc"));
|
EXPECT_TRUE(str_startswith("abcdef", "abc"));
|
||||||
|
|
|
@ -5,7 +5,7 @@ int main(int argc, const char **argv) // ignore_convention
|
||||||
dbg_logger_stdout();
|
dbg_logger_stdout();
|
||||||
if(argc < 1 + 2)
|
if(argc < 1 + 2)
|
||||||
{
|
{
|
||||||
dbg_msg("usage", "%s STR1 STR2", argv[0] ? argv[0] : "confusables");
|
dbg_msg("usage", "%s STR1 STR2", argv[0] ? argv[0] : "unicode_confusables");
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
dbg_msg("conf", "not_confusable=%d", str_utf8_comp_confusable(argv[1], argv[2]));
|
dbg_msg("conf", "not_confusable=%d", str_utf8_comp_confusable(argv[1], argv[2]));
|
Loading…
Reference in a new issue