mirror of
https://github.com/ddnet/ddnet.git
synced 2024-09-20 01:24:18 +00:00
Merge #5072
5072: Updated unicode script generation (fixes https://github.com/ddnet/ddnet/issues/5017) r=def- a=Chairn I also updated the version to 15.0.0. However, confusables are still from version 14 as https://www.unicode.org/Public/security/15.0.0/ is empty. CC `@heinrich5991` ## Checklist - [ ] Tested the change ingame - [ ] Provided screenshots if it is a visual change - [ ] Tested in combination with possibly related configuration options - [ ] Written a unit test if it works standalone, system.c especially - [ ] Considered possible null pointers and out of bounds array indexing - [ ] Changed no physics that affect existing maps - [ ] Tested the change with [ASan+UBSan or valgrind's memcheck](https://github.com/ddnet/ddnet/#using-addresssanitizer--undefinedbehavioursanitizer-or-valgrinds-memcheck) (optional) Co-authored-by: Chairn <chairn.nq@hotmail.fr>
This commit is contained in:
commit
e346e3e186
|
@ -1608,8 +1608,10 @@ set_src(BASE GLOB_RECURSE src/base
|
|||
tl/sorted_array.h
|
||||
tl/threading.h
|
||||
unicode/confusables.cpp
|
||||
unicode/confusables.h
|
||||
unicode/confusables_data.h
|
||||
unicode/tolower.cpp
|
||||
unicode/tolower.h
|
||||
unicode/tolower_data.h
|
||||
vmath.h
|
||||
)
|
||||
|
|
|
@ -6,7 +6,9 @@ os.chdir(os.path.dirname(__file__) + "/..")
|
|||
|
||||
PATH = "src/"
|
||||
EXCEPTIONS = [
|
||||
"src/base/unicode/confusables.h",
|
||||
"src/base/unicode/confusables_data.h",
|
||||
"src/base/unicode/tolower.h",
|
||||
"src/base/unicode/tolower_data.h",
|
||||
"src/tools/config_common.h"
|
||||
]
|
||||
|
|
|
@ -4,9 +4,11 @@
|
|||
# - http://www.unicode.org/Public/security/<VERSION>/confusables.txt
|
||||
# - http://www.unicode.org/Public/<VERSION>/ucd/UnicodeData.txt
|
||||
#
|
||||
# If executed as a script, it will generate the contents of the file
|
||||
# `src/base/unicode/confusables_data.h`.
|
||||
# If executed as a script, it will generate the contents of the files
|
||||
# python3 scripts/generate_unicode_confusables_data.py header > `src/base/unicode/confusables.h`,
|
||||
# python3 scripts/generate_unicode_confusables_data.py data > `src/base/unicode/confusables_data.h`.
|
||||
|
||||
import sys
|
||||
import unicode
|
||||
|
||||
def generate_decompositions():
|
||||
|
@ -48,22 +50,7 @@ def generate_decompositions():
|
|||
|
||||
return {c: gen(c) for c in interesting}
|
||||
|
||||
def main():
|
||||
decompositions = generate_decompositions()
|
||||
|
||||
# Deduplicate
|
||||
decomposition_set = sorted(set(tuple(x) for x in decompositions.values()))
|
||||
len_set = sorted(set(len(x) for x in decomposition_set))
|
||||
|
||||
if len(len_set) > 8:
|
||||
raise ValueError("Can't pack offset (13 bit) together with len (>3bit)")
|
||||
|
||||
cur_offset = 0
|
||||
decomposition_offsets = []
|
||||
for d in decomposition_set:
|
||||
decomposition_offsets.append(cur_offset)
|
||||
cur_offset += len(d)
|
||||
|
||||
def gen_header(decompositions, len_set):
|
||||
print("""\
|
||||
#include <stdint.h>
|
||||
|
||||
|
@ -80,19 +67,31 @@ struct DECOMP_SLICE
|
|||
print("};")
|
||||
print()
|
||||
|
||||
print("static const uint8_t decomp_lengths[NUM_DECOMP_LENGTHS] = {")
|
||||
print("extern const uint8_t decomp_lengths[NUM_DECOMP_LENGTHS];")
|
||||
print("extern const int32_t decomp_chars[NUM_DECOMPS];")
|
||||
print("extern const struct DECOMP_SLICE decomp_slices[NUM_DECOMPS];")
|
||||
print("extern const int32_t decomp_data[];")
|
||||
|
||||
def gen_data(decompositions, decomposition_set, decomposition_offsets, len_set):
|
||||
print("""\
|
||||
#ifndef CONFUSABLES_DATA
|
||||
#error "This file should only be included in `confusables.cpp`"
|
||||
#endif
|
||||
""")
|
||||
|
||||
print("const uint8_t decomp_lengths[NUM_DECOMP_LENGTHS] = {")
|
||||
for l in len_set:
|
||||
print("\t{},".format(l))
|
||||
print("};")
|
||||
print()
|
||||
|
||||
print("static const int32_t decomp_chars[NUM_DECOMPS] = {")
|
||||
print("const int32_t decomp_chars[NUM_DECOMPS] = {")
|
||||
for k in sorted(decompositions):
|
||||
print("\t0x{:x},".format(k))
|
||||
print("};")
|
||||
print()
|
||||
|
||||
print("static const struct DECOMP_SLICE decomp_slices[NUM_DECOMPS] = {")
|
||||
print("const struct DECOMP_SLICE decomp_slices[NUM_DECOMPS] = {")
|
||||
for k in sorted(decompositions):
|
||||
d = decompositions[k]
|
||||
i = decomposition_set.index(tuple(d))
|
||||
|
@ -101,11 +100,35 @@ struct DECOMP_SLICE
|
|||
print("};")
|
||||
print()
|
||||
|
||||
print("static const int32_t decomp_data[] = {")
|
||||
print("const int32_t decomp_data[] = {")
|
||||
for d in decomposition_set:
|
||||
for c in d:
|
||||
print("\t0x{:x},".format(c))
|
||||
print("};")
|
||||
|
||||
def main():
|
||||
decompositions = generate_decompositions()
|
||||
|
||||
# Deduplicate
|
||||
decomposition_set = sorted(set(tuple(x) for x in decompositions.values()))
|
||||
len_set = sorted(set(len(x) for x in decomposition_set))
|
||||
|
||||
if len(len_set) > 8:
|
||||
raise ValueError("Can't pack offset (13 bit) together with len (>3bit)")
|
||||
|
||||
cur_offset = 0
|
||||
decomposition_offsets = []
|
||||
for d in decomposition_set:
|
||||
decomposition_offsets.append(cur_offset)
|
||||
cur_offset += len(d)
|
||||
|
||||
header = "header" in sys.argv
|
||||
data = "data" in sys.argv
|
||||
|
||||
if header:
|
||||
gen_header(decompositions, len_set)
|
||||
elif data:
|
||||
gen_data(decompositions, decomposition_set, decomposition_offsets, len_set)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
|
|
@ -4,17 +4,17 @@
|
|||
# - http://www.unicode.org/Public/<VERSION>/ucd/UnicodeData.txt
|
||||
#
|
||||
# If executed as a script, it will generate the contents of the file
|
||||
# `src/base/unicode/tolower_data.h`.
|
||||
# python3 scripts/generate_unicode_tolower.py header > `src/base/unicode/tolower.h`,
|
||||
# python3 scripts/generate_unicode_tolower.py data > `src/base/unicode/tolower_data.h`.
|
||||
|
||||
import sys
|
||||
import unicode
|
||||
|
||||
def generate_cases():
|
||||
ud = unicode.data()
|
||||
return [(unicode.unhex(u["Value"]), unicode.unhex(u["Simple_Lowercase_Mapping"])) for u in ud if u["Simple_Lowercase_Mapping"]]
|
||||
|
||||
def main():
|
||||
cases = generate_cases()
|
||||
|
||||
def gen_header(cases):
|
||||
print("""\
|
||||
#include <stdint.h>
|
||||
|
||||
|
@ -26,13 +26,32 @@ struct UPPER_LOWER
|
|||
|
||||
enum
|
||||
{{
|
||||
\tNUM_TOLOWER={},
|
||||
\tNUM_TOLOWER = {},
|
||||
}};
|
||||
|
||||
static const struct UPPER_LOWER tolower[NUM_TOLOWER] = {{""".format(len(cases)))
|
||||
extern const struct UPPER_LOWER tolowermap[];""".format(len(cases)))
|
||||
|
||||
def gen_data(cases):
|
||||
print("""\
|
||||
#ifndef TOLOWER_DATA
|
||||
#error "This file must only be included in `tolower.cpp`"
|
||||
#endif
|
||||
|
||||
const struct UPPER_LOWER tolowermap[] = {""")
|
||||
for upper_code, lower_code in cases:
|
||||
print("\t{{{}, {}}},".format(upper_code, lower_code))
|
||||
print("};")
|
||||
|
||||
def main():
|
||||
cases = generate_cases()
|
||||
|
||||
header = "header" in sys.argv
|
||||
data = "data" in sys.argv
|
||||
|
||||
if header:
|
||||
gen_header(cases)
|
||||
elif data:
|
||||
gen_data(cases)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
|
|
@ -1 +1 @@
|
|||
12.0.0
|
||||
15.0.0
|
||||
|
|
File diff suppressed because it is too large
Load diff
18
src/base/unicode/confusables.h
Normal file
18
src/base/unicode/confusables.h
Normal file
|
@ -0,0 +1,18 @@
|
|||
#include <stdint.h>
|
||||
|
||||
struct DECOMP_SLICE
|
||||
{
|
||||
uint16_t offset : 13;
|
||||
uint16_t length : 3;
|
||||
};
|
||||
|
||||
enum
|
||||
{
|
||||
NUM_DECOMP_LENGTHS = 8,
|
||||
NUM_DECOMPS = 9770,
|
||||
};
|
||||
|
||||
extern const uint8_t decomp_lengths[NUM_DECOMP_LENGTHS];
|
||||
extern const int32_t decomp_chars[NUM_DECOMPS];
|
||||
extern const struct DECOMP_SLICE decomp_slices[NUM_DECOMPS];
|
||||
extern const int32_t decomp_data[];
|
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
14
src/base/unicode/tolower.h
Normal file
14
src/base/unicode/tolower.h
Normal file
|
@ -0,0 +1,14 @@
|
|||
#include <stdint.h>
|
||||
|
||||
struct UPPER_LOWER
|
||||
{
|
||||
int32_t upper;
|
||||
int32_t lower;
|
||||
};
|
||||
|
||||
enum
|
||||
{
|
||||
NUM_TOLOWER = 1433,
|
||||
};
|
||||
|
||||
extern const struct UPPER_LOWER tolowermap[];
|
File diff suppressed because it is too large
Load diff
Loading…
Reference in a new issue