mirror of
https://github.com/ddnet/ddnet.git
synced 2024-09-20 01:24:18 +00:00
Updated unicode script generation
This commit is contained in:
parent
e14fc102a6
commit
5925181acc
|
@ -4,10 +4,12 @@
|
|||
# - http://www.unicode.org/Public/security/<VERSION>/confusables.txt
|
||||
# - http://www.unicode.org/Public/<VERSION>/ucd/UnicodeData.txt
|
||||
#
|
||||
# If executed as a script, it will generate the contents of the file
|
||||
# `src/base/unicode/confusables_data.h`.
|
||||
# If executed as a script, it will generate the contents of the files
|
||||
# python3 scripts/generate_unicode_confusables_data.py header > `src/base/unicode/confusables_data.h`,
|
||||
# python3 scripts/generate_unicode_confusables_data.py source > `src/base/unicode/confusables_data.h`.
|
||||
|
||||
import unicode
|
||||
import sys
|
||||
|
||||
def generate_decompositions():
|
||||
ud = unicode.data()
|
||||
|
@ -48,22 +50,7 @@ def generate_decompositions():
|
|||
|
||||
return {c: gen(c) for c in interesting}
|
||||
|
||||
def main():
|
||||
decompositions = generate_decompositions()
|
||||
|
||||
# Deduplicate
|
||||
decomposition_set = sorted(set(tuple(x) for x in decompositions.values()))
|
||||
len_set = sorted(set(len(x) for x in decomposition_set))
|
||||
|
||||
if len(len_set) > 8:
|
||||
raise ValueError("Can't pack offset (13 bit) together with len (>3bit)")
|
||||
|
||||
cur_offset = 0
|
||||
decomposition_offsets = []
|
||||
for d in decomposition_set:
|
||||
decomposition_offsets.append(cur_offset)
|
||||
cur_offset += len(d)
|
||||
|
||||
def gen_header(decompositions, len_set):
|
||||
print("""\
|
||||
#include <stdint.h>
|
||||
|
||||
|
@ -80,19 +67,31 @@ struct DECOMP_SLICE
|
|||
print("};")
|
||||
print()
|
||||
|
||||
print("static const uint8_t decomp_lengths[NUM_DECOMP_LENGTHS] = {")
|
||||
print("extern const uint8_t decomp_lengths[NUM_DECOMP_LENGTHS];")
|
||||
print("extern const int32_t decomp_chars[NUM_DECOMPS];")
|
||||
print("extern const struct DECOMP_SLICE decomp_slices[NUM_DECOMPS];")
|
||||
print("extern const int32_t decomp_data[];")
|
||||
|
||||
def gen_source(decompositions, decomposition_set, decomposition_offsets, len_set):
|
||||
print("""\
|
||||
#ifndef CONFUSABLES_DATA
|
||||
#error "This file should only be included in `confusables.cpp`"
|
||||
#endif
|
||||
""")
|
||||
|
||||
print("const uint8_t decomp_lengths[NUM_DECOMP_LENGTHS] = {")
|
||||
for l in len_set:
|
||||
print("\t{},".format(l))
|
||||
print("};")
|
||||
print()
|
||||
|
||||
print("static const int32_t decomp_chars[NUM_DECOMPS] = {")
|
||||
print("const int32_t decomp_chars[NUM_DECOMPS] = {")
|
||||
for k in sorted(decompositions):
|
||||
print("\t0x{:x},".format(k))
|
||||
print("};")
|
||||
print()
|
||||
|
||||
print("static const struct DECOMP_SLICE decomp_slices[NUM_DECOMPS] = {")
|
||||
print("const struct DECOMP_SLICE decomp_slices[NUM_DECOMPS] = {")
|
||||
for k in sorted(decompositions):
|
||||
d = decompositions[k]
|
||||
i = decomposition_set.index(tuple(d))
|
||||
|
@ -101,11 +100,36 @@ struct DECOMP_SLICE
|
|||
print("};")
|
||||
print()
|
||||
|
||||
print("static const int32_t decomp_data[] = {")
|
||||
print("const int32_t decomp_data[] = {")
|
||||
for d in decomposition_set:
|
||||
for c in d:
|
||||
print("\t0x{:x},".format(c))
|
||||
print("};")
|
||||
|
||||
def main():
|
||||
decompositions = generate_decompositions()
|
||||
|
||||
# Deduplicate
|
||||
decomposition_set = sorted(set(tuple(x) for x in decompositions.values()))
|
||||
len_set = sorted(set(len(x) for x in decomposition_set))
|
||||
|
||||
if len(len_set) > 8:
|
||||
raise ValueError("Can't pack offset (13 bit) together with len (>3bit)")
|
||||
|
||||
cur_offset = 0
|
||||
decomposition_offsets = []
|
||||
for d in decomposition_set:
|
||||
decomposition_offsets.append(cur_offset)
|
||||
cur_offset += len(d)
|
||||
|
||||
header = "header" in sys.argv
|
||||
source = "source" in sys.argv
|
||||
|
||||
if header:
|
||||
gen_header(decompositions, len_set)
|
||||
elif source:
|
||||
gen_source(decompositions, decomposition_set, decomposition_offsets, len_set)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
||||
|
|
|
@ -4,17 +4,17 @@
|
|||
# - http://www.unicode.org/Public/<VERSION>/ucd/UnicodeData.txt
|
||||
#
|
||||
# If executed as a script, it will generate the contents of the file
|
||||
# `src/base/unicode/tolower_data.h`.
|
||||
# python3 scripts/generate_unicode_tolower.py header > `src/base/unicode/tolower_data.h`,
|
||||
# python3 scripts/generate_unicode_tolower.py source > `src/base/unicode/tolower_data.c`.
|
||||
|
||||
import unicode
|
||||
import sys
|
||||
|
||||
def generate_cases():
|
||||
ud = unicode.data()
|
||||
return [(unicode.unhex(u["Value"]), unicode.unhex(u["Simple_Lowercase_Mapping"])) for u in ud if u["Simple_Lowercase_Mapping"]]
|
||||
|
||||
def main():
|
||||
cases = generate_cases()
|
||||
|
||||
def gen_header(cases):
|
||||
print("""\
|
||||
#include <stdint.h>
|
||||
|
||||
|
@ -26,13 +26,32 @@ struct UPPER_LOWER
|
|||
|
||||
enum
|
||||
{{
|
||||
\tNUM_TOLOWER={},
|
||||
\tNUM_TOLOWER = {},
|
||||
}};
|
||||
|
||||
static const struct UPPER_LOWER tolower[NUM_TOLOWER] = {{""".format(len(cases)))
|
||||
extern const struct UPPER_LOWER tolowermap[];""".format(len(cases)))
|
||||
|
||||
def gen_source(cases):
|
||||
print("""\
|
||||
#ifndef TOLOWER_DATA
|
||||
#error "This file must only be included in `tolower.cpp`"
|
||||
#endif
|
||||
|
||||
const struct UPPER_LOWER tolowermap[] = {""")
|
||||
for upper_code, lower_code in cases:
|
||||
print("\t{{{}, {}}},".format(upper_code, lower_code))
|
||||
print("};")
|
||||
|
||||
def main():
|
||||
cases = generate_cases()
|
||||
|
||||
header = "header" in sys.argv
|
||||
source = "source" in sys.argv
|
||||
|
||||
if header:
|
||||
gen_header(cases)
|
||||
elif source:
|
||||
gen_source(cases)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
|
|
@ -1 +1 @@
|
|||
12.0.0
|
||||
15.0.0
|
||||
|
|
File diff suppressed because it is too large
Load diff
24183
src/base/unicode/confusables_data.c
Normal file
24183
src/base/unicode/confusables_data.c
Normal file
File diff suppressed because it is too large
Load diff
|
@ -9,7 +9,7 @@ struct DECOMP_SLICE
|
|||
enum
|
||||
{
|
||||
NUM_DECOMP_LENGTHS = 8,
|
||||
NUM_DECOMPS = 9606,
|
||||
NUM_DECOMPS = 9770,
|
||||
};
|
||||
|
||||
extern const uint8_t decomp_lengths[NUM_DECOMP_LENGTHS];
|
||||
|
|
File diff suppressed because it is too large
Load diff
1439
src/base/unicode/tolower_data.c
Normal file
1439
src/base/unicode/tolower_data.c
Normal file
File diff suppressed because it is too large
Load diff
|
@ -8,7 +8,7 @@ struct UPPER_LOWER
|
|||
|
||||
enum
|
||||
{
|
||||
NUM_TOLOWER = 1390,
|
||||
NUM_TOLOWER = 1433,
|
||||
};
|
||||
|
||||
extern const struct UPPER_LOWER tolowermap[];
|
||||
|
|
Loading…
Reference in a new issue