2019-01-07 22:49:20 +00:00
|
|
|
# Needs UnicodeData.txt in the current directory.
|
|
|
|
#
|
|
|
|
# It can be obtained from unicode.org:
|
|
|
|
# - http://www.unicode.org/Public/<VERSION>/ucd/UnicodeData.txt
|
|
|
|
#
|
|
|
|
# If executed as a script, it will generate the contents of the file
|
2022-05-06 18:31:24 +00:00
|
|
|
# python3 scripts/generate_unicode_tolower.py header > `src/base/unicode/tolower_data.h`,
|
|
|
|
# python3 scripts/generate_unicode_tolower.py source > `src/base/unicode/tolower_data.c`.
|
2019-01-07 22:49:20 +00:00
|
|
|
|
|
|
|
import unicode
|
2022-05-06 18:31:24 +00:00
|
|
|
import sys
|
2019-01-07 22:49:20 +00:00
|
|
|
|
|
|
|
def generate_cases():
|
2020-12-02 14:22:26 +00:00
|
|
|
ud = unicode.data()
|
|
|
|
return [(unicode.unhex(u["Value"]), unicode.unhex(u["Simple_Lowercase_Mapping"])) for u in ud if u["Simple_Lowercase_Mapping"]]
|
2019-01-07 22:49:20 +00:00
|
|
|
|
2022-05-06 18:31:24 +00:00
|
|
|
def gen_header(cases):
|
2020-12-02 14:22:26 +00:00
|
|
|
print("""\
|
2019-01-07 22:49:20 +00:00
|
|
|
#include <stdint.h>
|
|
|
|
|
|
|
|
struct UPPER_LOWER
|
|
|
|
{{
|
|
|
|
\tint32_t upper;
|
|
|
|
\tint32_t lower;
|
|
|
|
}};
|
|
|
|
|
|
|
|
enum
|
|
|
|
{{
|
2022-05-06 18:31:24 +00:00
|
|
|
\tNUM_TOLOWER = {},
|
2019-01-07 22:49:20 +00:00
|
|
|
}};
|
|
|
|
|
2022-05-06 18:31:24 +00:00
|
|
|
extern const struct UPPER_LOWER tolowermap[];""".format(len(cases)))
|
|
|
|
|
|
|
|
def gen_source(cases):
|
|
|
|
print("""\
|
|
|
|
#ifndef TOLOWER_DATA
|
|
|
|
#error "This file must only be included in `tolower.cpp`"
|
|
|
|
#endif
|
|
|
|
|
|
|
|
const struct UPPER_LOWER tolowermap[] = {""")
|
2020-12-02 14:22:26 +00:00
|
|
|
for upper_code, lower_code in cases:
|
|
|
|
print("\t{{{}, {}}},".format(upper_code, lower_code))
|
|
|
|
print("};")
|
2019-01-07 22:49:20 +00:00
|
|
|
|
2022-05-06 18:31:24 +00:00
|
|
|
def main():
|
|
|
|
cases = generate_cases()
|
|
|
|
|
|
|
|
header = "header" in sys.argv
|
|
|
|
source = "source" in sys.argv
|
|
|
|
|
|
|
|
if header:
|
|
|
|
gen_header(cases)
|
|
|
|
elif source:
|
|
|
|
gen_source(cases)
|
|
|
|
|
2019-01-07 22:49:20 +00:00
|
|
|
if __name__ == '__main__':
|
2020-12-02 14:22:26 +00:00
|
|
|
main()
|