# Needs UnicodeData.txt in the current directory. # # It can be obtained from unicode.org: # - http://www.unicode.org/Public//ucd/UnicodeData.txt # # If executed as a script, it will generate the contents of the file # python3 scripts/generate_unicode_tolower.py header > `src/base/unicode/tolower_data.h`, # python3 scripts/generate_unicode_tolower.py source > `src/base/unicode/tolower_data.c`. import unicode import sys def generate_cases(): ud = unicode.data() return [(unicode.unhex(u["Value"]), unicode.unhex(u["Simple_Lowercase_Mapping"])) for u in ud if u["Simple_Lowercase_Mapping"]] def gen_header(cases): print("""\ #include struct UPPER_LOWER {{ \tint32_t upper; \tint32_t lower; }}; enum {{ \tNUM_TOLOWER = {}, }}; extern const struct UPPER_LOWER tolowermap[];""".format(len(cases))) def gen_source(cases): print("""\ #ifndef TOLOWER_DATA #error "This file must only be included in `tolower.cpp`" #endif const struct UPPER_LOWER tolowermap[] = {""") for upper_code, lower_code in cases: print("\t{{{}, {}}},".format(upper_code, lower_code)) print("};") def main(): cases = generate_cases() header = "header" in sys.argv source = "source" in sys.argv if header: gen_header(cases) elif source: gen_source(cases) if __name__ == '__main__': main()