ddnet/scripts/languages/twlang.py
Robert Müller cddbc78592 Add RFC 3066 language tags to language index
Add RFC 3066 language tags to language index in order to identify the language to select when first starting the game based on the user's locale.

Multiple language tags can be specified per entry separated with a semicolon, as some languages have different tags that do not share a unique prefix (e.g., Simplified and Traditional Chinese).
2023-03-21 21:17:40 +01:00

107 lines
3.5 KiB
Python

import os
import re
from collections import OrderedDict
class LanguageDecodeError(Exception):
def __init__(self, message, filename, line):
error = f"File \"{filename}\", line {line+1}: {message}"
super().__init__(error)
# Taken from https://stackoverflow.com/questions/30011379/how-can-i-parse-a-c-format-string-in-python
cfmt = r'''\
( # start of capture group 1
% # literal "%"
(?: # first option
(?:[-+0 #]{0,5}) # optional flags
(?:\d+|\*)? # width
(?:\.(?:\d+|\*))? # precision
(?:h|l|ll|w|I|I32|I64)? # size
[cCdiouxXeEfgGaAnpsSZ] # type
) | # OR
%%) # literal "%%"
'''
def decode(fileobj, elements_per_key):
data = {}
current_context = ""
current_key = None
index = -1
for index, line in enumerate(fileobj):
line = line.encode("utf-8").decode("utf-8-sig")
line = line[:-1]
if line and line[-1] == "\r":
line = line[:-1]
if not line or line[:1] == "#":
current_context = ""
continue
if line[0] == "[":
if line[-1] != "]":
raise LanguageDecodeError("Invalid context string", fileobj.name, index)
current_context = line[1:-1]
elif line[:3] == "== ":
if len(data[current_key]) >= 1+elements_per_key:
raise LanguageDecodeError("Wrong number of elements per key", fileobj.name, index)
if current_key:
original = current_key[0] # pylint: disable=unsubscriptable-object
translation = line[3:]
if translation and [m.group(1) for m in re.finditer(cfmt, original, flags=re.X)] != [m.group(1) for m in re.finditer(cfmt, translation, flags=re.X)]:
raise LanguageDecodeError("Non-matching formatting string", fileobj.name, index)
data[current_key].extend([translation])
else:
raise LanguageDecodeError("Element before key given", fileobj.name, index)
else:
if current_key:
if len(data[current_key]) != 1+elements_per_key:
raise LanguageDecodeError("Wrong number of elements per key", fileobj.name, index)
data[current_key].append(index)
if line in data:
raise LanguageDecodeError("Key defined multiple times: " + line, fileobj.name, index)
data[(line, current_context)] = [index - 1 if current_context else index]
current_key = (line, current_context)
if len(data[current_key]) != 1+elements_per_key:
raise LanguageDecodeError("Wrong number of elements per key", fileobj.name, index)
data[current_key].append(index+1)
new_data = {}
for key, value in data.items():
if key[0]:
new_data[key] = value
return new_data
def check_file(path):
with open(path, encoding="utf-8") as fileobj:
matches = re.findall(r"(Localize|Localizable)\s*\(\s*\"([^\"]+)\"(?:\s*,\s*\"([^\"]+)\")?\s*\)", fileobj.read())
return matches
def check_folder(path):
englishlist = OrderedDict()
for path2, dirs, files in os.walk(path):
dirs.sort()
for f in sorted(files):
if not any(f.endswith(x) for x in [".cpp", ".c", ".h"]):
continue
for sentence in check_file(os.path.join(path2, f)):
englishlist[sentence[1:]] = None
return englishlist
def languages():
with open("data/languages/index.txt", encoding="utf-8") as f:
index = decode(f, 3)
langs = {"data/languages/"+key[0]+".txt" : [key[0]]+elements for key, elements in index.items()}
return langs
def translations(filename):
with open(filename, encoding="utf-8") as f:
return decode(f, 1)
def localizes():
englishlist = list(check_folder("src"))
return englishlist