ddnet/scripts/languages/twlang.py

import os
import re
from collections import OrderedDict

class LanguageDecodeError(Exception):
	def __init__(self, message, filename, line):
		error = f"File \"{filename}\", line {line+1}: {message}"
		super().__init__(error)


# Taken from https://stackoverflow.com/questions/30011379/how-can-i-parse-a-c-format-string-in-python
cfmt = r'''\
(                                  # start of capture group 1
%                                  # literal "%"
(?:                                # first option
(?:[-+0 #]{0,5})                   # optional flags
(?:\d+|\*)?                        # width
(?:\.(?:\d+|\*))?                  # precision
(?:h|l|ll|w|I|I32|I64)?            # size
[cCdiouxXeEfgGaAnpsSZ]             # type
) |                                # OR
%%)                                # literal "%%"
'''


def decode(fileobj, elements_per_key):
	data = {}
	current_context = ""
	current_key = None
	index = -1
	for index, line in enumerate(fileobj):
		line = line.encode("utf-8").decode("utf-8-sig")
		line = line[:-1]
		if line and line[-1] == "\r":
			line = line[:-1]
		if not line or line[:1] == "#":
			current_context = ""
			continue

		if line[0] == "[":
			if line[-1] != "]":
				raise LanguageDecodeError("Invalid context string", fileobj.name, index)
			current_context = line[1:-1]
		elif line[:3] == "== ":
			if len(data[current_key]) >= 1+elements_per_key:
				raise LanguageDecodeError("Wrong number of elements per key", fileobj.name, index)
			if current_key:
				original = current_key[0] # pylint: disable=unsubscriptable-object
				translation = line[3:]
				if translation and [m.group(1) for m in re.finditer(cfmt, original, flags=re.X)] != [m.group(1) for m in re.finditer(cfmt, translation, flags=re.X)]:
					raise LanguageDecodeError("Non-matching formatting string", fileobj.name, index)
				data[current_key].extend([translation])
			else:
				raise LanguageDecodeError("Element before key given", fileobj.name, index)
		else:
			if current_key:
				if len(data[current_key]) != 1+elements_per_key:
					raise LanguageDecodeError("Wrong number of elements per key", fileobj.name, index)
				data[current_key].append(index)
			if line in data:
				raise LanguageDecodeError("Key defined multiple times: " + line, fileobj.name, index)
			data[(line, current_context)] = [index - 1 if current_context else index]
			current_key = (line, current_context)
	if len(data[current_key]) != 1+elements_per_key:
		raise LanguageDecodeError("Wrong number of elements per key", fileobj.name, index)
	data[current_key].append(index+1)
	new_data = {}
	for key, value in data.items():
		if key[0]:
			new_data[key] = value
	return new_data


def check_file(path):
	with open(path, encoding="utf-8") as fileobj:
		matches = re.findall(r"(Localize|Localizable)\s*\(\s*\"([^\"]+)\"(?:\s*,\s*\"([^\"]+)\")?\s*\)", fileobj.read())
	return matches


def check_folder(path):
	englishlist = OrderedDict()
	for path2, dirs, files in os.walk(path):
		dirs.sort()
		for f in sorted(files):
			if not any(f.endswith(x) for x in [".cpp", ".c", ".h"]):
				continue
			for sentence in check_file(os.path.join(path2, f)):
				englishlist[sentence[1:]] = None
	return englishlist


def languages():
	with open("data/languages/index.txt", encoding="utf-8") as f:
		index = decode(f, 3)
	langs = {"data/languages/"+key[0]+".txt" : [key[0]]+elements for key, elements in index.items()}
	return langs


def translations(filename):
	with open(filename, encoding="utf-8") as f:
		return decode(f, 1)


def localizes():
	englishlist = list(check_folder("src"))
	return englishlist
added language scripts 2015-06-30 16:57:25 +00:00			`import os`
			`import re`
Always recreate the language files in same order Alphabetically as the files are located in directory structure. Keeps the language files more stable 2020-06-28 08:26:02 +00:00			`from collections import OrderedDict`
added language scripts 2015-06-30 16:57:25 +00:00
			`class LanguageDecodeError(Exception):`
Add pylint and fix occurences 2020-12-02 14:22:26 +00:00			`def __init__(self, message, filename, line):`
[WIP] Require Python 3.6 (f-strings) and fix pylints So far only done scripts directory, will do the rest if this is considered good 2022-06-12 11:15:02 +00:00			`error = f"File \"{filename}\", line {line+1}: {message}"`
Add pylint and fix occurences 2020-12-02 14:22:26 +00:00			`super().__init__(error)`
added language scripts 2015-06-30 16:57:25 +00:00

Check translations in github, check formatting strings, fix 2020-11-11 21:48:23 +00:00			`# Taken from https://stackoverflow.com/questions/30011379/how-can-i-parse-a-c-format-string-in-python`
Add pylint and fix occurences 2020-12-02 14:22:26 +00:00			`cfmt = r'''\`
Check translations in github, check formatting strings, fix 2020-11-11 21:48:23 +00:00			`( # start of capture group 1`
			`% # literal "%"`
			`(?: # first option`
			`(?:[-+0 #]{0,5}) # optional flags`
			`(?:\d+\|\*)? # width`
			`(?:\.(?:\d+\|\*))? # precision`
			`(?:h\|l\|ll\|w\|I\|I32\|I64)? # size`
			`[cCdiouxXeEfgGaAnpsSZ] # type`
			`) \| # OR`
			`%%) # literal "%%"`
			`'''`


added language scripts 2015-06-30 16:57:25 +00:00			`def decode(fileobj, elements_per_key):`
Add pylint and fix occurences 2020-12-02 14:22:26 +00:00			`data = {}`
			`current_context = ""`
			`current_key = None`
			`index = -1`
			`for index, line in enumerate(fileobj):`
			`line = line.encode("utf-8").decode("utf-8-sig")`
			`line = line[:-1]`
			`if line and line[-1] == "\r":`
			`line = line[:-1]`
			`if not line or line[:1] == "#":`
			`current_context = ""`
			`continue`

			`if line[0] == "[":`
			`if line[-1] != "]":`
			`raise LanguageDecodeError("Invalid context string", fileobj.name, index)`
			`current_context = line[1:-1]`
			`elif line[:3] == "== ":`
			`if len(data[current_key]) >= 1+elements_per_key:`
			`raise LanguageDecodeError("Wrong number of elements per key", fileobj.name, index)`
			`if current_key:`
[WIP] Require Python 3.6 (f-strings) and fix pylints So far only done scripts directory, will do the rest if this is considered good 2022-06-12 11:15:02 +00:00			`original = current_key[0] # pylint: disable=unsubscriptable-object`
Add pylint and fix occurences 2020-12-02 14:22:26 +00:00			`translation = line[3:]`
			`if translation and [m.group(1) for m in re.finditer(cfmt, original, flags=re.X)] != [m.group(1) for m in re.finditer(cfmt, translation, flags=re.X)]:`
			`raise LanguageDecodeError("Non-matching formatting string", fileobj.name, index)`
			`data[current_key].extend([translation])`
			`else:`
			`raise LanguageDecodeError("Element before key given", fileobj.name, index)`
			`else:`
			`if current_key:`
			`if len(data[current_key]) != 1+elements_per_key:`
			`raise LanguageDecodeError("Wrong number of elements per key", fileobj.name, index)`
			`data[current_key].append(index)`
			`if line in data:`
			`raise LanguageDecodeError("Key defined multiple times: " + line, fileobj.name, index)`
Fix twlang decode to handle context lines properly Context line is always one line above. If we don't do this we end up with empty [Graphics] context lines in the translation files. 2023-02-25 09:35:29 +00:00			`data[(line, current_context)] = [index - 1 if current_context else index]`
Add pylint and fix occurences 2020-12-02 14:22:26 +00:00			`current_key = (line, current_context)`
			`if len(data[current_key]) != 1+elements_per_key:`
			`raise LanguageDecodeError("Wrong number of elements per key", fileobj.name, index)`
			`data[current_key].append(index+1)`
Fix twlang decode to handle context lines properly Context line is always one line above. If we don't do this we end up with empty [Graphics] context lines in the translation files. 2023-02-25 09:35:29 +00:00			`new_data = {}`
			`for key, value in data.items():`
			`if key[0]:`
			`new_data[key] = value`
			`return new_data`
added language scripts 2015-06-30 16:57:25 +00:00

			`def check_file(path):`
[WIP] Require Python 3.6 (f-strings) and fix pylints So far only done scripts directory, will do the rest if this is considered good 2022-06-12 11:15:02 +00:00			`with open(path, encoding="utf-8") as fileobj:`
Add Localizable hint 2022-12-09 15:15:19 +00:00			`matches = re.findall(r"(Localize\|Localizable)\s\(\s\"([^\"]+)\"(?:\s,\s\"([^\"]+)\")?\s*\)", fileobj.read())`
Add pylint and fix occurences 2020-12-02 14:22:26 +00:00			`return matches`
added language scripts 2015-06-30 16:57:25 +00:00

			`def check_folder(path):`
Add pylint and fix occurences 2020-12-02 14:22:26 +00:00			`englishlist = OrderedDict()`
			`for path2, dirs, files in os.walk(path):`
			`dirs.sort()`
			`for f in sorted(files):`
Fix twlang decode to handle context lines properly Context line is always one line above. If we don't do this we end up with empty [Graphics] context lines in the translation files. 2023-02-25 09:35:29 +00:00			`if not any(f.endswith(x) for x in [".cpp", ".c", ".h"]):`
Add pylint and fix occurences 2020-12-02 14:22:26 +00:00			`continue`
			`for sentence in check_file(os.path.join(path2, f)):`
Add Localizable hint 2022-12-09 15:15:19 +00:00			`englishlist[sentence[1:]] = None`
Add pylint and fix occurences 2020-12-02 14:22:26 +00:00			`return englishlist`
added language scripts 2015-06-30 16:57:25 +00:00

			`def languages():`
[WIP] Require Python 3.6 (f-strings) and fix pylints So far only done scripts directory, will do the rest if this is considered good 2022-06-12 11:15:02 +00:00			`with open("data/languages/index.txt", encoding="utf-8") as f:`
Add RFC 3066 language tags to language index Add RFC 3066 language tags to language index in order to identify the language to select when first starting the game based on the user's locale. Multiple language tags can be specified per entry separated with a semicolon, as some languages have different tags that do not share a unique prefix (e.g., Simplified and Traditional Chinese). 2023-03-20 20:45:43 +00:00			`index = decode(f, 3)`
Add pylint and fix occurences 2020-12-02 14:22:26 +00:00			`langs = {"data/languages/"+key[0]+".txt" : [key[0]]+elements for key, elements in index.items()}`
			`return langs`
added language scripts 2015-06-30 16:57:25 +00:00

			`def translations(filename):`
[WIP] Require Python 3.6 (f-strings) and fix pylints So far only done scripts directory, will do the rest if this is considered good 2022-06-12 11:15:02 +00:00			`with open(filename, encoding="utf-8") as f:`
			`return decode(f, 1)`
added language scripts 2015-06-30 16:57:25 +00:00

			`def localizes():`
Add pylint and fix occurences 2020-12-02 14:22:26 +00:00			`englishlist = list(check_folder("src"))`
			`return englishlist`