ddnet/scripts/languages/twlang.py

import os
import re
from collections import OrderedDict

class LanguageDecodeError(Exception):
	def __init__(self, message, filename, line):
		error = f"File \"{filename}\", line {line+1}: {message}"
		super().__init__(error)


# Taken from https://stackoverflow.com/questions/30011379/how-can-i-parse-a-c-format-string-in-python
cfmt = r'''\
(                                  # start of capture group 1
%                                  # literal "%"
(?:                                # first option
(?:[-+0 #]{0,5})                   # optional flags
(?:\d+|\*)?                        # width
(?:\.(?:\d+|\*))?                  # precision
(?:h|l|ll|w|I|I32|I64)?            # size
[cCdiouxXeEfgGaAnpsSZ]             # type
) |                                # OR
%%)                                # literal "%%"
'''


def decode(fileobj, elements_per_key):
	data = {}
	current_context = ""
	current_key = None
	index = -1
	for index, line in enumerate(fileobj):
		line = line.encode("utf-8").decode("utf-8-sig")
		line = line[:-1]
		if line and line[-1] == "\r":
			line = line[:-1]
		if not line or line[:1] == "#":
			current_context = ""
			continue

		if line[0] == "[":
			if line[-1] != "]":
				raise LanguageDecodeError("Invalid context string", fileobj.name, index)
			current_context = line[1:-1]
		elif line[:3] == "== ":
			if len(data[current_key]) >= 1+elements_per_key:
				raise LanguageDecodeError("Wrong number of elements per key", fileobj.name, index)
			if current_key:
				original = current_key[0] # pylint: disable=unsubscriptable-object
				translation = line[3:]
				if translation and [m.group(1) for m in re.finditer(cfmt, original, flags=re.X)] != [m.group(1) for m in re.finditer(cfmt, translation, flags=re.X)]:
					raise LanguageDecodeError("Non-matching formatting string", fileobj.name, index)
				data[current_key].extend([translation])
			else:
				raise LanguageDecodeError("Element before key given", fileobj.name, index)
		else:
			if current_key:
				if len(data[current_key]) != 1+elements_per_key:
					raise LanguageDecodeError("Wrong number of elements per key", fileobj.name, index)
				data[current_key].append(index)
			if line in data:
				raise LanguageDecodeError("Key defined multiple times: " + line, fileobj.name, index)
			data[(line, current_context)] = [index - 1 if current_context else index]
			current_key = (line, current_context)
	if len(data[current_key]) != 1+elements_per_key:
		raise LanguageDecodeError("Wrong number of elements per key", fileobj.name, index)
	data[current_key].append(index+1)
	new_data = {}
	for key, value in data.items():
		if key[0]:
			new_data[key] = value
	return new_data


def check_file(path):
	with open(path, encoding="utf-8") as fileobj:
		matches = re.findall(r"(Localize|Localizable)\s*\(\s*\"([^\"]+)\"(?:\s*,\s*\"([^\"]+)\")?\s*\)", fileobj.read())
	return matches


def check_folder(path):
	englishlist = OrderedDict()
	for path2, dirs, files in os.walk(path):
		dirs.sort()
		for f in sorted(files):
			if not any(f.endswith(x) for x in [".cpp", ".c", ".h"]):
				continue
			for sentence in check_file(os.path.join(path2, f)):
				englishlist[sentence[1:]] = None
	return englishlist


def languages():
	with open("data/languages/index.txt", encoding="utf-8") as f:
		index = decode(f, 3)
	langs = {"data/languages/"+key[0]+".txt" : [key[0]]+elements for key, elements in index.items()}
	return langs


def translations(filename):
	with open(filename, encoding="utf-8") as f:
		return decode(f, 1)


def localizes():
	englishlist = list(check_folder("src"))
	return englishlist