Validate language files for and non-matching formatters

This commit is contained in:
furo 2024-08-25 00:40:12 +02:00
parent be2e49e1f6
commit 728bb9777f
3 changed files with 56 additions and 19 deletions

View file

@ -40,6 +40,8 @@ jobs:
run: scripts/fix_style.py --dry-run
- name: Check header guards
run: scripts/check_header_guards.py
- name: Validate Languages
run: scripts/languages/validate.py
- name: Check languages
run: scripts/languages/update_all.py
- name: Check dilated images

View file

@ -7,22 +7,6 @@ class LanguageDecodeError(Exception):
error = f"File \"{filename}\", line {line+1}: {message}"
super().__init__(error)
# Taken from https://stackoverflow.com/questions/30011379/how-can-i-parse-a-c-format-string-in-python
cfmt = r'''\
( # start of capture group 1
% # literal "%"
(?: # first option
(?:[-+0 #]{0,5}) # optional flags
(?:\d+|\*)? # width
(?:\.(?:\d+|\*))? # precision
(?:h|l|ll|w|I|I32|I64)? # size
[cCdiouxXeEfgGaAnpsSZ] # type
) | # OR
%%) # literal "%%"
'''
def decode(fileobj, elements_per_key):
data = {}
current_context = ""
@ -45,10 +29,7 @@ def decode(fileobj, elements_per_key):
if len(data[current_key]) >= 1+elements_per_key:
raise LanguageDecodeError("Wrong number of elements per key", fileobj.name, index)
if current_key:
original = current_key[0] # pylint: disable=unsubscriptable-object
translation = line[3:]
if translation and [m.group(1) for m in re.finditer(cfmt, original, flags=re.X)] != [m.group(1) for m in re.finditer(cfmt, translation, flags=re.X)]:
raise LanguageDecodeError("Non-matching formatting string", fileobj.name, index)
data[current_key].extend([translation])
else:
raise LanguageDecodeError("Element before key given", fileobj.name, index)

54
scripts/languages/validate.py Executable file
View file

@ -0,0 +1,54 @@
#!/usr/bin/env python3
import os
import sys
import re
import twlang
os.chdir(os.path.dirname(__file__) + "/../..")
# Taken from https://stackoverflow.com/questions/30011379/how-can-i-parse-a-c-format-string-in-python
cfmt = '''
( # start of capture group 1
% # literal "%"
(?: # first option
(?:[-+0 #]{0,5}) # optional flags
(?:\\d+|\\*)? # width
(?:\\.(?:\\d+|\\*))? # precision
(?:h|l|ll|w|I|I32|I64)? # size
[cCdiouxXeEfgGaAnpsSZ] # type
) | # OR
%%) # literal "%%"
'''
total_errors = 0
def print_validation_error(error, filename, error_line):
print(f"Invalid: {translated}")
print(f"- {error} in {filename}:{error_line + 1}\n")
global total_errors
total_errors += 1
if len(sys.argv) > 1:
languages = sys.argv[1:]
else:
languages = twlang.languages()
local = twlang.localizes()
for language in languages:
translations = twlang.translations(language)
for (english, _), (line, translated, _) in translations.items():
if not translated:
continue
# Validate c format strings. Strings that move the formatters are not validated.
if re.findall(cfmt, english, flags=re.X) != re.findall(cfmt, translated, flags=re.X) and not "1$" in translated:
print_validation_error("Non-matching formatting", language, line)
# Check for elipisis
if "" in english and "..." in translated:
print_validation_error("Usage of ... instead of the … character", language, line)
if total_errors:
print(f"Found {total_errors} {'error' if total_errors == 1 else 'errors'} ")
sys.exit(1)