diff --git a/.github/workflows/style.yml b/.github/workflows/style.yml index e0bd0fde7..5b5b811c3 100644 --- a/.github/workflows/style.yml +++ b/.github/workflows/style.yml @@ -40,6 +40,8 @@ jobs: run: scripts/fix_style.py --dry-run - name: Check header guards run: scripts/check_header_guards.py + - name: Validate Languages + run: scripts/languages/validate.py - name: Check languages run: scripts/languages/update_all.py - name: Check dilated images diff --git a/scripts/languages/twlang.py b/scripts/languages/twlang.py index 899e75b35..eba1c9116 100644 --- a/scripts/languages/twlang.py +++ b/scripts/languages/twlang.py @@ -7,22 +7,6 @@ class LanguageDecodeError(Exception): error = f"File \"{filename}\", line {line+1}: {message}" super().__init__(error) - -# Taken from https://stackoverflow.com/questions/30011379/how-can-i-parse-a-c-format-string-in-python -cfmt = r'''\ -( # start of capture group 1 -% # literal "%" -(?: # first option -(?:[-+0 #]{0,5}) # optional flags -(?:\d+|\*)? # width -(?:\.(?:\d+|\*))? # precision -(?:h|l|ll|w|I|I32|I64)? # size -[cCdiouxXeEfgGaAnpsSZ] # type -) | # OR -%%) # literal "%%" -''' - - def decode(fileobj, elements_per_key): data = {} current_context = "" @@ -45,10 +29,7 @@ def decode(fileobj, elements_per_key): if len(data[current_key]) >= 1+elements_per_key: raise LanguageDecodeError("Wrong number of elements per key", fileobj.name, index) if current_key: - original = current_key[0] # pylint: disable=unsubscriptable-object translation = line[3:] - if translation and [m.group(1) for m in re.finditer(cfmt, original, flags=re.X)] != [m.group(1) for m in re.finditer(cfmt, translation, flags=re.X)]: - raise LanguageDecodeError("Non-matching formatting string", fileobj.name, index) data[current_key].extend([translation]) else: raise LanguageDecodeError("Element before key given", fileobj.name, index) diff --git a/scripts/languages/validate.py b/scripts/languages/validate.py new file mode 100755 index 000000000..fe214bdb1 --- /dev/null +++ b/scripts/languages/validate.py @@ -0,0 +1,54 @@ +#!/usr/bin/env python3 +import os +import sys +import re +import twlang + +os.chdir(os.path.dirname(__file__) + "/../..") + +# Taken from https://stackoverflow.com/questions/30011379/how-can-i-parse-a-c-format-string-in-python +cfmt = ''' +( # start of capture group 1 +% # literal "%" +(?: # first option +(?:[-+0 #]{0,5}) # optional flags +(?:\\d+|\\*)? # width +(?:\\.(?:\\d+|\\*))? # precision +(?:h|l|ll|w|I|I32|I64)? # size +[cCdiouxXeEfgGaAnpsSZ] # type +) | # OR +%%) # literal "%%" +''' + +total_errors = 0 + +def print_validation_error(error, filename, error_line): + print(f"Invalid: {translated}") + print(f"- {error} in {filename}:{error_line + 1}\n") + global total_errors + total_errors += 1 + +if len(sys.argv) > 1: + languages = sys.argv[1:] +else: + languages = twlang.languages() +local = twlang.localizes() + +for language in languages: + translations = twlang.translations(language) + + for (english, _), (line, translated, _) in translations.items(): + if not translated: + continue + + # Validate c format strings. Strings that move the formatters are not validated. + if re.findall(cfmt, english, flags=re.X) != re.findall(cfmt, translated, flags=re.X) and not "1$" in translated: + print_validation_error("Non-matching formatting", language, line) + + # Check for elipisis + if "…" in english and "..." in translated: + print_validation_error("Usage of ... instead of the … character", language, line) + +if total_errors: + print(f"Found {total_errors} {'error' if total_errors == 1 else 'errors'} ") + sys.exit(1)