diff --git a/app/tasks/minetestcheck/translation.py b/app/tasks/minetestcheck/translation.py index 8be4bb24..e1100594 100644 --- a/app/tasks/minetestcheck/translation.py +++ b/app/tasks/minetestcheck/translation.py @@ -1,7 +1,18 @@ -# Adapted from: https://github.com/minetest/minetest/blob/master/util/mod_translation_updater.py +# ContentDB +# Copyright (C) 2024 rubenwardy # -# Copyright (C) 2019 Joachim Stolberg, 2020 FaceDeer, 2020 Louis Royer, 2023 Wuzzy, 2024 rubenwardy -# License: LGPLv2.1 or later +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . import os import re @@ -18,93 +29,81 @@ class Translation: self.entries = entries -# Handles a translation line in *.tr file. -# Group 1 is the source string left of the equals sign. -# Group 2 is the translated string, right of the equals sign. -pattern_tr = re.compile( - r'(.*)' # Source string - # the separating equals sign, if NOT preceded by @, unless - # that @ is preceded by another @ - r'(?:(? Translation: - dOut = {} - in_header = True - header_comments = None - textdomain = None - + entries = {} filename = os.path.basename(filepath) filename_parts = filename.split(".") assert len(filename_parts) >= 3 assert filename_parts[-1] == "tr" language = filename_parts[-2] + textdomain = ".".join(filename_parts[0:-2]) with open(filepath, "r", encoding='utf-8') as existing_file: - # save the full text to allow for comparison - # of the old version with the new output - existing_file.seek(0) - # a running record of the current comment block - # we're inside, to allow preceeding multi-line comments - # to be retained for a translation line - latest_comment_block = None - for line in existing_file.readlines(): - line = line.rstrip('\n') - # "##### not used anymore #####" comment - if line == comment_unused: - # Always delete the 'not used anymore' comment. - # It will be re-added to the file if neccessary. - latest_comment_block = None - if header_comments is not None: - in_header = False - continue + lines = existing_file.readlines() + line_index = 0 + while line_index < len(lines): + line = lines[line_index].rstrip('\n') + + if line == "": + pass + # Comment lines elif line.startswith("#"): - # Source file comments: ##[ file.lua ]## - if line.startswith(symbol_source_prefix) and line.endswith(symbol_source_suffix): - continue - - # Store first occurance of textdomain + # Store first occurrence of textdomain # discard all subsequent textdomain lines if line.startswith("# textdomain:"): - if textdomain is None: - textdomain = line[13:].strip() - continue - elif in_header: - # Save header comments (normal comments at top of file) - if not header_comments: - header_comments = line + line_textdomain = line[13:].strip() + if line_textdomain != textdomain: + raise SyntaxError( + f"Line {line_index + 1}: The filename's textdomain ({textdomain}) should match the comment ({line_textdomain})") + else: + i = 0 + had_equals = False + source = "" + current_part = "" + while i < len(line): + if line[i] == "@": + if i + 1 < len(line): + i += 1 + code = line[i] + if code == "=": + current_part += "=" + elif code == "@": + current_part += "@" + elif code == "n": + current_part += "\n" + elif code.isdigit(): + current_part += "@" + code + else: + raise SyntaxError(f"Line {line_index + 1}: Unknown escape character: {code}") + + else: + # @\n -> add new line + line_index += 1 + if line_index >= len(lines): + raise SyntaxError(f"Line {line_index + 1}: Unexpected end of file") + line = lines[line_index] + current_part += "\n" + i = 0 + continue + elif not had_equals and line[i] == "=": + had_equals = True + source = current_part + current_part = "" + else: - header_comments = header_comments + "\n" + line - else: - # Save normal comments - if line.startswith("# textdomain:") and textdomain is None: - textdomain = line - elif not latest_comment_block: - latest_comment_block = line - else: - latest_comment_block = latest_comment_block + "\n" + line + current_part += line[i] - continue + i += 1 - match = pattern_tr.match(line) - if match: - latest_comment_block = None - in_header = False - dOut[match.group(1).strip()] = match.group(2).strip() + translation = current_part + if not had_equals: + raise SyntaxError(f"Line {line_index + 1}: Missing = in line") - return Translation(language, textdomain, dOut) + entries[source.strip()] = translation.strip() + + line_index += 1 + + return Translation(language, textdomain, entries) diff --git a/app/tasks/minetestcheck/tree.py b/app/tasks/minetestcheck/tree.py index 144a923c..a14b7719 100644 --- a/app/tasks/minetestcheck/tree.py +++ b/app/tasks/minetestcheck/tree.py @@ -305,5 +305,10 @@ class PackageTreeNode: ret = [] for name in glob.glob(f"{self.baseDir}/**/locale/{textdomain}.*.tr", recursive=True): - ret.append(parse_tr(name)) + try: + ret.append(parse_tr(name)) + except SyntaxError as e: + relative_path = os.path.join(self.relative, os.path.relpath(name, self.baseDir)) + raise MinetestCheckError(f"Syntax error whilst reading {relative_path}: {e}") + return ret diff --git a/app/tests/unit/bad_escape.fr.tr b/app/tests/unit/bad_escape.fr.tr new file mode 100644 index 00000000..4fcedc07 --- /dev/null +++ b/app/tests/unit/bad_escape.fr.tr @@ -0,0 +1 @@ +Bad @x escape = Bad @x escape diff --git a/app/tests/unit/err_missing_eq.fr.tr b/app/tests/unit/err_missing_eq.fr.tr new file mode 100644 index 00000000..16db9569 --- /dev/null +++ b/app/tests/unit/err_missing_eq.fr.tr @@ -0,0 +1,4 @@ +# textdomain: err_missing_eq + +Hello, World! = Bonjour, Monde! +Invalid line diff --git a/app/tests/unit/foo.bar.fr.tr b/app/tests/unit/foo.bar.fr.tr new file mode 100644 index 00000000..765e8fdf --- /dev/null +++ b/app/tests/unit/foo.bar.fr.tr @@ -0,0 +1,9 @@ +# textdomain: foo.bar + +Hello, World! = Bonjour, Monde! +Hello @1!=@1, salut! +Cats @= cool = Chats = cool +# a comment +A @n newline = Une @ +nouvelle ligne +Maybe @@@n@@@=@@= Peut ĂȘtre @@@n@@@=@@ diff --git a/app/tests/unit/no_textdomain_comment.fr.tr b/app/tests/unit/no_textdomain_comment.fr.tr new file mode 100644 index 00000000..38635705 --- /dev/null +++ b/app/tests/unit/no_textdomain_comment.fr.tr @@ -0,0 +1 @@ +Hello, World! = Bonjour, Monde! diff --git a/app/tests/unit/test_translation.py b/app/tests/unit/test_translation.py index b8f6d3bf..bcb9211f 100644 --- a/app/tests/unit/test_translation.py +++ b/app/tests/unit/test_translation.py @@ -1,13 +1,61 @@ import os + +import pytest + from app.tasks.minetestcheck.translation import parse_tr def test_parses_tr(): dirname = os.path.dirname(__file__) - filepath = os.path.join(dirname, "test_file.fr.tr") + filepath = os.path.join(dirname, "foo.bar.fr.tr") out = parse_tr(filepath) assert out.language == "fr" - assert out.textdomain == "foobar" + assert out.textdomain == "foo.bar" + assert len(out.entries) == 5 + assert out.entries["Hello, World!"] == "Bonjour, Monde!" + assert out.entries["Hello @1!"] == "@1, salut!" + assert out.entries["Cats = cool"] == "Chats = cool" + assert out.entries["A \n newline"] == "Une \nnouvelle ligne" + assert out.entries["Maybe @\n@=@"] == "Peut ĂȘtre @\n@=@" + + +def test_parses_tr_infers_textdomain(): + dirname = os.path.dirname(__file__) + filepath = os.path.join(dirname, "no_textdomain_comment.fr.tr") + out = parse_tr(filepath) + + assert out.language == "fr" + assert out.textdomain == "no_textdomain_comment" assert len(out.entries) == 1 assert out.entries["Hello, World!"] == "Bonjour, Monde!" + + +def test_parses_tr_error_on_textdomain_mismatch(): + dirname = os.path.dirname(__file__) + filepath = os.path.join(dirname, "textdomain_mismatch.fr.tr") + + with pytest.raises(SyntaxError) as e: + parse_tr(filepath) + + assert str(e.value) == "Line 1: The filename's textdomain (textdomain_mismatch) should match the comment (foobar)" + + +def test_parses_tr_error_on_missing_eq(): + dirname = os.path.dirname(__file__) + filepath = os.path.join(dirname, "err_missing_eq.fr.tr") + + with pytest.raises(SyntaxError) as e: + parse_tr(filepath) + + assert str(e.value) == "Line 4: Missing = in line" + + +def test_parses_tr_error_on_bad_escape(): + dirname = os.path.dirname(__file__) + filepath = os.path.join(dirname, "bad_escape.fr.tr") + + with pytest.raises(SyntaxError) as e: + parse_tr(filepath) + + assert str(e.value) == "Line 1: Unknown escape character: x" diff --git a/app/tests/unit/test_file.fr.tr b/app/tests/unit/textdomain_mismatch.fr.tr similarity index 100% rename from app/tests/unit/test_file.fr.tr rename to app/tests/unit/textdomain_mismatch.fr.tr