mirror of
https://github.com/minetest/contentdb.git
synced 2025-01-20 21:11:26 +01:00
Rewrite .tr parser
This commit is contained in:
parent
1b5791a358
commit
65dc8c0891
@ -1,7 +1,18 @@
|
|||||||
# Adapted from: https://github.com/minetest/minetest/blob/master/util/mod_translation_updater.py
|
# ContentDB
|
||||||
|
# Copyright (C) 2024 rubenwardy
|
||||||
#
|
#
|
||||||
# Copyright (C) 2019 Joachim Stolberg, 2020 FaceDeer, 2020 Louis Royer, 2023 Wuzzy, 2024 rubenwardy
|
# This program is free software: you can redistribute it and/or modify
|
||||||
# License: LGPLv2.1 or later
|
# it under the terms of the GNU Affero General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU Affero General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU Affero General Public License
|
||||||
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
@ -18,93 +29,81 @@ class Translation:
|
|||||||
self.entries = entries
|
self.entries = entries
|
||||||
|
|
||||||
|
|
||||||
# Handles a translation line in *.tr file.
|
|
||||||
# Group 1 is the source string left of the equals sign.
|
|
||||||
# Group 2 is the translated string, right of the equals sign.
|
|
||||||
pattern_tr = re.compile(
|
|
||||||
r'(.*)' # Source string
|
|
||||||
# the separating equals sign, if NOT preceded by @, unless
|
|
||||||
# that @ is preceded by another @
|
|
||||||
r'(?:(?<!(?<!@)@)=)'
|
|
||||||
r'(.*)' # Translation string
|
|
||||||
)
|
|
||||||
|
|
||||||
# Strings longer than this will have extra space added between
|
|
||||||
# them in the translation files to make it easier to distinguish their
|
|
||||||
# beginnings and endings at a glance
|
|
||||||
doublespace_threshold = 80
|
|
||||||
|
|
||||||
# These symbols mark comment lines showing the source file name.
|
|
||||||
# A comment may look like "##[ init.lua ]##".
|
|
||||||
symbol_source_prefix = "##["
|
|
||||||
symbol_source_suffix = "]##"
|
|
||||||
comment_unused = "##### not used anymore #####"
|
|
||||||
|
|
||||||
|
|
||||||
def parse_tr(filepath: str) -> Translation:
|
def parse_tr(filepath: str) -> Translation:
|
||||||
dOut = {}
|
entries = {}
|
||||||
in_header = True
|
|
||||||
header_comments = None
|
|
||||||
textdomain = None
|
|
||||||
|
|
||||||
filename = os.path.basename(filepath)
|
filename = os.path.basename(filepath)
|
||||||
filename_parts = filename.split(".")
|
filename_parts = filename.split(".")
|
||||||
|
|
||||||
assert len(filename_parts) >= 3
|
assert len(filename_parts) >= 3
|
||||||
assert filename_parts[-1] == "tr"
|
assert filename_parts[-1] == "tr"
|
||||||
language = filename_parts[-2]
|
language = filename_parts[-2]
|
||||||
|
textdomain = ".".join(filename_parts[0:-2])
|
||||||
|
|
||||||
with open(filepath, "r", encoding='utf-8') as existing_file:
|
with open(filepath, "r", encoding='utf-8') as existing_file:
|
||||||
# save the full text to allow for comparison
|
lines = existing_file.readlines()
|
||||||
# of the old version with the new output
|
line_index = 0
|
||||||
existing_file.seek(0)
|
while line_index < len(lines):
|
||||||
# a running record of the current comment block
|
line = lines[line_index].rstrip('\n')
|
||||||
# we're inside, to allow preceeding multi-line comments
|
|
||||||
# to be retained for a translation line
|
if line == "":
|
||||||
latest_comment_block = None
|
pass
|
||||||
for line in existing_file.readlines():
|
|
||||||
line = line.rstrip('\n')
|
|
||||||
# "##### not used anymore #####" comment
|
|
||||||
if line == comment_unused:
|
|
||||||
# Always delete the 'not used anymore' comment.
|
|
||||||
# It will be re-added to the file if neccessary.
|
|
||||||
latest_comment_block = None
|
|
||||||
if header_comments is not None:
|
|
||||||
in_header = False
|
|
||||||
continue
|
|
||||||
# Comment lines
|
# Comment lines
|
||||||
elif line.startswith("#"):
|
elif line.startswith("#"):
|
||||||
# Source file comments: ##[ file.lua ]##
|
# Store first occurrence of textdomain
|
||||||
if line.startswith(symbol_source_prefix) and line.endswith(symbol_source_suffix):
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Store first occurance of textdomain
|
|
||||||
# discard all subsequent textdomain lines
|
# discard all subsequent textdomain lines
|
||||||
if line.startswith("# textdomain:"):
|
if line.startswith("# textdomain:"):
|
||||||
if textdomain is None:
|
line_textdomain = line[13:].strip()
|
||||||
textdomain = line[13:].strip()
|
if line_textdomain != textdomain:
|
||||||
continue
|
raise SyntaxError(
|
||||||
elif in_header:
|
f"Line {line_index + 1}: The filename's textdomain ({textdomain}) should match the comment ({line_textdomain})")
|
||||||
# Save header comments (normal comments at top of file)
|
else:
|
||||||
if not header_comments:
|
i = 0
|
||||||
header_comments = line
|
had_equals = False
|
||||||
|
source = ""
|
||||||
|
current_part = ""
|
||||||
|
while i < len(line):
|
||||||
|
if line[i] == "@":
|
||||||
|
if i + 1 < len(line):
|
||||||
|
i += 1
|
||||||
|
code = line[i]
|
||||||
|
if code == "=":
|
||||||
|
current_part += "="
|
||||||
|
elif code == "@":
|
||||||
|
current_part += "@"
|
||||||
|
elif code == "n":
|
||||||
|
current_part += "\n"
|
||||||
|
elif code.isdigit():
|
||||||
|
current_part += "@" + code
|
||||||
|
else:
|
||||||
|
raise SyntaxError(f"Line {line_index + 1}: Unknown escape character: {code}")
|
||||||
|
|
||||||
|
else:
|
||||||
|
# @\n -> add new line
|
||||||
|
line_index += 1
|
||||||
|
if line_index >= len(lines):
|
||||||
|
raise SyntaxError(f"Line {line_index + 1}: Unexpected end of file")
|
||||||
|
line = lines[line_index]
|
||||||
|
current_part += "\n"
|
||||||
|
i = 0
|
||||||
|
continue
|
||||||
|
elif not had_equals and line[i] == "=":
|
||||||
|
had_equals = True
|
||||||
|
source = current_part
|
||||||
|
current_part = ""
|
||||||
|
|
||||||
else:
|
else:
|
||||||
header_comments = header_comments + "\n" + line
|
current_part += line[i]
|
||||||
else:
|
|
||||||
# Save normal comments
|
|
||||||
if line.startswith("# textdomain:") and textdomain is None:
|
|
||||||
textdomain = line
|
|
||||||
elif not latest_comment_block:
|
|
||||||
latest_comment_block = line
|
|
||||||
else:
|
|
||||||
latest_comment_block = latest_comment_block + "\n" + line
|
|
||||||
|
|
||||||
continue
|
i += 1
|
||||||
|
|
||||||
match = pattern_tr.match(line)
|
translation = current_part
|
||||||
if match:
|
if not had_equals:
|
||||||
latest_comment_block = None
|
raise SyntaxError(f"Line {line_index + 1}: Missing = in line")
|
||||||
in_header = False
|
|
||||||
dOut[match.group(1).strip()] = match.group(2).strip()
|
|
||||||
|
|
||||||
return Translation(language, textdomain, dOut)
|
entries[source.strip()] = translation.strip()
|
||||||
|
|
||||||
|
line_index += 1
|
||||||
|
|
||||||
|
return Translation(language, textdomain, entries)
|
||||||
|
@ -305,5 +305,10 @@ class PackageTreeNode:
|
|||||||
ret = []
|
ret = []
|
||||||
|
|
||||||
for name in glob.glob(f"{self.baseDir}/**/locale/{textdomain}.*.tr", recursive=True):
|
for name in glob.glob(f"{self.baseDir}/**/locale/{textdomain}.*.tr", recursive=True):
|
||||||
ret.append(parse_tr(name))
|
try:
|
||||||
|
ret.append(parse_tr(name))
|
||||||
|
except SyntaxError as e:
|
||||||
|
relative_path = os.path.join(self.relative, os.path.relpath(name, self.baseDir))
|
||||||
|
raise MinetestCheckError(f"Syntax error whilst reading {relative_path}: {e}")
|
||||||
|
|
||||||
return ret
|
return ret
|
||||||
|
1
app/tests/unit/bad_escape.fr.tr
Normal file
1
app/tests/unit/bad_escape.fr.tr
Normal file
@ -0,0 +1 @@
|
|||||||
|
Bad @x escape = Bad @x escape
|
4
app/tests/unit/err_missing_eq.fr.tr
Normal file
4
app/tests/unit/err_missing_eq.fr.tr
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
# textdomain: err_missing_eq
|
||||||
|
|
||||||
|
Hello, World! = Bonjour, Monde!
|
||||||
|
Invalid line
|
9
app/tests/unit/foo.bar.fr.tr
Normal file
9
app/tests/unit/foo.bar.fr.tr
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
# textdomain: foo.bar
|
||||||
|
|
||||||
|
Hello, World! = Bonjour, Monde!
|
||||||
|
Hello @1!=@1, salut!
|
||||||
|
Cats @= cool = Chats = cool
|
||||||
|
# a comment
|
||||||
|
A @n newline = Une @
|
||||||
|
nouvelle ligne
|
||||||
|
Maybe @@@n@@@=@@= Peut être @@@n@@@=@@
|
1
app/tests/unit/no_textdomain_comment.fr.tr
Normal file
1
app/tests/unit/no_textdomain_comment.fr.tr
Normal file
@ -0,0 +1 @@
|
|||||||
|
Hello, World! = Bonjour, Monde!
|
@ -1,13 +1,61 @@
|
|||||||
import os
|
import os
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
from app.tasks.minetestcheck.translation import parse_tr
|
from app.tasks.minetestcheck.translation import parse_tr
|
||||||
|
|
||||||
|
|
||||||
def test_parses_tr():
|
def test_parses_tr():
|
||||||
dirname = os.path.dirname(__file__)
|
dirname = os.path.dirname(__file__)
|
||||||
filepath = os.path.join(dirname, "test_file.fr.tr")
|
filepath = os.path.join(dirname, "foo.bar.fr.tr")
|
||||||
out = parse_tr(filepath)
|
out = parse_tr(filepath)
|
||||||
|
|
||||||
assert out.language == "fr"
|
assert out.language == "fr"
|
||||||
assert out.textdomain == "foobar"
|
assert out.textdomain == "foo.bar"
|
||||||
|
assert len(out.entries) == 5
|
||||||
|
assert out.entries["Hello, World!"] == "Bonjour, Monde!"
|
||||||
|
assert out.entries["Hello @1!"] == "@1, salut!"
|
||||||
|
assert out.entries["Cats = cool"] == "Chats = cool"
|
||||||
|
assert out.entries["A \n newline"] == "Une \nnouvelle ligne"
|
||||||
|
assert out.entries["Maybe @\n@=@"] == "Peut être @\n@=@"
|
||||||
|
|
||||||
|
|
||||||
|
def test_parses_tr_infers_textdomain():
|
||||||
|
dirname = os.path.dirname(__file__)
|
||||||
|
filepath = os.path.join(dirname, "no_textdomain_comment.fr.tr")
|
||||||
|
out = parse_tr(filepath)
|
||||||
|
|
||||||
|
assert out.language == "fr"
|
||||||
|
assert out.textdomain == "no_textdomain_comment"
|
||||||
assert len(out.entries) == 1
|
assert len(out.entries) == 1
|
||||||
assert out.entries["Hello, World!"] == "Bonjour, Monde!"
|
assert out.entries["Hello, World!"] == "Bonjour, Monde!"
|
||||||
|
|
||||||
|
|
||||||
|
def test_parses_tr_error_on_textdomain_mismatch():
|
||||||
|
dirname = os.path.dirname(__file__)
|
||||||
|
filepath = os.path.join(dirname, "textdomain_mismatch.fr.tr")
|
||||||
|
|
||||||
|
with pytest.raises(SyntaxError) as e:
|
||||||
|
parse_tr(filepath)
|
||||||
|
|
||||||
|
assert str(e.value) == "Line 1: The filename's textdomain (textdomain_mismatch) should match the comment (foobar)"
|
||||||
|
|
||||||
|
|
||||||
|
def test_parses_tr_error_on_missing_eq():
|
||||||
|
dirname = os.path.dirname(__file__)
|
||||||
|
filepath = os.path.join(dirname, "err_missing_eq.fr.tr")
|
||||||
|
|
||||||
|
with pytest.raises(SyntaxError) as e:
|
||||||
|
parse_tr(filepath)
|
||||||
|
|
||||||
|
assert str(e.value) == "Line 4: Missing = in line"
|
||||||
|
|
||||||
|
|
||||||
|
def test_parses_tr_error_on_bad_escape():
|
||||||
|
dirname = os.path.dirname(__file__)
|
||||||
|
filepath = os.path.join(dirname, "bad_escape.fr.tr")
|
||||||
|
|
||||||
|
with pytest.raises(SyntaxError) as e:
|
||||||
|
parse_tr(filepath)
|
||||||
|
|
||||||
|
assert str(e.value) == "Line 1: Unknown escape character: x"
|
||||||
|
Loading…
Reference in New Issue
Block a user