Gettext and plural support for client-side translations (#14726)

---------

Co-authored-by: Ekdohibs <nathanael.courant@laposte.net>
Co-authored-by: y5nw <y5nw@protonmail.com>
Co-authored-by: rubenwardy <rw@rubenwardy.com>
This commit is contained in:
y5nw 2024-10-13 11:29:08 +02:00 committed by GitHub
parent dbbe0ca065
commit e3aa79cffb
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
28 changed files with 1360 additions and 74 deletions

@ -574,12 +574,14 @@ function core.strip_colors(str)
return (str:gsub(ESCAPE_CHAR .. "%([bc]@[^)]+%)", ""))
end
function core.translate(textdomain, str, ...)
local function translate(textdomain, str, num, ...)
local start_seq
if textdomain == "" then
if textdomain == "" and num == "" then
start_seq = ESCAPE_CHAR .. "T"
else
elseif num == "" then
start_seq = ESCAPE_CHAR .. "(T@" .. textdomain .. ")"
else
start_seq = ESCAPE_CHAR .. "(T@" .. textdomain .. "@" .. num .. ")"
end
local arg = {n=select('#', ...), ...}
local end_seq = ESCAPE_CHAR .. "E"
@ -610,8 +612,31 @@ function core.translate(textdomain, str, ...)
return start_seq .. translated .. end_seq
end
function core.translate(textdomain, str, ...)
return translate(textdomain, str, "", ...)
end
function core.translate_n(textdomain, str, str_plural, n, ...)
assert (type(n) == "number")
assert (n >= 0)
assert (math.floor(n) == n)
-- Truncate n if too large
local max = 1000000
if n >= 2 * max then
n = n % max + max
end
if n == 1 then
return translate(textdomain, str, "1", ...)
else
return translate(textdomain, str_plural, tostring(n), ...)
end
end
function core.get_translator(textdomain)
return function(str, ...) return core.translate(textdomain or "", str, ...) end
return
(function(str, ...) return core.translate(textdomain or "", str, ...) end),
(function(str, str_plural, n, ...) return core.translate_n(textdomain or "", str, str_plural, n, ...) end)
end
--------------------------------------------------------------------------------

@ -118,7 +118,7 @@ local function get_formspec(tabview, name, tabdata)
local title_and_name
if selected_pkg.type == "game" then
title_and_name = selected_pkg.name
title_and_name = selected_pkg.title or selected_pkg.name
else
title_and_name = (selected_pkg.title or selected_pkg.name) .. "\n" ..
core.colorize("#BFBFBF", selected_pkg.name)

@ -4178,10 +4178,6 @@ Translations
Texts can be translated client-side with the help of `minetest.translate` and
translation files.
Consider using the script `mod_translation_updater.py` in the Minetest
[modtools](https://github.com/minetest/modtools) repository to generate and
update translation files automatically from the Lua sources.
Translating a string
--------------------
@ -4189,13 +4185,15 @@ Two functions are provided to translate strings: `minetest.translate` and
`minetest.get_translator`.
* `minetest.get_translator(textdomain)` is a simple wrapper around
`minetest.translate`, and `minetest.get_translator(textdomain)(str, ...)` is
equivalent to `minetest.translate(textdomain, str, ...)`.
`minetest.translate` and `minetest.translate_n`.
After `local S, NS = minetest.get_translator(textdomain)`, we have
`S(str, ...)` equivalent to `minetest.translate(textdomain, str, ...)`, and
`NS(str, str_plural, n, ...)` to `minetest.translate_n(textdomain, str, str_plural, n, ...)`.
It is intended to be used in the following way, so that it avoids verbose
repetitions of `minetest.translate`:
```lua
local S = minetest.get_translator(textdomain)
local S, NS = minetest.get_translator(textdomain)
S(str, ...)
```
@ -4212,29 +4210,102 @@ Two functions are provided to translate strings: `minetest.translate` and
arguments the translated string expects.
Arguments are literal strings -- they will not be translated.
For instance, suppose we want to greet players when they join. We can do the
* `minetest.translate_n(textdomain, str, str_plural, n, ...)` translates the
string `str` with the given `textdomain` for disambiguaion. The value of
`n`, which must be a nonnegative integer, is used to decide whether to use
the singular or the plural version of the string. Depending on the locale of
the client, the choice between singular and plural might be more complicated,
but the choice will be done automatically using the value of `n`.
You can read https://www.gnu.org/software/gettext/manual/html_node/Plural-forms.html
for more details on the differences of plurals between languages.
Also note that plurals are only handled in .po or .mo files, and not in .tr files.
For instance, suppose we want to greet players when they join and provide a
command that shows the amount of time since the player joined. We can do the
following:
```lua
local S = minetest.get_translator("hello")
local S, NS = minetest.get_translator("hello")
minetest.register_on_joinplayer(function(player)
local name = player:get_player_name()
minetest.chat_send_player(name, S("Hello @1, how are you today?", name))
end)
minetest.register_chatcommand("playtime", {
func = function(name)
local last_login = core.get_auth_handler().get_auth(name).last_login
local playtime = math.floor((last_login-os.time())/60)
return true, NS(
"You have been playing for @1 minute.",
"You have been playing for @1 minutes.",
minutes, tostring(minutes))
end,
})
```
When someone called "CoolGuy" joins the game with an old client or a client
that does not have localization enabled, they will see `Hello CoolGuy, how are
you today?`
you today?`. If they use the `/playtime` command, they will see `You have been
playing for 1 minute` or (for example) `You have been playing for 4 minutes.`
However, if we have for instance a translation file named `hello.de.tr`
However, if we have for instance a translation file named `hello.de.po`
containing the following:
# textdomain: hello
Hello @1, how are you today?=Hallo @1, wie geht es dir heute?
```po
msgid ""
msgstr ""
"Plural-Forms: nplurals=2; plural=(n != 1);\n"
msgid "Hello @1, how are you today?"
msgstr "Hallo @1, wie geht es dir heute?"
msgid "You have been playing for @1 minute."
msgid_plural "You have been playing for @1 minutes."
msgstr[0] "Du spielst seit @1 Minute."
msgstr[1] "Du spielst seit @1 Minuten."
```
and CoolGuy has set a German locale, they will see `Hallo CoolGuy, wie geht es
dir heute?`
dir heute?` when they join, and the `/playtime` command will show them `Du
spielst seit 1 Minute.` or (for example) `Du spielst seit 4 Minuten.`
Creating and updating translation files
---------------------------------------
As an alternative to writing translation files by hand (as shown in the above
example), it is also possible to generate translation files based on the source
code.
It is recommended to first generate a translation template. The translation
template includes translatable strings that translators can directly work on.
After creating the `locale` directory, a translation template for the above
example using the following command:
```sh
xgettext -L lua -kS -kNS:1,2 -kminetest.translate:1c,2 -kminetest.translate_n:1c,2,3 \
-d hello -o locale/hello.pot *.lua
```
The above command can also be used to update the translation template when new
translatable strings are added.
The German translator can then create the translation file with
```sh
msginit -l de -i locale/hello.pot -o locale/hello.de.po
```
and provide the translations by editing `locale/hello.de.po`.
The translation file can be updated using
```sh
msgmerge -U locale/hello.de.po locale/hello.pot
```
Refer to the [Gettext manual](https://www.gnu.org/software/gettext/manual/) for
further information on creating and updating translation files.
Operations on translated strings
--------------------------------
@ -4248,8 +4319,8 @@ expected manner. However, string concatenation will still work as expected
sentences by breaking them into parts; arguments should be used instead), and
operations such as `minetest.colorize` which are also concatenation.
Translation file format
-----------------------
Old translation file format
---------------------------
A translation file has the suffix `.[lang].tr`, where `[lang]` is the language
it corresponds to. It must be put into the `locale` subdirectory of the mod.
@ -4264,6 +4335,34 @@ The file should be a text file, with the following format:
There must be no extraneous whitespace around the `=` or at the beginning or
the end of the line.
Using the earlier example of greeting the player, the translation file would be
```
# textdomain: hello
Hello @1, how are you today?=Hallo @1, wie geht es dir heute?
```
For old translation files, consider using the script `mod_translation_updater.py`
in the Minetest [modtools](https://github.com/minetest/modtools) repository to
generate and update translation files automatically from the Lua sources.
Gettext translation file format
-------------------------------
Gettext files can also be used as translations. A translation file has the suffix
`.[lang].po` or `.[lang].mo`, depending on whether it is compiled or not, and must
also be placed in the `locale` subdirectory of the mod. The value of `textdomain`
is `msgctxt` in the gettext files. If `msgctxt` is not provided, the name of the
translation file is used instead.
A typical entry in a `.po` file would look like:
```po
msgctxt "textdomain"
msgid "Hello world!"
msgstr "Bonjour le monde!"
```
Escapes
-------

@ -0,0 +1,26 @@
local S, NS = minetest.get_translator("testtranslations")
local function send_compare(name, text)
core.chat_send_player(name, ("%s | %s | %s"):format(
core.get_translated_string("", text), text, core.get_translated_string("fr", text)))
end
minetest.register_chatcommand("testtranslations", {
params = "",
description = "Test translations",
privs = {},
func = function(name, param)
core.chat_send_player(name, "Please ensure your locale is set to \"fr\"")
core.chat_send_player(name, "Untranslated | Client-side translation | Server-side translation (fr)")
send_compare(name, S("Testing .tr files: untranslated"))
send_compare(name, S("Testing .po files: untranslated"))
send_compare(name, S("Testing .mo files: untranslated"))
send_compare(name, S("Testing fuzzy .po entry: untranslated (expected)"))
send_compare(name, core.translate("translation_po", "Testing .po without context: untranslated"))
send_compare(name, core.translate("translation_mo", "Testing .mo without context: untranslated"))
for i = 0,4 do
send_compare(name, NS("@1: .po singular", "@1: .po plural", i, tostring(i)))
send_compare(name, NS("@1: .mo singular", "@1: .mo plural", i, tostring(i)))
end
end
})

@ -0,0 +1,9 @@
# Dummy entry. This is a test to make sure that a parser error is not thrown
# if the following line is msgctxt.
msgctxt "testtranslations"
msgid "Dummy entry"
msgstr "Dummy result"
# Used for translating the mod title
msgid "Test translations"
msgstr "Test translations (French)"

@ -0,0 +1,22 @@
# Test Plural-Forms parsing
msgid ""
msgstr ""
"Plural-Forms: nplurals=2; plural= (n-1+1)<=1 ? n||1?0:1 : 1?(!!2):2;"
msgctxt "testtranslations"
msgid "Testing .po files: untranslated"
msgstr "Testing .po files: translated"
msgctxt "testtranslations"
msgid "@1: .po singular"
msgid_plural "@1: .po plural"
msgstr[0] "@1: .po 0 and 1 (French singular)"
msgstr[1] "@1: .po >1 (French plural)"
#, foo bar fuzzy
msgctxt "testtranslations"
msgid "Testing fuzzy .po entry: untranslated (expected)"
msgstr "Testing fuzzy .po entry: translated (wrong)"
msgid "Testing .po without context: untranslated"
msgstr "Testing .po without context: translated"

@ -0,0 +1,2 @@
# textdomain: testtranslations
Testing .tr files: untranslated=Testing .tr files: translated

@ -0,0 +1,3 @@
name = testtranslations
title = Test translations
description = Test mod to test translations.

@ -0,0 +1,4 @@
The translation files in this directory intentionally include errors (which
would be reported when someone starts the devtest game in the de locale). This
allows the unittest to check that the translation file reader also handles
files that include errors.

@ -0,0 +1,42 @@
# This file is used by the C++ unittest for testing the parser
msgid ""
msgstr "\n\n\n"
"Plural-Forms: nplurals=2; plural=n!=1;"
"\n\n\n"
msgid "foo"
msgstr "bar"
msgid "Untranslated"
msgstr ""
#, fuzzy
msgid "Fuzzy entry"
msgstr "Wrong"
msgid "Multi\\""line\n"
"string"
msgstr "Multi\\\"" "li\\ne\nresult"
msgctxt "Something" in "between"
msgctxt "String does not end
msgstr "Lost string"
msgid "Wrong order"
msgid "Singular form"
msgid_plural "Plural form"
msgstr[0] "Singular result"
msgstr[1] "Plural result"
msgid "Not enough value"
msgid_plural "Not enough values"
msgstr[0] "Result"
msgid "Partial translation"
msgid_plural "Partial translations"
msgstr[0] "Partially translated"
msgstr[1] ""
msgctxt "context"
msgid "With context"
msgstr "Has context"

@ -0,0 +1,26 @@
msgid ""
msgstr "Plural-Forms: nplurals=2; plural= n != 1;"
msgctxt "context"
msgid "With context"
msgstr "Has context"
msgctxt "context"
msgid "Singular form"
msgid_plural "Plural form"
msgstr[0] "Singular result"
msgstr[1] "Plural result"
# Replace plural form delimiter in the msgstr
msgid "Corrupt singular"
msgid_plural "Corrupt plural"
msgstr[0] "Corrupt singular result"
msgstr[1] "Corrupt plural result"
# Replace terminating NUL in the MO file
msgid "Corrupt entry"
msgstr "Corrupted result"
# Change the address of this entry to something invalid
msgid "Removed entry"
msgstr "Removed result"

@ -0,0 +1,18 @@
msgid ""
msgstr ""
"Content-Type: text/plain; charset=UTF-8\n"
"Content-Transfer-Encoding: 8bit\n"
"Plural-Forms: nplurals=2; plural=n>1;"
msgctxt "testtranslations"
msgid "Testing .mo files: untranslated"
msgstr "Testing .mo files: translated"
msgid "Testing .mo without context: untranslated"
msgstr "Testing .mo without context: translated"
msgctxt "testtranslations"
msgid "@1: .mo singular"
msgid_plural "@1: .mo plural"
msgstr[0] "@1: .mo 0 and 1 (French singular)"
msgstr[1] "@1: .mo >1 (French plural)"

@ -408,6 +408,7 @@ set(common_SRCS
face_position_cache.cpp
filesys.cpp
gettext.cpp
gettext_plural_form.cpp
httpfetch.cpp
hud.cpp
inventory.cpp

@ -841,16 +841,12 @@ bool Client::loadMedia(const std::string &data, const std::string &filename,
return true;
}
const char *translate_ext[] = {
".tr", NULL
};
name = removeStringEnd(filename, translate_ext);
if (!name.empty()) {
if (Translations::isTranslationFile(filename)) {
if (from_media_push)
return false;
TRACESTREAM(<< "Client: Loading translation: "
<< "\"" << filename << "\"" << std::endl);
g_client_translations->loadTranslation(data);
g_client_translations->loadTranslation(filename, data);
return true;
}

@ -36,7 +36,8 @@ with this program; if not, write to the Free Software Foundation, Inc.,
// the USE_GETTEXT=0 case and can't assume that gettext is installed.
#include <locale>
#define gettext(String) String
#define gettext(String) (String)
#define ngettext(String1, String2, n) ((n) == 1 ? (String1) : (String2))
#endif
#define _(String) gettext(String)

256
src/gettext_plural_form.cpp Normal file

@ -0,0 +1,256 @@
// Minetest
// SPDX-License-Identifier: LGPL-2.1-or-later
#include "gettext_plural_form.h"
#include "util/string.h"
static size_t minsize(const GettextPluralForm::Ptr &form)
{
return form ? form->size() : 0;
}
static size_t minsize(const GettextPluralForm::Ptr &f, const GettextPluralForm::Ptr &g)
{
if (sizeof(g) > 0)
return std::min(minsize(f), minsize(g));
return f ? f->size() : 0;
}
class Identity: public GettextPluralForm
{
public:
Identity(size_t nplurals): GettextPluralForm(nplurals) {};
NumT operator()(const NumT n) const override
{
return n;
}
};
class ConstValue: public GettextPluralForm
{
public:
ConstValue(size_t nplurals, NumT val): GettextPluralForm(nplurals), value(val) {};
NumT operator()(const NumT n) const override
{
return value;
}
private:
NumT value;
};
template<template<typename> typename F>
class UnaryOperation: public GettextPluralForm
{
public:
UnaryOperation(const Ptr &op):
GettextPluralForm(minsize(op)), op(op) {}
NumT operator()(const NumT n) const override
{
if (operator bool())
return func((*op)(n));
return 0;
}
private:
Ptr op;
static constexpr F<NumT> func = {};
};
template<template<typename> typename F>
class BinaryOperation: public GettextPluralForm
{
public:
BinaryOperation(const Ptr &lhs, const Ptr &rhs):
GettextPluralForm(minsize(lhs, rhs)),
lhs(lhs), rhs(rhs) {}
NumT operator()(const NumT n) const override
{
if (operator bool())
return func((*lhs)(n), (*rhs)(n));
return 0;
}
private:
Ptr lhs, rhs;
static constexpr F<NumT> func = {};
};
class TernaryOperation: public GettextPluralForm
{
public:
TernaryOperation(const Ptr &cond, const Ptr &val, const Ptr &alt):
GettextPluralForm(std::min(minsize(cond), minsize(val, alt))),
cond(cond), val(val), alt(alt) {}
NumT operator()(const NumT n) const override
{
if (operator bool())
return (*cond)(n) ? (*val)(n) : (*alt)(n);
return 0;
}
private:
Ptr cond, val, alt;
};
typedef std::pair<GettextPluralForm::Ptr, std::wstring_view> ParserResult;
typedef ParserResult (*Parser)(const size_t, const std::wstring_view &);
static ParserResult parse_expr(const size_t nplurals, const std::wstring_view &str);
template<Parser Parser, template<typename> typename Operator>
static ParserResult reduce_ltr(const size_t nplurals, const ParserResult &res, const wchar_t* pattern)
{
if (!str_starts_with(res.second, pattern))
return ParserResult(nullptr, res.second);
auto next = Parser(nplurals, res.second.substr(std::char_traits<wchar_t>::length(pattern)));
if (!next.first)
return next;
next.first = GettextPluralForm::Ptr(new BinaryOperation<Operator>(res.first, next.first));
next.second = trim(next.second);
return next;
}
template<Parser Parser>
static ParserResult reduce_ltr(const size_t nplurals, const ParserResult &res, const wchar_t**)
{
return ParserResult(nullptr, res.second);
}
template<Parser Parser, template<typename> typename Operator, template<typename> typename... Operators>
static ParserResult reduce_ltr(const size_t nplurals, const ParserResult &res, const wchar_t** patterns)
{
auto next = reduce_ltr<Parser, Operator>(nplurals, res, patterns[0]);
if (next.first || next.second != res.second)
return next;
return reduce_ltr<Parser, Operators...>(nplurals, res, patterns+1);
}
template<Parser Parser, template<typename> typename Operator, template<typename> typename... Operators>
static ParserResult parse_ltr(const size_t nplurals, const std::wstring_view &str, const wchar_t** patterns)
{
auto &&pres = Parser(nplurals, str);
if (!pres.first)
return pres;
pres.second = trim(pres.second);
while (!pres.second.empty()) {
auto next = reduce_ltr<Parser, Operator, Operators...>(nplurals, pres, patterns);
if (!next.first)
return pres;
next.second = trim(next.second);
pres = next;
}
return pres;
}
static ParserResult parse_atomic(const size_t nplurals, const std::wstring_view &str)
{
if (str.empty())
return ParserResult(nullptr, str);
if (str[0] == 'n')
return ParserResult(new Identity(nplurals), trim(str.substr(1)));
wchar_t* endp;
auto val = wcstoul(str.data(), &endp, 10);
return ParserResult(new ConstValue(nplurals, val), trim(str.substr(endp-str.data())));
}
static ParserResult parse_parenthesized(const size_t nplurals, const std::wstring_view &str)
{
if (str.empty())
return ParserResult(nullptr, str);
if (str[0] != '(')
return parse_atomic(nplurals, str);
auto result = parse_expr(nplurals, str.substr(1));
if (result.first) {
if (result.second.empty() || result.second[0] != ')')
result.first = nullptr;
else
result.second = trim(result.second.substr(1));
}
return result;
}
static ParserResult parse_negation(const size_t nplurals, const std::wstring_view &str)
{
if (str.empty())
return ParserResult(nullptr, str);
if (str[0] != '!')
return parse_parenthesized(nplurals, str);
auto result = parse_negation(nplurals, trim(str.substr(1)));
if (result.first)
result.first = GettextPluralForm::Ptr(new UnaryOperation<std::logical_not>(result.first));
return result;
}
static ParserResult parse_multiplicative(const size_t nplurals, const std::wstring_view &str)
{
static const wchar_t *patterns[] = { L"*", L"/", L"%" };
return parse_ltr<parse_negation, std::multiplies, std::divides, std::modulus>(nplurals, str, patterns);
}
static ParserResult parse_additive(const size_t nplurals, const std::wstring_view &str)
{
static const wchar_t *patterns[] = { L"+", L"-" };
return parse_ltr<parse_multiplicative, std::plus, std::minus>(nplurals, str, patterns);
}
static ParserResult parse_comparison(const size_t nplurals, const std::wstring_view &str)
{
static const wchar_t *patterns[] = { L"<=", L">=", L"<", L">" };
return parse_ltr<parse_additive, std::less_equal, std::greater_equal, std::less, std::greater>(nplurals, str, patterns);
}
static ParserResult parse_equality(const size_t nplurals, const std::wstring_view &str)
{
static const wchar_t *patterns[] = { L"==", L"!=" };
return parse_ltr<parse_comparison, std::equal_to, std::not_equal_to>(nplurals, str, patterns);
}
static ParserResult parse_conjunction(const size_t nplurals, const std::wstring_view &str)
{
static const wchar_t *and_pattern[] = { L"&&" };
return parse_ltr<parse_equality, std::logical_and>(nplurals, str, and_pattern);
}
static ParserResult parse_disjunction(const size_t nplurals, const std::wstring_view &str)
{
static const wchar_t *or_pattern[] = { L"||" };
return parse_ltr<parse_conjunction, std::logical_or>(nplurals, str, or_pattern);
}
static ParserResult parse_ternary(const size_t nplurals, const std::wstring_view &str)
{
auto pres = parse_disjunction(nplurals, str);
if (pres.second.empty() || pres.second[0] != '?') // no ? :
return pres;
auto cond = pres.first;
pres = parse_ternary(nplurals, trim(pres.second.substr(1)));
if (pres.second.empty() || pres.second[0] != ':')
return ParserResult(nullptr, pres.second);
auto val = pres.first;
pres = parse_ternary(nplurals, trim(pres.second.substr(1)));
return ParserResult(new TernaryOperation(cond, val, pres.first), pres.second);
}
static ParserResult parse_expr(const size_t nplurals, const std::wstring_view &str)
{
return parse_ternary(nplurals, trim(str));
}
GettextPluralForm::Ptr GettextPluralForm::parse(const size_t nplurals, const std::wstring_view &str)
{
if (nplurals == 0)
return nullptr;
auto result = parse_expr(nplurals, str);
if (!result.second.empty())
return nullptr;
return result.first;
}
GettextPluralForm::Ptr GettextPluralForm::parseHeaderLine(const std::wstring_view &str)
{
if (!str_starts_with(str, L"Plural-Forms: nplurals=") || !str_ends_with(str, L";"))
return nullptr;
auto nplurals = wcstoul(str.data()+23, nullptr, 10);
auto pos = str.find(L"plural=");
if (pos == str.npos)
return nullptr;
return parse(nplurals, str.substr(pos+7, str.size()-pos-8));
}

33
src/gettext_plural_form.h Normal file

@ -0,0 +1,33 @@
// Minetest
// SPDX-License-Identifier: LGPL-2.1-or-later
#pragma once
#include <string_view>
#include <memory>
// Note that this only implements a subset of C expressions. See:
// https://git.savannah.gnu.org/gitweb/?p=gettext.git;a=blob;f=gettext-runtime/intl/plural.y
class GettextPluralForm
{
public:
using NumT = unsigned long;
using Ptr = std::shared_ptr<GettextPluralForm>;
size_t size() const
{
return nplurals;
};
virtual NumT operator()(const NumT) const = 0;
virtual operator bool() const
{
return size() > 0;
}
virtual ~GettextPluralForm() {};
static GettextPluralForm::Ptr parse(const size_t nplurals, const std::wstring_view &str);
static GettextPluralForm::Ptr parseHeaderLine(const std::wstring_view &str);
protected:
GettextPluralForm(size_t nplurals): nplurals(nplurals) {};
private:
const size_t nplurals;
};

@ -214,15 +214,28 @@ GUIEngine::GUIEngine(JoystickController *joystick,
/******************************************************************************/
std::string findLocaleFileInMods(const std::string &path, const std::string &filename)
std::string findLocaleFileWithExtension(const std::string &path)
{
if (fs::PathExists(path + ".mo"))
return path + ".mo";
if (fs::PathExists(path + ".po"))
return path + ".po";
if (fs::PathExists(path + ".tr"))
return path + ".tr";
return "";
}
/******************************************************************************/
std::string findLocaleFileInMods(const std::string &path, const std::string &filename_no_ext)
{
std::vector<ModSpec> mods = flattenMods(getModsInPath(path, "root", true));
for (const auto &mod : mods) {
std::string ret = mod.path + DIR_DELIM "locale" DIR_DELIM + filename;
if (fs::PathExists(ret)) {
std::string ret = findLocaleFileWithExtension(
mod.path + DIR_DELIM "locale" DIR_DELIM + filename_no_ext);
if (!ret.empty())
return ret;
}
}
return "";
@ -235,19 +248,26 @@ Translations *GUIEngine::getContentTranslations(const std::string &path,
if (domain.empty() || lang_code.empty())
return nullptr;
std::string filename = domain + "." + lang_code + ".tr";
std::string key = path + DIR_DELIM "locale" DIR_DELIM + filename;
std::string filename_no_ext = domain + "." + lang_code;
std::string key = path + DIR_DELIM "locale" DIR_DELIM + filename_no_ext;
if (key == m_last_translations_key)
return &m_last_translations;
std::string trans_path = key;
ContentType type = getContentType(path);
if (type == ContentType::GAME)
trans_path = findLocaleFileInMods(path + DIR_DELIM "mods" DIR_DELIM, filename);
else if (type == ContentType::MODPACK)
trans_path = findLocaleFileInMods(path, filename);
// We don't need to search for locale files in a mod, as there's only one `locale` folder.
switch (getContentType(path)) {
case ContentType::GAME:
trans_path = findLocaleFileInMods(path + DIR_DELIM "mods" DIR_DELIM,
filename_no_ext);
break;
case ContentType::MODPACK:
trans_path = findLocaleFileInMods(path, filename_no_ext);
break;
default:
trans_path = findLocaleFileWithExtension(trans_path);
break;
}
if (trans_path.empty())
return nullptr;
@ -257,7 +277,7 @@ Translations *GUIEngine::getContentTranslations(const std::string &path,
std::string data;
if (fs::ReadFile(trans_path, data)) {
m_last_translations.loadTranslation(data);
m_last_translations.loadTranslation(fs::GetFilenameFromPath(trans_path.c_str()), data);
}
return &m_last_translations;

@ -2537,8 +2537,8 @@ bool Server::addMediaFile(const std::string &filename,
".png", ".jpg", ".bmp", ".tga",
".ogg",
".x", ".b3d", ".obj", ".gltf", ".glb",
// Custom translation file format
".tr",
// Translation file formats
".tr", ".po", ".mo",
NULL
};
if (removeStringEnd(filename, supported_ext).empty()) {
@ -2621,14 +2621,20 @@ void Server::fillMediaCache()
void Server::sendMediaAnnouncement(session_t peer_id, const std::string &lang_code)
{
std::string lang_suffix = ".";
lang_suffix.append(lang_code).append(".tr");
std::string translation_formats[3] = { ".tr", ".po", ".mo" };
std::string lang_suffixes[3];
for (size_t i = 0; i < 3; i++) {
lang_suffixes[i].append(".").append(lang_code).append(translation_formats[i]);
}
auto include = [&] (const std::string &name, const MediaInfo &info) -> bool {
auto include = [&] (const std::string &name, const MediaInfo &info) -> bool {
if (info.no_announce)
return false;
if (str_ends_with(name, ".tr") && !str_ends_with(name, lang_suffix))
return false;
for (size_t j = 0; j < 3; j++) {
if (str_ends_with(name, translation_formats[j]) && !str_ends_with(name, lang_suffixes[j])) {
return false;
}
}
return true;
};
@ -4167,12 +4173,11 @@ Translations *Server::getTranslationLanguage(const std::string &lang_code)
// [] will create an entry
auto *translations = &server_translations[lang_code];
std::string suffix = "." + lang_code + ".tr";
for (const auto &i : m_media) {
if (str_ends_with(i.first, suffix)) {
if (Translations::getFileLanguage(i.first) == lang_code) {
std::string data;
if (fs::ReadFile(i.second.path, data, true)) {
translations->loadTranslation(data);
translations->loadTranslation(i.first, data);
}
}
}

@ -19,7 +19,9 @@ with this program; if not, write to the Free Software Foundation, Inc.,
#include "translation.h"
#include "log.h"
#include "util/hex.h"
#include "util/string.h"
#include "gettext.h"
#include <unordered_map>
@ -29,10 +31,22 @@ static Translations client_translations;
Translations *g_client_translations = &client_translations;
#endif
const std::string_view Translations::getFileLanguage(const std::string &filename)
{
const char *translate_ext[] = {
".tr", ".po", ".mo", NULL
};
auto basename = removeStringEnd(filename, translate_ext);
auto pos = basename.rfind('.');
if (pos == basename.npos)
return "";
return basename.substr(pos+1);
}
void Translations::clear()
{
m_translations.clear();
m_plural_translations.clear();
}
const std::wstring &Translations::getTranslation(
@ -45,7 +59,52 @@ const std::wstring &Translations::getTranslation(
return s;
}
void Translations::loadTranslation(const std::string &data)
const std::wstring &Translations::getPluralTranslation(
const std::wstring &textdomain, const std::wstring &s, unsigned long int number) const
{
std::wstring key = textdomain + L"|" + s;
auto it = m_plural_translations.find(key);
if (it != m_plural_translations.end()) {
auto n = (*(it->second.first))(number);
const std::vector<std::wstring> &v = it->second.second;
if (n < v.size()) {
if (v[n].empty())
return s;
return v[n];
}
}
return s;
}
void Translations::addTranslation(
const std::wstring &textdomain, const std::wstring &original, const std::wstring &translated)
{
std::wstring key = textdomain + L"|" + original;
if (!translated.empty()) {
m_translations.emplace(std::move(key), std::move(translated));
}
}
void Translations::addPluralTranslation(
const std::wstring &textdomain, const GettextPluralForm::Ptr &plural, const std::wstring &original, std::vector<std::wstring> &translated)
{
static bool warned = false;
if (!plural) {
warned = true;
if (!warned)
errorstream << "Translations: plural translation entry defined without Plural-Forms" << std::endl;
return;
} else if (translated.size() != plural->size()) {
errorstream << "Translations: incorrect number of plural translations (expected " << plural->size() << ", got " << translated.size() << ")" << std::endl;
return;
}
std::wstring key = textdomain + L"|" + original;
m_plural_translations.emplace(std::move(key), std::pair(plural, translated));
}
void Translations::loadTrTranslation(const std::string &data)
{
std::istringstream is(data);
std::string textdomain_narrow;
@ -145,11 +204,455 @@ void Translations::loadTranslation(const std::string &data)
}
}
std::wstring oword1 = word1.str(), oword2 = word2.str();
if (!oword2.empty()) {
std::wstring translation_index = textdomain + L"|";
translation_index.append(oword1);
m_translations.emplace(std::move(translation_index), std::move(oword2));
}
addTranslation(textdomain, word1.str(), word2.str());
}
}
std::wstring Translations::unescapeC(const std::wstring &str)
{
// Process escape sequences in str as if it were a C string
std::wstring result;
size_t i = 0;
while (i < str.length()) {
if (str[i] != L'\\') {
result.push_back(str[i]);
i++;
continue;
}
i++;
if (i == str.length()) {
errorstream << "Unfinished escape sequence at the end of \"" << wide_to_utf8(str) << "\"" << std::endl;
break;
}
switch (str[i]) {
// From https://en.wikipedia.org/wiki/Escape_sequences_in_C#Table_of_escape_sequences
case L'a': result.push_back(L'\a'); break;
case L'b': result.push_back(L'\b'); break;
case L'e': result.push_back(L'\x1b'); break;
case L'f': result.push_back(L'\f'); break;
case L'n': result.push_back(L'\n'); break;
case L'r': result.push_back(L'\r'); break;
case L't': result.push_back(L'\t'); break;
case L'v': result.push_back(L'\v'); break;
case L'\\': result.push_back(L'\\'); break;
case L'\'': result.push_back(L'\''); break;
case L'"': result.push_back(L'"'); break;
case L'?': result.push_back(L'?'); break;
case L'0': case L'1': case L'2': case L'3': case L'4': case L'5': case L'6': case L'7': {
size_t j = 0;
wchar_t c = 0;
for (; j < 3 && i+j < str.length() && L'0' <= str[i+j] && str[i+j] <= L'7'; j++) {
c = c * 8 + (str[i+j] - L'0');
}
if (c <= 0xff) {
result.push_back(c);
}
i += j;
continue;
}
case L'x': {
i++;
if (i >= str.length()) {
errorstream << "Unfinished escape sequence at the end of \"" << wide_to_utf8(str) << "\"" << std::endl;
}
char32_t c = 0;
size_t j = 0;
unsigned char v;
for (; i+j < str.length() && hex_digit_decode((char)str[i+j], v); j++) {
c = c * 16 + v;
}
if (j == 0) {
errorstream << "Invalid escape sequence \\x, ignoring" << std::endl;
continue;
}
// If character fits in 16 bits and is not part of surrogate pair, insert it.
// Otherwise, silently drop it: this is valid since \x escape sequences with
// values above 0xff are implementation-defined
if ((c < 0xd800) || (0xe000 <= c && c <= 0xffff)) {
result.push_back(c);
}
i += j;
continue;
}
case L'u': {
i++;
if (i + 4 > str.length()) {
errorstream << "Unfinished escape sequence at the end of \"" << wide_to_utf8(str) << "\"" << std::endl;
}
char16_t c = 0;
bool ok = true;
for (size_t j = 0; j < 4; j++) {
unsigned char v;
if (str[i+j] <= 0xff && hex_digit_decode((char)str[i+j], v)) {
c = c * 16 + v;
} else {
errorstream << "Invalid unicode escape sequence \"\\u" << wide_to_utf8(str.substr(i, 4)) << "\", ignoring" << std::endl;
ok = false;
break;
}
}
if (ok) {
wide_add_codepoint(result, c);
}
i += 4;
continue;
}
case L'U': {
i++;
if (i + 8 > str.length()) {
errorstream << "Unfinished escape sequence at the end of \"" << wide_to_utf8(str) << "\"" << std::endl;
}
char32_t c = 0;
bool ok = true;
for (size_t j = 0; j < 8; j++) {
unsigned char v;
if (str[i+j] <= 0xff && hex_digit_decode((char)str[i+j], v)) {
c = c * 16 + v;
} else {
errorstream << "Invalid unicode escape sequence \"\\U" << wide_to_utf8(str.substr(i, 8)) << "\", ignoring" << std::endl;
ok = false;
break;
}
}
if (ok) {
wide_add_codepoint(result, c);
}
i += 8;
continue;
}
default: {
errorstream << "Unknown escape sequence \"\\" << str[i] << "\", ignoring" << std::endl;
break;
}
}
i++;
}
return result;
}
void Translations::loadPoEntry(const std::wstring &basefilename, const GettextPluralForm::Ptr &plural_form, const std::map<std::wstring, std::wstring> &entry)
{
// Process an entry from a PO file and add it to the translation table
// Assumes that entry[L"msgid"] is always defined
std::wstring textdomain;
auto ctx = entry.find(L"msgctxt");
if (ctx != entry.end()) {
textdomain = ctx->second;
} else {
textdomain = basefilename;
}
std::wstring original = entry.at(L"msgid");
auto plural = entry.find(L"msgid_plural");
if (plural == entry.end()) {
auto translated = entry.find(L"msgstr");
if (translated == entry.end()) {
errorstream << "Could not load translation: entry for msgid \"" << wide_to_utf8(original) << "\" does not contain a msgstr field" << std::endl;
return;
}
addTranslation(textdomain, original, translated->second);
} else {
std::vector<std::wstring> translations;
for (int i = 0; ; i++) {
auto translated = entry.find(L"msgstr[" + std::to_wstring(i) + L"]");
if (translated == entry.end())
break;
translations.push_back(translated->second);
}
addPluralTranslation(textdomain, plural_form, original, translations);
addPluralTranslation(textdomain, plural_form, plural->second, translations);
}
}
bool Translations::inEscape(const std::wstring &line, size_t pos)
{
if (pos == std::wstring::npos || pos == 0)
return false;
pos--;
size_t count = 0;
for (; line[pos] == L'\\'; pos--) {
count++;
if (pos == 0)
break;
}
return count % 2 == 1;
}
std::optional<std::pair<std::wstring, std::wstring>> Translations::parsePoLine(const std::string &line)
{
if (line.empty())
return std::nullopt;
if (line[0] == '#')
return std::pair(L"#", utf8_to_wide(line.substr(1)));
std::wstring wline = utf8_to_wide(line);
// Defend against some possibly malformed utf8 string, which
// is empty after converting to wide string
if (wline.empty())
return std::nullopt;
std::size_t pos = wline.find(L'"');
std::wstring s;
if (pos == std::wstring::npos) {
errorstream << "Unable to parse po file line: " << line << std::endl;
return std::nullopt;
}
auto prefix = trim(wline.substr(0, pos));
auto begin = pos;
while (pos < wline.size()) {
begin = wline.find(L'"', pos);
if (begin == std::wstring::npos) {
if (trim(wline.substr(pos)).empty()) {
break;
} else {
errorstream << "Excessive content at the end of po file line: " << line << std::endl;
return std::nullopt;
}
}
if (!trim(wline.substr(pos, begin-pos)).empty()) {
errorstream << "Excessive content within string concatenation in po file line: " << line << std::endl;
return std::nullopt;
}
auto end = wline.find(L'"', begin+1);
while (inEscape(wline, end)) {
end = wline.find(L'"', end+1);
}
if (end == std::wstring::npos) {
errorstream << "String extends beyond po file line: " << line << std::endl;
return std::nullopt;
}
s.append(unescapeC(wline.substr(begin+1, end-begin-1)));
pos = end+1;
}
return std::pair(prefix, s);
}
void Translations::loadPoTranslation(const std::string &basefilename, const std::string &data)
{
std::istringstream is(data);
std::string line;
std::map<std::wstring, std::wstring> last_entry;
std::wstring last_key;
std::wstring wbasefilename = utf8_to_wide(basefilename);
GettextPluralForm::Ptr plural;
bool skip = false;
bool skip_last = false;
while (is.good()) {
std::getline(is, line);
// Trim last character if file was using a \r\n line ending
if (line.length () > 0 && line[line.length() - 1] == '\r')
line.resize(line.length() - 1);
auto parsed = parsePoLine(line);
if (!parsed)
continue;
auto prefix = parsed->first;
auto s = parsed->second;
if (prefix == L"#") {
if (s[0] == L',') {
// Skip fuzzy entries
if ((s + L' ').find(L" fuzzy ") != line.npos) {
if (last_entry.empty())
skip_last = true;
else
skip = true;
}
}
continue;
}
if (prefix.empty()) {
// Continuation of previous line
if (last_key == L"") {
errorstream << "Unable to parse po file: continuation of non-existant previous line" << std::endl;
continue;
}
last_entry[last_key].append(s);
continue;
}
if (prefix == L"msgctxt" || (prefix == L"msgid" && last_entry.find(L"msgid") != last_entry.end())) {
if (last_entry.find(L"msgid") != last_entry.end()) {
if (!skip_last) {
if (last_entry[L"msgid"].empty()) {
if (last_entry.find(L"msgstr") == last_entry.end()) {
errorstream << "Header entry has no \"msgstr\" field" << std::endl;
} else if (plural) {
errorstream << "Attempt to override existing po header entry" << std::endl;
} else {
for (auto &line: str_split(last_entry[L"msgstr"], L'\n')) {
if (str_starts_with(line, L"Plural-Forms:")) {
plural = GettextPluralForm::parseHeaderLine(line);
if (!(plural && *plural)) {
errorstream << "Invalid Plural-Forms line: " << wide_to_utf8(line) << std::endl;
}
}
}
}
} else {
loadPoEntry(wbasefilename, plural, last_entry);
}
}
last_entry.clear();
skip_last = skip;
} else if (!last_entry.empty()) {
errorstream << "Unable to parse po file: previous entry has no \"msgid\" field but is not empty" << std::endl;
last_entry.clear();
skip_last = skip;
}
} else {
// prevent malpositioned fuzzy flag from influencing the following entry
skip = false;
}
if (last_entry.find(prefix) != last_entry.end()) {
errorstream << "Unable to parse po file: Key \"" << wide_to_utf8(prefix) << "\" was already present in previous entry" << std::endl;
continue;
}
last_key = prefix;
last_entry[prefix] = s;
}
if (last_entry.find(L"msgid") != last_entry.end()) {
if (!skip_last && !last_entry[L"msgid"].empty())
loadPoEntry(wbasefilename, plural, last_entry);
} else if (!last_entry.empty()) {
errorstream << "Unable to parse po file: Last entry has no \"msgid\" field" << std::endl;
}
}
void Translations::loadMoEntry(const std::wstring &basefilename, const GettextPluralForm::Ptr &plural_form, const std::string &original, const std::string &translated)
{
std::wstring textdomain = L"";
size_t found;
std::string noriginal = original;
found = original.find('\x04'); // EOT character
if (found != std::string::npos) {
textdomain = utf8_to_wide(original.substr(0, found));
noriginal = original.substr(found + 1);
} else {
textdomain = basefilename;
}
found = noriginal.find('\0');
if (found != std::string::npos) {
std::vector<std::wstring> translations = str_split(utf8_to_wide(translated), L'\0');
addPluralTranslation(textdomain, plural_form, utf8_to_wide(noriginal.substr(0, found)), translations);
addPluralTranslation(textdomain, plural_form, utf8_to_wide(noriginal.substr(found + 1)), translations);
} else {
addTranslation(textdomain, utf8_to_wide(noriginal), utf8_to_wide(translated));
}
}
inline u32 readVarEndian(bool is_be, std::string_view data, size_t pos = 0)
{
if (pos + 4 > data.size())
return 0;
if (is_be) {
return
((u32)(unsigned char)data[pos+0] << 24) | ((u32)(unsigned char)data[pos+1] << 16) |
((u32)(unsigned char)data[pos+2] << 8) | ((u32)(unsigned char)data[pos+3] << 0);
} else {
return
((u32)(unsigned char)data[pos+0] << 0) | ((u32)(unsigned char)data[pos+1] << 8) |
((u32)(unsigned char)data[pos+2] << 16) | ((u32)(unsigned char)data[pos+3] << 24);
}
}
void Translations::loadMoTranslation(const std::string &basefilename, const std::string &data)
{
size_t length = data.length();
std::wstring wbasefilename = utf8_to_wide(basefilename);
GettextPluralForm::Ptr plural_form;
if (length < 20) {
errorstream << "Ignoring too short mo file" << std::endl;
return;
}
u32 magic = readVarEndian(false, data);
bool is_be;
if (magic == 0x950412de) {
is_be = false;
} else if (magic == 0xde120495) {
is_be = true;
} else {
errorstream << "Bad magic number for mo file: 0x" << hex_encode(data.substr(0, 4)) << std::endl;
return;
}
u32 revision = readVarEndian(is_be, data, 4);
if (revision != 0) {
errorstream << "Unknown revision " << revision << " for mo file" << std::endl;
return;
}
u32 nstring = readVarEndian(is_be, data, 8);
u32 original_offset = readVarEndian(is_be, data, 12);
u32 translated_offset = readVarEndian(is_be, data, 16);
if (length < original_offset + 8 * (u64)nstring || length < translated_offset + 8 * (u64)nstring) {
errorstream << "Ignoring truncated mo file" << std::endl;
return;
}
for (u32 i = 0; i < nstring; i++) {
u32 original_len = readVarEndian(is_be, data, original_offset + 8 * i);
u32 original_off = readVarEndian(is_be, data, original_offset + 8 * i + 4);
u32 translated_len = readVarEndian(is_be, data, translated_offset + 8 * i);
u32 translated_off = readVarEndian(is_be, data, translated_offset + 8 * i + 4);
if (length < original_off + (u64)original_len || length < translated_off + (u64)translated_len) {
errorstream << "Ignoring translation out of mo file" << std::endl;
continue;
}
if (data[original_off+original_len] != '\0' || data[translated_off+translated_len] != '\0') {
errorstream << "String in mo entry does not have a trailing NUL" << std::endl;
continue;
}
auto original = data.substr(original_off, original_len);
auto translated = data.substr(translated_off, translated_len);
if (original.empty()) {
if (plural_form) {
errorstream << "Attempt to override existing mo header entry" << std::endl;
} else {
for (auto &line: str_split(translated, '\n')) {
if (str_starts_with(line, "Plural-Forms:")) {
plural_form = GettextPluralForm::parseHeaderLine(utf8_to_wide(line));
if (!(plural_form && *plural_form)) {
errorstream << "Invalid Plural-Forms line: " << line << std::endl;
}
}
}
}
} else {
loadMoEntry(wbasefilename, plural_form, original, translated);
}
}
return;
}
void Translations::loadTranslation(const std::string &filename, const std::string &data)
{
const char *trExtension[] = { ".tr", NULL };
const char *poExtension[] = { ".po", NULL };
const char *moExtension[] = { ".mo", NULL };
if (!removeStringEnd(filename, trExtension).empty()) {
loadTrTranslation(data);
} else if (!removeStringEnd(filename, poExtension).empty()) {
std::string basefilename = str_split(filename, '.')[0];
loadPoTranslation(basefilename, data);
} else if (!removeStringEnd(filename, moExtension).empty()) {
std::string basefilename = str_split(filename, '.')[0];
loadMoTranslation(basefilename, data);
} else {
errorstream << "loadTranslation called with invalid filename: \"" << filename << "\"" << std::endl;
}
}

@ -19,8 +19,12 @@ with this program; if not, write to the Free Software Foundation, Inc.,
#pragma once
#include "gettext_plural_form.h"
#include <unordered_map>
#include <map>
#include <optional>
#include <string>
#include <vector>
class Translations;
#ifndef SERVER
@ -30,11 +34,39 @@ extern Translations *g_client_translations;
class Translations
{
public:
void loadTranslation(const std::string &data);
void loadTranslation(const std::string &filename, const std::string &data);
void clear();
const std::wstring &getTranslation(const std::wstring &textdomain,
const std::wstring &s) const;
const std::wstring &getTranslation(
const std::wstring &textdomain, const std::wstring &s) const;
const std::wstring &getPluralTranslation(const std::wstring &textdomain,
const std::wstring &s, unsigned long int number) const;
static const std::string_view getFileLanguage(const std::string &filename);
static inline bool isTranslationFile(const std::string &filename)
{
return getFileLanguage(filename) != "";
}
// for testing
inline size_t size()
{
return m_translations.size() + m_plural_translations.size()/2;
}
private:
std::unordered_map<std::wstring, std::wstring> m_translations;
std::unordered_map<std::wstring, std::pair<GettextPluralForm::Ptr, std::vector<std::wstring>>> m_plural_translations;
void addTranslation(const std::wstring &textdomain, const std::wstring &original,
const std::wstring &translated);
void addPluralTranslation(const std::wstring &textdomain,
const GettextPluralForm::Ptr &plural,
const std::wstring &original,
std::vector<std::wstring> &translated);
std::wstring unescapeC(const std::wstring &str);
std::optional<std::pair<std::wstring, std::wstring>> parsePoLine(const std::string &line);
bool inEscape(const std::wstring &str, size_t pos);
void loadPoEntry(const std::wstring &basefilename, const GettextPluralForm::Ptr &plural_form, const std::map<std::wstring, std::wstring> &entry);
void loadMoEntry(const std::wstring &basefilename, const GettextPluralForm::Ptr &plural_form, const std::string &original, const std::string &translated);
void loadTrTranslation(const std::string &data);
void loadPoTranslation(const std::string &basefilename, const std::string &data);
void loadMoTranslation(const std::string &basefilename, const std::string &data);
};

@ -37,6 +37,7 @@ set (UNITTEST_SRCS
${CMAKE_CURRENT_SOURCE_DIR}/test_socket.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_servermodmanager.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_threading.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_translations.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_utilities.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_voxelarea.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_voxelalgorithms.cpp

@ -122,7 +122,7 @@ void TestServerModManager::testGetMods()
ServerModManager sm(m_worlddir);
const auto &mods = sm.getMods();
// `ls ./games/devtest/mods | wc -l` + 1 (test mod)
UASSERTEQ(std::size_t, mods.size(), 33 + 1);
UASSERTEQ(std::size_t, mods.size(), 34 + 1);
// Ensure we found basenodes mod (part of devtest)
// and test_mod (for testing MINETEST_MOD_PATH).

@ -0,0 +1,64 @@
// Minetest
// SPDX-License-Identifier: LGPL-2.1-or-later
#include "translation.h"
#include "filesys.h"
#include "content/subgames.h"
#include "catch.h"
#define CONTEXT L"context"
#define TEXTDOMAIN_PO L"translation_po"
#define TEST_PO_NAME "translation_po.de.po"
#define TEST_MO_NAME "translation_mo.de.mo"
static std::string read_translation_file(const std::string &filename)
{
auto gamespec = findSubgame("devtest");
REQUIRE(gamespec.isValid());
auto path = gamespec.gamemods_path + (DIR_DELIM "testtranslations" DIR_DELIM "test_locale" DIR_DELIM) + filename;
std::string content;
REQUIRE(fs::ReadFile(path, content));
return content;
}
TEST_CASE("test translations")
{
SECTION("Plural-Forms function for translations")
{
auto form = GettextPluralForm::parseHeaderLine(L"Plural-Forms: nplurals=3; plural= (n-1+1)<=1 ? n||1?0:1 : 1?(!!2):2;");
REQUIRE(form);
REQUIRE(form->size() == 3);
CHECK((*form)(0) == 0);
CHECK((*form)(1) == 0);
CHECK((*form)(2) == 1);
}
SECTION("PO file parser")
{
Translations translations;
translations.loadTranslation(TEST_PO_NAME, read_translation_file(TEST_PO_NAME));
CHECK(translations.size() == 5);
CHECK(translations.getTranslation(TEXTDOMAIN_PO, L"foo") == L"bar");
CHECK(translations.getTranslation(TEXTDOMAIN_PO, L"Untranslated") == L"Untranslated");
CHECK(translations.getTranslation(TEXTDOMAIN_PO, L"Fuzzy") == L"Fuzzy");
CHECK(translations.getTranslation(TEXTDOMAIN_PO, L"Multi\\line\nstring") == L"Multi\\\"li\\ne\nresult");
CHECK(translations.getTranslation(TEXTDOMAIN_PO, L"Wrong order") == L"Wrong order");
CHECK(translations.getPluralTranslation(TEXTDOMAIN_PO, L"Plural form", 1) == L"Singular result");
CHECK(translations.getPluralTranslation(TEXTDOMAIN_PO, L"Singular form", 0) == L"Plural result");
CHECK(translations.getPluralTranslation(TEXTDOMAIN_PO, L"Partial translation", 1) == L"Partially translated");
CHECK(translations.getPluralTranslation(TEXTDOMAIN_PO, L"Partial translations", 2) == L"Partial translations");
CHECK(translations.getTranslation(CONTEXT, L"With context") == L"Has context");
}
SECTION("MO file parser")
{
Translations translations;
translations.loadTranslation(TEST_MO_NAME, read_translation_file(TEST_MO_NAME));
CHECK(translations.size() == 2);
CHECK(translations.getTranslation(CONTEXT, L"With context") == L"Has context");
CHECK(translations.getPluralTranslation(CONTEXT, L"Plural form", 1) == L"Singular result");
CHECK(translations.getPluralTranslation(CONTEXT, L"Singular form", 0) == L"Plural result");
}
}

@ -154,6 +154,16 @@ std::string wide_to_utf8(std::wstring_view input)
return out;
}
void wide_add_codepoint(std::wstring &result, char32_t codepoint)
{
if ((0xD800 <= codepoint && codepoint <= 0xDFFF) || (0x10FFFF < codepoint)) {
// Invalid codepoint, replace with unicode replacement character
result.push_back(0xFFFD);
return;
}
result.push_back(codepoint);
}
#else // _WIN32
std::wstring utf8_to_wide(std::string_view input)
@ -180,6 +190,29 @@ std::string wide_to_utf8(std::wstring_view input)
return out;
}
void wide_add_codepoint(std::wstring &result, char32_t codepoint)
{
if (codepoint < 0x10000) {
if (0xD800 <= codepoint && codepoint <= 0xDFFF) {
// Invalid codepoint, part of a surrogate pair
// Replace with unicode replacement character
result.push_back(0xFFFD);
return;
}
result.push_back((wchar_t) codepoint);
return;
}
codepoint -= 0x10000;
if (codepoint >= 0x100000) {
// original codepoint was above 0x10FFFF, so invalid
// replace with unicode replacement character
result.push_back(0xFFFD);
return;
}
result.push_back((wchar_t) ((codepoint >> 10) | 0xD800));
result.push_back((wchar_t) ((codepoint & 0x3FF) | 0xDC00));
}
#endif // _WIN32
@ -668,13 +701,20 @@ std::string wrap_rows(std::string_view from, unsigned row_len, bool has_color_co
* We get the argument "White", translated, and create a template string with "@1" instead of it.
* We finally get the template "@1 Wool" that was used in the beginning, which we translate
* before filling it again.
*
* The \x1bT marking the beginning of a translated string allows two '@'-separated arguments:
* - The first one is the textdomain/context in which the string is to be translated. Most often,
* this is the name of the mod which asked for the translation.
* - The second argument, if present, should be an integer; it is used to decide which plural form
* to use, for languages containing several plural forms.
*/
static void translate_all(std::wstring_view s, size_t &i,
Translations *translations, std::wstring &res);
static void translate_string(std::wstring_view s, Translations *translations,
const std::wstring &textdomain, size_t &i, std::wstring &res)
const std::wstring &textdomain, size_t &i, std::wstring &res,
bool use_plural, unsigned long int number)
{
std::vector<std::wstring> args;
int arg_number = 1;
@ -751,8 +791,17 @@ static void translate_string(std::wstring_view s, Translations *translations,
}
// Translate the template.
const std::wstring &toutput = translations ?
translations->getTranslation(textdomain, output) : output;
std::wstring toutput;
if (translations != nullptr) {
if (use_plural)
toutput = translations->getPluralTranslation(
textdomain, output, number);
else
toutput = translations->getTranslation(
textdomain, output);
} else {
toutput = output;
}
// Put back the arguments in the translated template.
size_t j = 0;
@ -835,10 +884,37 @@ static void translate_all(std::wstring_view s, size_t &i,
} else if (parts[0] == L"T") {
// Beginning of translated string.
std::wstring textdomain;
bool use_plural = false;
unsigned long int number = 0;
if (parts.size() > 1)
textdomain = parts[1];
if (parts.size() > 2 && parts[2] != L"") {
// parts[2] should contain a number used for selecting
// the plural form.
// However, we can't blindly cast it to an unsigned long int,
// as it might be too large for that.
//
// We follow the advice of gettext and reduce integers larger than 1000000
// to something in the range [1000000, 2000000), with the same last 6 digits.
//
// https://www.gnu.org/software/gettext/manual/html_node/Plural-forms.html
constexpr unsigned long int max = 1000000;
use_plural = true;
number = 0;
for (char c : parts[2]) {
if (L'0' <= c && c <= L'9') {
number = (10 * number + (unsigned long int)(c - L'0'));
if (number >= 2 * max) number = (number % max) + max;
} else {
// Invalid number
use_plural = false;
break;
}
}
}
std::wstring translated;
translate_string(s, translations, textdomain, i, translated);
translate_string(s, translations, textdomain, i, translated, use_plural, number);
res.append(translated);
} else {
// Another escape sequence, such as colors. Preserve it.

@ -32,6 +32,7 @@ with this program; if not, write to the Free Software Foundation, Inc.,
#include <sstream>
#include <iomanip>
#include <cctype>
#include <cwctype>
#include <unordered_map>
class Translations;
@ -87,6 +88,8 @@ struct FlagDesc {
std::wstring utf8_to_wide(std::string_view input);
std::string wide_to_utf8(std::wstring_view input);
void wide_add_codepoint(std::wstring &result, char32_t codepoint);
std::string urlencode(std::string_view str);
std::string urldecode(std::string_view str);
@ -325,19 +328,30 @@ inline std::string lowercase(std::string_view str)
}
inline bool my_isspace(const char c)
{
return std::isspace(c);
}
inline bool my_isspace(const wchar_t c)
{
return std::iswspace(c);
}
/**
* @param str
* @return A view of \p str with leading and trailing whitespace removed.
*/
inline std::string_view trim(std::string_view str)
template<typename T>
inline std::basic_string_view<T> trim(const std::basic_string_view<T> &str)
{
size_t front = 0;
size_t back = str.size();
while (front < back && std::isspace(str[front]))
while (front < back && my_isspace(str[front]))
++front;
while (back > front && std::isspace(str[back - 1]))
while (back > front && my_isspace(str[back - 1]))
--back;
return str.substr(front, back - front);
@ -351,16 +365,24 @@ inline std::string_view trim(std::string_view str)
* @param str
* @return A copy of \p str with leading and trailing whitespace removed.
*/
inline std::string trim(std::string &&str)
template<typename T>
inline std::basic_string<T> trim(std::basic_string<T> &&str)
{
std::string ret(trim(std::string_view(str)));
std::basic_string<T> ret(trim(std::basic_string_view<T>(str)));
return ret;
}
// The above declaration causes ambiguity with char pointers so we have to fix that:
inline std::string_view trim(const char *str)
template<typename T>
inline std::basic_string_view<T> trim(const std::basic_string<T> &str)
{
return trim(std::string_view(str));
return trim(std::basic_string_view<T>(str));
}
// The above declaration causes ambiguity with char pointers so we have to fix that:
template<typename T>
inline std::basic_string_view<T> trim(const T *str)
{
return trim(std::basic_string_view<T>(str));
}