diff --git a/builtin/common/misc_helpers.lua b/builtin/common/misc_helpers.lua index 2ad9b10af..d0942b2d2 100644 --- a/builtin/common/misc_helpers.lua +++ b/builtin/common/misc_helpers.lua @@ -574,12 +574,14 @@ function core.strip_colors(str) return (str:gsub(ESCAPE_CHAR .. "%([bc]@[^)]+%)", "")) end -function core.translate(textdomain, str, ...) +local function translate(textdomain, str, num, ...) local start_seq - if textdomain == "" then + if textdomain == "" and num == "" then start_seq = ESCAPE_CHAR .. "T" - else + elseif num == "" then start_seq = ESCAPE_CHAR .. "(T@" .. textdomain .. ")" + else + start_seq = ESCAPE_CHAR .. "(T@" .. textdomain .. "@" .. num .. ")" end local arg = {n=select('#', ...), ...} local end_seq = ESCAPE_CHAR .. "E" @@ -610,8 +612,31 @@ function core.translate(textdomain, str, ...) return start_seq .. translated .. end_seq end +function core.translate(textdomain, str, ...) + return translate(textdomain, str, "", ...) +end + +function core.translate_n(textdomain, str, str_plural, n, ...) + assert (type(n) == "number") + assert (n >= 0) + assert (math.floor(n) == n) + + -- Truncate n if too large + local max = 1000000 + if n >= 2 * max then + n = n % max + max + end + if n == 1 then + return translate(textdomain, str, "1", ...) + else + return translate(textdomain, str_plural, tostring(n), ...) + end +end + function core.get_translator(textdomain) - return function(str, ...) return core.translate(textdomain or "", str, ...) end + return + (function(str, ...) return core.translate(textdomain or "", str, ...) end), + (function(str, str_plural, n, ...) return core.translate_n(textdomain or "", str, str_plural, n, ...) end) end -------------------------------------------------------------------------------- diff --git a/builtin/mainmenu/tab_content.lua b/builtin/mainmenu/tab_content.lua index b38f12884..9cfb96d54 100644 --- a/builtin/mainmenu/tab_content.lua +++ b/builtin/mainmenu/tab_content.lua @@ -118,7 +118,7 @@ local function get_formspec(tabview, name, tabdata) local title_and_name if selected_pkg.type == "game" then - title_and_name = selected_pkg.name + title_and_name = selected_pkg.title or selected_pkg.name else title_and_name = (selected_pkg.title or selected_pkg.name) .. "\n" .. core.colorize("#BFBFBF", selected_pkg.name) diff --git a/doc/lua_api.md b/doc/lua_api.md index d9e683da3..2c827d7ad 100644 --- a/doc/lua_api.md +++ b/doc/lua_api.md @@ -4178,10 +4178,6 @@ Translations Texts can be translated client-side with the help of `minetest.translate` and translation files. -Consider using the script `mod_translation_updater.py` in the Minetest -[modtools](https://github.com/minetest/modtools) repository to generate and -update translation files automatically from the Lua sources. - Translating a string -------------------- @@ -4189,13 +4185,15 @@ Two functions are provided to translate strings: `minetest.translate` and `minetest.get_translator`. * `minetest.get_translator(textdomain)` is a simple wrapper around - `minetest.translate`, and `minetest.get_translator(textdomain)(str, ...)` is - equivalent to `minetest.translate(textdomain, str, ...)`. + `minetest.translate` and `minetest.translate_n`. + After `local S, NS = minetest.get_translator(textdomain)`, we have + `S(str, ...)` equivalent to `minetest.translate(textdomain, str, ...)`, and + `NS(str, str_plural, n, ...)` to `minetest.translate_n(textdomain, str, str_plural, n, ...)`. It is intended to be used in the following way, so that it avoids verbose repetitions of `minetest.translate`: ```lua - local S = minetest.get_translator(textdomain) + local S, NS = minetest.get_translator(textdomain) S(str, ...) ``` @@ -4212,29 +4210,102 @@ Two functions are provided to translate strings: `minetest.translate` and arguments the translated string expects. Arguments are literal strings -- they will not be translated. -For instance, suppose we want to greet players when they join. We can do the +* `minetest.translate_n(textdomain, str, str_plural, n, ...)` translates the + string `str` with the given `textdomain` for disambiguaion. The value of + `n`, which must be a nonnegative integer, is used to decide whether to use + the singular or the plural version of the string. Depending on the locale of + the client, the choice between singular and plural might be more complicated, + but the choice will be done automatically using the value of `n`. + + You can read https://www.gnu.org/software/gettext/manual/html_node/Plural-forms.html + for more details on the differences of plurals between languages. + + Also note that plurals are only handled in .po or .mo files, and not in .tr files. + +For instance, suppose we want to greet players when they join and provide a +command that shows the amount of time since the player joined. We can do the following: ```lua -local S = minetest.get_translator("hello") +local S, NS = minetest.get_translator("hello") minetest.register_on_joinplayer(function(player) local name = player:get_player_name() minetest.chat_send_player(name, S("Hello @1, how are you today?", name)) end) +minetest.register_chatcommand("playtime", { + func = function(name) + local last_login = core.get_auth_handler().get_auth(name).last_login + local playtime = math.floor((last_login-os.time())/60) + return true, NS( + "You have been playing for @1 minute.", + "You have been playing for @1 minutes.", + minutes, tostring(minutes)) + end, +}) ``` When someone called "CoolGuy" joins the game with an old client or a client that does not have localization enabled, they will see `Hello CoolGuy, how are -you today?` +you today?`. If they use the `/playtime` command, they will see `You have been +playing for 1 minute` or (for example) `You have been playing for 4 minutes.` -However, if we have for instance a translation file named `hello.de.tr` +However, if we have for instance a translation file named `hello.de.po` containing the following: - # textdomain: hello - Hello @1, how are you today?=Hallo @1, wie geht es dir heute? +```po +msgid "" +msgstr "" +"Plural-Forms: nplurals=2; plural=(n != 1);\n" + +msgid "Hello @1, how are you today?" +msgstr "Hallo @1, wie geht es dir heute?" + +msgid "You have been playing for @1 minute." +msgid_plural "You have been playing for @1 minutes." +msgstr[0] "Du spielst seit @1 Minute." +msgstr[1] "Du spielst seit @1 Minuten." +``` and CoolGuy has set a German locale, they will see `Hallo CoolGuy, wie geht es -dir heute?` +dir heute?` when they join, and the `/playtime` command will show them `Du +spielst seit 1 Minute.` or (for example) `Du spielst seit 4 Minuten.` + +Creating and updating translation files +--------------------------------------- + +As an alternative to writing translation files by hand (as shown in the above +example), it is also possible to generate translation files based on the source +code. + +It is recommended to first generate a translation template. The translation +template includes translatable strings that translators can directly work on. +After creating the `locale` directory, a translation template for the above +example using the following command: + +```sh +xgettext -L lua -kS -kNS:1,2 -kminetest.translate:1c,2 -kminetest.translate_n:1c,2,3 \ + -d hello -o locale/hello.pot *.lua +``` + +The above command can also be used to update the translation template when new +translatable strings are added. + +The German translator can then create the translation file with + +```sh +msginit -l de -i locale/hello.pot -o locale/hello.de.po +``` + +and provide the translations by editing `locale/hello.de.po`. + +The translation file can be updated using + +```sh +msgmerge -U locale/hello.de.po locale/hello.pot +``` + +Refer to the [Gettext manual](https://www.gnu.org/software/gettext/manual/) for +further information on creating and updating translation files. Operations on translated strings -------------------------------- @@ -4248,8 +4319,8 @@ expected manner. However, string concatenation will still work as expected sentences by breaking them into parts; arguments should be used instead), and operations such as `minetest.colorize` which are also concatenation. -Translation file format ------------------------ +Old translation file format +--------------------------- A translation file has the suffix `.[lang].tr`, where `[lang]` is the language it corresponds to. It must be put into the `locale` subdirectory of the mod. @@ -4264,6 +4335,34 @@ The file should be a text file, with the following format: There must be no extraneous whitespace around the `=` or at the beginning or the end of the line. +Using the earlier example of greeting the player, the translation file would be + +``` +# textdomain: hello +Hello @1, how are you today?=Hallo @1, wie geht es dir heute? +``` + +For old translation files, consider using the script `mod_translation_updater.py` +in the Minetest [modtools](https://github.com/minetest/modtools) repository to +generate and update translation files automatically from the Lua sources. + +Gettext translation file format +------------------------------- + +Gettext files can also be used as translations. A translation file has the suffix +`.[lang].po` or `.[lang].mo`, depending on whether it is compiled or not, and must +also be placed in the `locale` subdirectory of the mod. The value of `textdomain` +is `msgctxt` in the gettext files. If `msgctxt` is not provided, the name of the +translation file is used instead. + +A typical entry in a `.po` file would look like: + +```po +msgctxt "textdomain" +msgid "Hello world!" +msgstr "Bonjour le monde!" +``` + Escapes ------- diff --git a/games/devtest/mods/testtranslations/init.lua b/games/devtest/mods/testtranslations/init.lua new file mode 100644 index 000000000..bb3696e7e --- /dev/null +++ b/games/devtest/mods/testtranslations/init.lua @@ -0,0 +1,26 @@ +local S, NS = minetest.get_translator("testtranslations") + +local function send_compare(name, text) + core.chat_send_player(name, ("%s | %s | %s"):format( + core.get_translated_string("", text), text, core.get_translated_string("fr", text))) +end + +minetest.register_chatcommand("testtranslations", { + params = "", + description = "Test translations", + privs = {}, + func = function(name, param) + core.chat_send_player(name, "Please ensure your locale is set to \"fr\"") + core.chat_send_player(name, "Untranslated | Client-side translation | Server-side translation (fr)") + send_compare(name, S("Testing .tr files: untranslated")) + send_compare(name, S("Testing .po files: untranslated")) + send_compare(name, S("Testing .mo files: untranslated")) + send_compare(name, S("Testing fuzzy .po entry: untranslated (expected)")) + send_compare(name, core.translate("translation_po", "Testing .po without context: untranslated")) + send_compare(name, core.translate("translation_mo", "Testing .mo without context: untranslated")) + for i = 0,4 do + send_compare(name, NS("@1: .po singular", "@1: .po plural", i, tostring(i))) + send_compare(name, NS("@1: .mo singular", "@1: .mo plural", i, tostring(i))) + end + end +}) diff --git a/games/devtest/mods/testtranslations/locale/testtranslations.fr.po b/games/devtest/mods/testtranslations/locale/testtranslations.fr.po new file mode 100644 index 000000000..2bcc6c7d4 --- /dev/null +++ b/games/devtest/mods/testtranslations/locale/testtranslations.fr.po @@ -0,0 +1,9 @@ +# Dummy entry. This is a test to make sure that a parser error is not thrown +# if the following line is msgctxt. +msgctxt "testtranslations" +msgid "Dummy entry" +msgstr "Dummy result" + +# Used for translating the mod title +msgid "Test translations" +msgstr "Test translations (French)" diff --git a/games/devtest/mods/testtranslations/locale/translation_mo.fr.mo b/games/devtest/mods/testtranslations/locale/translation_mo.fr.mo new file mode 100644 index 000000000..0e7190de9 Binary files /dev/null and b/games/devtest/mods/testtranslations/locale/translation_mo.fr.mo differ diff --git a/games/devtest/mods/testtranslations/locale/translation_po.fr.po b/games/devtest/mods/testtranslations/locale/translation_po.fr.po new file mode 100644 index 000000000..5aefc0f41 --- /dev/null +++ b/games/devtest/mods/testtranslations/locale/translation_po.fr.po @@ -0,0 +1,22 @@ +# Test Plural-Forms parsing +msgid "" +msgstr "" +"Plural-Forms: nplurals=2; plural= (n-1+1)<=1 ? n||1?0:1 : 1?(!!2):2;" + +msgctxt "testtranslations" +msgid "Testing .po files: untranslated" +msgstr "Testing .po files: translated" + +msgctxt "testtranslations" +msgid "@1: .po singular" +msgid_plural "@1: .po plural" +msgstr[0] "@1: .po 0 and 1 (French singular)" +msgstr[1] "@1: .po >1 (French plural)" + +#, foo bar fuzzy +msgctxt "testtranslations" +msgid "Testing fuzzy .po entry: untranslated (expected)" +msgstr "Testing fuzzy .po entry: translated (wrong)" + +msgid "Testing .po without context: untranslated" +msgstr "Testing .po without context: translated" diff --git a/games/devtest/mods/testtranslations/locale/translation_tr.fr.tr b/games/devtest/mods/testtranslations/locale/translation_tr.fr.tr new file mode 100644 index 000000000..b9ac66af5 --- /dev/null +++ b/games/devtest/mods/testtranslations/locale/translation_tr.fr.tr @@ -0,0 +1,2 @@ +# textdomain: testtranslations +Testing .tr files: untranslated=Testing .tr files: translated diff --git a/games/devtest/mods/testtranslations/mod.conf b/games/devtest/mods/testtranslations/mod.conf new file mode 100644 index 000000000..1fc09cf6b --- /dev/null +++ b/games/devtest/mods/testtranslations/mod.conf @@ -0,0 +1,3 @@ +name = testtranslations +title = Test translations +description = Test mod to test translations. diff --git a/games/devtest/mods/testtranslations/test_locale/readme.txt b/games/devtest/mods/testtranslations/test_locale/readme.txt new file mode 100644 index 000000000..7a2ed4329 --- /dev/null +++ b/games/devtest/mods/testtranslations/test_locale/readme.txt @@ -0,0 +1,4 @@ +The translation files in this directory intentionally include errors (which +would be reported when someone starts the devtest game in the de locale). This +allows the unittest to check that the translation file reader also handles +files that include errors. diff --git a/games/devtest/mods/testtranslations/test_locale/translation_mo.de.mo b/games/devtest/mods/testtranslations/test_locale/translation_mo.de.mo new file mode 100644 index 000000000..ffe05cd71 Binary files /dev/null and b/games/devtest/mods/testtranslations/test_locale/translation_mo.de.mo differ diff --git a/games/devtest/mods/testtranslations/test_locale/translation_po.de.po b/games/devtest/mods/testtranslations/test_locale/translation_po.de.po new file mode 100644 index 000000000..9a64805a6 --- /dev/null +++ b/games/devtest/mods/testtranslations/test_locale/translation_po.de.po @@ -0,0 +1,42 @@ +# This file is used by the C++ unittest for testing the parser +msgid "" +msgstr "\n\n\n" +"Plural-Forms: nplurals=2; plural=n!=1;" +"\n\n\n" + +msgid "foo" + msgstr "bar" + +msgid "Untranslated" +msgstr "" + +#, fuzzy +msgid "Fuzzy entry" +msgstr "Wrong" + +msgid "Multi\\""line\n" +"string" +msgstr "Multi\\\"" "li\\ne\nresult" + +msgctxt "Something" in "between" +msgctxt "String does not end +msgstr "Lost string" +msgid "Wrong order" + +msgid "Singular form" +msgid_plural "Plural form" +msgstr[0] "Singular result" +msgstr[1] "Plural result" + +msgid "Not enough value" +msgid_plural "Not enough values" +msgstr[0] "Result" + +msgid "Partial translation" +msgid_plural "Partial translations" +msgstr[0] "Partially translated" +msgstr[1] "" + +msgctxt "context" +msgid "With context" +msgstr "Has context" diff --git a/games/devtest/mods/testtranslations/translation_mo.de.po b/games/devtest/mods/testtranslations/translation_mo.de.po new file mode 100644 index 000000000..c3f22c4ed --- /dev/null +++ b/games/devtest/mods/testtranslations/translation_mo.de.po @@ -0,0 +1,26 @@ +msgid "" +msgstr "Plural-Forms: nplurals=2; plural= n != 1;" + +msgctxt "context" +msgid "With context" +msgstr "Has context" + +msgctxt "context" +msgid "Singular form" +msgid_plural "Plural form" +msgstr[0] "Singular result" +msgstr[1] "Plural result" + +# Replace plural form delimiter in the msgstr +msgid "Corrupt singular" +msgid_plural "Corrupt plural" +msgstr[0] "Corrupt singular result" +msgstr[1] "Corrupt plural result" + +# Replace terminating NUL in the MO file +msgid "Corrupt entry" +msgstr "Corrupted result" + +# Change the address of this entry to something invalid +msgid "Removed entry" +msgstr "Removed result" diff --git a/games/devtest/mods/testtranslations/translation_mo.fr.po b/games/devtest/mods/testtranslations/translation_mo.fr.po new file mode 100644 index 000000000..e6cf6d6ea --- /dev/null +++ b/games/devtest/mods/testtranslations/translation_mo.fr.po @@ -0,0 +1,18 @@ +msgid "" +msgstr "" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Plural-Forms: nplurals=2; plural=n>1;" + +msgctxt "testtranslations" +msgid "Testing .mo files: untranslated" +msgstr "Testing .mo files: translated" + +msgid "Testing .mo without context: untranslated" +msgstr "Testing .mo without context: translated" + +msgctxt "testtranslations" +msgid "@1: .mo singular" +msgid_plural "@1: .mo plural" +msgstr[0] "@1: .mo 0 and 1 (French singular)" +msgstr[1] "@1: .mo >1 (French plural)" diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index cad22ca6f..6dd4c05d2 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -408,6 +408,7 @@ set(common_SRCS face_position_cache.cpp filesys.cpp gettext.cpp + gettext_plural_form.cpp httpfetch.cpp hud.cpp inventory.cpp diff --git a/src/client/client.cpp b/src/client/client.cpp index 0f90bca97..7feb2212d 100644 --- a/src/client/client.cpp +++ b/src/client/client.cpp @@ -841,16 +841,12 @@ bool Client::loadMedia(const std::string &data, const std::string &filename, return true; } - const char *translate_ext[] = { - ".tr", NULL - }; - name = removeStringEnd(filename, translate_ext); - if (!name.empty()) { + if (Translations::isTranslationFile(filename)) { if (from_media_push) return false; TRACESTREAM(<< "Client: Loading translation: " << "\"" << filename << "\"" << std::endl); - g_client_translations->loadTranslation(data); + g_client_translations->loadTranslation(filename, data); return true; } diff --git a/src/gettext.h b/src/gettext.h index 042729c1a..507d27e64 100644 --- a/src/gettext.h +++ b/src/gettext.h @@ -36,7 +36,8 @@ with this program; if not, write to the Free Software Foundation, Inc., // the USE_GETTEXT=0 case and can't assume that gettext is installed. #include - #define gettext(String) String + #define gettext(String) (String) + #define ngettext(String1, String2, n) ((n) == 1 ? (String1) : (String2)) #endif #define _(String) gettext(String) diff --git a/src/gettext_plural_form.cpp b/src/gettext_plural_form.cpp new file mode 100644 index 000000000..6a5322421 --- /dev/null +++ b/src/gettext_plural_form.cpp @@ -0,0 +1,256 @@ +// Minetest +// SPDX-License-Identifier: LGPL-2.1-or-later + +#include "gettext_plural_form.h" +#include "util/string.h" + +static size_t minsize(const GettextPluralForm::Ptr &form) +{ + return form ? form->size() : 0; +} + +static size_t minsize(const GettextPluralForm::Ptr &f, const GettextPluralForm::Ptr &g) +{ + if (sizeof(g) > 0) + return std::min(minsize(f), minsize(g)); + return f ? f->size() : 0; +} + +class Identity: public GettextPluralForm +{ + public: + Identity(size_t nplurals): GettextPluralForm(nplurals) {}; + NumT operator()(const NumT n) const override + { + return n; + } +}; + +class ConstValue: public GettextPluralForm +{ + public: + ConstValue(size_t nplurals, NumT val): GettextPluralForm(nplurals), value(val) {}; + NumT operator()(const NumT n) const override + { + return value; + } + private: + NumT value; +}; + +template typename F> +class UnaryOperation: public GettextPluralForm +{ + public: + UnaryOperation(const Ptr &op): + GettextPluralForm(minsize(op)), op(op) {} + NumT operator()(const NumT n) const override + { + if (operator bool()) + return func((*op)(n)); + return 0; + } + private: + Ptr op; + static constexpr F func = {}; +}; + +template typename F> +class BinaryOperation: public GettextPluralForm +{ + public: + BinaryOperation(const Ptr &lhs, const Ptr &rhs): + GettextPluralForm(minsize(lhs, rhs)), + lhs(lhs), rhs(rhs) {} + NumT operator()(const NumT n) const override + { + if (operator bool()) + return func((*lhs)(n), (*rhs)(n)); + return 0; + } + private: + Ptr lhs, rhs; + static constexpr F func = {}; +}; + +class TernaryOperation: public GettextPluralForm +{ + public: + TernaryOperation(const Ptr &cond, const Ptr &val, const Ptr &alt): + GettextPluralForm(std::min(minsize(cond), minsize(val, alt))), + cond(cond), val(val), alt(alt) {} + NumT operator()(const NumT n) const override + { + if (operator bool()) + return (*cond)(n) ? (*val)(n) : (*alt)(n); + return 0; + } + private: + Ptr cond, val, alt; +}; + +typedef std::pair ParserResult; +typedef ParserResult (*Parser)(const size_t, const std::wstring_view &); + +static ParserResult parse_expr(const size_t nplurals, const std::wstring_view &str); + +template typename Operator> +static ParserResult reduce_ltr(const size_t nplurals, const ParserResult &res, const wchar_t* pattern) +{ + if (!str_starts_with(res.second, pattern)) + return ParserResult(nullptr, res.second); + auto next = Parser(nplurals, res.second.substr(std::char_traits::length(pattern))); + if (!next.first) + return next; + next.first = GettextPluralForm::Ptr(new BinaryOperation(res.first, next.first)); + next.second = trim(next.second); + return next; +} + +template +static ParserResult reduce_ltr(const size_t nplurals, const ParserResult &res, const wchar_t**) +{ + return ParserResult(nullptr, res.second); +} + +template typename Operator, template typename... Operators> +static ParserResult reduce_ltr(const size_t nplurals, const ParserResult &res, const wchar_t** patterns) +{ + auto next = reduce_ltr(nplurals, res, patterns[0]); + if (next.first || next.second != res.second) + return next; + return reduce_ltr(nplurals, res, patterns+1); +} + +template typename Operator, template typename... Operators> +static ParserResult parse_ltr(const size_t nplurals, const std::wstring_view &str, const wchar_t** patterns) +{ + auto &&pres = Parser(nplurals, str); + if (!pres.first) + return pres; + pres.second = trim(pres.second); + while (!pres.second.empty()) { + auto next = reduce_ltr(nplurals, pres, patterns); + if (!next.first) + return pres; + next.second = trim(next.second); + pres = next; + } + return pres; +} + +static ParserResult parse_atomic(const size_t nplurals, const std::wstring_view &str) +{ + if (str.empty()) + return ParserResult(nullptr, str); + if (str[0] == 'n') + return ParserResult(new Identity(nplurals), trim(str.substr(1))); + + wchar_t* endp; + auto val = wcstoul(str.data(), &endp, 10); + return ParserResult(new ConstValue(nplurals, val), trim(str.substr(endp-str.data()))); +} + +static ParserResult parse_parenthesized(const size_t nplurals, const std::wstring_view &str) +{ + if (str.empty()) + return ParserResult(nullptr, str); + if (str[0] != '(') + return parse_atomic(nplurals, str); + auto result = parse_expr(nplurals, str.substr(1)); + if (result.first) { + if (result.second.empty() || result.second[0] != ')') + result.first = nullptr; + else + result.second = trim(result.second.substr(1)); + } + return result; +} + +static ParserResult parse_negation(const size_t nplurals, const std::wstring_view &str) +{ + if (str.empty()) + return ParserResult(nullptr, str); + if (str[0] != '!') + return parse_parenthesized(nplurals, str); + auto result = parse_negation(nplurals, trim(str.substr(1))); + if (result.first) + result.first = GettextPluralForm::Ptr(new UnaryOperation(result.first)); + return result; +} + +static ParserResult parse_multiplicative(const size_t nplurals, const std::wstring_view &str) +{ + static const wchar_t *patterns[] = { L"*", L"/", L"%" }; + return parse_ltr(nplurals, str, patterns); +} + +static ParserResult parse_additive(const size_t nplurals, const std::wstring_view &str) +{ + static const wchar_t *patterns[] = { L"+", L"-" }; + return parse_ltr(nplurals, str, patterns); +} + +static ParserResult parse_comparison(const size_t nplurals, const std::wstring_view &str) +{ + static const wchar_t *patterns[] = { L"<=", L">=", L"<", L">" }; + return parse_ltr(nplurals, str, patterns); +} + +static ParserResult parse_equality(const size_t nplurals, const std::wstring_view &str) +{ + static const wchar_t *patterns[] = { L"==", L"!=" }; + return parse_ltr(nplurals, str, patterns); +} + +static ParserResult parse_conjunction(const size_t nplurals, const std::wstring_view &str) +{ + static const wchar_t *and_pattern[] = { L"&&" }; + return parse_ltr(nplurals, str, and_pattern); +} + +static ParserResult parse_disjunction(const size_t nplurals, const std::wstring_view &str) +{ + static const wchar_t *or_pattern[] = { L"||" }; + return parse_ltr(nplurals, str, or_pattern); +} + +static ParserResult parse_ternary(const size_t nplurals, const std::wstring_view &str) +{ + auto pres = parse_disjunction(nplurals, str); + if (pres.second.empty() || pres.second[0] != '?') // no ? : + return pres; + auto cond = pres.first; + pres = parse_ternary(nplurals, trim(pres.second.substr(1))); + if (pres.second.empty() || pres.second[0] != ':') + return ParserResult(nullptr, pres.second); + auto val = pres.first; + pres = parse_ternary(nplurals, trim(pres.second.substr(1))); + return ParserResult(new TernaryOperation(cond, val, pres.first), pres.second); +} + +static ParserResult parse_expr(const size_t nplurals, const std::wstring_view &str) +{ + return parse_ternary(nplurals, trim(str)); +} + +GettextPluralForm::Ptr GettextPluralForm::parse(const size_t nplurals, const std::wstring_view &str) +{ + if (nplurals == 0) + return nullptr; + auto result = parse_expr(nplurals, str); + if (!result.second.empty()) + return nullptr; + return result.first; +} + +GettextPluralForm::Ptr GettextPluralForm::parseHeaderLine(const std::wstring_view &str) +{ + if (!str_starts_with(str, L"Plural-Forms: nplurals=") || !str_ends_with(str, L";")) + return nullptr; + auto nplurals = wcstoul(str.data()+23, nullptr, 10); + auto pos = str.find(L"plural="); + if (pos == str.npos) + return nullptr; + return parse(nplurals, str.substr(pos+7, str.size()-pos-8)); +} diff --git a/src/gettext_plural_form.h b/src/gettext_plural_form.h new file mode 100644 index 000000000..d73718965 --- /dev/null +++ b/src/gettext_plural_form.h @@ -0,0 +1,33 @@ +// Minetest +// SPDX-License-Identifier: LGPL-2.1-or-later + +#pragma once +#include +#include + +// Note that this only implements a subset of C expressions. See: +// https://git.savannah.gnu.org/gitweb/?p=gettext.git;a=blob;f=gettext-runtime/intl/plural.y +class GettextPluralForm +{ +public: + using NumT = unsigned long; + using Ptr = std::shared_ptr; + + size_t size() const + { + return nplurals; + }; + virtual NumT operator()(const NumT) const = 0; + virtual operator bool() const + { + return size() > 0; + } + virtual ~GettextPluralForm() {}; + + static GettextPluralForm::Ptr parse(const size_t nplurals, const std::wstring_view &str); + static GettextPluralForm::Ptr parseHeaderLine(const std::wstring_view &str); +protected: + GettextPluralForm(size_t nplurals): nplurals(nplurals) {}; +private: + const size_t nplurals; +}; diff --git a/src/gui/guiEngine.cpp b/src/gui/guiEngine.cpp index 8a4e22b1d..200c26fa0 100644 --- a/src/gui/guiEngine.cpp +++ b/src/gui/guiEngine.cpp @@ -214,15 +214,28 @@ GUIEngine::GUIEngine(JoystickController *joystick, /******************************************************************************/ -std::string findLocaleFileInMods(const std::string &path, const std::string &filename) +std::string findLocaleFileWithExtension(const std::string &path) +{ + if (fs::PathExists(path + ".mo")) + return path + ".mo"; + if (fs::PathExists(path + ".po")) + return path + ".po"; + if (fs::PathExists(path + ".tr")) + return path + ".tr"; + return ""; +} + + +/******************************************************************************/ +std::string findLocaleFileInMods(const std::string &path, const std::string &filename_no_ext) { std::vector mods = flattenMods(getModsInPath(path, "root", true)); for (const auto &mod : mods) { - std::string ret = mod.path + DIR_DELIM "locale" DIR_DELIM + filename; - if (fs::PathExists(ret)) { + std::string ret = findLocaleFileWithExtension( + mod.path + DIR_DELIM "locale" DIR_DELIM + filename_no_ext); + if (!ret.empty()) return ret; - } } return ""; @@ -235,19 +248,26 @@ Translations *GUIEngine::getContentTranslations(const std::string &path, if (domain.empty() || lang_code.empty()) return nullptr; - std::string filename = domain + "." + lang_code + ".tr"; - std::string key = path + DIR_DELIM "locale" DIR_DELIM + filename; + std::string filename_no_ext = domain + "." + lang_code; + std::string key = path + DIR_DELIM "locale" DIR_DELIM + filename_no_ext; if (key == m_last_translations_key) return &m_last_translations; std::string trans_path = key; - ContentType type = getContentType(path); - if (type == ContentType::GAME) - trans_path = findLocaleFileInMods(path + DIR_DELIM "mods" DIR_DELIM, filename); - else if (type == ContentType::MODPACK) - trans_path = findLocaleFileInMods(path, filename); - // We don't need to search for locale files in a mod, as there's only one `locale` folder. + + switch (getContentType(path)) { + case ContentType::GAME: + trans_path = findLocaleFileInMods(path + DIR_DELIM "mods" DIR_DELIM, + filename_no_ext); + break; + case ContentType::MODPACK: + trans_path = findLocaleFileInMods(path, filename_no_ext); + break; + default: + trans_path = findLocaleFileWithExtension(trans_path); + break; + } if (trans_path.empty()) return nullptr; @@ -257,7 +277,7 @@ Translations *GUIEngine::getContentTranslations(const std::string &path, std::string data; if (fs::ReadFile(trans_path, data)) { - m_last_translations.loadTranslation(data); + m_last_translations.loadTranslation(fs::GetFilenameFromPath(trans_path.c_str()), data); } return &m_last_translations; diff --git a/src/server.cpp b/src/server.cpp index df2d14a1d..ab219043e 100644 --- a/src/server.cpp +++ b/src/server.cpp @@ -2537,8 +2537,8 @@ bool Server::addMediaFile(const std::string &filename, ".png", ".jpg", ".bmp", ".tga", ".ogg", ".x", ".b3d", ".obj", ".gltf", ".glb", - // Custom translation file format - ".tr", + // Translation file formats + ".tr", ".po", ".mo", NULL }; if (removeStringEnd(filename, supported_ext).empty()) { @@ -2621,14 +2621,20 @@ void Server::fillMediaCache() void Server::sendMediaAnnouncement(session_t peer_id, const std::string &lang_code) { - std::string lang_suffix = "."; - lang_suffix.append(lang_code).append(".tr"); + std::string translation_formats[3] = { ".tr", ".po", ".mo" }; + std::string lang_suffixes[3]; + for (size_t i = 0; i < 3; i++) { + lang_suffixes[i].append(".").append(lang_code).append(translation_formats[i]); + } - auto include = [&] (const std::string &name, const MediaInfo &info) -> bool { + auto include = [&] (const std::string &name, const MediaInfo &info) -> bool { if (info.no_announce) return false; - if (str_ends_with(name, ".tr") && !str_ends_with(name, lang_suffix)) - return false; + for (size_t j = 0; j < 3; j++) { + if (str_ends_with(name, translation_formats[j]) && !str_ends_with(name, lang_suffixes[j])) { + return false; + } + } return true; }; @@ -4167,12 +4173,11 @@ Translations *Server::getTranslationLanguage(const std::string &lang_code) // [] will create an entry auto *translations = &server_translations[lang_code]; - std::string suffix = "." + lang_code + ".tr"; for (const auto &i : m_media) { - if (str_ends_with(i.first, suffix)) { + if (Translations::getFileLanguage(i.first) == lang_code) { std::string data; if (fs::ReadFile(i.second.path, data, true)) { - translations->loadTranslation(data); + translations->loadTranslation(i.first, data); } } } diff --git a/src/translation.cpp b/src/translation.cpp index 5d5491e56..728789acc 100644 --- a/src/translation.cpp +++ b/src/translation.cpp @@ -19,7 +19,9 @@ with this program; if not, write to the Free Software Foundation, Inc., #include "translation.h" #include "log.h" +#include "util/hex.h" #include "util/string.h" +#include "gettext.h" #include @@ -29,10 +31,22 @@ static Translations client_translations; Translations *g_client_translations = &client_translations; #endif +const std::string_view Translations::getFileLanguage(const std::string &filename) +{ + const char *translate_ext[] = { + ".tr", ".po", ".mo", NULL + }; + auto basename = removeStringEnd(filename, translate_ext); + auto pos = basename.rfind('.'); + if (pos == basename.npos) + return ""; + return basename.substr(pos+1); +} void Translations::clear() { m_translations.clear(); + m_plural_translations.clear(); } const std::wstring &Translations::getTranslation( @@ -45,7 +59,52 @@ const std::wstring &Translations::getTranslation( return s; } -void Translations::loadTranslation(const std::string &data) +const std::wstring &Translations::getPluralTranslation( + const std::wstring &textdomain, const std::wstring &s, unsigned long int number) const +{ + std::wstring key = textdomain + L"|" + s; + auto it = m_plural_translations.find(key); + if (it != m_plural_translations.end()) { + auto n = (*(it->second.first))(number); + const std::vector &v = it->second.second; + if (n < v.size()) { + if (v[n].empty()) + return s; + return v[n]; + } + } + return s; +} + + +void Translations::addTranslation( + const std::wstring &textdomain, const std::wstring &original, const std::wstring &translated) +{ + std::wstring key = textdomain + L"|" + original; + if (!translated.empty()) { + m_translations.emplace(std::move(key), std::move(translated)); + } +} + +void Translations::addPluralTranslation( + const std::wstring &textdomain, const GettextPluralForm::Ptr &plural, const std::wstring &original, std::vector &translated) +{ + static bool warned = false; + if (!plural) { + warned = true; + if (!warned) + errorstream << "Translations: plural translation entry defined without Plural-Forms" << std::endl; + return; + } else if (translated.size() != plural->size()) { + errorstream << "Translations: incorrect number of plural translations (expected " << plural->size() << ", got " << translated.size() << ")" << std::endl; + return; + } + std::wstring key = textdomain + L"|" + original; + m_plural_translations.emplace(std::move(key), std::pair(plural, translated)); +} + + +void Translations::loadTrTranslation(const std::string &data) { std::istringstream is(data); std::string textdomain_narrow; @@ -145,11 +204,455 @@ void Translations::loadTranslation(const std::string &data) } } - std::wstring oword1 = word1.str(), oword2 = word2.str(); - if (!oword2.empty()) { - std::wstring translation_index = textdomain + L"|"; - translation_index.append(oword1); - m_translations.emplace(std::move(translation_index), std::move(oword2)); - } + addTranslation(textdomain, word1.str(), word2.str()); + } +} + + +std::wstring Translations::unescapeC(const std::wstring &str) +{ + // Process escape sequences in str as if it were a C string + std::wstring result; + size_t i = 0; + while (i < str.length()) { + if (str[i] != L'\\') { + result.push_back(str[i]); + i++; + continue; + } + i++; + if (i == str.length()) { + errorstream << "Unfinished escape sequence at the end of \"" << wide_to_utf8(str) << "\"" << std::endl; + break; + } + switch (str[i]) { + // From https://en.wikipedia.org/wiki/Escape_sequences_in_C#Table_of_escape_sequences + case L'a': result.push_back(L'\a'); break; + case L'b': result.push_back(L'\b'); break; + case L'e': result.push_back(L'\x1b'); break; + case L'f': result.push_back(L'\f'); break; + case L'n': result.push_back(L'\n'); break; + case L'r': result.push_back(L'\r'); break; + case L't': result.push_back(L'\t'); break; + case L'v': result.push_back(L'\v'); break; + case L'\\': result.push_back(L'\\'); break; + case L'\'': result.push_back(L'\''); break; + case L'"': result.push_back(L'"'); break; + case L'?': result.push_back(L'?'); break; + case L'0': case L'1': case L'2': case L'3': case L'4': case L'5': case L'6': case L'7': { + size_t j = 0; + wchar_t c = 0; + for (; j < 3 && i+j < str.length() && L'0' <= str[i+j] && str[i+j] <= L'7'; j++) { + c = c * 8 + (str[i+j] - L'0'); + } + if (c <= 0xff) { + result.push_back(c); + } + i += j; + continue; + } + case L'x': { + i++; + if (i >= str.length()) { + errorstream << "Unfinished escape sequence at the end of \"" << wide_to_utf8(str) << "\"" << std::endl; + } + char32_t c = 0; + size_t j = 0; + unsigned char v; + for (; i+j < str.length() && hex_digit_decode((char)str[i+j], v); j++) { + c = c * 16 + v; + } + if (j == 0) { + errorstream << "Invalid escape sequence \\x, ignoring" << std::endl; + continue; + } + // If character fits in 16 bits and is not part of surrogate pair, insert it. + // Otherwise, silently drop it: this is valid since \x escape sequences with + // values above 0xff are implementation-defined + if ((c < 0xd800) || (0xe000 <= c && c <= 0xffff)) { + result.push_back(c); + } + i += j; + continue; + } + case L'u': { + i++; + if (i + 4 > str.length()) { + errorstream << "Unfinished escape sequence at the end of \"" << wide_to_utf8(str) << "\"" << std::endl; + } + char16_t c = 0; + bool ok = true; + for (size_t j = 0; j < 4; j++) { + unsigned char v; + if (str[i+j] <= 0xff && hex_digit_decode((char)str[i+j], v)) { + c = c * 16 + v; + } else { + errorstream << "Invalid unicode escape sequence \"\\u" << wide_to_utf8(str.substr(i, 4)) << "\", ignoring" << std::endl; + ok = false; + break; + } + } + if (ok) { + wide_add_codepoint(result, c); + } + i += 4; + continue; + } + case L'U': { + i++; + if (i + 8 > str.length()) { + errorstream << "Unfinished escape sequence at the end of \"" << wide_to_utf8(str) << "\"" << std::endl; + } + char32_t c = 0; + bool ok = true; + for (size_t j = 0; j < 8; j++) { + unsigned char v; + if (str[i+j] <= 0xff && hex_digit_decode((char)str[i+j], v)) { + c = c * 16 + v; + } else { + errorstream << "Invalid unicode escape sequence \"\\U" << wide_to_utf8(str.substr(i, 8)) << "\", ignoring" << std::endl; + ok = false; + break; + } + } + if (ok) { + wide_add_codepoint(result, c); + } + i += 8; + continue; + } + default: { + errorstream << "Unknown escape sequence \"\\" << str[i] << "\", ignoring" << std::endl; + break; + } + } + i++; + } + return result; +} + +void Translations::loadPoEntry(const std::wstring &basefilename, const GettextPluralForm::Ptr &plural_form, const std::map &entry) +{ + // Process an entry from a PO file and add it to the translation table + // Assumes that entry[L"msgid"] is always defined + std::wstring textdomain; + auto ctx = entry.find(L"msgctxt"); + if (ctx != entry.end()) { + textdomain = ctx->second; + } else { + textdomain = basefilename; + } + std::wstring original = entry.at(L"msgid"); + + auto plural = entry.find(L"msgid_plural"); + if (plural == entry.end()) { + auto translated = entry.find(L"msgstr"); + if (translated == entry.end()) { + errorstream << "Could not load translation: entry for msgid \"" << wide_to_utf8(original) << "\" does not contain a msgstr field" << std::endl; + return; + } + addTranslation(textdomain, original, translated->second); + } else { + std::vector translations; + for (int i = 0; ; i++) { + auto translated = entry.find(L"msgstr[" + std::to_wstring(i) + L"]"); + if (translated == entry.end()) + break; + translations.push_back(translated->second); + } + addPluralTranslation(textdomain, plural_form, original, translations); + addPluralTranslation(textdomain, plural_form, plural->second, translations); + } +} + +bool Translations::inEscape(const std::wstring &line, size_t pos) +{ + if (pos == std::wstring::npos || pos == 0) + return false; + pos--; + size_t count = 0; + for (; line[pos] == L'\\'; pos--) { + count++; + if (pos == 0) + break; + } + return count % 2 == 1; +} + +std::optional> Translations::parsePoLine(const std::string &line) +{ + if (line.empty()) + return std::nullopt; + if (line[0] == '#') + return std::pair(L"#", utf8_to_wide(line.substr(1))); + + std::wstring wline = utf8_to_wide(line); + // Defend against some possibly malformed utf8 string, which + // is empty after converting to wide string + if (wline.empty()) + return std::nullopt; + + std::size_t pos = wline.find(L'"'); + std::wstring s; + if (pos == std::wstring::npos) { + errorstream << "Unable to parse po file line: " << line << std::endl; + return std::nullopt; + } + auto prefix = trim(wline.substr(0, pos)); + auto begin = pos; + while (pos < wline.size()) { + begin = wline.find(L'"', pos); + if (begin == std::wstring::npos) { + if (trim(wline.substr(pos)).empty()) { + break; + } else { + errorstream << "Excessive content at the end of po file line: " << line << std::endl; + return std::nullopt; + } + } + if (!trim(wline.substr(pos, begin-pos)).empty()) { + errorstream << "Excessive content within string concatenation in po file line: " << line << std::endl; + return std::nullopt; + } + auto end = wline.find(L'"', begin+1); + while (inEscape(wline, end)) { + end = wline.find(L'"', end+1); + } + if (end == std::wstring::npos) { + errorstream << "String extends beyond po file line: " << line << std::endl; + return std::nullopt; + } + s.append(unescapeC(wline.substr(begin+1, end-begin-1))); + pos = end+1; + } + return std::pair(prefix, s); +} + +void Translations::loadPoTranslation(const std::string &basefilename, const std::string &data) +{ + std::istringstream is(data); + std::string line; + std::map last_entry; + std::wstring last_key; + std::wstring wbasefilename = utf8_to_wide(basefilename); + GettextPluralForm::Ptr plural; + bool skip = false; + bool skip_last = false; + + while (is.good()) { + std::getline(is, line); + // Trim last character if file was using a \r\n line ending + if (line.length () > 0 && line[line.length() - 1] == '\r') + line.resize(line.length() - 1); + + auto parsed = parsePoLine(line); + if (!parsed) + continue; + auto prefix = parsed->first; + auto s = parsed->second; + + if (prefix == L"#") { + if (s[0] == L',') { + // Skip fuzzy entries + if ((s + L' ').find(L" fuzzy ") != line.npos) { + if (last_entry.empty()) + skip_last = true; + else + skip = true; + } + } + continue; + } + + if (prefix.empty()) { + // Continuation of previous line + if (last_key == L"") { + errorstream << "Unable to parse po file: continuation of non-existant previous line" << std::endl; + continue; + } + + last_entry[last_key].append(s); + continue; + } + + if (prefix == L"msgctxt" || (prefix == L"msgid" && last_entry.find(L"msgid") != last_entry.end())) { + if (last_entry.find(L"msgid") != last_entry.end()) { + if (!skip_last) { + if (last_entry[L"msgid"].empty()) { + if (last_entry.find(L"msgstr") == last_entry.end()) { + errorstream << "Header entry has no \"msgstr\" field" << std::endl; + } else if (plural) { + errorstream << "Attempt to override existing po header entry" << std::endl; + } else { + for (auto &line: str_split(last_entry[L"msgstr"], L'\n')) { + if (str_starts_with(line, L"Plural-Forms:")) { + plural = GettextPluralForm::parseHeaderLine(line); + if (!(plural && *plural)) { + errorstream << "Invalid Plural-Forms line: " << wide_to_utf8(line) << std::endl; + } + } + } + } + } else { + loadPoEntry(wbasefilename, plural, last_entry); + } + } + last_entry.clear(); + skip_last = skip; + } else if (!last_entry.empty()) { + errorstream << "Unable to parse po file: previous entry has no \"msgid\" field but is not empty" << std::endl; + last_entry.clear(); + skip_last = skip; + } + } else { + // prevent malpositioned fuzzy flag from influencing the following entry + skip = false; + } + if (last_entry.find(prefix) != last_entry.end()) { + errorstream << "Unable to parse po file: Key \"" << wide_to_utf8(prefix) << "\" was already present in previous entry" << std::endl; + continue; + } + last_key = prefix; + last_entry[prefix] = s; + } + + if (last_entry.find(L"msgid") != last_entry.end()) { + if (!skip_last && !last_entry[L"msgid"].empty()) + loadPoEntry(wbasefilename, plural, last_entry); + } else if (!last_entry.empty()) { + errorstream << "Unable to parse po file: Last entry has no \"msgid\" field" << std::endl; + } +} + +void Translations::loadMoEntry(const std::wstring &basefilename, const GettextPluralForm::Ptr &plural_form, const std::string &original, const std::string &translated) +{ + std::wstring textdomain = L""; + size_t found; + std::string noriginal = original; + found = original.find('\x04'); // EOT character + if (found != std::string::npos) { + textdomain = utf8_to_wide(original.substr(0, found)); + noriginal = original.substr(found + 1); + } else { + textdomain = basefilename; + } + + found = noriginal.find('\0'); + if (found != std::string::npos) { + std::vector translations = str_split(utf8_to_wide(translated), L'\0'); + addPluralTranslation(textdomain, plural_form, utf8_to_wide(noriginal.substr(0, found)), translations); + addPluralTranslation(textdomain, plural_form, utf8_to_wide(noriginal.substr(found + 1)), translations); + } else { + addTranslation(textdomain, utf8_to_wide(noriginal), utf8_to_wide(translated)); + } +} + +inline u32 readVarEndian(bool is_be, std::string_view data, size_t pos = 0) +{ + if (pos + 4 > data.size()) + return 0; + if (is_be) { + return + ((u32)(unsigned char)data[pos+0] << 24) | ((u32)(unsigned char)data[pos+1] << 16) | + ((u32)(unsigned char)data[pos+2] << 8) | ((u32)(unsigned char)data[pos+3] << 0); + } else { + return + ((u32)(unsigned char)data[pos+0] << 0) | ((u32)(unsigned char)data[pos+1] << 8) | + ((u32)(unsigned char)data[pos+2] << 16) | ((u32)(unsigned char)data[pos+3] << 24); + } +} + +void Translations::loadMoTranslation(const std::string &basefilename, const std::string &data) +{ + size_t length = data.length(); + std::wstring wbasefilename = utf8_to_wide(basefilename); + GettextPluralForm::Ptr plural_form; + + if (length < 20) { + errorstream << "Ignoring too short mo file" << std::endl; + return; + } + + u32 magic = readVarEndian(false, data); + bool is_be; + if (magic == 0x950412de) { + is_be = false; + } else if (magic == 0xde120495) { + is_be = true; + } else { + errorstream << "Bad magic number for mo file: 0x" << hex_encode(data.substr(0, 4)) << std::endl; + return; + } + + u32 revision = readVarEndian(is_be, data, 4); + if (revision != 0) { + errorstream << "Unknown revision " << revision << " for mo file" << std::endl; + return; + } + + u32 nstring = readVarEndian(is_be, data, 8); + u32 original_offset = readVarEndian(is_be, data, 12); + u32 translated_offset = readVarEndian(is_be, data, 16); + + if (length < original_offset + 8 * (u64)nstring || length < translated_offset + 8 * (u64)nstring) { + errorstream << "Ignoring truncated mo file" << std::endl; + return; + } + + for (u32 i = 0; i < nstring; i++) { + u32 original_len = readVarEndian(is_be, data, original_offset + 8 * i); + u32 original_off = readVarEndian(is_be, data, original_offset + 8 * i + 4); + u32 translated_len = readVarEndian(is_be, data, translated_offset + 8 * i); + u32 translated_off = readVarEndian(is_be, data, translated_offset + 8 * i + 4); + + if (length < original_off + (u64)original_len || length < translated_off + (u64)translated_len) { + errorstream << "Ignoring translation out of mo file" << std::endl; + continue; + } + + if (data[original_off+original_len] != '\0' || data[translated_off+translated_len] != '\0') { + errorstream << "String in mo entry does not have a trailing NUL" << std::endl; + continue; + } + + auto original = data.substr(original_off, original_len); + auto translated = data.substr(translated_off, translated_len); + + if (original.empty()) { + if (plural_form) { + errorstream << "Attempt to override existing mo header entry" << std::endl; + } else { + for (auto &line: str_split(translated, '\n')) { + if (str_starts_with(line, "Plural-Forms:")) { + plural_form = GettextPluralForm::parseHeaderLine(utf8_to_wide(line)); + if (!(plural_form && *plural_form)) { + errorstream << "Invalid Plural-Forms line: " << line << std::endl; + } + } + } + } + } else { + loadMoEntry(wbasefilename, plural_form, original, translated); + } + } + + return; +} + +void Translations::loadTranslation(const std::string &filename, const std::string &data) +{ + const char *trExtension[] = { ".tr", NULL }; + const char *poExtension[] = { ".po", NULL }; + const char *moExtension[] = { ".mo", NULL }; + if (!removeStringEnd(filename, trExtension).empty()) { + loadTrTranslation(data); + } else if (!removeStringEnd(filename, poExtension).empty()) { + std::string basefilename = str_split(filename, '.')[0]; + loadPoTranslation(basefilename, data); + } else if (!removeStringEnd(filename, moExtension).empty()) { + std::string basefilename = str_split(filename, '.')[0]; + loadMoTranslation(basefilename, data); + } else { + errorstream << "loadTranslation called with invalid filename: \"" << filename << "\"" << std::endl; } } diff --git a/src/translation.h b/src/translation.h index d7ed15505..972cdafef 100644 --- a/src/translation.h +++ b/src/translation.h @@ -19,8 +19,12 @@ with this program; if not, write to the Free Software Foundation, Inc., #pragma once +#include "gettext_plural_form.h" #include +#include +#include #include +#include class Translations; #ifndef SERVER @@ -30,11 +34,39 @@ extern Translations *g_client_translations; class Translations { public: - void loadTranslation(const std::string &data); + void loadTranslation(const std::string &filename, const std::string &data); void clear(); - const std::wstring &getTranslation(const std::wstring &textdomain, - const std::wstring &s) const; + const std::wstring &getTranslation( + const std::wstring &textdomain, const std::wstring &s) const; + const std::wstring &getPluralTranslation(const std::wstring &textdomain, + const std::wstring &s, unsigned long int number) const; + static const std::string_view getFileLanguage(const std::string &filename); + static inline bool isTranslationFile(const std::string &filename) + { + return getFileLanguage(filename) != ""; + } + // for testing + inline size_t size() + { + return m_translations.size() + m_plural_translations.size()/2; + } private: std::unordered_map m_translations; + std::unordered_map>> m_plural_translations; + + void addTranslation(const std::wstring &textdomain, const std::wstring &original, + const std::wstring &translated); + void addPluralTranslation(const std::wstring &textdomain, + const GettextPluralForm::Ptr &plural, + const std::wstring &original, + std::vector &translated); + std::wstring unescapeC(const std::wstring &str); + std::optional> parsePoLine(const std::string &line); + bool inEscape(const std::wstring &str, size_t pos); + void loadPoEntry(const std::wstring &basefilename, const GettextPluralForm::Ptr &plural_form, const std::map &entry); + void loadMoEntry(const std::wstring &basefilename, const GettextPluralForm::Ptr &plural_form, const std::string &original, const std::string &translated); + void loadTrTranslation(const std::string &data); + void loadPoTranslation(const std::string &basefilename, const std::string &data); + void loadMoTranslation(const std::string &basefilename, const std::string &data); }; diff --git a/src/unittest/CMakeLists.txt b/src/unittest/CMakeLists.txt index f546d150e..46e4f9a18 100644 --- a/src/unittest/CMakeLists.txt +++ b/src/unittest/CMakeLists.txt @@ -37,6 +37,7 @@ set (UNITTEST_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/test_socket.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_servermodmanager.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_threading.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/test_translations.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_utilities.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_voxelarea.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_voxelalgorithms.cpp diff --git a/src/unittest/test_servermodmanager.cpp b/src/unittest/test_servermodmanager.cpp index 03fdc7042..033cbbc49 100644 --- a/src/unittest/test_servermodmanager.cpp +++ b/src/unittest/test_servermodmanager.cpp @@ -122,7 +122,7 @@ void TestServerModManager::testGetMods() ServerModManager sm(m_worlddir); const auto &mods = sm.getMods(); // `ls ./games/devtest/mods | wc -l` + 1 (test mod) - UASSERTEQ(std::size_t, mods.size(), 33 + 1); + UASSERTEQ(std::size_t, mods.size(), 34 + 1); // Ensure we found basenodes mod (part of devtest) // and test_mod (for testing MINETEST_MOD_PATH). diff --git a/src/unittest/test_translations.cpp b/src/unittest/test_translations.cpp new file mode 100644 index 000000000..37fc78ee4 --- /dev/null +++ b/src/unittest/test_translations.cpp @@ -0,0 +1,64 @@ +// Minetest +// SPDX-License-Identifier: LGPL-2.1-or-later + +#include "translation.h" +#include "filesys.h" +#include "content/subgames.h" +#include "catch.h" + +#define CONTEXT L"context" +#define TEXTDOMAIN_PO L"translation_po" +#define TEST_PO_NAME "translation_po.de.po" +#define TEST_MO_NAME "translation_mo.de.mo" + +static std::string read_translation_file(const std::string &filename) +{ + auto gamespec = findSubgame("devtest"); + REQUIRE(gamespec.isValid()); + auto path = gamespec.gamemods_path + (DIR_DELIM "testtranslations" DIR_DELIM "test_locale" DIR_DELIM) + filename; + std::string content; + REQUIRE(fs::ReadFile(path, content)); + return content; +} + +TEST_CASE("test translations") +{ + SECTION("Plural-Forms function for translations") + { + auto form = GettextPluralForm::parseHeaderLine(L"Plural-Forms: nplurals=3; plural= (n-1+1)<=1 ? n||1?0:1 : 1?(!!2):2;"); + REQUIRE(form); + REQUIRE(form->size() == 3); + CHECK((*form)(0) == 0); + CHECK((*form)(1) == 0); + CHECK((*form)(2) == 1); + } + + SECTION("PO file parser") + { + Translations translations; + translations.loadTranslation(TEST_PO_NAME, read_translation_file(TEST_PO_NAME)); + + CHECK(translations.size() == 5); + CHECK(translations.getTranslation(TEXTDOMAIN_PO, L"foo") == L"bar"); + CHECK(translations.getTranslation(TEXTDOMAIN_PO, L"Untranslated") == L"Untranslated"); + CHECK(translations.getTranslation(TEXTDOMAIN_PO, L"Fuzzy") == L"Fuzzy"); + CHECK(translations.getTranslation(TEXTDOMAIN_PO, L"Multi\\line\nstring") == L"Multi\\\"li\\ne\nresult"); + CHECK(translations.getTranslation(TEXTDOMAIN_PO, L"Wrong order") == L"Wrong order"); + CHECK(translations.getPluralTranslation(TEXTDOMAIN_PO, L"Plural form", 1) == L"Singular result"); + CHECK(translations.getPluralTranslation(TEXTDOMAIN_PO, L"Singular form", 0) == L"Plural result"); + CHECK(translations.getPluralTranslation(TEXTDOMAIN_PO, L"Partial translation", 1) == L"Partially translated"); + CHECK(translations.getPluralTranslation(TEXTDOMAIN_PO, L"Partial translations", 2) == L"Partial translations"); + CHECK(translations.getTranslation(CONTEXT, L"With context") == L"Has context"); + } + + SECTION("MO file parser") + { + Translations translations; + translations.loadTranslation(TEST_MO_NAME, read_translation_file(TEST_MO_NAME)); + + CHECK(translations.size() == 2); + CHECK(translations.getTranslation(CONTEXT, L"With context") == L"Has context"); + CHECK(translations.getPluralTranslation(CONTEXT, L"Plural form", 1) == L"Singular result"); + CHECK(translations.getPluralTranslation(CONTEXT, L"Singular form", 0) == L"Plural result"); + } +} diff --git a/src/util/string.cpp b/src/util/string.cpp index 74a360266..b05d993a5 100644 --- a/src/util/string.cpp +++ b/src/util/string.cpp @@ -154,6 +154,16 @@ std::string wide_to_utf8(std::wstring_view input) return out; } +void wide_add_codepoint(std::wstring &result, char32_t codepoint) +{ + if ((0xD800 <= codepoint && codepoint <= 0xDFFF) || (0x10FFFF < codepoint)) { + // Invalid codepoint, replace with unicode replacement character + result.push_back(0xFFFD); + return; + } + result.push_back(codepoint); +} + #else // _WIN32 std::wstring utf8_to_wide(std::string_view input) @@ -180,6 +190,29 @@ std::string wide_to_utf8(std::wstring_view input) return out; } +void wide_add_codepoint(std::wstring &result, char32_t codepoint) +{ + if (codepoint < 0x10000) { + if (0xD800 <= codepoint && codepoint <= 0xDFFF) { + // Invalid codepoint, part of a surrogate pair + // Replace with unicode replacement character + result.push_back(0xFFFD); + return; + } + result.push_back((wchar_t) codepoint); + return; + } + codepoint -= 0x10000; + if (codepoint >= 0x100000) { + // original codepoint was above 0x10FFFF, so invalid + // replace with unicode replacement character + result.push_back(0xFFFD); + return; + } + result.push_back((wchar_t) ((codepoint >> 10) | 0xD800)); + result.push_back((wchar_t) ((codepoint & 0x3FF) | 0xDC00)); +} + #endif // _WIN32 @@ -668,13 +701,20 @@ std::string wrap_rows(std::string_view from, unsigned row_len, bool has_color_co * We get the argument "White", translated, and create a template string with "@1" instead of it. * We finally get the template "@1 Wool" that was used in the beginning, which we translate * before filling it again. + * + * The \x1bT marking the beginning of a translated string allows two '@'-separated arguments: + * - The first one is the textdomain/context in which the string is to be translated. Most often, + * this is the name of the mod which asked for the translation. + * - The second argument, if present, should be an integer; it is used to decide which plural form + * to use, for languages containing several plural forms. */ static void translate_all(std::wstring_view s, size_t &i, Translations *translations, std::wstring &res); static void translate_string(std::wstring_view s, Translations *translations, - const std::wstring &textdomain, size_t &i, std::wstring &res) + const std::wstring &textdomain, size_t &i, std::wstring &res, + bool use_plural, unsigned long int number) { std::vector args; int arg_number = 1; @@ -751,8 +791,17 @@ static void translate_string(std::wstring_view s, Translations *translations, } // Translate the template. - const std::wstring &toutput = translations ? - translations->getTranslation(textdomain, output) : output; + std::wstring toutput; + if (translations != nullptr) { + if (use_plural) + toutput = translations->getPluralTranslation( + textdomain, output, number); + else + toutput = translations->getTranslation( + textdomain, output); + } else { + toutput = output; + } // Put back the arguments in the translated template. size_t j = 0; @@ -835,10 +884,37 @@ static void translate_all(std::wstring_view s, size_t &i, } else if (parts[0] == L"T") { // Beginning of translated string. std::wstring textdomain; + bool use_plural = false; + unsigned long int number = 0; if (parts.size() > 1) textdomain = parts[1]; + if (parts.size() > 2 && parts[2] != L"") { + // parts[2] should contain a number used for selecting + // the plural form. + // However, we can't blindly cast it to an unsigned long int, + // as it might be too large for that. + // + // We follow the advice of gettext and reduce integers larger than 1000000 + // to something in the range [1000000, 2000000), with the same last 6 digits. + // + // https://www.gnu.org/software/gettext/manual/html_node/Plural-forms.html + constexpr unsigned long int max = 1000000; + + use_plural = true; + number = 0; + for (char c : parts[2]) { + if (L'0' <= c && c <= L'9') { + number = (10 * number + (unsigned long int)(c - L'0')); + if (number >= 2 * max) number = (number % max) + max; + } else { + // Invalid number + use_plural = false; + break; + } + } + } std::wstring translated; - translate_string(s, translations, textdomain, i, translated); + translate_string(s, translations, textdomain, i, translated, use_plural, number); res.append(translated); } else { // Another escape sequence, such as colors. Preserve it. diff --git a/src/util/string.h b/src/util/string.h index aae1167b6..50e208966 100644 --- a/src/util/string.h +++ b/src/util/string.h @@ -32,6 +32,7 @@ with this program; if not, write to the Free Software Foundation, Inc., #include #include #include +#include #include class Translations; @@ -87,6 +88,8 @@ struct FlagDesc { std::wstring utf8_to_wide(std::string_view input); std::string wide_to_utf8(std::wstring_view input); +void wide_add_codepoint(std::wstring &result, char32_t codepoint); + std::string urlencode(std::string_view str); std::string urldecode(std::string_view str); @@ -325,19 +328,30 @@ inline std::string lowercase(std::string_view str) } +inline bool my_isspace(const char c) +{ + return std::isspace(c); +} + +inline bool my_isspace(const wchar_t c) +{ + return std::iswspace(c); +} + /** * @param str * @return A view of \p str with leading and trailing whitespace removed. */ -inline std::string_view trim(std::string_view str) +template +inline std::basic_string_view trim(const std::basic_string_view &str) { size_t front = 0; size_t back = str.size(); - while (front < back && std::isspace(str[front])) + while (front < back && my_isspace(str[front])) ++front; - while (back > front && std::isspace(str[back - 1])) + while (back > front && my_isspace(str[back - 1])) --back; return str.substr(front, back - front); @@ -351,16 +365,24 @@ inline std::string_view trim(std::string_view str) * @param str * @return A copy of \p str with leading and trailing whitespace removed. */ -inline std::string trim(std::string &&str) +template +inline std::basic_string trim(std::basic_string &&str) { - std::string ret(trim(std::string_view(str))); + std::basic_string ret(trim(std::basic_string_view(str))); return ret; } -// The above declaration causes ambiguity with char pointers so we have to fix that: -inline std::string_view trim(const char *str) +template +inline std::basic_string_view trim(const std::basic_string &str) { - return trim(std::string_view(str)); + return trim(std::basic_string_view(str)); +} + +// The above declaration causes ambiguity with char pointers so we have to fix that: +template +inline std::basic_string_view trim(const T *str) +{ + return trim(std::basic_string_view(str)); }