From c6ef5ab2595d55192e7952694c6b301f2b64f65c Mon Sep 17 00:00:00 2001 From: sfan5 Date: Wed, 21 Aug 2024 21:34:46 +0200 Subject: [PATCH] Sanitize formspec fields server-side (#14878) --- doc/lua_api.md | 3 ++ src/network/serverpackethandler.cpp | 17 ++++++++--- src/unittest/test_utilities.cpp | 27 +++++++++++++++++ src/util/string.cpp | 47 +++++++++++++++++++++++++++++ src/util/string.h | 10 ++++++ 5 files changed, 99 insertions(+), 5 deletions(-) diff --git a/doc/lua_api.md b/doc/lua_api.md index 07c7b3c2e..a43f517a0 100644 --- a/doc/lua_api.md +++ b/doc/lua_api.md @@ -2625,6 +2625,9 @@ background elements are drawn before all other elements. **WARNING**: do _not_ use an element name starting with `key_`; those names are reserved to pass key press events to formspec! +**WARNING**: names and values of elements cannot contain binary data such as ASCII +control characters. For values, escape sequences used by the engine are an exception to this. + **WARNING**: Minetest allows you to add elements to every single formspec instance using `player:set_formspec_prepend()`, which may be the reason backgrounds are appearing when you don't expect them to, or why things are styled differently diff --git a/src/network/serverpackethandler.cpp b/src/network/serverpackethandler.cpp index 9de39229b..c0718d43b 100644 --- a/src/network/serverpackethandler.cpp +++ b/src/network/serverpackethandler.cpp @@ -1351,15 +1351,22 @@ static bool pkt_read_formspec_fields(NetworkPacket *pkt, StringMap &fields) u16 field_count; *pkt >> field_count; - u64 length = 0; + size_t length = 0; for (u16 k = 0; k < field_count; k++) { - std::string fieldname; + std::string fieldname, fieldvalue; *pkt >> fieldname; - fields[fieldname] = pkt->readLongString(); + fieldvalue = pkt->readLongString(); - length += fieldname.size(); - length += fields[fieldname].size(); + fieldname = sanitize_untrusted(fieldname, false); + // We'd love to strip escapes here but some formspec elements reflect data + // from the server (e.g. dropdown), which can contain translations. + fieldvalue = sanitize_untrusted(fieldvalue); + + length += fieldname.size() + fieldvalue.size(); + + fields[std::move(fieldname)] = std::move(fieldvalue); } + // 640K ought to be enough for anyone return length < 640 * 1024; } diff --git a/src/unittest/test_utilities.cpp b/src/unittest/test_utilities.cpp index 996b418e3..a05421c9b 100644 --- a/src/unittest/test_utilities.cpp +++ b/src/unittest/test_utilities.cpp @@ -61,6 +61,7 @@ public: void testSanitizeDirName(); void testIsBlockInSight(); void testColorizeURL(); + void testSanitizeUntrusted(); }; static TestUtilities g_test_instance; @@ -95,6 +96,7 @@ void TestUtilities::runTests(IGameDef *gamedef) TEST(testSanitizeDirName); TEST(testIsBlockInSight); TEST(testColorizeURL); + TEST(testSanitizeUntrusted); } //////////////////////////////////////////////////////////////////////////////// @@ -743,3 +745,28 @@ void TestUtilities::testColorizeURL() warningstream << "Test skipped." << std::endl; #endif } + +void TestUtilities::testSanitizeUntrusted() +{ + std::string_view t1{u8"AnästhesieausrĂ¼stung"}; + UASSERTEQ(auto, sanitize_untrusted(t1), t1); + + std::string_view t2{"stop\x00here", 9}; + UASSERTEQ(auto, sanitize_untrusted(t2), "stop"); + + UASSERTEQ(auto, sanitize_untrusted("\x01\x08\x13\x1dhello\r\n\tworld"), "hello\n\tworld"); + + std::string_view t3{"some \x1b(T@whatever)text\x1b" "E here"}; + UASSERTEQ(auto, sanitize_untrusted(t3), t3); + auto t3_sanitized = sanitize_untrusted(t3, false); + UASSERT(str_starts_with(t3_sanitized, "some ") && str_ends_with(t3_sanitized, " here")); + UASSERT(t3_sanitized.find('\x1b') == std::string::npos); + + UASSERTEQ(auto, sanitize_untrusted("\x1b[31m"), "[31m"); + + // edge cases + for (bool keep : {true, false}) { + UASSERTEQ(auto, sanitize_untrusted("\x1b", keep), ""); + UASSERTEQ(auto, sanitize_untrusted("\x1b(", keep), "("); + } +} diff --git a/src/util/string.cpp b/src/util/string.cpp index 0c896e6ec..73d1d6907 100644 --- a/src/util/string.cpp +++ b/src/util/string.cpp @@ -950,6 +950,53 @@ std::string sanitizeDirName(std::string_view str, std::string_view optional_pref return wide_to_utf8(safe_name); } +template +void remove_indexed(std::string &s, F pred) +{ + size_t j = 0; + for (size_t i = 0; i < s.length();) { + if (pred(s, i++)) + j++; + if (i != j) + s[j] = s[i]; + } + s.resize(j); +} + +std::string sanitize_untrusted(std::string_view str, bool keep_escapes) +{ + // truncate on NULL + std::string s{str.substr(0, str.find('\0'))}; + + // remove control characters except tab, feed and escape + s.erase(std::remove_if(s.begin(), s.end(), [] (unsigned char c) { + return c < 9 || (c >= 13 && c < 27) || (c >= 28 && c < 32); + }), s.end()); + + if (!keep_escapes) { + s.erase(std::remove(s.begin(), s.end(), '\x1b'), s.end()); + return s; + } + // Note: Minetest escapes generally just look like \x1b# or \x1b(###) + // where # is a single character and ### any number of characters. + // Here we additionally assume that the first character in the sequence + // is [A-Za-z], to enable us to filter foreign types of escapes that might + // be unsafe e.g. ANSI escapes in a terminal. + const auto &check = [] (char c) { + return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); + }; + remove_indexed(s, [&check] (const std::string &s, size_t i) { + if (s[i] != '\x1b') + return true; + if (i+1 >= s.length()) + return false; + if (s[i+1] == '(') + return i+2 < s.length() && check(s[i+2]); // long-form escape + else + return check(s[i+1]); // short-form escape + }); + return s; +} void safe_print_string(std::ostream &os, std::string_view str) { diff --git a/src/util/string.h b/src/util/string.h index bddbc62ce..ad3d09818 100644 --- a/src/util/string.h +++ b/src/util/string.h @@ -761,6 +761,16 @@ inline irr::core::stringw utf8_to_stringw(std::string_view input) */ std::string sanitizeDirName(std::string_view str, std::string_view optional_prefix); +/** + * Sanitize an untrusted string (e.g. from the network). This will get strip + * control characters and (optionally) any MT-style escape sequences too. + * Note that they won't be removed cleanly but rather just broken, unlike with + * unescape_enriched. + * Line breaks and UTF-8 is permitted. + */ +[[nodiscard]] +std::string sanitize_untrusted(std::string_view str, bool keep_escapes = true); + /** * Prints a sanitized version of a string without control characters. * '\t' and '\n' are allowed, as are UTF-8 control characters (e.g. RTL).