diff --git a/app/blueprints/api/endpoints.py b/app/blueprints/api/endpoints.py index a40cd760..8b0fbc85 100644 --- a/app/blueprints/api/endpoints.py +++ b/app/blueprints/api/endpoints.py @@ -134,7 +134,8 @@ def package_view_client(package: Package): formspec_version = get_int_or_abort(request.args["formspec_version"]) include_images = is_yes(request.args.get("include_images", "true")) html = render_markdown(data["long_description"]) - data["long_description"] = html_to_minetest(html, formspec_version, include_images) + page_url = package.get_url("packages.view", absolute=True) + data["long_description"] = html_to_minetest(html, page_url, formspec_version, include_images) data["info_hypertext"] = package_info_as_hypertext(package, formspec_version) @@ -155,10 +156,11 @@ def package_view_client(package: Package): @is_package_page @cors_allowed def package_hypertext(package): - formspec_version = request.args["formspec_version"] + formspec_version = get_int_or_abort(request.args["formspec_version"]) include_images = is_yes(request.args.get("include_images", "true")) html = render_markdown(package.desc) - return jsonify(html_to_minetest(html, formspec_version, include_images)) + page_url = package.get_url("packages.view", absolute=True) + return jsonify(html_to_minetest(html, page_url, formspec_version, include_images)) @bp.route("/api/packages///", methods=["PUT"]) @@ -847,14 +849,14 @@ def json_schema(): @csrf.exempt @cors_allowed def hypertext(): - formspec_version = request.args["formspec_version"] + formspec_version = get_int_or_abort(request.args["formspec_version"]) include_images = is_yes(request.args.get("include_images", "true")) html = request.data.decode("utf-8") if request.content_type == "text/markdown": html = render_markdown(html) - return jsonify(html_to_minetest(html, formspec_version, include_images)) + return jsonify(html_to_minetest(html, "", formspec_version, include_images)) @bp.route("/api/collections/") diff --git a/app/flatpages/help/api.md b/app/flatpages/help/api.md index 4ffddf3f..62fb5a27 100644 --- a/app/flatpages/help/api.md +++ b/app/flatpages/help/api.md @@ -106,7 +106,7 @@ curl -X DELETE https://content.minetest.net/api/delete-token/ \ * `info_hypertext` is the info sidebar as a hypertext object. * Query arguments * `formspec_version`: Required. See /hypertext/ below. - * `include_images`: Optional, defaults to true. + * `include_images`: Optional, defaults to true. If true, images use ``. If false, they're linked. * `protocol_version`: Optional, used to get the correct release. * `engine_version`: Optional, used to get the correct release. Ex: `5.3.0`. * GET `/api/packages///hypertext/` @@ -114,7 +114,7 @@ curl -X DELETE https://content.minetest.net/api/delete-token/ \ to be used in a `hypertext` formspec element. * Query arguments: * `formspec_version`: Required, maximum supported formspec version. - * `include_images`: Optional, defaults to true. + * `include_images`: Optional, defaults to true. If true, images use ``. If false, they're linked. * Returns JSON dictionary with following key: * `head`: markup for suggested styling and custom tags, prepend to the body before displaying. * `body`: markup for long description. @@ -543,7 +543,7 @@ Supported query parameters: * Content-Type: `text/html` or `text/markdown`. * Query arguments: * `formspec_version`: Required, maximum supported formspec version. Ie: 6 - * `include_images`: Optional, defaults to true. + * `include_images`: Optional, defaults to true. If true, images use ``. If false, they're linked. * Returns JSON dictionary with following key: * `head`: markup for suggested styling and custom tags, prepend to the body before displaying. * `body`: markup for long description. diff --git a/app/markdown.py b/app/markdown.py index 0b6c43b3..9e2f0dbe 100644 --- a/app/markdown.py +++ b/app/markdown.py @@ -76,6 +76,7 @@ ALLOWED_ATTRIBUTES = { "code": allow_class, "div": allow_class, "span": allow_class, + "table": ["id"], } ALLOWED_PROTOCOLS = {"http", "https", "mailto"} diff --git a/app/tests/unit/utils/test_minetest_hypertext.py b/app/tests/unit/utils/test_minetest_hypertext.py index 4d6df866..1245c9ff 100644 --- a/app/tests/unit/utils/test_minetest_hypertext.py +++ b/app/tests/unit/utils/test_minetest_hypertext.py @@ -70,9 +70,11 @@ You may leave a game and return to normal playing mode at anytime by typing: The Conquer GUI is the central place for monitoring your kingdom. Once in a session, you can view it by pressing the inventory key (I), or by punching/right-clicking the keep node. """ +page_url = "https://example.com/a/b/" + def test_conquer(): - assert html_to_minetest(conquer_html)["body"].strip() == conquer_expected.strip() + assert html_to_minetest(conquer_html, page_url)["body"].strip() == conquer_expected.strip() def test_images(): @@ -81,10 +83,40 @@ def test_images(): """ expected = "" - result = html_to_minetest(html) + result = html_to_minetest(html, page_url) assert result["body"].strip() == expected.strip() assert len(result["images"]) == 1 - assert result["images"]["image_0"] == "/path/to/img.png" + assert result["images"]["image_0"] == "https://example.com/path/to/img.png" + + +def test_images_removed(): + html = """ + alt + """ + + expected = "Image: alt" + result = html_to_minetest(html, page_url, 7, False) + assert result["body"].strip() == expected.strip() + assert len(result["images"]) == 0 + assert result["links"]["image_0"] == "https://example.com/path/to/img.png" + + +def test_links_relative_absolute(): + html = """ + Relative + Absolute + Other domain + """ + + expected = "Relative " \ + "Absolute " \ + "Other domain" + + result = html_to_minetest(html, page_url, 7, False) + assert result["body"].strip() == expected.strip() + assert result["links"]["link_0"] == "https://example.com/a/b/relative" + assert result["links"]["link_1"] == "https://example.com/absolute" + assert result["links"]["link_2"] == "https://www.minetest.net/downloads/" def test_bullets(): @@ -102,15 +134,61 @@ def test_bullets(): "• sub two\n\n" \ "• four\n" - result = html_to_minetest(html) + result = html_to_minetest(html, page_url) assert result["body"].strip() == expected.strip() +def test_table(): + html = """ + + + + + +
Col ACol BCol C
A1B1C1
A2B2C2
A3B3C3
+

Heading

+ + + + + +
Col ACol BCol C
A1B1C1
A2B2C2
A3B3C3
+ """ + + expected = "(view table in browser)\n\n" \ + "Heading\n" \ + "(view table in browser)" + result = html_to_minetest(html, page_url) + assert result["body"].strip() == expected.strip() + assert result["links"]["link_0"] == f"{page_url}#with-id" + assert result["links"]["link_1"] == f"{page_url}#heading" + + def test_inline(): html = """ One two three """ expected = "One two three" - result = html_to_minetest(html) + result = html_to_minetest(html, page_url) + assert result["body"].strip() == expected.strip() + + +def test_escape(): + html = r""" + One t\w<o> three + """ + + expected = r"One t\\w\ three" + result = html_to_minetest(html, page_url) + assert result["body"].strip() == expected.strip() + + +def test_unknown_attr(): + html = r""" + link + """ + + expected = r"link" + result = html_to_minetest(html, page_url) assert result["body"].strip() == expected.strip() diff --git a/app/utils/minetest_hypertext.py b/app/utils/minetest_hypertext.py index 85c33c79..cacfe53b 100644 --- a/app/utils/minetest_hypertext.py +++ b/app/utils/minetest_hypertext.py @@ -17,6 +17,7 @@ from html.parser import HTMLParser import re import sys +from urllib.parse import urljoin from flask_babel import gettext @@ -33,10 +34,14 @@ assert normalize_whitespace(" one three\nfour\n\n") == " one three four " # Styles and custom tags HEAD = normalize_whitespace(""" - + """).strip() +def escape_hypertext(text): + return text.replace("\\", "\\\\").replace("<", "\\<").replace(">", "\\>") + + def get_attributes(attrs): retval = {} for attr in attrs: @@ -49,12 +54,14 @@ def make_indent(w): class MinetestHTMLParser(HTMLParser): - def __init__(self, include_images): + def __init__(self, page_url: str, include_images: bool): super().__init__() + self.page_url = page_url self.include_images = include_images self.completed_text = "" self.current_line = "" + self.last_id = None self.links = {} self.images = {} self.image_tooltips = {} @@ -66,10 +73,19 @@ class MinetestHTMLParser(HTMLParser): self.completed_text += self.current_line.rstrip() + "\n" self.current_line = "" + def resolve_url(self, url: str) -> str: + if self.page_url == "": + return url + else: + return urljoin(self.page_url, url) + def handle_starttag(self, tag, attrs): if self.is_preserving or self.remove_until: return + attr_by_name = get_attributes(attrs) + self.last_id = get_attributes(attrs).get("id", self.last_id) + if tag == "p": pass elif tag == "pre": @@ -78,7 +94,16 @@ class MinetestHTMLParser(HTMLParser): elif tag == "table": # Tables are currently unsupported and removed self.remove_until = "table" - self.current_line += "(table removed)" + + url = self.page_url + if self.last_id is not None: + url = url + "#" + self.last_id + + name = f"link_{len(self.links)}" + self.links[name] = url + self.current_line += f"" + self.current_line += escape_hypertext(gettext("(view table in browser)")) + self.current_line += "" self.finish_line() elif tag == "br": self.finish_line() @@ -89,25 +114,29 @@ class MinetestHTMLParser(HTMLParser): self.finish_line() self.current_line += "" elif tag == "a": - for attr in attrs: - if attr[0] == "href": - name = f"link_{len(self.links)}" - self.links[name] = attr[1] - self.current_line += f"" - break + if "href" in attr_by_name: + name = f"link_{len(self.links)}" + self.links[name] = self.resolve_url(attr_by_name["href"]) + self.current_line += f"" else: self.current_line += "" elif tag == "img": - attr_by_value = get_attributes(attrs) - if "src" in attr_by_value and self.include_images: + if "src" in attr_by_name: name = f"image_{len(self.images)}" - self.images[name] = attr_by_value["src"] - width = attr_by_value.get("width", 128) - height = attr_by_value.get("height", 128) - self.current_line += f"" + if self.include_images: + self.images[name] = self.resolve_url(attr_by_name["src"]) + width = attr_by_name.get("width", 128) + height = attr_by_name.get("height", 128) + self.current_line += f"" - if "alt" in attr_by_value: - self.image_tooltips[name] = attr_by_value["alt"] + if "alt" in attr_by_name: + self.image_tooltips[name] = attr_by_name["alt"] + else: + self.links[name] = self.resolve_url(attr_by_name["src"]) + label = gettext("Image") + if "alt" in attr_by_name: + label = f"{label}: {attr_by_name['alt']}" + self.current_line += f"{escape_hypertext(label)}" elif tag == "b" or tag == "strong": self.current_line += "" elif tag == "i" or tag == "em": @@ -175,7 +204,7 @@ class MinetestHTMLParser(HTMLParser): if self.current_line.strip() == "": data = data.lstrip() - self.current_line += data + self.current_line += escape_hypertext(data) def handle_entityref(self, name): to_value = { @@ -192,8 +221,8 @@ class MinetestHTMLParser(HTMLParser): self.current_line += f"&{name};" -def html_to_minetest(html, formspec_version=7, include_images=True): - parser = MinetestHTMLParser(include_images) +def html_to_minetest(html, page_url: str, formspec_version: int = 7, include_images: bool = True): + parser = MinetestHTMLParser(page_url, include_images) parser.feed(html) parser.finish_line() @@ -212,19 +241,19 @@ def package_info_as_hypertext(package: Package, formspec_version: int = 7): def add_value(label, value): nonlocal body - body += f"{label}\n{value}\n\n" + body += f"{label}: {escape_hypertext(str(value))}\n\n" def add_list(label, items): nonlocal body - body += label + "\n" + body += label + ": " for i, item in enumerate(items): if i != 0: - body += ", " - body += item + body += ", " + body += f"{escape_hypertext(str(item))}" if len(items) == 0: - body += "-" + body += "" + escape_hypertext(gettext("none")) + "" body += "\n\n" @@ -235,7 +264,7 @@ def package_info_as_hypertext(package: Package, formspec_version: int = 7): def make_game_link(game): key = f"link_{len(links)}" links[key] = game.get_url("packages.view", absolute=True) - return f"{game.title}" + return f"{game.title}" [supported, unsupported] = package.get_sorted_game_support_pair() supports_all_games = package.supports_all_games or len(supported) == 0