Hypertext: Fix various issues

* Change link color
* Return absolute URLs
* Provide link to tables (with anchor)
* Provide link to image when include_images=false
* Escape backward slashes
* Make package info more compact
This commit is contained in:
rubenwardy 2024-04-05 18:17:07 +01:00
parent fc565eee92
commit 609354cd35
5 changed files with 149 additions and 39 deletions

@ -134,7 +134,8 @@ def package_view_client(package: Package):
formspec_version = get_int_or_abort(request.args["formspec_version"]) formspec_version = get_int_or_abort(request.args["formspec_version"])
include_images = is_yes(request.args.get("include_images", "true")) include_images = is_yes(request.args.get("include_images", "true"))
html = render_markdown(data["long_description"]) html = render_markdown(data["long_description"])
data["long_description"] = html_to_minetest(html, formspec_version, include_images) page_url = package.get_url("packages.view", absolute=True)
data["long_description"] = html_to_minetest(html, page_url, formspec_version, include_images)
data["info_hypertext"] = package_info_as_hypertext(package, formspec_version) data["info_hypertext"] = package_info_as_hypertext(package, formspec_version)
@ -155,10 +156,11 @@ def package_view_client(package: Package):
@is_package_page @is_package_page
@cors_allowed @cors_allowed
def package_hypertext(package): def package_hypertext(package):
formspec_version = request.args["formspec_version"] formspec_version = get_int_or_abort(request.args["formspec_version"])
include_images = is_yes(request.args.get("include_images", "true")) include_images = is_yes(request.args.get("include_images", "true"))
html = render_markdown(package.desc) html = render_markdown(package.desc)
return jsonify(html_to_minetest(html, formspec_version, include_images)) page_url = package.get_url("packages.view", absolute=True)
return jsonify(html_to_minetest(html, page_url, formspec_version, include_images))
@bp.route("/api/packages/<author>/<name>/", methods=["PUT"]) @bp.route("/api/packages/<author>/<name>/", methods=["PUT"])
@ -847,14 +849,14 @@ def json_schema():
@csrf.exempt @csrf.exempt
@cors_allowed @cors_allowed
def hypertext(): def hypertext():
formspec_version = request.args["formspec_version"] formspec_version = get_int_or_abort(request.args["formspec_version"])
include_images = is_yes(request.args.get("include_images", "true")) include_images = is_yes(request.args.get("include_images", "true"))
html = request.data.decode("utf-8") html = request.data.decode("utf-8")
if request.content_type == "text/markdown": if request.content_type == "text/markdown":
html = render_markdown(html) html = render_markdown(html)
return jsonify(html_to_minetest(html, formspec_version, include_images)) return jsonify(html_to_minetest(html, "", formspec_version, include_images))
@bp.route("/api/collections/") @bp.route("/api/collections/")

@ -106,7 +106,7 @@ curl -X DELETE https://content.minetest.net/api/delete-token/ \
* `info_hypertext` is the info sidebar as a hypertext object. * `info_hypertext` is the info sidebar as a hypertext object.
* Query arguments * Query arguments
* `formspec_version`: Required. See /hypertext/ below. * `formspec_version`: Required. See /hypertext/ below.
* `include_images`: Optional, defaults to true. * `include_images`: Optional, defaults to true. If true, images use `<img>`. If false, they're linked.
* `protocol_version`: Optional, used to get the correct release. * `protocol_version`: Optional, used to get the correct release.
* `engine_version`: Optional, used to get the correct release. Ex: `5.3.0`. * `engine_version`: Optional, used to get the correct release. Ex: `5.3.0`.
* GET `/api/packages/<author>/<name>/hypertext/` * GET `/api/packages/<author>/<name>/hypertext/`
@ -114,7 +114,7 @@ curl -X DELETE https://content.minetest.net/api/delete-token/ \
to be used in a `hypertext` formspec element. to be used in a `hypertext` formspec element.
* Query arguments: * Query arguments:
* `formspec_version`: Required, maximum supported formspec version. * `formspec_version`: Required, maximum supported formspec version.
* `include_images`: Optional, defaults to true. * `include_images`: Optional, defaults to true. If true, images use `<img>`. If false, they're linked.
* Returns JSON dictionary with following key: * Returns JSON dictionary with following key:
* `head`: markup for suggested styling and custom tags, prepend to the body before displaying. * `head`: markup for suggested styling and custom tags, prepend to the body before displaying.
* `body`: markup for long description. * `body`: markup for long description.
@ -543,7 +543,7 @@ Supported query parameters:
* Content-Type: `text/html` or `text/markdown`. * Content-Type: `text/html` or `text/markdown`.
* Query arguments: * Query arguments:
* `formspec_version`: Required, maximum supported formspec version. Ie: 6 * `formspec_version`: Required, maximum supported formspec version. Ie: 6
* `include_images`: Optional, defaults to true. * `include_images`: Optional, defaults to true. If true, images use `<img>`. If false, they're linked.
* Returns JSON dictionary with following key: * Returns JSON dictionary with following key:
* `head`: markup for suggested styling and custom tags, prepend to the body before displaying. * `head`: markup for suggested styling and custom tags, prepend to the body before displaying.
* `body`: markup for long description. * `body`: markup for long description.

@ -76,6 +76,7 @@ ALLOWED_ATTRIBUTES = {
"code": allow_class, "code": allow_class,
"div": allow_class, "div": allow_class,
"span": allow_class, "span": allow_class,
"table": ["id"],
} }
ALLOWED_PROTOCOLS = {"http", "https", "mailto"} ALLOWED_PROTOCOLS = {"http", "https", "mailto"}

@ -70,9 +70,11 @@ You may leave a game and return to normal playing mode at anytime by typing:
The Conquer GUI is the central place for monitoring your kingdom. Once in a session, you can view it by pressing the inventory key (I), or by punching/right-clicking the keep node. The Conquer GUI is the central place for monitoring your kingdom. Once in a session, you can view it by pressing the inventory key (I), or by punching/right-clicking the keep node.
""" """
page_url = "https://example.com/a/b/"
def test_conquer(): def test_conquer():
assert html_to_minetest(conquer_html)["body"].strip() == conquer_expected.strip() assert html_to_minetest(conquer_html, page_url)["body"].strip() == conquer_expected.strip()
def test_images(): def test_images():
@ -81,10 +83,40 @@ def test_images():
""" """
expected = "<img name=image_0 width=128 height=128>" expected = "<img name=image_0 width=128 height=128>"
result = html_to_minetest(html) result = html_to_minetest(html, page_url)
assert result["body"].strip() == expected.strip() assert result["body"].strip() == expected.strip()
assert len(result["images"]) == 1 assert len(result["images"]) == 1
assert result["images"]["image_0"] == "/path/to/img.png" assert result["images"]["image_0"] == "https://example.com/path/to/img.png"
def test_images_removed():
html = """
<img src="/path/to/img.png" alt="alt">
"""
expected = "<action name=image_0><u>Image: alt</u></action>"
result = html_to_minetest(html, page_url, 7, False)
assert result["body"].strip() == expected.strip()
assert len(result["images"]) == 0
assert result["links"]["image_0"] == "https://example.com/path/to/img.png"
def test_links_relative_absolute():
html = """
<a href="relative">Relative</a>
<a href="/absolute">Absolute</a>
<a href="https://www.minetest.net/downloads/">Other domain</a>
"""
expected = "<action name=link_0><u>Relative</u></action> " \
"<action name=link_1><u>Absolute</u></action> " \
"<action name=link_2><u>Other domain</u></action>"
result = html_to_minetest(html, page_url, 7, False)
assert result["body"].strip() == expected.strip()
assert result["links"]["link_0"] == "https://example.com/a/b/relative"
assert result["links"]["link_1"] == "https://example.com/absolute"
assert result["links"]["link_2"] == "https://www.minetest.net/downloads/"
def test_bullets(): def test_bullets():
@ -102,15 +134,61 @@ def test_bullets():
"<img name=blank.png width=32 height=1>• sub two\n\n" \ "<img name=blank.png width=32 height=1>• sub two\n\n" \
"<img name=blank.png width=16 height=1>• four\n" "<img name=blank.png width=16 height=1>• four\n"
result = html_to_minetest(html) result = html_to_minetest(html, page_url)
assert result["body"].strip() == expected.strip() assert result["body"].strip() == expected.strip()
def test_table():
html = """
<table id="with-id">
<tr><th>Col A</th><th>Col B</th><th>Col C</th></tr>
<tr><td>A1</td><td>B1</td><td>C1</td>
<tr><td>A2</td><td>B2</td><td>C2</td>
<tr><td>A3</td><td>B3</td><td>C3</td>
</table>
<h3 id="heading">Heading</h3>
<table>
<tr><th>Col A</th><th>Col B</th><th>Col C</th></tr>
<tr><td>A1</td><td>B1</td><td>C1</td>
<tr><td>A2</td><td>B2</td><td>C2</td>
<tr><td>A3</td><td>B3</td><td>C3</td>
</table>
"""
expected = "<action name=link_0><u>(view table in browser)</u></action>\n\n" \
"<b>Heading</b>\n" \
"<action name=link_1><u>(view table in browser)</u></action>"
result = html_to_minetest(html, page_url)
assert result["body"].strip() == expected.strip()
assert result["links"]["link_0"] == f"{page_url}#with-id"
assert result["links"]["link_1"] == f"{page_url}#heading"
def test_inline(): def test_inline():
html = """ html = """
<b>One <i>two</i> three</b> <b>One <i>two</i> three</b>
""" """
expected = "<b>One <i>two</i> three</b>" expected = "<b>One <i>two</i> three</b>"
result = html_to_minetest(html) result = html_to_minetest(html, page_url)
assert result["body"].strip() == expected.strip()
def test_escape():
html = r"""
<b>One <i>t\w&lt;o&gt;</i> three</b>
"""
expected = r"<b>One <i>t\\w\<o\></i> three</b>"
result = html_to_minetest(html, page_url)
assert result["body"].strip() == expected.strip()
def test_unknown_attr():
html = r"""
<a href="https://example.com" url="http://www.minetest.net">link</a>
"""
expected = r"<action name=link_0><u>link</u></action>"
result = html_to_minetest(html, page_url)
assert result["body"].strip() == expected.strip() assert result["body"].strip() == expected.strip()

@ -17,6 +17,7 @@
from html.parser import HTMLParser from html.parser import HTMLParser
import re import re
import sys import sys
from urllib.parse import urljoin
from flask_babel import gettext from flask_babel import gettext
@ -33,10 +34,14 @@ assert normalize_whitespace(" one three\nfour\n\n") == " one three four "
# Styles and custom tags # Styles and custom tags
HEAD = normalize_whitespace(""" HEAD = normalize_whitespace("""
<tag name=code color=#7bf font=mono> <tag name=code color=#7bf font=mono>
<tag name=action color=#77f hovercolor=#aaf> <tag name=action color=#4CDAFA hovercolor=#97EAFC>
""").strip() """).strip()
def escape_hypertext(text):
return text.replace("\\", "\\\\").replace("<", "\\<").replace(">", "\\>")
def get_attributes(attrs): def get_attributes(attrs):
retval = {} retval = {}
for attr in attrs: for attr in attrs:
@ -49,12 +54,14 @@ def make_indent(w):
class MinetestHTMLParser(HTMLParser): class MinetestHTMLParser(HTMLParser):
def __init__(self, include_images): def __init__(self, page_url: str, include_images: bool):
super().__init__() super().__init__()
self.page_url = page_url
self.include_images = include_images self.include_images = include_images
self.completed_text = "" self.completed_text = ""
self.current_line = "" self.current_line = ""
self.last_id = None
self.links = {} self.links = {}
self.images = {} self.images = {}
self.image_tooltips = {} self.image_tooltips = {}
@ -66,10 +73,19 @@ class MinetestHTMLParser(HTMLParser):
self.completed_text += self.current_line.rstrip() + "\n" self.completed_text += self.current_line.rstrip() + "\n"
self.current_line = "" self.current_line = ""
def resolve_url(self, url: str) -> str:
if self.page_url == "":
return url
else:
return urljoin(self.page_url, url)
def handle_starttag(self, tag, attrs): def handle_starttag(self, tag, attrs):
if self.is_preserving or self.remove_until: if self.is_preserving or self.remove_until:
return return
attr_by_name = get_attributes(attrs)
self.last_id = get_attributes(attrs).get("id", self.last_id)
if tag == "p": if tag == "p":
pass pass
elif tag == "pre": elif tag == "pre":
@ -78,7 +94,16 @@ class MinetestHTMLParser(HTMLParser):
elif tag == "table": elif tag == "table":
# Tables are currently unsupported and removed # Tables are currently unsupported and removed
self.remove_until = "table" self.remove_until = "table"
self.current_line += "<i>(table removed)</i>"
url = self.page_url
if self.last_id is not None:
url = url + "#" + self.last_id
name = f"link_{len(self.links)}"
self.links[name] = url
self.current_line += f"<action name={name}><u>"
self.current_line += escape_hypertext(gettext("(view table in browser)"))
self.current_line += "</u></action>"
self.finish_line() self.finish_line()
elif tag == "br": elif tag == "br":
self.finish_line() self.finish_line()
@ -89,25 +114,29 @@ class MinetestHTMLParser(HTMLParser):
self.finish_line() self.finish_line()
self.current_line += "<b>" self.current_line += "<b>"
elif tag == "a": elif tag == "a":
for attr in attrs: if "href" in attr_by_name:
if attr[0] == "href": name = f"link_{len(self.links)}"
name = f"link_{len(self.links)}" self.links[name] = self.resolve_url(attr_by_name["href"])
self.links[name] = attr[1] self.current_line += f"<action name={name}><u>"
self.current_line += f"<action name={name}><u>"
break
else: else:
self.current_line += "<action><u>" self.current_line += "<action><u>"
elif tag == "img": elif tag == "img":
attr_by_value = get_attributes(attrs) if "src" in attr_by_name:
if "src" in attr_by_value and self.include_images:
name = f"image_{len(self.images)}" name = f"image_{len(self.images)}"
self.images[name] = attr_by_value["src"] if self.include_images:
width = attr_by_value.get("width", 128) self.images[name] = self.resolve_url(attr_by_name["src"])
height = attr_by_value.get("height", 128) width = attr_by_name.get("width", 128)
self.current_line += f"<img name={name} width={width} height={height}>" height = attr_by_name.get("height", 128)
self.current_line += f"<img name={name} width={width} height={height}>"
if "alt" in attr_by_value: if "alt" in attr_by_name:
self.image_tooltips[name] = attr_by_value["alt"] self.image_tooltips[name] = attr_by_name["alt"]
else:
self.links[name] = self.resolve_url(attr_by_name["src"])
label = gettext("Image")
if "alt" in attr_by_name:
label = f"{label}: {attr_by_name['alt']}"
self.current_line += f"<action name={name}><u>{escape_hypertext(label)}</u></action>"
elif tag == "b" or tag == "strong": elif tag == "b" or tag == "strong":
self.current_line += "<b>" self.current_line += "<b>"
elif tag == "i" or tag == "em": elif tag == "i" or tag == "em":
@ -175,7 +204,7 @@ class MinetestHTMLParser(HTMLParser):
if self.current_line.strip() == "": if self.current_line.strip() == "":
data = data.lstrip() data = data.lstrip()
self.current_line += data self.current_line += escape_hypertext(data)
def handle_entityref(self, name): def handle_entityref(self, name):
to_value = { to_value = {
@ -192,8 +221,8 @@ class MinetestHTMLParser(HTMLParser):
self.current_line += f"&{name};" self.current_line += f"&{name};"
def html_to_minetest(html, formspec_version=7, include_images=True): def html_to_minetest(html, page_url: str, formspec_version: int = 7, include_images: bool = True):
parser = MinetestHTMLParser(include_images) parser = MinetestHTMLParser(page_url, include_images)
parser.feed(html) parser.feed(html)
parser.finish_line() parser.finish_line()
@ -212,19 +241,19 @@ def package_info_as_hypertext(package: Package, formspec_version: int = 7):
def add_value(label, value): def add_value(label, value):
nonlocal body nonlocal body
body += f"{label}\n<b>{value}</b>\n\n" body += f"{label}: <b>{escape_hypertext(str(value))}</b>\n\n"
def add_list(label, items): def add_list(label, items):
nonlocal body nonlocal body
body += label + "\n<b>" body += label + ": "
for i, item in enumerate(items): for i, item in enumerate(items):
if i != 0: if i != 0:
body += "</b>, <b>" body += ", "
body += item body += f"<b>{escape_hypertext(str(item))}</b>"
if len(items) == 0: if len(items) == 0:
body += "-" body += "<i>" + escape_hypertext(gettext("none")) + "</i>"
body += "</b>\n\n" body += "</b>\n\n"
@ -235,7 +264,7 @@ def package_info_as_hypertext(package: Package, formspec_version: int = 7):
def make_game_link(game): def make_game_link(game):
key = f"link_{len(links)}" key = f"link_{len(links)}"
links[key] = game.get_url("packages.view", absolute=True) links[key] = game.get_url("packages.view", absolute=True)
return f"<action name={key}>{game.title}</action>" return f"<action name={key}><u>{game.title}</u></action>"
[supported, unsupported] = package.get_sorted_game_support_pair() [supported, unsupported] = package.get_sorted_game_support_pair()
supports_all_games = package.supports_all_games or len(supported) == 0 supports_all_games = package.supports_all_games or len(supported) == 0