mirror of
https://github.com/minetest/contentdb.git
synced 2025-01-08 22:17:34 +01:00
hypertext: Add support for nested lists
This commit is contained in:
parent
0a06e41497
commit
2a0545210b
@ -75,12 +75,17 @@ def test_bullets():
|
|||||||
html = """
|
html = """
|
||||||
<ul>
|
<ul>
|
||||||
<li>One</li>
|
<li>One</li>
|
||||||
<li>two three</li>
|
<li>two three<ul><li>sub one</li><li>sub two</li></ul></li>
|
||||||
<li>four</li>
|
<li>four</li>
|
||||||
</ul>
|
</ul>
|
||||||
"""
|
"""
|
||||||
|
|
||||||
expected = "• One\n• two three\n• four\n"
|
expected = "<img name=blank.png width=16 height=1>• One\n" \
|
||||||
|
"<img name=blank.png width=16 height=1>• two three\n" \
|
||||||
|
"<img name=blank.png width=32 height=1>• sub one\n" \
|
||||||
|
"<img name=blank.png width=32 height=1>• sub two\n\n" \
|
||||||
|
"<img name=blank.png width=16 height=1>• four\n"
|
||||||
|
|
||||||
result = html_to_minetest(html)
|
result = html_to_minetest(html)
|
||||||
assert result["body"].strip() == expected.strip()
|
assert result["body"].strip() == expected.strip()
|
||||||
|
|
||||||
|
@ -24,52 +24,59 @@ def get_attributes(attrs):
|
|||||||
return retval
|
return retval
|
||||||
|
|
||||||
|
|
||||||
|
def make_indent(w):
|
||||||
|
return f"<img name=blank.png width={16*w} height=1>"
|
||||||
|
|
||||||
|
|
||||||
class MinetestHTMLParser(HTMLParser):
|
class MinetestHTMLParser(HTMLParser):
|
||||||
def __init__(self, include_images):
|
def __init__(self, include_images):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.include_images = include_images
|
self.include_images = include_images
|
||||||
|
|
||||||
self.text_buffer = ""
|
self.completed_text = ""
|
||||||
self.has_line_started = False
|
self.current_line = ""
|
||||||
self.links = {}
|
self.links = {}
|
||||||
self.images = {}
|
self.images = {}
|
||||||
self.image_tooltips = {}
|
self.image_tooltips = {}
|
||||||
self.is_preserving = False
|
self.is_preserving = False
|
||||||
self.remove_until = None
|
self.remove_until = None
|
||||||
|
self.indent_level = 0
|
||||||
|
|
||||||
|
def finish_line(self):
|
||||||
|
self.completed_text += self.current_line.rstrip() + "\n"
|
||||||
|
self.current_line = ""
|
||||||
|
|
||||||
def handle_starttag(self, tag, attrs):
|
def handle_starttag(self, tag, attrs):
|
||||||
if self.is_preserving or self.remove_until:
|
if self.is_preserving or self.remove_until:
|
||||||
return
|
return
|
||||||
|
|
||||||
print("OPEN", tag, file=sys.stderr)
|
|
||||||
|
|
||||||
self.has_line_started = True
|
|
||||||
if tag == "p":
|
if tag == "p":
|
||||||
self.has_line_started = False
|
pass
|
||||||
elif tag == "pre":
|
elif tag == "pre":
|
||||||
self.text_buffer += "<code>"
|
self.current_line += "<code>"
|
||||||
self.is_preserving = True
|
self.is_preserving = True
|
||||||
self.has_line_started = False
|
|
||||||
elif tag == "table":
|
elif tag == "table":
|
||||||
# Tables are currently unsupported and removed
|
# Tables are currently unsupported and removed
|
||||||
self.remove_until = "table"
|
self.remove_until = "table"
|
||||||
self.text_buffer += "<i>(table removed)</i>\n"
|
self.current_line += "<i>(table removed)</i>"
|
||||||
|
self.finish_line()
|
||||||
elif tag == "br":
|
elif tag == "br":
|
||||||
self.text_buffer += "\n"
|
self.finish_line()
|
||||||
self.has_line_started = False
|
|
||||||
elif tag == "h1" or tag == "h2":
|
elif tag == "h1" or tag == "h2":
|
||||||
self.text_buffer += "\n<big>"
|
self.finish_line()
|
||||||
|
self.current_line += "<big>"
|
||||||
elif tag == "h3" or tag == "h4" or tag == "h5":
|
elif tag == "h3" or tag == "h4" or tag == "h5":
|
||||||
self.text_buffer += "\n<b>"
|
self.finish_line()
|
||||||
|
self.current_line += "<b>"
|
||||||
elif tag == "a":
|
elif tag == "a":
|
||||||
for attr in attrs:
|
for attr in attrs:
|
||||||
if attr[0] == "href":
|
if attr[0] == "href":
|
||||||
name = f"link_{len(self.links)}"
|
name = f"link_{len(self.links)}"
|
||||||
self.links[name] = attr[1]
|
self.links[name] = attr[1]
|
||||||
self.text_buffer += f"<action name={name}><u>"
|
self.current_line += f"<action name={name}><u>"
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
self.text_buffer += "<action><u>"
|
self.current_line += "<action><u>"
|
||||||
elif tag == "img":
|
elif tag == "img":
|
||||||
attr_by_value = get_attributes(attrs)
|
attr_by_value = get_attributes(attrs)
|
||||||
if "src" in attr_by_value and self.include_images:
|
if "src" in attr_by_value and self.include_images:
|
||||||
@ -77,23 +84,29 @@ class MinetestHTMLParser(HTMLParser):
|
|||||||
self.images[name] = attr_by_value["src"]
|
self.images[name] = attr_by_value["src"]
|
||||||
width = attr_by_value.get("width", 128)
|
width = attr_by_value.get("width", 128)
|
||||||
height = attr_by_value.get("height", 128)
|
height = attr_by_value.get("height", 128)
|
||||||
self.text_buffer += f"<img name={name} width={width} height={height}>"
|
self.current_line += f"<img name={name} width={width} height={height}>"
|
||||||
|
|
||||||
if "alt" in attr_by_value:
|
if "alt" in attr_by_value:
|
||||||
self.image_tooltips[name] = attr_by_value["alt"]
|
self.image_tooltips[name] = attr_by_value["alt"]
|
||||||
elif tag == "b" or tag == "strong":
|
elif tag == "b" or tag == "strong":
|
||||||
self.text_buffer += "<b>"
|
self.current_line += "<b>"
|
||||||
elif tag == "i" or tag == "em":
|
elif tag == "i" or tag == "em":
|
||||||
self.text_buffer += "<i>"
|
self.current_line += "<i>"
|
||||||
elif tag == "u":
|
elif tag == "u":
|
||||||
self.text_buffer += "<u>"
|
self.current_line += "<u>"
|
||||||
elif tag == "li":
|
elif tag == "li":
|
||||||
self.has_line_started = False
|
if self.current_line.strip() != "":
|
||||||
self.text_buffer += "• "
|
self.finish_line()
|
||||||
|
else:
|
||||||
|
self.current_line = ""
|
||||||
|
|
||||||
|
self.current_line += make_indent(self.indent_level) + "• "
|
||||||
elif tag == "code":
|
elif tag == "code":
|
||||||
self.text_buffer += "<code>"
|
self.current_line += "<code>"
|
||||||
elif tag == "span" or tag == "ul":
|
elif tag == "span":
|
||||||
pass
|
pass
|
||||||
|
elif tag == "ul":
|
||||||
|
self.indent_level += 1
|
||||||
else:
|
else:
|
||||||
print("UNKNOWN TAG ", tag, attrs, file=sys.stderr)
|
print("UNKNOWN TAG ", tag, attrs, file=sys.stderr)
|
||||||
|
|
||||||
@ -103,52 +116,46 @@ class MinetestHTMLParser(HTMLParser):
|
|||||||
self.remove_until = None
|
self.remove_until = None
|
||||||
return
|
return
|
||||||
|
|
||||||
print("CLOSE", tag, file=sys.stderr)
|
|
||||||
|
|
||||||
if tag == "pre":
|
if tag == "pre":
|
||||||
self.text_buffer = self.text_buffer.rstrip()
|
self.current_line = self.current_line.rstrip() + "</code>"
|
||||||
self.text_buffer += "</code>\n"
|
self.finish_line()
|
||||||
self.is_preserving = False
|
self.is_preserving = False
|
||||||
self.has_line_started = False
|
|
||||||
elif self.is_preserving:
|
elif self.is_preserving:
|
||||||
return
|
return
|
||||||
elif tag == "p":
|
elif tag == "p":
|
||||||
self.text_buffer = self.text_buffer.rstrip()
|
self.current_line = self.current_line.rstrip()
|
||||||
self.text_buffer += "\n"
|
self.finish_line()
|
||||||
self.has_line_started = False
|
|
||||||
elif tag == "h1" or tag == "h2":
|
elif tag == "h1" or tag == "h2":
|
||||||
self.text_buffer += "</big>\n"
|
self.current_line += "</big>"
|
||||||
self.has_line_started = False
|
self.finish_line()
|
||||||
elif tag == "h3" or tag == "h4" or tag == "h5":
|
elif tag == "h3" or tag == "h4" or tag == "h5":
|
||||||
self.text_buffer += "</b>\n"
|
self.current_line += "</b>"
|
||||||
self.has_line_started = False
|
self.finish_line()
|
||||||
elif tag == "a":
|
elif tag == "a":
|
||||||
self.text_buffer += "</u></action>"
|
self.current_line += "</u></action>"
|
||||||
elif tag == "code":
|
elif tag == "code":
|
||||||
self.text_buffer += "</code>"
|
self.current_line += "</code>"
|
||||||
elif tag == "b" or tag == "strong":
|
elif tag == "b" or tag == "strong":
|
||||||
self.text_buffer += "</b>"
|
self.current_line += "</b>"
|
||||||
elif tag == "i" or tag == "em":
|
elif tag == "i" or tag == "em":
|
||||||
self.text_buffer += "</i>"
|
self.current_line += "</i>"
|
||||||
elif tag == "u":
|
elif tag == "u":
|
||||||
self.text_buffer += "</u>"
|
self.current_line += "</u>"
|
||||||
elif tag == "li":
|
elif tag == "li":
|
||||||
self.text_buffer += "\n"
|
self.finish_line()
|
||||||
# else:
|
elif tag == "ul":
|
||||||
# print("END", tag, file=sys.stderr)
|
self.indent_level = max(self.indent_level - 1, 0)
|
||||||
|
|
||||||
def handle_data(self, data):
|
def handle_data(self, data):
|
||||||
print(f"DATA \"{data}\"", file=sys.stderr)
|
|
||||||
if self.remove_until:
|
if self.remove_until:
|
||||||
return
|
return
|
||||||
|
|
||||||
if not self.is_preserving:
|
if not self.is_preserving:
|
||||||
data = normalize_whitespace(data)
|
data = normalize_whitespace(data)
|
||||||
if not self.has_line_started:
|
if self.current_line.strip() == "":
|
||||||
data = data.lstrip()
|
data = data.lstrip()
|
||||||
|
|
||||||
self.text_buffer += data
|
self.current_line += data
|
||||||
self.has_line_started = True
|
|
||||||
|
|
||||||
def handle_entityref(self, name):
|
def handle_entityref(self, name):
|
||||||
to_value = {
|
to_value = {
|
||||||
@ -160,17 +167,19 @@ class MinetestHTMLParser(HTMLParser):
|
|||||||
}
|
}
|
||||||
|
|
||||||
if name in to_value:
|
if name in to_value:
|
||||||
self.text_buffer += to_value[name]
|
self.current_line += to_value[name]
|
||||||
else:
|
else:
|
||||||
self.text_buffer += f"&{name};"
|
self.current_line += f"&{name};"
|
||||||
|
|
||||||
|
|
||||||
def html_to_minetest(html, formspec_version=6, include_images=True):
|
def html_to_minetest(html, formspec_version=6, include_images=True):
|
||||||
parser = MinetestHTMLParser(include_images)
|
parser = MinetestHTMLParser(include_images)
|
||||||
parser.feed(html)
|
parser.feed(html)
|
||||||
|
parser.finish_line()
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"head": HEAD,
|
"head": HEAD,
|
||||||
"body": parser.text_buffer.strip() + "\n\n",
|
"body": parser.completed_text.strip() + "\n",
|
||||||
"links": parser.links,
|
"links": parser.links,
|
||||||
"images": parser.images,
|
"images": parser.images,
|
||||||
"image_tooltips": parser.image_tooltips,
|
"image_tooltips": parser.image_tooltips,
|
||||||
|
Loading…
Reference in New Issue
Block a user