contentdb/app/markdown.py
2021-08-21 05:40:20 +01:00

126 lines
2.9 KiB
Python

from functools import partial
import bleach
from bleach import Cleaner
from bleach.linkifier import LinkifyFilter
from bs4 import BeautifulSoup
from markdown import Markdown
from flask import Markup
from markdown.extensions import Extension
from markdown.inlinepatterns import SimpleTagInlineProcessor
# Based on
# https://github.com/Wenzil/mdx_bleach/blob/master/mdx_bleach/whitelist.py
#
# License: MIT
ALLOWED_TAGS = [
"h1", "h2", "h3", "h4", "h5", "h6", "hr",
"ul", "ol", "li",
"p",
"br",
"pre",
"code",
"blockquote",
"strong",
"em",
"a",
"img",
"table", "thead", "tbody", "tr", "th", "td",
"div", "span", "del", "s",
]
ALLOWED_CSS = [
"highlight", "codehilite",
"hll", "c", "err", "g", "k", "l", "n", "o", "x", "p", "ch", "cm", "cp", "cpf", "c1", "cs",
"gd", "ge", "gr", "gh", "gi", "go", "gp", "gs", "gu", "gt", "kc", "kd", "kn", "kp", "kr",
"kt", "ld", "m", "s", "na", "nb", "nc", "no", "nd", "ni", "ne", "nf", "nl", "nn", "nx",
"py", "nt", "nv", "ow", "w", "mb", "mf", "mh", "mi", "mo", "sa", "sb", "sc", "dl", "sd",
"s2", "se", "sh", "si", "sx", "sr", "s1", "ss", "bp", "fm", "vc", "vg", "vi", "vm", "il",
]
def allow_class(_tag, name, value):
return name == "class" and value in ALLOWED_CSS
ALLOWED_ATTRIBUTES = {
"h1": ["id"],
"h2": ["id"],
"h3": ["id"],
"h4": ["id"],
"a": ["href", "title"],
"img": ["src", "title", "alt"],
"code": allow_class,
"div": allow_class,
"span": allow_class,
}
ALLOWED_PROTOCOLS = ["http", "https", "mailto"]
md = None
def render_markdown(source):
html = md.convert(source)
cleaner = Cleaner(
tags=ALLOWED_TAGS,
attributes=ALLOWED_ATTRIBUTES,
protocols=ALLOWED_PROTOCOLS,
filters=[partial(LinkifyFilter, callbacks=bleach.linkifier.DEFAULT_CALLBACKS)])
return cleaner.clean(html)
class DelInsExtension(Extension):
def extendMarkdown(self, md):
del_proc = SimpleTagInlineProcessor(r'(\~\~)(.+?)(\~\~)', 'del')
md.inlinePatterns.register(del_proc, 'del', 200)
ins_proc = SimpleTagInlineProcessor(r'(\+\+)(.+?)(\+\+)', 'ins')
md.inlinePatterns.register(ins_proc, 'ins', 200)
MARKDOWN_EXTENSIONS = ["fenced_code", "tables", "codehilite", "toc", DelInsExtension()]
MARKDOWN_EXTENSION_CONFIG = {
"fenced_code": {},
"tables": {},
"codehilite": {
"guess_lang": False,
}
}
def init_markdown(app):
global md
md = Markdown(extensions=MARKDOWN_EXTENSIONS,
extension_configs=MARKDOWN_EXTENSION_CONFIG,
output_format="html5")
@app.template_filter()
def markdown(source):
return Markup(render_markdown(source))
def get_headings(html: str):
soup = BeautifulSoup(html, "html.parser")
headings = soup.find_all(["h1", "h2", "h3"])
root = []
stack = []
for heading in headings:
this = { "link": heading.get("id") or "", "text": heading.text, "children": [] }
this_level = int(heading.name[1:]) - 1
while this_level <= len(stack):
stack.pop()
if len(stack) > 0:
stack[-1]["children"].append(this)
else:
root.append(this)
stack.append(this)
return root