From 3f62a41952022d0784d9c4e3b441a6d0bfca7b88 Mon Sep 17 00:00:00 2001 From: rubenwardy Date: Thu, 4 Jul 2024 21:29:56 +0100 Subject: [PATCH] Check packages for broken links when submitting for approval Half of #546 --- app/blueprints/packages/packages.py | 3 ++ app/markdown.py | 7 +++ app/models/packages.py | 6 +++ app/tasks/pkgtasks.py | 63 ++++++++++++++++++++++++- app/templates/packages/view.html | 2 +- app/templates/todo/topics_mismatch.html | 2 +- app/utils/models.py | 14 +++++- 7 files changed, 91 insertions(+), 6 deletions(-) diff --git a/app/blueprints/packages/packages.py b/app/blueprints/packages/packages.py index 65a46b24..7eebd676 100644 --- a/app/blueprints/packages/packages.py +++ b/app/blueprints/packages/packages.py @@ -36,6 +36,7 @@ from app.querybuilder import QueryBuilder from app.rediscache import has_key, set_temp_key from app.tasks.importtasks import import_repo_screenshot, check_zip_release, remove_package_game_support, \ update_package_game_support +from app.tasks.pkgtasks import check_package_on_submit from app.tasks.webhooktasks import post_discord_webhook from . import bp, get_package_tabs @@ -436,6 +437,8 @@ def move_to_state(package): db.session.commit() + check_package_on_submit.delay(package.id) + if package.state == PackageState.CHANGES_NEEDED: flash(gettext("Please comment what changes are needed in the approval thread"), "warning") if package.review_thread: diff --git a/app/markdown.py b/app/markdown.py index 9e2f0dbe..19be525a 100644 --- a/app/markdown.py +++ b/app/markdown.py @@ -15,6 +15,7 @@ # along with this program. If not, see . from functools import partial +from urllib.parse import urljoin import bleach from bleach import Cleaner @@ -205,3 +206,9 @@ def get_user_mentions(html: str) -> set: soup = BeautifulSoup(html, "html.parser") links = soup.select("a[data-username]") return set([x.get("data-username") for x in links]) + + +def get_links(html: str, url: str) -> set: + soup = BeautifulSoup(html, "html.parser") + links = soup.select("a[href]") + return set([urljoin(url, x.get("href")) for x in links]) diff --git a/app/models/packages.py b/app/models/packages.py index 14373488..271acf04 100644 --- a/app/models/packages.py +++ b/app/models/packages.py @@ -452,6 +452,12 @@ class Package(db.Model): def donate_url_actual(self): return self.donate_url or self.author.donate_url + @property + def forums_url(self) -> typing.Optional[str]: + if self.forums is None: + return None + + return "https://forum.minetest.net/viewtopic.php?t=" + str(self.forums) enable_game_support_detection = db.Column(db.Boolean, nullable=False, default=True) diff --git a/app/tasks/pkgtasks.py b/app/tasks/pkgtasks.py index 0a294b64..0f6d2b54 100644 --- a/app/tasks/pkgtasks.py +++ b/app/tasks/pkgtasks.py @@ -16,12 +16,15 @@ import datetime import re +from typing import Optional +import requests from sqlalchemy import or_, and_ +from app.markdown import get_links, render_markdown from app.models import Package, db, PackageState, AuditLogEntry -from app.tasks import celery -from app.utils import post_bot_message +from app.tasks import celery, TaskError +from app.utils import post_bot_message, post_to_approval_thread, get_system_user @celery.task() @@ -103,3 +106,59 @@ def clear_removed_packages(all_packages: bool): db.session.commit() return f"Deleted {count} soft deleted packages packages" + + +def _url_exists(url: str) -> bool: + try: + with requests.get(url, stream=True) as response: + try: + response.raise_for_status() + return True + except requests.exceptions.HTTPError: + return False + except requests.exceptions.ConnectionError: + return False + + +def check_for_dead_links(package: Package) -> set[str]: + links: list[Optional[str]] = [ + package.repo, + package.website, + package.issueTracker, + package.forums_url, + package.video_url, + package.donate_url_actual, + package.translation_url, + ] + + if package.desc: + links.extend(get_links(render_markdown(package.desc), package.get_url("packages.view", absolute=True))) + + bad_urls = set() + + for link in links: + if link is None: + continue + + if not _url_exists(link): + bad_urls.add(link) + + return bad_urls + + +@celery.task() +def check_package_on_submit(package_id: int): + package = Package.query.get(package_id) + if package is None: + raise TaskError("No such package") + + bad_urls = check_for_dead_links(package) + if len(bad_urls) > 0: + marked = f"Marked {package.title} as Changed Needed" + msg = "The following broken links were found on your package:\n\n" + "\n".join([f"- {x}" for x in bad_urls]) + + system_user = get_system_user() + post_to_approval_thread(package, system_user, marked, is_status_update=True, create_thread=True) + post_to_approval_thread(package, system_user, msg, is_status_update=False, create_thread=True) + package.state = PackageState.CHANGES_NEEDED + db.session.commit() diff --git a/app/templates/packages/view.html b/app/templates/packages/view.html index 212cec69..cc55b38a 100644 --- a/app/templates/packages/view.html +++ b/app/templates/packages/view.html @@ -221,7 +221,7 @@ {% endif %} {% if package.forums %} - + {{ _("Forums") }} diff --git a/app/templates/todo/topics_mismatch.html b/app/templates/todo/topics_mismatch.html index cc913190..bc1e6820 100644 --- a/app/templates/todo/topics_mismatch.html +++ b/app/templates/todo/topics_mismatch.html @@ -54,7 +54,7 @@ Mismatched Topics
diff --git a/app/utils/models.py b/app/utils/models.py index 2e7bc404..4fe3396a 100644 --- a/app/utils/models.py +++ b/app/utils/models.py @@ -148,10 +148,20 @@ def post_bot_message(package: Package, title: str, message: str, session=None): thread.replies.append(reply) -def post_to_approval_thread(package: Package, user: User, message: str, is_status_update=True): +def post_to_approval_thread(package: Package, user: User, message: str, is_status_update=True, create_thread=False): thread = package.review_thread if thread is None: - return + if create_thread: + thread = Thread() + thread.author = user + thread.title = "Package approval comments" + thread.private = True + thread.package = package + db.session.add(thread) + db.session.flush() + package.review_thread = thread + else: + return reply = ThreadReply() reply.thread = thread