From 100697127189b8e45c91f5906c1d157d6dc130fc Mon Sep 17 00:00:00 2001 From: rubenwardy Date: Thu, 4 Jul 2024 21:52:24 +0100 Subject: [PATCH] Add admin action to check for broken links Fixes #546 --- app/blueprints/admin/actions.py | 8 +++++++- app/tasks/pkgtasks.py | 36 ++++++++++++++++++++++++++++----- 2 files changed, 38 insertions(+), 6 deletions(-) diff --git a/app/blueprints/admin/actions.py b/app/blueprints/admin/actions.py index 18fa6280..0f5c3f72 100644 --- a/app/blueprints/admin/actions.py +++ b/app/blueprints/admin/actions.py @@ -29,7 +29,7 @@ from app.tasks.forumtasks import import_topic_list, check_all_forum_accounts from app.tasks.importtasks import import_repo_screenshot, check_zip_release, check_for_updates, update_all_game_support, \ import_languages from app.tasks.usertasks import import_github_user_ids -from app.tasks.pkgtasks import notify_about_git_forum_links, clear_removed_packages +from app.tasks.pkgtasks import notify_about_git_forum_links, clear_removed_packages, check_package_for_broken_links from app.utils import add_notification, get_system_user actions = {} @@ -404,3 +404,9 @@ def delete_empty_threads(): flash(f"Deleted {count} threads", "success") return redirect(url_for("admin.admin_page")) + + +@action("DANGER: Check for broken links in all packages") +def check_for_broken_links(): + for package in Package.query.filter_by(state=PackageState.APPROVED).order_by(db.desc(Package.score)).limit(100).all(): + check_package_for_broken_links.delay(package.id) diff --git a/app/tasks/pkgtasks.py b/app/tasks/pkgtasks.py index 1aadc9a3..1cf6b04b 100644 --- a/app/tasks/pkgtasks.py +++ b/app/tasks/pkgtasks.py @@ -16,6 +16,7 @@ import datetime import re +import sys from typing import Optional import requests @@ -114,12 +115,13 @@ def _url_exists(url: str) -> str: response.raise_for_status() return "" except requests.exceptions.HTTPError as e: + print(f" - [{e.response.status_code}] {url}", file=sys.stderr) return str(e.response.status_code) except requests.exceptions.ConnectionError: return "ConnectionError" -def check_for_dead_links(package: Package) -> dict[str, str]: +def _check_for_dead_links(package: Package) -> dict[str, str]: links: list[Optional[str]] = [ package.repo, package.website, @@ -133,6 +135,8 @@ def check_for_dead_links(package: Package) -> dict[str, str]: if package.desc: links.extend(get_links(render_markdown(package.desc), package.get_url("packages.view", absolute=True))) + print(f"Checking {package.title} ({len(links)} links) for broken links", file=sys.stderr) + bad_urls = {} for link in links: @@ -146,20 +150,42 @@ def check_for_dead_links(package: Package) -> dict[str, str]: return bad_urls +def _check_package(package: Package) -> Optional[str]: + bad_urls = _check_for_dead_links(package) + if len(bad_urls) > 0: + return ("The following broken links were found on your package:\n\n" + + "\n".join([f"- {link} [{res}]" for link, res in bad_urls.items()])) + + return None + + @celery.task() def check_package_on_submit(package_id: int): package = Package.query.get(package_id) if package is None: raise TaskError("No such package") - bad_urls = check_for_dead_links(package) - if len(bad_urls) > 0: + if package.state != PackageState.READY_FOR_REVIEW: + return + + msg = _check_package(package) + if msg != "": marked = f"Marked {package.title} as Changed Needed" - msg = ("The following broken links were found on your package:\n\n" + - "\n".join([f"- {link} [{res}]" for link, res in bad_urls.items()])) system_user = get_system_user() post_to_approval_thread(package, system_user, marked, is_status_update=True, create_thread=True) post_to_approval_thread(package, system_user, msg, is_status_update=False, create_thread=True) package.state = PackageState.CHANGES_NEEDED db.session.commit() + + +@celery.task(rate_limit="5/m") +def check_package_for_broken_links(package_id: int): + package = Package.query.get(package_id) + if package is None: + raise TaskError("No such package") + + msg = _check_package(package) + if msg: + post_bot_message(package, "Broken links", msg) + db.session.commit()