Add admin action to check for broken links

Fixes #546
This commit is contained in:
rubenwardy 2024-07-04 21:52:24 +01:00
parent d738e19ce9
commit 1006971271
2 changed files with 38 additions and 6 deletions

@ -29,7 +29,7 @@ from app.tasks.forumtasks import import_topic_list, check_all_forum_accounts
from app.tasks.importtasks import import_repo_screenshot, check_zip_release, check_for_updates, update_all_game_support, \ from app.tasks.importtasks import import_repo_screenshot, check_zip_release, check_for_updates, update_all_game_support, \
import_languages import_languages
from app.tasks.usertasks import import_github_user_ids from app.tasks.usertasks import import_github_user_ids
from app.tasks.pkgtasks import notify_about_git_forum_links, clear_removed_packages from app.tasks.pkgtasks import notify_about_git_forum_links, clear_removed_packages, check_package_for_broken_links
from app.utils import add_notification, get_system_user from app.utils import add_notification, get_system_user
actions = {} actions = {}
@ -404,3 +404,9 @@ def delete_empty_threads():
flash(f"Deleted {count} threads", "success") flash(f"Deleted {count} threads", "success")
return redirect(url_for("admin.admin_page")) return redirect(url_for("admin.admin_page"))
@action("DANGER: Check for broken links in all packages")
def check_for_broken_links():
for package in Package.query.filter_by(state=PackageState.APPROVED).order_by(db.desc(Package.score)).limit(100).all():
check_package_for_broken_links.delay(package.id)

@ -16,6 +16,7 @@
import datetime import datetime
import re import re
import sys
from typing import Optional from typing import Optional
import requests import requests
@ -114,12 +115,13 @@ def _url_exists(url: str) -> str:
response.raise_for_status() response.raise_for_status()
return "" return ""
except requests.exceptions.HTTPError as e: except requests.exceptions.HTTPError as e:
print(f" - [{e.response.status_code}] {url}", file=sys.stderr)
return str(e.response.status_code) return str(e.response.status_code)
except requests.exceptions.ConnectionError: except requests.exceptions.ConnectionError:
return "ConnectionError" return "ConnectionError"
def check_for_dead_links(package: Package) -> dict[str, str]: def _check_for_dead_links(package: Package) -> dict[str, str]:
links: list[Optional[str]] = [ links: list[Optional[str]] = [
package.repo, package.repo,
package.website, package.website,
@ -133,6 +135,8 @@ def check_for_dead_links(package: Package) -> dict[str, str]:
if package.desc: if package.desc:
links.extend(get_links(render_markdown(package.desc), package.get_url("packages.view", absolute=True))) links.extend(get_links(render_markdown(package.desc), package.get_url("packages.view", absolute=True)))
print(f"Checking {package.title} ({len(links)} links) for broken links", file=sys.stderr)
bad_urls = {} bad_urls = {}
for link in links: for link in links:
@ -146,20 +150,42 @@ def check_for_dead_links(package: Package) -> dict[str, str]:
return bad_urls return bad_urls
def _check_package(package: Package) -> Optional[str]:
bad_urls = _check_for_dead_links(package)
if len(bad_urls) > 0:
return ("The following broken links were found on your package:\n\n" +
"\n".join([f"- {link} [{res}]" for link, res in bad_urls.items()]))
return None
@celery.task() @celery.task()
def check_package_on_submit(package_id: int): def check_package_on_submit(package_id: int):
package = Package.query.get(package_id) package = Package.query.get(package_id)
if package is None: if package is None:
raise TaskError("No such package") raise TaskError("No such package")
bad_urls = check_for_dead_links(package) if package.state != PackageState.READY_FOR_REVIEW:
if len(bad_urls) > 0: return
msg = _check_package(package)
if msg != "":
marked = f"Marked {package.title} as Changed Needed" marked = f"Marked {package.title} as Changed Needed"
msg = ("The following broken links were found on your package:\n\n" +
"\n".join([f"- {link} [{res}]" for link, res in bad_urls.items()]))
system_user = get_system_user() system_user = get_system_user()
post_to_approval_thread(package, system_user, marked, is_status_update=True, create_thread=True) post_to_approval_thread(package, system_user, marked, is_status_update=True, create_thread=True)
post_to_approval_thread(package, system_user, msg, is_status_update=False, create_thread=True) post_to_approval_thread(package, system_user, msg, is_status_update=False, create_thread=True)
package.state = PackageState.CHANGES_NEEDED package.state = PackageState.CHANGES_NEEDED
db.session.commit() db.session.commit()
@celery.task(rate_limit="5/m")
def check_package_for_broken_links(package_id: int):
package = Package.query.get(package_id)
if package is None:
raise TaskError("No such package")
msg = _check_package(package)
if msg:
post_bot_message(package, "Broken links", msg)
db.session.commit()