Fix various issues with broken link checking

This commit is contained in:
rubenwardy 2024-07-04 22:57:26 +01:00
parent b31268c9f2
commit 576d9dd3e0
2 changed files with 41 additions and 24 deletions

@ -17,6 +17,8 @@
import datetime import datetime
import re import re
import sys import sys
from time import sleep
from urllib.parse import urlparse
from typing import Optional from typing import Optional
import requests import requests
@ -76,13 +78,7 @@ def notify_about_git_forum_links():
"package will improve the user experience.\n\nHere are some URLs you might wish to replace:\n" "package will improve the user experience.\n\nHere are some URLs you might wish to replace:\n"
for x in links: for x in links:
line = f"\n* {x[1].replace('%', '')} -> {x[0].get_url('packages.view', absolute=True)}" msg += f"\n* {x[1].replace('%', '')} -> {x[0].get_url('packages.view', absolute=True)}"
line_added = msg + line
if len(line_added) > 2000 - 150:
post_bot_message(package, title, msg)
msg = f"(...continued)\n{line}"
else:
msg = line_added
post_bot_message(package, title, msg) post_bot_message(package, title, msg)
@ -111,11 +107,14 @@ def clear_removed_packages(all_packages: bool):
def _url_exists(url: str) -> str: def _url_exists(url: str) -> str:
try: try:
with requests.get(url, stream=True, timeout=10) as response: headers = {
"User-Agent": "Mozilla/5.0 (compatible; ContentDB link checker; +https://content.minetest.net/)",
}
with requests.get(url, stream=True, headers=headers, timeout=10) as response:
response.raise_for_status() response.raise_for_status()
return "" return ""
except requests.exceptions.HTTPError as e: except requests.exceptions.HTTPError as e:
print(f" - [{e.response.status_code}] {url}", file=sys.stderr) print(f" - [{e.response.status_code}] <{url}>", file=sys.stderr)
return str(e.response.status_code) return str(e.response.status_code)
except requests.exceptions.ConnectionError: except requests.exceptions.ConnectionError:
return "ConnectionError" return "ConnectionError"
@ -143,10 +142,16 @@ def _check_for_dead_links(package: Package) -> dict[str, str]:
if link is None: if link is None:
continue continue
url = urlparse(link)
if url.scheme != "http" and url.scheme != "https":
continue
res = _url_exists(link) res = _url_exists(link)
if res != "": if res != "":
bad_urls[link] = res bad_urls[link] = res
sleep(0.5)
return bad_urls return bad_urls

@ -120,6 +120,29 @@ def add_system_audit_log(severity: AuditSeverity, title: str, url: str, package=
return add_audit_log(severity, get_system_user(), title, url, package, description) return add_audit_log(severity, get_system_user(), title, url, package, description)
def add_replies(thread: Thread, user: User, message: str, continuation: str = "(continued)\n\n", is_status_update=False):
is_first = True
while message != "":
if len(message) > 1900:
idx = message[:1900].rfind("\n")
this_reply = message[:idx] + "\n\n"
message = message[idx:]
else:
this_reply = message
message = ""
reply = ThreadReply()
reply.thread = thread
reply.author = user
reply.is_status_update = is_status_update
if is_first:
reply.comment = this_reply
else:
reply.comment = f"{continuation}{this_reply}"
thread.replies.append(reply)
is_first = False
def post_bot_message(package: Package, title: str, message: str, session=None): def post_bot_message(package: Package, title: str, message: str, session=None):
if session is None: if session is None:
session = db.session session = db.session
@ -137,16 +160,12 @@ def post_bot_message(package: Package, title: str, message: str, session=None):
session.add(thread) session.add(thread)
session.flush() session.flush()
reply = ThreadReply() add_replies(thread, system_user,
reply.thread = thread f"**{title}**\n\n{message}\n\nThis is an automated message, but you can reply if you need help",
reply.author = system_user continuation=f"(continued)\n\n**{title}**\n\n")
reply.comment = "**{}**\n\n{}\n\nThis is an automated message, but you can reply if you need help".format(title, message)
session.add(reply)
add_notification(thread.watchers, system_user, NotificationType.BOT, title, thread.get_view_url(), thread.package, session) add_notification(thread.watchers, system_user, NotificationType.BOT, title, thread.get_view_url(), thread.package, session)
thread.replies.append(reply)
def post_to_approval_thread(package: Package, user: User, message: str, is_status_update=True, create_thread=False): def post_to_approval_thread(package: Package, user: User, message: str, is_status_update=True, create_thread=False):
thread = package.review_thread thread = package.review_thread
@ -163,12 +182,7 @@ def post_to_approval_thread(package: Package, user: User, message: str, is_statu
else: else:
return return
reply = ThreadReply() add_replies(thread, user, message, is_status_update=is_status_update)
reply.thread = thread
reply.author = user
reply.is_status_update = is_status_update
reply.comment = message
db.session.add(reply)
if is_status_update: if is_status_update:
msg = f"{message} - {thread.title}" msg = f"{message} - {thread.title}"
@ -177,8 +191,6 @@ def post_to_approval_thread(package: Package, user: User, message: str, is_statu
add_notification(thread.watchers, user, NotificationType.THREAD_REPLY, msg, thread.get_view_url(), package) add_notification(thread.watchers, user, NotificationType.THREAD_REPLY, msg, thread.get_view_url(), package)
thread.replies.append(reply)
def get_games_from_csv(session: sqlalchemy.orm.Session, csv: str) -> List[Package]: def get_games_from_csv(session: sqlalchemy.orm.Session, csv: str) -> List[Package]:
return get_games_from_list(session, [name.strip() for name in csv.split(",")]) return get_games_from_list(session, [name.strip() for name in csv.split(",")])