Fix various issues with broken link checking

This commit is contained in:
rubenwardy 2024-07-04 22:57:26 +01:00
parent b31268c9f2
commit 576d9dd3e0
2 changed files with 41 additions and 24 deletions

@ -17,6 +17,8 @@
import datetime
import re
import sys
from time import sleep
from urllib.parse import urlparse
from typing import Optional
import requests
@ -76,13 +78,7 @@ def notify_about_git_forum_links():
"package will improve the user experience.\n\nHere are some URLs you might wish to replace:\n"
for x in links:
line = f"\n* {x[1].replace('%', '')} -> {x[0].get_url('packages.view', absolute=True)}"
line_added = msg + line
if len(line_added) > 2000 - 150:
post_bot_message(package, title, msg)
msg = f"(...continued)\n{line}"
else:
msg = line_added
msg += f"\n* {x[1].replace('%', '')} -> {x[0].get_url('packages.view', absolute=True)}"
post_bot_message(package, title, msg)
@ -111,11 +107,14 @@ def clear_removed_packages(all_packages: bool):
def _url_exists(url: str) -> str:
try:
with requests.get(url, stream=True, timeout=10) as response:
headers = {
"User-Agent": "Mozilla/5.0 (compatible; ContentDB link checker; +https://content.minetest.net/)",
}
with requests.get(url, stream=True, headers=headers, timeout=10) as response:
response.raise_for_status()
return ""
except requests.exceptions.HTTPError as e:
print(f" - [{e.response.status_code}] {url}", file=sys.stderr)
print(f" - [{e.response.status_code}] <{url}>", file=sys.stderr)
return str(e.response.status_code)
except requests.exceptions.ConnectionError:
return "ConnectionError"
@ -143,10 +142,16 @@ def _check_for_dead_links(package: Package) -> dict[str, str]:
if link is None:
continue
url = urlparse(link)
if url.scheme != "http" and url.scheme != "https":
continue
res = _url_exists(link)
if res != "":
bad_urls[link] = res
sleep(0.5)
return bad_urls

@ -120,6 +120,29 @@ def add_system_audit_log(severity: AuditSeverity, title: str, url: str, package=
return add_audit_log(severity, get_system_user(), title, url, package, description)
def add_replies(thread: Thread, user: User, message: str, continuation: str = "(continued)\n\n", is_status_update=False):
is_first = True
while message != "":
if len(message) > 1900:
idx = message[:1900].rfind("\n")
this_reply = message[:idx] + "\n\n"
message = message[idx:]
else:
this_reply = message
message = ""
reply = ThreadReply()
reply.thread = thread
reply.author = user
reply.is_status_update = is_status_update
if is_first:
reply.comment = this_reply
else:
reply.comment = f"{continuation}{this_reply}"
thread.replies.append(reply)
is_first = False
def post_bot_message(package: Package, title: str, message: str, session=None):
if session is None:
session = db.session
@ -137,16 +160,12 @@ def post_bot_message(package: Package, title: str, message: str, session=None):
session.add(thread)
session.flush()
reply = ThreadReply()
reply.thread = thread
reply.author = system_user
reply.comment = "**{}**\n\n{}\n\nThis is an automated message, but you can reply if you need help".format(title, message)
session.add(reply)
add_replies(thread, system_user,
f"**{title}**\n\n{message}\n\nThis is an automated message, but you can reply if you need help",
continuation=f"(continued)\n\n**{title}**\n\n")
add_notification(thread.watchers, system_user, NotificationType.BOT, title, thread.get_view_url(), thread.package, session)
thread.replies.append(reply)
def post_to_approval_thread(package: Package, user: User, message: str, is_status_update=True, create_thread=False):
thread = package.review_thread
@ -163,12 +182,7 @@ def post_to_approval_thread(package: Package, user: User, message: str, is_statu
else:
return
reply = ThreadReply()
reply.thread = thread
reply.author = user
reply.is_status_update = is_status_update
reply.comment = message
db.session.add(reply)
add_replies(thread, user, message, is_status_update=is_status_update)
if is_status_update:
msg = f"{message} - {thread.title}"
@ -177,8 +191,6 @@ def post_to_approval_thread(package: Package, user: User, message: str, is_statu
add_notification(thread.watchers, user, NotificationType.THREAD_REPLY, msg, thread.get_view_url(), package)
thread.replies.append(reply)
def get_games_from_csv(session: sqlalchemy.orm.Session, csv: str) -> List[Package]:
return get_games_from_list(session, [name.strip() for name in csv.split(",")])