From 211be30cf4775dd122accb62f9e703b1d0e8184c Mon Sep 17 00:00:00 2001 From: rubenwardy Date: Fri, 5 Jul 2024 01:20:33 +0100 Subject: [PATCH] Fix various things with broken link detection --- app/tasks/pkgtasks.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/app/tasks/pkgtasks.py b/app/tasks/pkgtasks.py index 90d997d7..c76fbe26 100644 --- a/app/tasks/pkgtasks.py +++ b/app/tasks/pkgtasks.py @@ -15,6 +15,7 @@ # along with this program. If not, see . import datetime +import random import re import sys from time import sleep @@ -22,6 +23,7 @@ from urllib.parse import urlparse from typing import Optional import requests +import urllib3 from sqlalchemy import or_, and_ from app.markdown import get_links, render_markdown @@ -118,10 +120,12 @@ def _url_exists(url: str) -> str: return str(e.response.status_code) except requests.exceptions.ConnectionError: return "ConnectionError" + except urllib3.exceptions.ReadTimeoutError: + return "timeout" def _check_for_dead_links(package: Package) -> dict[str, str]: - links: list[Optional[str]] = [ + links: set[Optional[str]] = { package.repo, package.website, package.issueTracker, @@ -129,10 +133,10 @@ def _check_for_dead_links(package: Package) -> dict[str, str]: package.video_url, package.donate_url_actual, package.translation_url, - ] + } if package.desc: - links.extend(get_links(render_markdown(package.desc), package.get_url("packages.view", absolute=True))) + links.update(get_links(render_markdown(package.desc), package.get_url("packages.view", absolute=True))) print(f"Checking {package.title} ({len(links)} links) for broken links", file=sys.stderr) @@ -150,7 +154,8 @@ def _check_for_dead_links(package: Package) -> dict[str, str]: if res != "": bad_urls[link] = res - sleep(0.5) + # Prevent leaking information + sleep(random.uniform(0.4, 0.6)) return bad_urls @@ -159,7 +164,7 @@ def _check_package(package: Package) -> Optional[str]: bad_urls = _check_for_dead_links(package) if len(bad_urls) > 0: return ("The following broken links were found on your package:\n\n" + - "\n".join([f"- {link} [{res}]" for link, res in bad_urls.items()])) + "\n".join([f"- <{link}> [{res}]" for link, res in bad_urls.items()])) return None