Fix various things with broken link detection

This commit is contained in:
rubenwardy 2024-07-05 01:20:33 +01:00
parent 9bf91f17d6
commit 211be30cf4

@ -15,6 +15,7 @@
# along with this program. If not, see <https://www.gnu.org/licenses/>.
import datetime
import random
import re
import sys
from time import sleep
@ -22,6 +23,7 @@ from urllib.parse import urlparse
from typing import Optional
import requests
import urllib3
from sqlalchemy import or_, and_
from app.markdown import get_links, render_markdown
@ -118,10 +120,12 @@ def _url_exists(url: str) -> str:
return str(e.response.status_code)
except requests.exceptions.ConnectionError:
return "ConnectionError"
except urllib3.exceptions.ReadTimeoutError:
return "timeout"
def _check_for_dead_links(package: Package) -> dict[str, str]:
links: list[Optional[str]] = [
links: set[Optional[str]] = {
package.repo,
package.website,
package.issueTracker,
@ -129,10 +133,10 @@ def _check_for_dead_links(package: Package) -> dict[str, str]:
package.video_url,
package.donate_url_actual,
package.translation_url,
]
}
if package.desc:
links.extend(get_links(render_markdown(package.desc), package.get_url("packages.view", absolute=True)))
links.update(get_links(render_markdown(package.desc), package.get_url("packages.view", absolute=True)))
print(f"Checking {package.title} ({len(links)} links) for broken links", file=sys.stderr)
@ -150,7 +154,8 @@ def _check_for_dead_links(package: Package) -> dict[str, str]:
if res != "":
bad_urls[link] = res
sleep(0.5)
# Prevent leaking information
sleep(random.uniform(0.4, 0.6))
return bad_urls
@ -159,7 +164,7 @@ def _check_package(package: Package) -> Optional[str]:
bad_urls = _check_for_dead_links(package)
if len(bad_urls) > 0:
return ("The following broken links were found on your package:\n\n" +
"\n".join([f"- {link} [{res}]" for link, res in bad_urls.items()]))
"\n".join([f"- <{link}> [{res}]" for link, res in bad_urls.items()]))
return None