mirror of
https://github.com/minetest/contentdb.git
synced 2025-01-03 11:47:28 +01:00
Fix various things with broken link detection
This commit is contained in:
parent
9bf91f17d6
commit
211be30cf4
@ -15,6 +15,7 @@
|
|||||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
import datetime
|
import datetime
|
||||||
|
import random
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
from time import sleep
|
from time import sleep
|
||||||
@ -22,6 +23,7 @@ from urllib.parse import urlparse
|
|||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
import urllib3
|
||||||
from sqlalchemy import or_, and_
|
from sqlalchemy import or_, and_
|
||||||
|
|
||||||
from app.markdown import get_links, render_markdown
|
from app.markdown import get_links, render_markdown
|
||||||
@ -118,10 +120,12 @@ def _url_exists(url: str) -> str:
|
|||||||
return str(e.response.status_code)
|
return str(e.response.status_code)
|
||||||
except requests.exceptions.ConnectionError:
|
except requests.exceptions.ConnectionError:
|
||||||
return "ConnectionError"
|
return "ConnectionError"
|
||||||
|
except urllib3.exceptions.ReadTimeoutError:
|
||||||
|
return "timeout"
|
||||||
|
|
||||||
|
|
||||||
def _check_for_dead_links(package: Package) -> dict[str, str]:
|
def _check_for_dead_links(package: Package) -> dict[str, str]:
|
||||||
links: list[Optional[str]] = [
|
links: set[Optional[str]] = {
|
||||||
package.repo,
|
package.repo,
|
||||||
package.website,
|
package.website,
|
||||||
package.issueTracker,
|
package.issueTracker,
|
||||||
@ -129,10 +133,10 @@ def _check_for_dead_links(package: Package) -> dict[str, str]:
|
|||||||
package.video_url,
|
package.video_url,
|
||||||
package.donate_url_actual,
|
package.donate_url_actual,
|
||||||
package.translation_url,
|
package.translation_url,
|
||||||
]
|
}
|
||||||
|
|
||||||
if package.desc:
|
if package.desc:
|
||||||
links.extend(get_links(render_markdown(package.desc), package.get_url("packages.view", absolute=True)))
|
links.update(get_links(render_markdown(package.desc), package.get_url("packages.view", absolute=True)))
|
||||||
|
|
||||||
print(f"Checking {package.title} ({len(links)} links) for broken links", file=sys.stderr)
|
print(f"Checking {package.title} ({len(links)} links) for broken links", file=sys.stderr)
|
||||||
|
|
||||||
@ -150,7 +154,8 @@ def _check_for_dead_links(package: Package) -> dict[str, str]:
|
|||||||
if res != "":
|
if res != "":
|
||||||
bad_urls[link] = res
|
bad_urls[link] = res
|
||||||
|
|
||||||
sleep(0.5)
|
# Prevent leaking information
|
||||||
|
sleep(random.uniform(0.4, 0.6))
|
||||||
|
|
||||||
return bad_urls
|
return bad_urls
|
||||||
|
|
||||||
@ -159,7 +164,7 @@ def _check_package(package: Package) -> Optional[str]:
|
|||||||
bad_urls = _check_for_dead_links(package)
|
bad_urls = _check_for_dead_links(package)
|
||||||
if len(bad_urls) > 0:
|
if len(bad_urls) > 0:
|
||||||
return ("The following broken links were found on your package:\n\n" +
|
return ("The following broken links were found on your package:\n\n" +
|
||||||
"\n".join([f"- {link} [{res}]" for link, res in bad_urls.items()]))
|
"\n".join([f"- <{link}> [{res}]" for link, res in bad_urls.items()]))
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user