Check packages for broken links when submitting for approval

Half of #546
This commit is contained in:
rubenwardy 2024-07-04 21:29:56 +01:00
parent a38a650dc1
commit 3f62a41952
7 changed files with 91 additions and 6 deletions

@ -36,6 +36,7 @@ from app.querybuilder import QueryBuilder
from app.rediscache import has_key, set_temp_key from app.rediscache import has_key, set_temp_key
from app.tasks.importtasks import import_repo_screenshot, check_zip_release, remove_package_game_support, \ from app.tasks.importtasks import import_repo_screenshot, check_zip_release, remove_package_game_support, \
update_package_game_support update_package_game_support
from app.tasks.pkgtasks import check_package_on_submit
from app.tasks.webhooktasks import post_discord_webhook from app.tasks.webhooktasks import post_discord_webhook
from . import bp, get_package_tabs from . import bp, get_package_tabs
@ -436,6 +437,8 @@ def move_to_state(package):
db.session.commit() db.session.commit()
check_package_on_submit.delay(package.id)
if package.state == PackageState.CHANGES_NEEDED: if package.state == PackageState.CHANGES_NEEDED:
flash(gettext("Please comment what changes are needed in the approval thread"), "warning") flash(gettext("Please comment what changes are needed in the approval thread"), "warning")
if package.review_thread: if package.review_thread:

@ -15,6 +15,7 @@
# along with this program. If not, see <https://www.gnu.org/licenses/>. # along with this program. If not, see <https://www.gnu.org/licenses/>.
from functools import partial from functools import partial
from urllib.parse import urljoin
import bleach import bleach
from bleach import Cleaner from bleach import Cleaner
@ -205,3 +206,9 @@ def get_user_mentions(html: str) -> set:
soup = BeautifulSoup(html, "html.parser") soup = BeautifulSoup(html, "html.parser")
links = soup.select("a[data-username]") links = soup.select("a[data-username]")
return set([x.get("data-username") for x in links]) return set([x.get("data-username") for x in links])
def get_links(html: str, url: str) -> set:
soup = BeautifulSoup(html, "html.parser")
links = soup.select("a[href]")
return set([urljoin(url, x.get("href")) for x in links])

@ -452,6 +452,12 @@ class Package(db.Model):
def donate_url_actual(self): def donate_url_actual(self):
return self.donate_url or self.author.donate_url return self.donate_url or self.author.donate_url
@property
def forums_url(self) -> typing.Optional[str]:
if self.forums is None:
return None
return "https://forum.minetest.net/viewtopic.php?t=" + str(self.forums)
enable_game_support_detection = db.Column(db.Boolean, nullable=False, default=True) enable_game_support_detection = db.Column(db.Boolean, nullable=False, default=True)

@ -16,12 +16,15 @@
import datetime import datetime
import re import re
from typing import Optional
import requests
from sqlalchemy import or_, and_ from sqlalchemy import or_, and_
from app.markdown import get_links, render_markdown
from app.models import Package, db, PackageState, AuditLogEntry from app.models import Package, db, PackageState, AuditLogEntry
from app.tasks import celery from app.tasks import celery, TaskError
from app.utils import post_bot_message from app.utils import post_bot_message, post_to_approval_thread, get_system_user
@celery.task() @celery.task()
@ -103,3 +106,59 @@ def clear_removed_packages(all_packages: bool):
db.session.commit() db.session.commit()
return f"Deleted {count} soft deleted packages packages" return f"Deleted {count} soft deleted packages packages"
def _url_exists(url: str) -> bool:
try:
with requests.get(url, stream=True) as response:
try:
response.raise_for_status()
return True
except requests.exceptions.HTTPError:
return False
except requests.exceptions.ConnectionError:
return False
def check_for_dead_links(package: Package) -> set[str]:
links: list[Optional[str]] = [
package.repo,
package.website,
package.issueTracker,
package.forums_url,
package.video_url,
package.donate_url_actual,
package.translation_url,
]
if package.desc:
links.extend(get_links(render_markdown(package.desc), package.get_url("packages.view", absolute=True)))
bad_urls = set()
for link in links:
if link is None:
continue
if not _url_exists(link):
bad_urls.add(link)
return bad_urls
@celery.task()
def check_package_on_submit(package_id: int):
package = Package.query.get(package_id)
if package is None:
raise TaskError("No such package")
bad_urls = check_for_dead_links(package)
if len(bad_urls) > 0:
marked = f"Marked {package.title} as Changed Needed"
msg = "The following broken links were found on your package:\n\n" + "\n".join([f"- {x}" for x in bad_urls])
system_user = get_system_user()
post_to_approval_thread(package, system_user, marked, is_status_update=True, create_thread=True)
post_to_approval_thread(package, system_user, msg, is_status_update=False, create_thread=True)
package.state = PackageState.CHANGES_NEEDED
db.session.commit()

@ -221,7 +221,7 @@
</a> </a>
{% endif %} {% endif %}
{% if package.forums %} {% if package.forums %}
<a class="btn" href="https://forum.minetest.net/viewtopic.php?t={{ package.forums }}"> <a class="btn" href="{{ package.forums_url }}">
<i class="fas fa-comments"></i> <i class="fas fa-comments"></i>
<span class="count">{{ _("Forums") }}</span> <span class="count">{{ _("Forums") }}</span>
</a> </a>

@ -54,7 +54,7 @@ Mismatched Topics
<div class="col-sm"></div> <div class="col-sm"></div>
<div class="col-sm-auto"> <div class="col-sm-auto">
<a class="btn btn-secondary me-2" href="{{ package.get_url('packages.create_edit') }}">Edit</a> <a class="btn btn-secondary me-2" href="{{ package.get_url('packages.create_edit') }}">Edit</a>
<a class="btn btn-secondary me-2" href="https://forum.minetest.net/viewtopic.php?t={{ package.forums }}">Forums</a> <a class="btn btn-secondary me-2" href="{{ package.forums_url }}">Forums</a>
</div> </div>
</div> </div>
</li> </li>

@ -148,9 +148,19 @@ def post_bot_message(package: Package, title: str, message: str, session=None):
thread.replies.append(reply) thread.replies.append(reply)
def post_to_approval_thread(package: Package, user: User, message: str, is_status_update=True): def post_to_approval_thread(package: Package, user: User, message: str, is_status_update=True, create_thread=False):
thread = package.review_thread thread = package.review_thread
if thread is None: if thread is None:
if create_thread:
thread = Thread()
thread.author = user
thread.title = "Package approval comments"
thread.private = True
thread.package = package
db.session.add(thread)
db.session.flush()
package.review_thread = thread
else:
return return
reply = ThreadReply() reply = ThreadReply()