Add admin task to check all zip files

This commit is contained in:
rubenwardy 2024-07-08 21:24:02 +01:00
parent a62f68ea5a
commit 1f29938186
2 changed files with 52 additions and 16 deletions

@ -27,7 +27,7 @@ from app.models import PackageRelease, db, Package, PackageState, PackageScreens
from app.tasks.emails import send_pending_digests
from app.tasks.forumtasks import import_topic_list, check_all_forum_accounts
from app.tasks.importtasks import import_repo_screenshot, check_zip_release, check_for_updates, update_all_game_support, \
import_languages
import_languages, check_all_zip_files
from app.tasks.usertasks import import_github_user_ids
from app.tasks.pkgtasks import notify_about_git_forum_links, clear_removed_packages, check_package_for_broken_links
from app.utils import add_notification, get_system_user
@ -315,6 +315,13 @@ def do_notify_git_forums_links():
return redirect(url_for("tasks.check", id=task_id, r=url_for("admin.admin_page")))
@action("Check all zip files")
def do_check_all_zip_files():
task_id = uuid()
check_all_zip_files.apply_async((), task_id=task_id)
return redirect(url_for("tasks.check", id=task_id, r=url_for("admin.admin_page")))
@action("DANGER: Delete less popular removed packages")
def del_less_popular_removed_packages():
task_id = uuid()

@ -18,6 +18,7 @@ import datetime
import json
import os
import shutil
import sys
from json import JSONDecodeError
from zipfile import ZipFile
@ -282,22 +283,32 @@ def update_translations(package: Package, tree: PackageTreeNode):
.update(to_update)
def safe_extract_zip(temp_dir: str, archive_path: str) -> bool:
with ZipFile(archive_path, 'r') as zf:
# No more than 256MB
total_size = sum(e.file_size for e in zf.infolist())
if total_size > 256 * 1024 * 1024:
def _check_zip_file(temp_dir: str, zf: ZipFile) -> bool:
# No more than 300MB
total_size = sum(e.file_size for e in zf.infolist())
if total_size > 300 * 1024 * 1024:
print(f"zip file is too large: {total_size} bytes", file=sys.stderr)
return False
# Check paths
for member in zf.infolist():
# We've had cases of newlines in filenames
if "\n" in member.filename:
print("zip file member contains invalid characters", file=sys.stderr)
return False
# Check paths
for member in zf.infolist():
# We've had cases of newlines in filenames
if "\n" in member.filename:
return False
file_path = os.path.realpath(os.path.join(temp_dir, member.filename))
if not file_path.startswith(os.path.realpath(temp_dir)):
print(f"zip file contains path out-of-bounds: {file_path}", file=sys.stderr)
return False
file_path = os.path.realpath(os.path.join(temp_dir, member.filename))
if not file_path.startswith(os.path.realpath(temp_dir)):
return False
return True
def _safe_extract_zip(temp_dir: str, archive_path: str) -> bool:
with ZipFile(archive_path, 'r') as zf:
if not _check_zip_file(temp_dir, zf):
return False
# Extract all
for member in zf.infolist():
@ -315,7 +326,7 @@ def check_zip_release(self, id, path):
raise TaskError("No package attached to release")
with get_temp_dir() as temp:
if not safe_extract_zip(temp, path):
if not _safe_extract_zip(temp, path):
release.approved = False
db.session.commit()
raise Exception(f"Unsafe zip file at {path}")
@ -327,6 +338,24 @@ def check_zip_release(self, id, path):
db.session.commit()
@celery.task()
def check_all_zip_files():
result = []
with get_temp_dir() as temp:
releases = PackageRelease.query.all()
for release in releases:
with ZipFile(release.file_path, 'r') as zf:
if not _check_zip_file(temp, zf):
print(f"Unsafe zip file for {release.package.get_id} at {release.file_path}", file=sys.stderr)
result.append({
"package": release.package.get_id(),
"file": release.file_path,
})
return json.dumps(result)
@celery.task(bind=True)
def import_languages(self, id, path):
release = PackageRelease.query.get(id)
@ -336,7 +365,7 @@ def import_languages(self, id, path):
raise TaskError("No package attached to release")
with get_temp_dir() as temp:
if not safe_extract_zip(temp, path):
if not _safe_extract_zip(temp, path):
raise Exception(f"Unsafe zip file at {path}")
try: