From 1f299381866f4862879c59460a1f1d32a2b8d6bc Mon Sep 17 00:00:00 2001 From: rubenwardy Date: Mon, 8 Jul 2024 21:24:02 +0100 Subject: [PATCH] Add admin task to check all zip files --- app/blueprints/admin/actions.py | 9 ++++- app/tasks/importtasks.py | 59 ++++++++++++++++++++++++--------- 2 files changed, 52 insertions(+), 16 deletions(-) diff --git a/app/blueprints/admin/actions.py b/app/blueprints/admin/actions.py index 430de653..6bb4ebf0 100644 --- a/app/blueprints/admin/actions.py +++ b/app/blueprints/admin/actions.py @@ -27,7 +27,7 @@ from app.models import PackageRelease, db, Package, PackageState, PackageScreens from app.tasks.emails import send_pending_digests from app.tasks.forumtasks import import_topic_list, check_all_forum_accounts from app.tasks.importtasks import import_repo_screenshot, check_zip_release, check_for_updates, update_all_game_support, \ - import_languages + import_languages, check_all_zip_files from app.tasks.usertasks import import_github_user_ids from app.tasks.pkgtasks import notify_about_git_forum_links, clear_removed_packages, check_package_for_broken_links from app.utils import add_notification, get_system_user @@ -315,6 +315,13 @@ def do_notify_git_forums_links(): return redirect(url_for("tasks.check", id=task_id, r=url_for("admin.admin_page"))) +@action("Check all zip files") +def do_check_all_zip_files(): + task_id = uuid() + check_all_zip_files.apply_async((), task_id=task_id) + return redirect(url_for("tasks.check", id=task_id, r=url_for("admin.admin_page"))) + + @action("DANGER: Delete less popular removed packages") def del_less_popular_removed_packages(): task_id = uuid() diff --git a/app/tasks/importtasks.py b/app/tasks/importtasks.py index c382137b..09b7e281 100644 --- a/app/tasks/importtasks.py +++ b/app/tasks/importtasks.py @@ -18,6 +18,7 @@ import datetime import json import os import shutil +import sys from json import JSONDecodeError from zipfile import ZipFile @@ -282,22 +283,32 @@ def update_translations(package: Package, tree: PackageTreeNode): .update(to_update) -def safe_extract_zip(temp_dir: str, archive_path: str) -> bool: - with ZipFile(archive_path, 'r') as zf: - # No more than 256MB - total_size = sum(e.file_size for e in zf.infolist()) - if total_size > 256 * 1024 * 1024: +def _check_zip_file(temp_dir: str, zf: ZipFile) -> bool: + # No more than 300MB + total_size = sum(e.file_size for e in zf.infolist()) + if total_size > 300 * 1024 * 1024: + print(f"zip file is too large: {total_size} bytes", file=sys.stderr) + return False + + # Check paths + for member in zf.infolist(): + # We've had cases of newlines in filenames + if "\n" in member.filename: + print("zip file member contains invalid characters", file=sys.stderr) return False - # Check paths - for member in zf.infolist(): - # We've had cases of newlines in filenames - if "\n" in member.filename: - return False + file_path = os.path.realpath(os.path.join(temp_dir, member.filename)) + if not file_path.startswith(os.path.realpath(temp_dir)): + print(f"zip file contains path out-of-bounds: {file_path}", file=sys.stderr) + return False - file_path = os.path.realpath(os.path.join(temp_dir, member.filename)) - if not file_path.startswith(os.path.realpath(temp_dir)): - return False + return True + + +def _safe_extract_zip(temp_dir: str, archive_path: str) -> bool: + with ZipFile(archive_path, 'r') as zf: + if not _check_zip_file(temp_dir, zf): + return False # Extract all for member in zf.infolist(): @@ -315,7 +326,7 @@ def check_zip_release(self, id, path): raise TaskError("No package attached to release") with get_temp_dir() as temp: - if not safe_extract_zip(temp, path): + if not _safe_extract_zip(temp, path): release.approved = False db.session.commit() raise Exception(f"Unsafe zip file at {path}") @@ -327,6 +338,24 @@ def check_zip_release(self, id, path): db.session.commit() +@celery.task() +def check_all_zip_files(): + result = [] + + with get_temp_dir() as temp: + releases = PackageRelease.query.all() + for release in releases: + with ZipFile(release.file_path, 'r') as zf: + if not _check_zip_file(temp, zf): + print(f"Unsafe zip file for {release.package.get_id} at {release.file_path}", file=sys.stderr) + result.append({ + "package": release.package.get_id(), + "file": release.file_path, + }) + + return json.dumps(result) + + @celery.task(bind=True) def import_languages(self, id, path): release = PackageRelease.query.get(id) @@ -336,7 +365,7 @@ def import_languages(self, id, path): raise TaskError("No package attached to release") with get_temp_dir() as temp: - if not safe_extract_zip(temp, path): + if not _safe_extract_zip(temp, path): raise Exception(f"Unsafe zip file at {path}") try: