contentdb/app/tasks/importtasks.py

585 lines
18 KiB
Python
Raw Normal View History

2020-07-12 17:34:25 +02:00
# ContentDB
2021-01-29 18:17:51 +01:00
# Copyright (C) 2018-21 rubenwardy
2018-05-17 16:18:20 +02:00
#
# This program is free software: you can redistribute it and/or modify
2021-01-30 17:59:42 +01:00
# it under the terms of the GNU Affero General Public License as published by
2018-05-17 16:18:20 +02:00
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2021-01-30 17:59:42 +01:00
# GNU Affero General Public License for more details.
2018-05-17 16:18:20 +02:00
#
2021-01-30 17:59:42 +01:00
# You should have received a copy of the GNU Affero General Public License
2018-05-17 16:18:20 +02:00
# along with this program. If not, see <https://www.gnu.org/licenses/>.
2023-06-19 20:32:36 +02:00
import datetime
2021-02-03 00:58:59 +01:00
import json
import os
import shutil
2024-07-08 22:24:02 +02:00
import sys
2023-06-19 20:32:36 +02:00
from json import JSONDecodeError
2024-08-05 13:11:44 +02:00
from zipfile import ZipFile, BadZipFile
2023-06-19 20:32:36 +02:00
import gitdb
from flask import url_for
from git import GitCommandError
from git_archive_all import GitArchiver
from kombu import uuid
from sqlalchemy import and_
from sqlalchemy.dialects.postgresql import insert
2023-06-19 20:32:36 +02:00
from app.models import AuditSeverity, db, NotificationType, PackageRelease, MetaPackage, Dependency, PackageType, \
MinetestRelease, Package, PackageState, PackageScreenshot, PackageUpdateTrigger, PackageUpdateConfig, \
PackageGameSupport, PackageTranslation, Language
from app.tasks import celery, TaskError
from app.utils import random_string, post_bot_message, add_system_notification, add_system_audit_log, \
get_games_from_list, add_audit_log
from app.utils.git import clone_repo, get_latest_tag, get_latest_commit, get_temp_dir, get_release_notes
from .minetestcheck import build_tree, MinetestCheckError, ContentType, PackageTreeNode
from .webhooktasks import post_discord_webhook
2023-06-19 20:32:36 +02:00
from app import app
from app.logic.LogicError import LogicError
from app.logic.packages import do_edit_package, ALIASES
from app.logic.game_support import game_support_update, game_support_set, game_support_update_all, game_support_remove
2023-06-19 20:32:36 +02:00
from app.utils.image import get_image_size
2020-08-18 17:12:27 +02:00
2018-06-05 23:34:57 +02:00
@celery.task()
2023-06-19 22:27:49 +02:00
def get_meta(urlstr, author):
with clone_repo(urlstr, recursive=True) as repo:
try:
tree = build_tree(repo.working_tree_dir, author=author, repo=urlstr)
except MinetestCheckError as err:
raise TaskError(str(err))
2020-01-19 02:22:33 +01:00
result = {"name": tree.name, "type": tree.type.name}
2018-05-27 23:03:54 +02:00
for key in ["title", "repo", "issueTracker", "forumId", "description", "short_description"]:
result[key] = tree.get(key)
2018-05-27 23:03:54 +02:00
result["forums"] = result.get("forumId")
2023-06-19 20:32:36 +02:00
readme_path = tree.get_readme_path()
if readme_path:
with open(readme_path, "r") as f:
result["long_description"] = f.read()
try:
with open(os.path.join(tree.baseDir, ".cdb.json"), "r") as f:
data = json.loads(f.read())
for key, value in data.items():
result[key] = value
except LogicError as e:
raise TaskError(e.message)
except JSONDecodeError as e:
raise TaskError("Whilst reading .cdb.json: " + str(e))
except IOError:
pass
for alias, to in ALIASES.items():
if alias in result:
result[to] = result[alias]
2018-05-12 18:28:04 +02:00
for key, value in result.items():
if isinstance(value, set):
result[key] = list(value)
2018-05-27 23:03:54 +02:00
return result
2018-05-11 16:04:17 +02:00
2018-05-15 20:35:59 +02:00
@celery.task()
2023-06-19 22:27:49 +02:00
def update_all_game_support():
game_support_update_all(db.session)
db.session.commit()
@celery.task()
def update_package_game_support(package_id: int):
package = Package.query.get(package_id)
game_support_update(db.session, package, None)
db.session.commit()
@celery.task()
def remove_package_game_support(package_id: int):
package = Package.query.get(package_id)
game_support_remove(db.session, package)
db.session.commit()
2023-06-19 22:27:49 +02:00
def post_release_check_update(self, release: PackageRelease, path):
2020-08-18 17:12:27 +02:00
try:
tree: PackageTreeNode = build_tree(path, expected_type=ContentType[release.package.type.name],
2020-12-04 03:23:04 +01:00
author=release.package.author.username, name=release.package.name)
2020-08-18 17:12:27 +02:00
if tree.name is not None and release.package.name != tree.name and tree.type == ContentType.MOD:
raise MinetestCheckError(f"Expected {tree.relative} to have technical name {release.package.name}, instead has name {tree.name}")
2020-08-18 17:12:27 +02:00
cache = {}
2023-06-19 22:27:49 +02:00
def get_meta_packages(names):
2020-08-18 17:12:27 +02:00
return [ MetaPackage.GetOrCreate(x, cache) for x in names ]
2023-06-19 20:32:36 +02:00
provides = tree.get_mod_names()
2020-08-18 17:12:27 +02:00
package = release.package
old_provided_names = set([x.name for x in package.provides])
2020-08-18 17:12:27 +02:00
package.provides.clear()
# If new mods were added, add to audit log
if package.state == PackageState.APPROVED:
new_provides = []
for modname in provides:
if modname not in old_provided_names:
new_provides.append(modname)
if len(new_provides) > 0:
msg = "Added mods: " + (", ".join(new_provides))
add_audit_log(AuditSeverity.NORMAL, package.author, msg + " (Post release hook)", release.get_edit_url(), package)
# Do any of these new mods exist elsewhere?
new_names = get_meta_packages(new_provides)
if any(map(lambda x: x.packages.filter(Package.state == PackageState.APPROVED).count() > 0, new_names)):
discord_msg = (msg + " " + package.get_url("packages.similar", absolute=True) +
"#" + new_provides[0] +
"\nSome of these mods exist elsewhere, possible RTaN issue")
post_discord_webhook.delay(package.author.display_name, discord_msg, True,
package.title, package.short_desc, package.get_thumb_url(2, True, "png"))
2023-06-19 22:27:49 +02:00
package.provides.extend(get_meta_packages(tree.get_mod_names()))
2020-08-18 17:12:27 +02:00
2022-09-01 23:45:53 +02:00
# Delete all mod name dependencies
2020-08-18 17:12:27 +02:00
package.dependencies.filter(Dependency.meta_package != None).delete()
# Get raw dependencies
depends = tree.fold("meta", "depends")
optional_depends = tree.fold("meta", "optional_depends")
# Filter out provides
for mod in provides:
depends.discard(mod)
optional_depends.discard(mod)
# Raise error on unresolved game dependencies
if package.type == PackageType.GAME and len(depends) > 0:
deps = ", ".join(depends)
raise MinetestCheckError("Game has unresolved hard dependencies: " + deps)
if package.state != PackageState.APPROVED and tree.find_license_file() is None:
2023-12-30 18:30:49 +01:00
raise MinetestCheckError(
"You need to add a LICENSE.txt/.md or COPYING file to your package. See the 'Copyright Guide' for more info")
2020-08-18 18:08:17 +02:00
# Add dependencies
2023-06-19 22:27:49 +02:00
for meta in get_meta_packages(depends):
2020-08-18 17:12:27 +02:00
db.session.add(Dependency(package, meta=meta, optional=False))
2023-06-19 22:27:49 +02:00
for meta in get_meta_packages(optional_depends):
2020-08-18 17:12:27 +02:00
db.session.add(Dependency(package, meta=meta, optional=True))
# Read translations
2024-03-03 02:13:05 +01:00
update_translations(package, tree)
2020-08-18 17:12:27 +02:00
# Update min/max
if tree.meta.get("min_minetest_version"):
release.min_rel = MinetestRelease.get(tree.meta["min_minetest_version"], None)
if tree.meta.get("max_minetest_version"):
release.max_rel = MinetestRelease.get(tree.meta["max_minetest_version"], None)
2021-02-03 00:58:59 +01:00
try:
2021-07-22 15:04:08 +02:00
with open(os.path.join(tree.baseDir, ".cdb.json"), "r") as f:
data = json.loads(f.read())
do_edit_package(package.author, package, False, False, data, "Post release hook")
2021-02-03 00:58:59 +01:00
except LogicError as e:
raise TaskError(e.message)
except JSONDecodeError as e:
raise TaskError("Whilst reading .cdb.json: " + str(e))
2021-07-22 15:04:08 +02:00
except IOError:
pass
2021-02-03 00:58:59 +01:00
# Build release notes from git log
if release.commit_hash and release.release_notes is None:
previous_release = package.releases.filter(PackageRelease.id != release.id).first()
if previous_release and previous_release.commit_hash:
release.release_notes = get_release_notes(package.repo, previous_release.commit_hash, release.commit_hash)
# Update game support
2023-01-12 20:48:18 +01:00
if package.type == PackageType.MOD or package.type == PackageType.TXP:
game_is_supported = {}
if "supported_games" in tree.meta:
for game in get_games_from_list(db.session, tree.meta["supported_games"]):
game_is_supported[game.id] = True
has_star = any(map(lambda x: x.strip() == "*", tree.meta["supported_games"]))
if has_star:
if package.type == PackageType.TXP or \
package.supported_games.filter(and_(
PackageGameSupport.confidence == 1, PackageGameSupport.supports == True)).count() > 0:
raise TaskError("The package depends on a game-specific mod, and so cannot support all games.")
package.supports_all_games = True
if "unsupported_games" in tree.meta:
for game in get_games_from_list(db.session, tree.meta["unsupported_games"]):
game_is_supported[game.id] = False
game_support_set(db.session, package, game_is_supported, 10)
if package.type == PackageType.MOD:
errors = game_support_update(db.session, package, old_provided_names)
if len(errors) != 0:
raise TaskError("Error validating game support:\n\n" + "\n".join([f"- {x}" for x in errors]))
2020-08-18 17:12:27 +02:00
return tree
except (MinetestCheckError, TaskError, LogicError) as err:
db.session.rollback()
error_message = err.value if hasattr(err, "value") else str(err)
2022-06-25 04:30:12 +02:00
task_url = url_for('tasks.check', id=self.request.id)
msg = f"{error_message}\n\n[View Release]({release.get_edit_url()}) | [View Task]({task_url})"
post_bot_message(release.package, f"Release {release.title} validation failed", msg)
2020-08-18 17:12:27 +02:00
if "Fails validation" not in release.title:
release.title += " (Fails validation)"
release.task_id = self.request.id
release.approved = False
db.session.commit()
raise TaskError(error_message)
2020-08-18 17:12:27 +02:00
2024-03-03 02:13:05 +01:00
def update_translations(package: Package, tree: PackageTreeNode):
allowed_languages = set([x[0] for x in db.session.query(Language.id).all()])
allowed_languages.discard("en")
conn = db.session.connection()
for language in tree.get_supported_languages():
if language not in allowed_languages:
continue
values = {
"package_id": package.id,
"language_id": language,
}
stmt = insert(PackageTranslation).values(**values)
stmt = stmt.on_conflict_do_nothing(
index_elements=[PackageTranslation.package_id, PackageTranslation.language_id],
)
conn.execute(stmt)
raw_translations = tree.get_translations(tree.get("textdomain", tree.name))
for raw_translation in raw_translations:
if raw_translation.language not in allowed_languages:
continue
to_update = {
"title": raw_translation.entries.get(tree.get("title", package.title)),
"short_desc": raw_translation.entries.get(tree.get("description", package.short_desc)),
}
PackageTranslation.query \
.filter_by(package_id=package.id, language_id=raw_translation.language) \
.update(to_update)
2024-07-08 22:24:02 +02:00
def _check_zip_file(temp_dir: str, zf: ZipFile) -> bool:
# No more than 300MB
total_size = sum(e.file_size for e in zf.infolist())
if total_size > 300 * 1024 * 1024:
print(f"zip file is too large: {total_size} bytes", file=sys.stderr)
return False
# Check paths
for member in zf.infolist():
# We've had cases of newlines in filenames
if "\n" in member.filename:
print("zip file member contains invalid characters", file=sys.stderr)
return False
2024-07-08 22:24:02 +02:00
file_path = os.path.realpath(os.path.join(temp_dir, member.filename))
if not file_path.startswith(os.path.realpath(temp_dir)):
print(f"zip file contains path out-of-bounds: {file_path}", file=sys.stderr)
return False
return True
2024-07-08 22:24:02 +02:00
def _safe_extract_zip(temp_dir: str, archive_path: str) -> bool:
2024-08-05 13:11:44 +02:00
try:
with ZipFile(archive_path, 'r') as zf:
if not _check_zip_file(temp_dir, zf):
return False
# Extract all
for member in zf.infolist():
zf.extract(member, temp_dir)
except BadZipFile as e:
raise TaskError(str(e))
return True
2020-08-18 17:12:27 +02:00
@celery.task(bind=True)
2023-06-19 22:27:49 +02:00
def check_zip_release(self, id, path):
2020-08-18 17:12:27 +02:00
release = PackageRelease.query.get(id)
if release is None:
raise TaskError("No such release!")
elif release.package is None:
raise TaskError("No package attached to release")
with get_temp_dir() as temp:
2024-07-08 22:24:02 +02:00
if not _safe_extract_zip(temp, path):
release.approved = False
db.session.commit()
raise Exception(f"Unsafe zip file at {path}")
2020-08-18 17:12:27 +02:00
2023-06-19 22:27:49 +02:00
post_release_check_update(self, release, temp)
2020-01-19 02:37:15 +01:00
release.task_id = None
release.approve(release.package.author)
db.session.commit()
2024-07-08 22:24:02 +02:00
@celery.task()
def check_all_zip_files():
result = []
2024-07-09 01:13:41 +02:00
releases = PackageRelease.query.all()
for release in releases:
with ZipFile(release.file_path, 'r') as zf:
if not _check_zip_file("/tmp/example", zf):
print(f"Unsafe zip file for {release.package.get_id()} at {release.file_path}", file=sys.stderr)
result.append({
"package": release.package.get_id(),
"file": release.file_path,
})
2024-07-08 22:24:02 +02:00
return json.dumps(result)
2024-03-03 02:13:05 +01:00
@celery.task(bind=True)
def import_languages(self, id, path):
release = PackageRelease.query.get(id)
if release is None:
raise TaskError("No such release!")
elif release.package is None:
raise TaskError("No package attached to release")
with get_temp_dir() as temp:
2024-07-08 22:24:02 +02:00
if not _safe_extract_zip(temp, path):
raise Exception(f"Unsafe zip file at {path}")
2024-03-03 02:13:05 +01:00
try:
tree: PackageTreeNode = build_tree(temp,
expected_type=ContentType[release.package.type.name],
author=release.package.author.username,
name=release.package.name,
strict=False)
update_translations(release.package, tree)
db.session.commit()
except (MinetestCheckError, TaskError, LogicError) as err:
db.session.rollback()
task_url = url_for('tasks.check', id=self.request.id)
msg = f"{err}\n\n[View Release]({release.get_edit_url()}) | [View Task]({task_url})"
post_bot_message(release.package, f"Release {release.title} validation failed", msg)
db.session.commit()
raise TaskError(str(err))
2020-08-18 17:12:27 +02:00
@celery.task(bind=True)
2023-06-19 22:27:49 +02:00
def make_vcs_release(self, id, branch):
release = PackageRelease.query.get(id)
if release is None:
raise TaskError("No such release!")
elif release.package is None:
raise TaskError("No package attached to release")
with clone_repo(release.package.repo, ref=branch, recursive=True) as repo:
release.commit_hash = repo.head.object.hexsha
2023-06-19 22:27:49 +02:00
post_release_check_update(self, release, repo.working_tree_dir)
2020-01-19 02:22:33 +01:00
2023-06-19 22:27:49 +02:00
filename = random_string(10) + ".zip"
dest_path = os.path.join(app.config["UPLOAD_DIR"], filename)
assert not os.path.isfile(dest_path)
archiver = GitArchiver(prefix=release.package.name, force_sub=True, main_repo_abspath=repo.working_tree_dir)
archiver.create(dest_path)
assert os.path.isfile(dest_path)
file_stats = os.stat(dest_path)
if file_stats.st_size / (1024 * 1024) > 100:
os.remove(dest_path)
raise TaskError("The .zip file created from Git is too large - needs to be less than 100MB")
release.url = "/uploads/" + filename
release.task_id = None
release.approve(release.package.author)
db.session.commit()
return release.url
2018-05-15 20:35:59 +02:00
@celery.task()
2023-06-19 22:27:49 +02:00
def import_repo_screenshot(id):
2018-05-15 20:35:59 +02:00
package = Package.query.get(id)
if package is None or package.state == PackageState.DELETED:
2018-05-15 20:35:59 +02:00
raise Exception("Unexpected none package")
try:
with clone_repo(package.repo) as repo:
for ext in ["png", "jpg", "jpeg"]:
sourcePath = repo.working_tree_dir + "/screenshot." + ext
if os.path.isfile(sourcePath):
2023-06-19 22:27:49 +02:00
filename = random_string(10) + "." + ext
destPath = os.path.join(app.config["UPLOAD_DIR"], filename)
shutil.copyfile(sourcePath, destPath)
ss = PackageScreenshot()
ss.approved = True
ss.package = package
ss.title = "screenshot.png"
ss.url = "/uploads/" + filename
2022-01-26 03:51:40 +01:00
ss.width, ss.height = get_image_size(destPath)
if ss.is_too_small():
return None
db.session.add(ss)
db.session.commit()
return "/uploads/" + filename
except TaskError as e:
# ignore download errors
print(e)
pass
2018-06-05 23:34:57 +02:00
print("screenshot.png does not exist")
2018-05-15 20:35:59 +02:00
return None
def check_update_config_impl(package):
config = package.update_config
if config.trigger == PackageUpdateTrigger.COMMIT:
tag = None
2021-02-01 23:42:58 +01:00
commit = get_latest_commit(package.repo, package.update_config.ref)
release_notes = None
elif config.trigger == PackageUpdateTrigger.TAG:
tag, commit, release_notes = get_latest_tag(package.repo)
else:
raise TaskError("Unknown update trigger")
if commit is None:
return
if config.last_commit == commit:
if tag and config.last_tag != tag:
config.last_tag = tag
db.session.commit()
return
if not config.last_commit:
config.last_commit = commit
config.last_tag = tag
db.session.commit()
return
if package.releases.filter_by(commit_hash=commit).count() > 0:
return
if config.make_release:
rel = PackageRelease()
rel.package = package
rel.name = tag if tag else datetime.datetime.utcnow().strftime("%Y-%m-%d")
rel.title = rel.name
rel.release_notes = release_notes
rel.url = ""
rel.task_id = uuid()
db.session.add(rel)
2021-01-30 18:10:38 +01:00
2021-01-30 19:25:00 +01:00
msg = "Created release {} (Git Update Detection)".format(rel.title)
2023-06-19 22:27:49 +02:00
add_system_audit_log(AuditSeverity.NORMAL, msg, package.get_url("packages.view"), package)
2021-01-30 18:10:38 +01:00
db.session.commit()
2023-06-19 22:27:49 +02:00
make_vcs_release.apply_async((rel.id, commit), task_id=rel.task_id)
elif config.outdated_at is None:
config.set_outdated()
if config.trigger == PackageUpdateTrigger.COMMIT:
msg_last = ""
if config.last_commit:
msg_last = " The last commit was {}".format(config.last_commit[0:5])
msg = "New commit {} found on the Git repo, is the package outdated?{}" \
.format(commit[0:5], msg_last)
else:
msg_last = ""
if config.last_tag:
msg_last = " The last tag was {}".format(config.last_tag)
msg = "New tag {} found on the Git repo.{}" \
.format(tag, msg_last)
for user in package.maintainers:
2023-06-19 22:27:49 +02:00
add_system_notification(user, NotificationType.BOT,
msg, url_for("todo.view_user", username=user.username, _external=False), package)
config.last_commit = commit
config.last_tag = tag
db.session.commit()
2020-12-15 23:20:40 +01:00
@celery.task(bind=True)
def check_update_config(self, package_id):
package: Package = Package.query.get(package_id)
if package is None:
raise TaskError("No such package!")
elif package.update_config is None:
raise TaskError("No update config attached to package")
2020-12-15 23:20:40 +01:00
err = None
try:
check_update_config_impl(package)
2020-12-15 23:20:40 +01:00
except GitCommandError as e:
# This is needed to stop the backtrace being weird
err = e.stderr
except gitdb.exc.BadName as e:
err = "Unable to find the reference " + (package.update_config.ref or "?") + "\n" + e.stderr
except TaskError as e:
err = e.value
2020-12-15 23:20:40 +01:00
if err:
err = err.replace("stderr: ", "") \
.replace("Cloning into '/tmp/", "Cloning into '") \
.strip()
2022-06-25 04:30:12 +02:00
msg = "Error: {}.\n\n[Change update configuration]({}) | [View task]({})" \
.format(err, package.get_url("packages.update_config"), url_for("tasks.check", id=self.request.id))
2020-12-15 23:51:57 +01:00
post_bot_message(package, "Failed to check git repository", msg)
2020-12-15 23:20:40 +01:00
db.session.commit()
return
@celery.task
def check_for_updates():
for update_config in PackageUpdateConfig.query.all():
update_config: PackageUpdateConfig
if not update_config.package.approved:
continue
if update_config.package.repo is None:
db.session.delete(update_config)
continue
check_update_config.delay(update_config.package_id)
db.session.commit()