contentdb/app/tasks/importtasks.py

384 lines
10 KiB
Python
Raw Normal View History

2020-07-12 17:34:25 +02:00
# ContentDB
2018-05-17 16:18:20 +02:00
# Copyright (C) 2018 rubenwardy
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
import flask, json, os, git, tempfile, shutil, gitdb
from git import GitCommandError
from git_archive_all import GitArchiver
2018-12-23 00:03:38 +01:00
from flask_sqlalchemy import SQLAlchemy
2018-05-15 20:35:59 +02:00
from urllib.error import HTTPError
import urllib.request
from urllib.parse import urlparse, quote_plus, urlsplit
2020-01-19 02:37:15 +01:00
from zipfile import ZipFile
from app import app
from app.models import *
from app.tasks import celery, TaskError
from app.utils import randomString, getExtension
2020-01-19 02:22:33 +01:00
from .minetestcheck import build_tree, MinetestCheckError, ContentType
from .minetestcheck.config import parse_conf
2018-06-05 20:59:07 +02:00
2020-08-18 17:12:27 +02:00
krock_list_cache = None
krock_list_cache_by_name = None
def getKrockList():
global krock_list_cache
global krock_list_cache_by_name
if krock_list_cache is None:
2018-12-21 15:02:57 +01:00
contents = urllib.request.urlopen("https://krock-works.uk.to/minetest/modList.php").read().decode("utf-8")
list = json.loads(contents)
def h(x):
if not ("title" in x and "author" in x and \
"topicId" in x and "link" in x and x["link"] != ""):
return False
import re
m = re.search("\[([A-Za-z0-9_]+)\]", x["title"])
if m is None:
return False
x["name"] = m.group(1)
return True
def g(x):
return {
"title": x["title"],
"author": x["author"],
2018-06-05 01:10:47 +02:00
"name": x["name"],
"topicId": x["topicId"],
2018-06-05 01:10:47 +02:00
"link": x["link"],
}
krock_list_cache = [g(x) for x in list if h(x)]
krock_list_cache_by_name = {}
for x in krock_list_cache:
if not x["name"] in krock_list_cache_by_name:
krock_list_cache_by_name[x["name"]] = []
krock_list_cache_by_name[x["name"]].append(x)
return krock_list_cache, krock_list_cache_by_name
2020-08-18 17:12:27 +02:00
def findModInfo(author, name, link):
2018-05-14 14:58:31 +02:00
list, lookup = getKrockList()
2018-05-14 14:58:31 +02:00
if name is not None and name in lookup:
if len(lookup[name]) == 1:
return lookup[name][0]
for x in lookup[name]:
if x["author"] == author:
return x
2018-05-14 14:58:31 +02:00
if link is not None and len(link) > 15:
for x in list:
if link in x["link"]:
return x
return None
2020-08-18 17:12:27 +02:00
def generateGitURL(urlstr):
scheme, netloc, path, query, frag = urlsplit(urlstr)
return "http://:@" + netloc + path + query
2018-06-05 01:10:47 +02:00
2020-01-19 02:37:15 +01:00
def getTempDir():
return os.path.join(tempfile.gettempdir(), randomString(10))
# Clones a repo from an unvalidated URL.
# Returns a tuple of path and repo on sucess.
# Throws `TaskError` on failure.
# Caller is responsible for deleting returned directory.
def cloneRepo(urlstr, ref=None, recursive=False):
2020-01-19 02:37:15 +01:00
gitDir = getTempDir()
err = None
try:
gitUrl = generateGitURL(urlstr)
print("Cloning from " + gitUrl)
if ref is None:
repo = git.Repo.clone_from(gitUrl, gitDir, \
progress=None, env=None, depth=1, recursive=recursive, kill_after_timeout=15)
else:
repo = git.Repo.init(gitDir)
origin = repo.create_remote("origin", url=gitUrl)
assert origin.exists()
origin.fetch()
2020-01-25 03:24:26 +01:00
origin.pull(ref)
for submodule in repo.submodules:
submodule.update(init=True)
return gitDir, repo
except GitCommandError as e:
2018-06-05 23:34:57 +02:00
# This is needed to stop the backtrace being weird
err = e.stderr
except gitdb.exc.BadName as e:
err = "Unable to find the reference " + (ref or "?") + "\n" + e.stderr
raise TaskError(err.replace("stderr: ", "") \
.replace("Cloning into '" + gitDir + "'...", "") \
.strip())
2018-06-05 01:10:47 +02:00
2018-06-05 23:34:57 +02:00
@celery.task()
def getMeta(urlstr, author):
gitDir, _ = cloneRepo(urlstr, recursive=True)
2020-01-19 02:22:33 +01:00
try:
tree = build_tree(gitDir, author=author, repo=urlstr)
except MinetestCheckError as err:
raise TaskError(str(err))
shutil.rmtree(gitDir)
2018-06-05 01:10:47 +02:00
result = {}
result["name"] = tree.name
result["provides"] = tree.getModNames()
2018-06-05 01:10:47 +02:00
result["type"] = tree.type.name
2018-05-27 23:03:54 +02:00
2018-06-05 01:10:47 +02:00
for key in ["depends", "optional_depends"]:
result[key] = tree.fold("meta", key)
2018-05-27 23:03:54 +02:00
2018-06-05 01:10:47 +02:00
for key in ["title", "repo", "issueTracker", "forumId", "description", "short_description"]:
result[key] = tree.get(key)
2018-05-12 18:28:04 +02:00
2018-06-05 01:10:47 +02:00
for mod in result["provides"]:
result["depends"].discard(mod)
result["optional_depends"].discard(mod)
2018-05-27 23:03:54 +02:00
2018-06-05 01:10:47 +02:00
for key, value in result.items():
if isinstance(value, set):
result[key] = list(value)
return result
2018-05-11 16:04:17 +02:00
2018-05-15 20:35:59 +02:00
2020-08-18 17:12:27 +02:00
def postReleaseCheckUpdate(self, release, path):
try:
tree = build_tree(path, expected_type=ContentType[release.package.type.name], \
author=release.package.author.username, name=release.package.name)
cache = {}
def getMetaPackages(names):
return [ MetaPackage.GetOrCreate(x, cache) for x in names ]
provides = tree.getModNames()
package = release.package
package.provides.clear()
package.provides.extend(getMetaPackages(tree.getModNames()))
# Delete all meta package dependencies
package.dependencies.filter(Dependency.meta_package != None).delete()
# Get raw dependencies
depends = tree.fold("meta", "depends")
optional_depends = tree.fold("meta", "optional_depends")
# Filter out provides
for mod in provides:
depends.discard(mod)
optional_depends.discard(mod)
2020-08-18 18:08:17 +02:00
# Raise error on unresolved game dependencies
if package.type == PackageType.GAME and len(depends) > 0:
deps = ", ".join(depends)
2020-08-18 18:39:20 +02:00
raise MinetestCheckError("Game has unresolved hard dependencies: " + deps)
2020-08-18 17:12:27 +02:00
2020-08-18 18:08:17 +02:00
# Add dependencies
2020-08-18 17:12:27 +02:00
for meta in getMetaPackages(depends):
db.session.add(Dependency(package, meta=meta, optional=False))
for meta in getMetaPackages(optional_depends):
db.session.add(Dependency(package, meta=meta, optional=True))
# Update min/max
if tree.meta.get("min_minetest_version"):
release.min_rel = MinetestRelease.get(tree.meta["min_minetest_version"], None)
if tree.meta.get("max_minetest_version"):
release.max_rel = MinetestRelease.get(tree.meta["max_minetest_version"], None)
return tree
except MinetestCheckError as err:
db.session.rollback()
2020-08-18 17:12:27 +02:00
if "Fails validation" not in release.title:
release.title += " (Fails validation)"
release.task_id = self.request.id
release.approved = False
db.session.commit()
raise TaskError(str(err))
2020-01-19 02:59:00 +01:00
@celery.task(bind=True)
2020-08-18 17:12:27 +02:00
def updateMetaFromRelease(self, id, path):
2020-01-19 02:37:15 +01:00
release = PackageRelease.query.get(id)
if release is None:
raise TaskError("No such release!")
elif release.package is None:
raise TaskError("No package attached to release")
2020-08-18 17:12:27 +02:00
print("updateMetaFromRelease: {} for {}/{}" \
.format(id, release.package.author.display_name, release.package.name))
2020-01-19 02:37:15 +01:00
temp = getTempDir()
try:
with ZipFile(path, 'r') as zip_ref:
zip_ref.extractall(temp)
2020-08-18 17:12:27 +02:00
postReleaseCheckUpdate(self, release, temp)
db.session.commit()
2020-01-19 02:59:00 +01:00
2020-08-18 17:12:27 +02:00
finally:
shutil.rmtree(temp)
2020-01-19 02:59:00 +01:00
2020-08-18 17:12:27 +02:00
@celery.task(bind=True)
def checkZipRelease(self, id, path):
release = PackageRelease.query.get(id)
if release is None:
raise TaskError("No such release!")
elif release.package is None:
raise TaskError("No package attached to release")
temp = getTempDir()
try:
with ZipFile(path, 'r') as zip_ref:
zip_ref.extractall(temp)
postReleaseCheckUpdate(self, release, temp)
2020-01-19 02:37:15 +01:00
release.task_id = None
release.approve(release.package.author)
db.session.commit()
finally:
shutil.rmtree(temp)
2020-08-18 17:12:27 +02:00
@celery.task(bind=True)
def makeVCSRelease(self, id, branch):
release = PackageRelease.query.get(id)
if release is None:
raise TaskError("No such release!")
elif release.package is None:
raise TaskError("No package attached to release")
gitDir, repo = cloneRepo(release.package.repo, ref=branch, recursive=True)
2020-08-18 17:12:27 +02:00
postReleaseCheckUpdate(self, release, gitDir)
2020-01-19 02:22:33 +01:00
try:
filename = randomString(10) + ".zip"
destPath = os.path.join(app.config["UPLOAD_DIR"], filename)
assert(not os.path.isfile(destPath))
archiver = GitArchiver(force_sub=True, main_repo_abspath=gitDir)
archiver.create(destPath)
assert(os.path.isfile(destPath))
release.url = "/uploads/" + filename
release.task_id = None
release.commit_hash = repo.head.object.hexsha
release.approve(release.package.author)
db.session.commit()
2020-07-14 04:49:30 +02:00
updateMetaFromRelease.delay(release.id, destPath)
return release.url
finally:
shutil.rmtree(gitDir)
2018-05-15 20:35:59 +02:00
@celery.task()
def importRepoScreenshot(id):
package = Package.query.get(id)
if package is None or package.state == PackageState.DELETED:
2018-05-15 20:35:59 +02:00
raise Exception("Unexpected none package")
# Get URL Maker
try:
gitDir, _ = cloneRepo(package.repo)
except TaskError as e:
# ignore download errors
print(e)
return None
2018-05-15 20:35:59 +02:00
2018-06-05 23:34:57 +02:00
# Find and import screenshot
2018-05-15 20:35:59 +02:00
try:
2018-06-05 23:34:57 +02:00
for ext in ["png", "jpg", "jpeg"]:
sourcePath = gitDir + "/screenshot." + ext
if os.path.isfile(sourcePath):
filename = randomString(10) + "." + ext
2020-01-18 02:20:32 +01:00
destPath = os.path.join(app.config["UPLOAD_DIR"], filename)
2018-06-05 23:34:57 +02:00
shutil.copyfile(sourcePath, destPath)
ss = PackageScreenshot()
ss.approved = True
ss.package = package
ss.title = "screenshot.png"
ss.url = "/uploads/" + filename
db.session.add(ss)
db.session.commit()
return "/uploads/" + filename
finally:
shutil.rmtree(gitDir)
print("screenshot.png does not exist")
2018-05-15 20:35:59 +02:00
return None
@celery.task(bind=True)
def importForeignDownloads(self, id):
release = PackageRelease.query.get(id)
if release is None:
raise TaskError("No such release!")
elif release.package is None:
raise TaskError("No package attached to release")
elif not release.url.startswith("http"):
return
try:
ext = getExtension(release.url)
filename = randomString(10) + "." + ext
filepath = os.path.join(app.config["UPLOAD_DIR"], filename)
urllib.request.urlretrieve(release.url, filepath)
release.url = "/uploads/" + filename
db.session.commit()
2020-08-18 17:12:27 +02:00
except urllib.error.URLError:
2020-08-18 17:12:27 +02:00
db.session.rollback()
release.task_id = self.request.id
release.approved = False
db.session.commit()