Split importtasks.py

This commit is contained in:
rubenwardy 2021-02-01 22:42:58 +00:00
parent c6a973f7e1
commit a78fe8ceb9
5 changed files with 122 additions and 170 deletions

@ -26,7 +26,7 @@ from wtforms.validators import InputRequired, Length
from app.models import *
from app.tasks.forumtasks import importTopicList, checkAllForumAccounts
from app.tasks.importtasks import importRepoScreenshot, checkZipRelease, importForeignDownloads, check_for_updates
from app.tasks.importtasks import importRepoScreenshot, checkZipRelease, check_for_updates
from app.utils import rank_required, addAuditLog, addNotification
from . import bp
@ -74,21 +74,6 @@ def admin_page():
return redirect(url_for("todo.view_editor"))
elif action == "importforeign":
releases = PackageRelease.query.filter(PackageRelease.url.like("http%")).all()
tasks = []
for release in releases:
tasks.append(importForeignDownloads.s(release.id))
result = group(tasks).apply_async()
while not result.ready():
import time
time.sleep(0.1)
return redirect(url_for("todo.view_editor"))
elif action == "importmodlist":
task = importTopicList.delay()
return redirect(url_for("tasks.check", id=task.id, r=url_for("todo.topics")))

@ -357,31 +357,6 @@ class Package(db.Model):
def getIsFOSS(self):
return self.license.is_foss and self.media_license.is_foss
def getIsOnGitHub(self):
if self.repo is None:
return False
url = urlparse(self.repo)
return url.netloc == "github.com"
def getGitHubFullName(self):
if self.repo is None:
return None
url = urlparse(self.repo)
if url.netloc != "github.com":
return None
import re
m = re.search(r"^\/([^\/]+)\/([^\/]+)\/?$", url.path)
if m is None:
return
user = m.group(1)
repo = m.group(2).replace(".git", "")
return user, repo
def getSortedDependencies(self, is_hard=None):
query = self.dependencies
if is_hard is not None:

@ -15,116 +15,19 @@
# along with this program. If not, see <https://www.gnu.org/licenses/>.
import os, git, tempfile, shutil, gitdb, contextlib, datetime
import os, shutil, gitdb
from zipfile import ZipFile
from git import GitCommandError
from git_archive_all import GitArchiver
from urllib.error import HTTPError
import urllib.request
from urllib.parse import urlsplit
from zipfile import ZipFile
from kombu import uuid
from app.models import *
from app.tasks import celery, TaskError
from app.utils import randomString, getExtension, post_bot_message, addSystemNotification, addSystemAuditLog
from app.utils import randomString, post_bot_message, addSystemNotification, addSystemAuditLog
from app.utils.git import clone_repo, get_latest_tag, get_latest_commit, get_temp_dir
from .minetestcheck import build_tree, MinetestCheckError, ContentType
def generateGitURL(urlstr):
scheme, netloc, path, query, frag = urlsplit(urlstr)
return "http://:@" + netloc + path + query
@contextlib.contextmanager
def get_temp_dir():
temp = os.path.join(tempfile.gettempdir(), randomString(10))
yield temp
shutil.rmtree(temp)
# Clones a repo from an unvalidated URL.
# Returns a tuple of path and repo on sucess.
# Throws `TaskError` on failure.
# Caller is responsible for deleting returned directory.
@contextlib.contextmanager
def clone_repo(urlstr, ref=None, recursive=False):
gitDir = os.path.join(tempfile.gettempdir(), randomString(10))
err = None
try:
gitUrl = generateGitURL(urlstr)
print("Cloning from " + gitUrl)
if ref is None:
repo = git.Repo.clone_from(gitUrl, gitDir,
progress=None, env=None, depth=1, recursive=recursive, kill_after_timeout=15)
else:
assert ref != ""
repo = git.Repo.init(gitDir)
origin = repo.create_remote("origin", url=gitUrl)
assert origin.exists()
origin.fetch()
repo.git.checkout(ref)
for submodule in repo.submodules:
submodule.update(init=True)
yield repo
shutil.rmtree(gitDir)
return
except GitCommandError as e:
# This is needed to stop the backtrace being weird
err = e.stderr
except gitdb.exc.BadName as e:
err = "Unable to find the reference " + (ref or "?") + "\n" + e.stderr
raise TaskError(err.replace("stderr: ", "") \
.replace("Cloning into '" + gitDir + "'...", "") \
.strip())
def get_commit_hash(git_url, ref_name=None):
git_url = generateGitURL(git_url)
if ref_name:
ref_name = "refs/heads/" + ref_name
else:
ref_name = "HEAD"
g = git.cmd.Git()
remote_refs = {}
for ref in g.ls_remote(git_url).split('\n'):
hash_ref_list = ref.split('\t')
remote_refs[hash_ref_list[1]] = hash_ref_list[0]
return remote_refs.get(ref_name)
def get_latest_tag(git_url):
with get_temp_dir() as git_dir:
repo = git.Repo.init(git_dir)
origin = repo.create_remote("origin", url=git_url)
origin.fetch()
refs = repo.git.ls_remote(tags=True, sort="creatordate").split('\n')
refs = [ref for ref in refs if ref.strip() != ""]
if len(refs) == 0:
return None, None
last_ref = refs[-1]
hash_ref_list = last_ref.split('\t')
tag = hash_ref_list[1].replace("refs/tags/", "")
commit_hash = repo.git.rev_parse(tag + "^{}")
return tag, commit_hash
@celery.task()
def getMeta(urlstr, author):
with clone_repo(urlstr, recursive=True) as repo:
@ -290,38 +193,12 @@ def importRepoScreenshot(id):
return None
@celery.task(bind=True)
def importForeignDownloads(self, id):
release = PackageRelease.query.get(id)
if release is None:
raise TaskError("No such release!")
elif release.package is None:
raise TaskError("No package attached to release")
elif not release.url.startswith("http"):
return
try:
ext = getExtension(release.url)
filename = randomString(10) + "." + ext
filepath = os.path.join(app.config["UPLOAD_DIR"], filename)
urllib.request.urlretrieve(release.url, filepath)
release.url = "/uploads/" + filename
db.session.commit()
except urllib.error.URLError:
db.session.rollback()
release.task_id = self.request.id
release.approved = False
db.session.commit()
def check_update_config_impl(package):
config = package.update_config
if config.trigger == PackageUpdateTrigger.COMMIT:
tag = None
commit = get_commit_hash(package.repo, package.update_config.ref)
commit = get_latest_commit(package.repo, package.update_config.ref)
elif config.trigger == PackageUpdateTrigger.TAG:
tag, commit = get_latest_tag(package.repo)
else:

@ -35,7 +35,6 @@
<option value="div">------</option>
<option value="checkreleases">Validate all Zip releases</option>
<option value="importmodlist">Import forum topics</option>
<option value="importforeign">Import foreign release downloads</option>
<option value="checkusers">Check forum users</option>
<option value="importscreenshots">Import screenshots from VCS</option>
<option value="addupdateconfig">Add update configs</option>

116
app/utils/git.py Normal file

@ -0,0 +1,116 @@
# ContentDB
# Copyright (C) 2018-21 rubenwardy
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
import contextlib, git, gitdb, os, shutil, tempfile
from urllib.parse import urlsplit
from git import GitCommandError
from app.tasks import TaskError
from app.utils import randomString
def generateGitURL(urlstr):
scheme, netloc, path, query, frag = urlsplit(urlstr)
return "http://:@" + netloc + path + query
@contextlib.contextmanager
def get_temp_dir():
temp = os.path.join(tempfile.gettempdir(), randomString(10))
yield temp
shutil.rmtree(temp)
# Clones a repo from an unvalidated URL.
# Returns a tuple of path and repo on sucess.
# Throws `TaskError` on failure.
# Caller is responsible for deleting returned directory.
@contextlib.contextmanager
def clone_repo(urlstr, ref=None, recursive=False):
gitDir = os.path.join(tempfile.gettempdir(), randomString(10))
err = None
try:
gitUrl = generateGitURL(urlstr)
print("Cloning from " + gitUrl)
if ref is None:
repo = git.Repo.clone_from(gitUrl, gitDir,
progress=None, env=None, depth=1, recursive=recursive, kill_after_timeout=15)
else:
assert ref != ""
repo = git.Repo.init(gitDir)
origin = repo.create_remote("origin", url=gitUrl)
assert origin.exists()
origin.fetch()
repo.git.checkout(ref)
for submodule in repo.submodules:
submodule.update(init=True)
yield repo
shutil.rmtree(gitDir)
return
except GitCommandError as e:
# This is needed to stop the backtrace being weird
err = e.stderr
except gitdb.exc.BadName as e:
err = "Unable to find the reference " + (ref or "?") + "\n" + e.stderr
raise TaskError(err.replace("stderr: ", "") \
.replace("Cloning into '" + gitDir + "'...", "") \
.strip())
def get_latest_commit(git_url, ref_name=None):
git_url = generateGitURL(git_url)
if ref_name:
ref_name = "refs/heads/" + ref_name
else:
ref_name = "HEAD"
g = git.cmd.Git()
remote_refs = {}
for ref in g.ls_remote(git_url).split('\n'):
hash_ref_list = ref.split('\t')
remote_refs[hash_ref_list[1]] = hash_ref_list[0]
return remote_refs.get(ref_name)
def get_latest_tag(git_url):
with get_temp_dir() as git_dir:
repo = git.Repo.init(git_dir)
origin = repo.create_remote("origin", url=git_url)
origin.fetch()
refs = repo.git.ls_remote(tags=True, sort="creatordate").split('\n')
refs = [ref for ref in refs if ref.strip() != ""]
if len(refs) == 0:
return None, None
last_ref = refs[-1]
hash_ref_list = last_ref.split('\t')
tag = hash_ref_list[1].replace("refs/tags/", "")
commit_hash = repo.git.rev_parse(tag + "^{}")
return tag, commit_hash