From a78fe8ceb9305253856783945edb055dc60ecf74 Mon Sep 17 00:00:00 2001 From: rubenwardy Date: Mon, 1 Feb 2021 22:42:58 +0000 Subject: [PATCH] Split importtasks.py --- app/blueprints/admin/admin.py | 17 +---- app/models/packages.py | 25 ------- app/tasks/importtasks.py | 133 ++-------------------------------- app/templates/admin/list.html | 1 - app/utils/git.py | 116 +++++++++++++++++++++++++++++ 5 files changed, 122 insertions(+), 170 deletions(-) create mode 100644 app/utils/git.py diff --git a/app/blueprints/admin/admin.py b/app/blueprints/admin/admin.py index 47881db9..313e34fe 100644 --- a/app/blueprints/admin/admin.py +++ b/app/blueprints/admin/admin.py @@ -26,7 +26,7 @@ from wtforms.validators import InputRequired, Length from app.models import * from app.tasks.forumtasks import importTopicList, checkAllForumAccounts -from app.tasks.importtasks import importRepoScreenshot, checkZipRelease, importForeignDownloads, check_for_updates +from app.tasks.importtasks import importRepoScreenshot, checkZipRelease, check_for_updates from app.utils import rank_required, addAuditLog, addNotification from . import bp @@ -74,21 +74,6 @@ def admin_page(): return redirect(url_for("todo.view_editor")) - elif action == "importforeign": - releases = PackageRelease.query.filter(PackageRelease.url.like("http%")).all() - - tasks = [] - for release in releases: - tasks.append(importForeignDownloads.s(release.id)) - - result = group(tasks).apply_async() - - while not result.ready(): - import time - time.sleep(0.1) - - return redirect(url_for("todo.view_editor")) - elif action == "importmodlist": task = importTopicList.delay() return redirect(url_for("tasks.check", id=task.id, r=url_for("todo.topics"))) diff --git a/app/models/packages.py b/app/models/packages.py index ccdd6cf6..d75e4a73 100644 --- a/app/models/packages.py +++ b/app/models/packages.py @@ -357,31 +357,6 @@ class Package(db.Model): def getIsFOSS(self): return self.license.is_foss and self.media_license.is_foss - def getIsOnGitHub(self): - if self.repo is None: - return False - - url = urlparse(self.repo) - return url.netloc == "github.com" - - def getGitHubFullName(self): - if self.repo is None: - return None - - url = urlparse(self.repo) - if url.netloc != "github.com": - return None - - import re - m = re.search(r"^\/([^\/]+)\/([^\/]+)\/?$", url.path) - if m is None: - return - - user = m.group(1) - repo = m.group(2).replace(".git", "") - - return user, repo - def getSortedDependencies(self, is_hard=None): query = self.dependencies if is_hard is not None: diff --git a/app/tasks/importtasks.py b/app/tasks/importtasks.py index 960830b5..aeee25e2 100644 --- a/app/tasks/importtasks.py +++ b/app/tasks/importtasks.py @@ -15,116 +15,19 @@ # along with this program. If not, see . -import os, git, tempfile, shutil, gitdb, contextlib, datetime +import os, shutil, gitdb +from zipfile import ZipFile from git import GitCommandError from git_archive_all import GitArchiver -from urllib.error import HTTPError -import urllib.request -from urllib.parse import urlsplit -from zipfile import ZipFile - from kombu import uuid from app.models import * from app.tasks import celery, TaskError -from app.utils import randomString, getExtension, post_bot_message, addSystemNotification, addSystemAuditLog +from app.utils import randomString, post_bot_message, addSystemNotification, addSystemAuditLog +from app.utils.git import clone_repo, get_latest_tag, get_latest_commit, get_temp_dir from .minetestcheck import build_tree, MinetestCheckError, ContentType -def generateGitURL(urlstr): - scheme, netloc, path, query, frag = urlsplit(urlstr) - - return "http://:@" + netloc + path + query - - -@contextlib.contextmanager -def get_temp_dir(): - temp = os.path.join(tempfile.gettempdir(), randomString(10)) - yield temp - shutil.rmtree(temp) - - -# Clones a repo from an unvalidated URL. -# Returns a tuple of path and repo on sucess. -# Throws `TaskError` on failure. -# Caller is responsible for deleting returned directory. -@contextlib.contextmanager -def clone_repo(urlstr, ref=None, recursive=False): - gitDir = os.path.join(tempfile.gettempdir(), randomString(10)) - - err = None - try: - gitUrl = generateGitURL(urlstr) - print("Cloning from " + gitUrl) - - if ref is None: - repo = git.Repo.clone_from(gitUrl, gitDir, - progress=None, env=None, depth=1, recursive=recursive, kill_after_timeout=15) - else: - assert ref != "" - - repo = git.Repo.init(gitDir) - origin = repo.create_remote("origin", url=gitUrl) - assert origin.exists() - origin.fetch() - repo.git.checkout(ref) - - for submodule in repo.submodules: - submodule.update(init=True) - - yield repo - shutil.rmtree(gitDir) - return - - except GitCommandError as e: - # This is needed to stop the backtrace being weird - err = e.stderr - - except gitdb.exc.BadName as e: - err = "Unable to find the reference " + (ref or "?") + "\n" + e.stderr - - raise TaskError(err.replace("stderr: ", "") \ - .replace("Cloning into '" + gitDir + "'...", "") \ - .strip()) - - -def get_commit_hash(git_url, ref_name=None): - git_url = generateGitURL(git_url) - - if ref_name: - ref_name = "refs/heads/" + ref_name - else: - ref_name = "HEAD" - - g = git.cmd.Git() - - remote_refs = {} - for ref in g.ls_remote(git_url).split('\n'): - hash_ref_list = ref.split('\t') - remote_refs[hash_ref_list[1]] = hash_ref_list[0] - - return remote_refs.get(ref_name) - - -def get_latest_tag(git_url): - with get_temp_dir() as git_dir: - repo = git.Repo.init(git_dir) - origin = repo.create_remote("origin", url=git_url) - origin.fetch() - - refs = repo.git.ls_remote(tags=True, sort="creatordate").split('\n') - refs = [ref for ref in refs if ref.strip() != ""] - if len(refs) == 0: - return None, None - - last_ref = refs[-1] - hash_ref_list = last_ref.split('\t') - - tag = hash_ref_list[1].replace("refs/tags/", "") - commit_hash = repo.git.rev_parse(tag + "^{}") - return tag, commit_hash - - @celery.task() def getMeta(urlstr, author): with clone_repo(urlstr, recursive=True) as repo: @@ -290,38 +193,12 @@ def importRepoScreenshot(id): return None -@celery.task(bind=True) -def importForeignDownloads(self, id): - release = PackageRelease.query.get(id) - if release is None: - raise TaskError("No such release!") - elif release.package is None: - raise TaskError("No package attached to release") - elif not release.url.startswith("http"): - return - - try: - ext = getExtension(release.url) - filename = randomString(10) + "." + ext - filepath = os.path.join(app.config["UPLOAD_DIR"], filename) - urllib.request.urlretrieve(release.url, filepath) - - release.url = "/uploads/" + filename - db.session.commit() - - except urllib.error.URLError: - db.session.rollback() - release.task_id = self.request.id - release.approved = False - db.session.commit() - - def check_update_config_impl(package): config = package.update_config if config.trigger == PackageUpdateTrigger.COMMIT: tag = None - commit = get_commit_hash(package.repo, package.update_config.ref) + commit = get_latest_commit(package.repo, package.update_config.ref) elif config.trigger == PackageUpdateTrigger.TAG: tag, commit = get_latest_tag(package.repo) else: diff --git a/app/templates/admin/list.html b/app/templates/admin/list.html index 8c75e1d2..ddf8fc48 100644 --- a/app/templates/admin/list.html +++ b/app/templates/admin/list.html @@ -35,7 +35,6 @@ - diff --git a/app/utils/git.py b/app/utils/git.py new file mode 100644 index 00000000..7d800f95 --- /dev/null +++ b/app/utils/git.py @@ -0,0 +1,116 @@ +# ContentDB +# Copyright (C) 2018-21 rubenwardy +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + + +import contextlib, git, gitdb, os, shutil, tempfile +from urllib.parse import urlsplit +from git import GitCommandError + +from app.tasks import TaskError +from app.utils import randomString + +def generateGitURL(urlstr): + scheme, netloc, path, query, frag = urlsplit(urlstr) + + return "http://:@" + netloc + path + query + + +@contextlib.contextmanager +def get_temp_dir(): + temp = os.path.join(tempfile.gettempdir(), randomString(10)) + yield temp + shutil.rmtree(temp) + + +# Clones a repo from an unvalidated URL. +# Returns a tuple of path and repo on sucess. +# Throws `TaskError` on failure. +# Caller is responsible for deleting returned directory. +@contextlib.contextmanager +def clone_repo(urlstr, ref=None, recursive=False): + gitDir = os.path.join(tempfile.gettempdir(), randomString(10)) + + err = None + try: + gitUrl = generateGitURL(urlstr) + print("Cloning from " + gitUrl) + + if ref is None: + repo = git.Repo.clone_from(gitUrl, gitDir, + progress=None, env=None, depth=1, recursive=recursive, kill_after_timeout=15) + else: + assert ref != "" + + repo = git.Repo.init(gitDir) + origin = repo.create_remote("origin", url=gitUrl) + assert origin.exists() + origin.fetch() + repo.git.checkout(ref) + + for submodule in repo.submodules: + submodule.update(init=True) + + yield repo + shutil.rmtree(gitDir) + return + + except GitCommandError as e: + # This is needed to stop the backtrace being weird + err = e.stderr + + except gitdb.exc.BadName as e: + err = "Unable to find the reference " + (ref or "?") + "\n" + e.stderr + + raise TaskError(err.replace("stderr: ", "") \ + .replace("Cloning into '" + gitDir + "'...", "") \ + .strip()) + + +def get_latest_commit(git_url, ref_name=None): + git_url = generateGitURL(git_url) + + if ref_name: + ref_name = "refs/heads/" + ref_name + else: + ref_name = "HEAD" + + g = git.cmd.Git() + + remote_refs = {} + for ref in g.ls_remote(git_url).split('\n'): + hash_ref_list = ref.split('\t') + remote_refs[hash_ref_list[1]] = hash_ref_list[0] + + return remote_refs.get(ref_name) + + +def get_latest_tag(git_url): + with get_temp_dir() as git_dir: + repo = git.Repo.init(git_dir) + origin = repo.create_remote("origin", url=git_url) + origin.fetch() + + refs = repo.git.ls_remote(tags=True, sort="creatordate").split('\n') + refs = [ref for ref in refs if ref.strip() != ""] + if len(refs) == 0: + return None, None + + last_ref = refs[-1] + hash_ref_list = last_ref.split('\t') + + tag = hash_ref_list[1].replace("refs/tags/", "") + commit_hash = repo.git.rev_parse(tag + "^{}") + return tag, commit_hash