diff --git a/app/blueprints/admin/actions.py b/app/blueprints/admin/actions.py index 56263778..18fa6280 100644 --- a/app/blueprints/admin/actions.py +++ b/app/blueprints/admin/actions.py @@ -111,9 +111,9 @@ def recalc_scores(): @action("Import forum topic list") -def import_topic_list(): +def do_import_topic_list(): task = import_topic_list.delay() - return redirect(url_for("tasks.check", id=task.id, r=url_for("todo.topics"))) + return redirect(url_for("tasks.check", id=task.id, r=url_for("admin.admin_page"))) @action("Check all forum accounts") diff --git a/app/tasks/forumtasks.py b/app/tasks/forumtasks.py index 82ea6bd5..6b2f49b9 100644 --- a/app/tasks/forumtasks.py +++ b/app/tasks/forumtasks.py @@ -18,15 +18,41 @@ import json import re import sys import urllib.request +from typing import Optional from urllib.parse import urljoin +from sqlalchemy import or_ + from app.models import User, db, PackageType, ForumTopic from app.tasks import celery -from app.utils import is_username_valid +from app.utils import make_valid_username from app.utils.phpbbparser import get_profile, get_topics_from_forum from .usertasks import set_profile_picture_from_url, update_github_user_id_raw +def _get_or_create_user(forums_username: str, cache: Optional[dict] = None) -> Optional[User]: + if cache: + user = cache.get(forums_username) + if user: + return user + + user = User.query.filter_by(forums_username=forums_username).first() + if user is None: + cdb_username = make_valid_username(forums_username) + user = User.query.filter(or_(User.username == cdb_username, User.forums_username == cdb_username)).first() + if user: + return None + + user = User(cdb_username) + user.forums_username = forums_username + user.display_name = forums_username + db.session.add(user) + + if cache: + cache[forums_username] = user + return user + + @celery.task() def check_forum_account(forums_username, force_replace_pic=False): print("### Checking " + forums_username, file=sys.stderr) @@ -39,19 +65,16 @@ def check_forum_account(forums_username, force_replace_pic=False): if profile is None: return - user = User.query.filter_by(forums_username=forums_username).first() - - # Create user - needs_saving = False + user = _get_or_create_user(forums_username) if user is None: - user = User(forums_username) - user.forums_username = forums_username - db.session.add(user) + return + + needs_saving = False # Get GitHub username github_username = profile.get("github") if github_username is not None and github_username.strip() != "": - print("Updated GitHub username for " + user.display_name + " to " + github_username) + print("Updated GitHub username for " + user.display_name + " to " + github_username, file=sys.stderr) user.github_username = github_username update_github_user_id_raw(user) needs_saving = True @@ -104,7 +127,7 @@ regex_title = re.compile(r"^((?:\[[^\]]+\] *)*)([^\[]+) *((?:\[[^\]]+\] *)*)[^\[ def parse_title(title): m = regex_title.match(title) if m is None: - print("Invalid title format: " + title) + print("Invalid title format: " + title, file=sys.stderr) return title, get_name_from_taglist(title) else: return m.group(2).strip(), get_name_from_taglist(m.group(3)) @@ -124,7 +147,7 @@ def get_links_from_mod_search(): pass except urllib.error.URLError: - print("Unable to open krocks mod search!") + print("Unable to open krocks mod search!", file=sys.stderr) return links return links @@ -135,37 +158,23 @@ def import_topic_list(): links_by_id = get_links_from_mod_search() info_by_id = {} - get_topics_from_forum(11, out=info_by_id, extra={'type': PackageType.MOD, 'wip': False}) - get_topics_from_forum(9, out=info_by_id, extra={'type': PackageType.MOD, 'wip': True}) get_topics_from_forum(15, out=info_by_id, extra={'type': PackageType.GAME, 'wip': False}) get_topics_from_forum(50, out=info_by_id, extra={'type': PackageType.GAME, 'wip': True}) + get_topics_from_forum(11, out=info_by_id, extra={'type': PackageType.MOD, 'wip': False}) + get_topics_from_forum(9, out=info_by_id, extra={'type': PackageType.MOD, 'wip': True}) + get_topics_from_forum(4, out=info_by_id, extra={'type': PackageType.TXP, 'wip': False}) # Caches username_to_user = {} topics_by_id = {} for topic in ForumTopic.query.all(): - topics_by_id[topic.topic_id] = topic + if topic.topic_id in info_by_id: + topics_by_id[topic.topic_id] = topic + else: + db.session.delete(topic) + print(f"Deleting topic {topic.topic_id} title {topic.title}", file=sys.stderr) - def get_or_create_user(username): - user = username_to_user.get(username) - if user: - return user - - if not is_username_valid(username): - return None - - user = User.query.filter_by(forums_username=username).first() - if user is None: - user = User.query.filter_by(username=username).first() - if user: - return None - - user = User(username) - user.forums_username = username - db.session.add(user) - - username_to_user[username] = user - return user + username_conflicts = set() # Create or update for info in info_by_id.values(): @@ -173,9 +182,9 @@ def import_topic_list(): # Get author username = info["author"] - user = get_or_create_user(username) + user = _get_or_create_user(username, username_to_user) if user is None: - print("Error! Unable to create user {}".format(username), file=sys.stderr) + username_conflicts.add(username) continue # Get / add row @@ -203,3 +212,6 @@ def import_topic_list(): topic.created_at = info["date"] db.session.commit() + + if len(username_conflicts) > 0: + print("The following forum usernames could not be created: " + (", ".join(username_conflicts))) diff --git a/app/tests/unit/utils/test_utils.py b/app/tests/unit/utils/test_utils.py index 98030fe9..58632fae 100644 --- a/app/tests/unit/utils/test_utils.py +++ b/app/tests/unit/utils/test_utils.py @@ -16,6 +16,15 @@ import user_agents +from app.utils import make_valid_username + + +def test_make_valid_username(): + assert make_valid_username("rubenwardy") == "rubenwardy" + assert make_valid_username("Test123._-") == "Test123._-" + assert make_valid_username("Foo Bar") == "Foo_Bar" + assert make_valid_username("François") == "Fran_ois" + def test_web_is_not_bot(): assert not user_agents.parse("Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:126.0) Gecko/20100101 Firefox/126.0").is_bot diff --git a/app/utils/__init__.py b/app/utils/__init__.py index 2d122080..69fdc575 100644 --- a/app/utils/__init__.py +++ b/app/utils/__init__.py @@ -28,11 +28,15 @@ from .user import * YESES = ["yes", "true", "1", "on"] -def is_username_valid(username): +def is_username_valid(username: str) -> bool: return username is not None and len(username) >= 2 and \ re.match(r"^[A-Za-z0-9._-]*$", username) and not re.match(r"^\.*$", username) +def make_valid_username(username: str) -> str: + return re.sub(r"[^A-Za-z0-9._-]+", "_", username) + + def is_yes(val): return val and val.lower() in YESES diff --git a/app/utils/phpbbparser.py b/app/utils/phpbbparser.py index 549a61d5..f8139b11 100644 --- a/app/utils/phpbbparser.py +++ b/app/utils/phpbbparser.py @@ -3,6 +3,7 @@ # Source: https://github.com/rubenwardy/python_phpbb_parser import re +import sys import urllib import urllib.parse as urlparse import urllib.request @@ -121,7 +122,7 @@ regex_id = re.compile(r"^.*t=([0-9]+).*$") def parse_forum_list_page(id, page, out, extra=None): num_per_page = 30 start = page*num_per_page+1 - print(" - Fetching page {} (topics {}-{})".format(page, start, start+num_per_page)) + print(" - Fetching page {} (topics {}-{})".format(page, start, start+num_per_page), file=sys.stderr) url = "https://forum.minetest.net/viewforum.php?f=" + str(id) + "&start=" + str(start) r = urllib.request.urlopen(url).read().decode("utf-8") @@ -154,7 +155,7 @@ def parse_forum_list_page(id, page, out, extra=None): views = topic.find(class_="views").find(text=True) if id in out: - print(" - got {} again, title: {}".format(id, title)) + print(" - got {} again, title: {}".format(id, title), file=sys.stderr) assert title == out[id]['title'] return False @@ -177,7 +178,7 @@ def parse_forum_list_page(id, page, out, extra=None): def get_topics_from_forum(id, out, extra=None): - print("Fetching all topics from forum {}".format(id)) + print("Fetching all topics from forum {}".format(id), file=sys.stderr) page = 0 while parse_forum_list_page(id, page, out, extra): page = page + 1