contentdb/app/tasks/forumtasks.py

218 lines
6.1 KiB
Python
Raw Normal View History

2020-07-12 17:34:25 +02:00
# ContentDB
2021-01-30 17:59:42 +01:00
# Copyright (C) 2018-21 rubenwardy
2018-05-17 16:18:20 +02:00
#
# This program is free software: you can redistribute it and/or modify
2021-01-30 17:59:42 +01:00
# it under the terms of the GNU Affero General Public License as published by
2018-05-17 16:18:20 +02:00
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2021-01-30 17:59:42 +01:00
# GNU Affero General Public License for more details.
2018-05-17 16:18:20 +02:00
#
2021-01-30 17:59:42 +01:00
# You should have received a copy of the GNU Affero General Public License
2018-05-17 16:18:20 +02:00
# along with this program. If not, see <https://www.gnu.org/licenses/>.
2023-06-19 20:32:36 +02:00
import json
import re
import sys
import urllib.request
from typing import Optional
2023-06-19 20:32:36 +02:00
from urllib.parse import urljoin
2018-05-17 16:18:20 +02:00
from sqlalchemy import or_
2023-06-19 20:32:36 +02:00
from app.models import User, db, PackageType, ForumTopic
2018-05-14 00:31:42 +02:00
from app.tasks import celery
from app.utils import make_valid_username
2023-06-19 22:27:49 +02:00
from app.utils.phpbbparser import get_profile, get_topics_from_forum
from .usertasks import set_profile_picture_from_url, update_github_user_id_raw
2018-05-14 00:31:42 +02:00
def _get_or_create_user(forums_username: str, cache: Optional[dict] = None) -> Optional[User]:
if cache:
user = cache.get(forums_username)
if user:
return user
user = User.query.filter_by(forums_username=forums_username).first()
if user is None:
cdb_username = make_valid_username(forums_username)
user = User.query.filter(or_(User.username == cdb_username, User.forums_username == cdb_username)).first()
if user:
return None
user = User(cdb_username)
user.forums_username = forums_username
user.display_name = forums_username
db.session.add(user)
if cache:
cache[forums_username] = user
return user
2018-05-14 00:31:42 +02:00
@celery.task()
def check_forum_account(forums_username, force_replace_pic=False):
print("### Checking " + forums_username, file=sys.stderr)
2018-05-14 00:31:42 +02:00
try:
2023-06-19 22:27:49 +02:00
profile = get_profile("https://forum.minetest.net", forums_username)
except OSError as e:
print(e, file=sys.stderr)
2018-05-14 00:31:42 +02:00
return
2021-01-27 18:42:47 +01:00
if profile is None:
return
user = _get_or_create_user(forums_username)
if user is None:
return
2018-05-14 00:31:42 +02:00
2023-06-19 22:27:49 +02:00
needs_saving = False
2018-05-14 00:31:42 +02:00
2023-06-18 23:07:46 +02:00
# Get GitHub username
2018-05-14 00:31:42 +02:00
github_username = profile.get("github")
if github_username is not None and github_username.strip() != "":
print("Updated GitHub username for " + user.display_name + " to " + github_username, file=sys.stderr)
2018-05-14 00:31:42 +02:00
user.github_username = github_username
update_github_user_id_raw(user)
2023-06-19 22:27:49 +02:00
needs_saving = True
2018-05-14 00:31:42 +02:00
pic = profile.avatar
if pic and pic.startswith("http"):
2018-12-25 21:26:36 +01:00
pic = None
2018-05-14 00:31:42 +02:00
# Save
2023-06-19 22:27:49 +02:00
if needs_saving:
2018-05-14 00:31:42 +02:00
db.session.commit()
if pic:
pic = urljoin("https://forum.minetest.net/", pic)
print(f"####### Picture: {pic}", file=sys.stderr)
print(f"####### User pp {user.profile_pic}", file=sys.stderr)
pic_needs_replacing = user.profile_pic is None or user.profile_pic == "" or \
user.profile_pic.startswith("https://forum.minetest.net") or force_replace_pic
if pic_needs_replacing and pic.startswith("https://forum.minetest.net"):
print(f"####### Queueing", file=sys.stderr)
set_profile_picture_from_url.delay(user.username, pic)
2023-06-19 22:27:49 +02:00
return needs_saving
2020-12-04 04:08:54 +01:00
@celery.task()
2023-06-19 22:27:49 +02:00
def check_all_forum_accounts():
query = User.query.filter(User.forums_username.isnot(None))
for user in query.all():
2023-06-19 22:27:49 +02:00
check_forum_account(user.forums_username)
regex_tag = re.compile(r"\[([a-z0-9_]+)\]")
BANNED_NAMES = ["mod", "game", "old", "outdated", "wip", "api", "beta", "alpha", "git"]
2023-06-19 20:32:36 +02:00
2023-06-19 22:27:49 +02:00
def get_name_from_taglist(taglist):
for tag in reversed(regex_tag.findall(taglist)):
if len(tag) < 30 and not tag in BANNED_NAMES and \
not re.match(r"^[a-z]?[0-9]+$", tag):
return tag
return None
2023-06-19 20:32:36 +02:00
regex_title = re.compile(r"^((?:\[[^\]]+\] *)*)([^\[]+) *((?:\[[^\]]+\] *)*)[^\[]*$")
2023-06-19 20:32:36 +02:00
2023-06-19 22:27:49 +02:00
def parse_title(title):
m = regex_title.match(title)
if m is None:
print("Invalid title format: " + title, file=sys.stderr)
2023-06-19 22:27:49 +02:00
return title, get_name_from_taglist(title)
else:
2023-06-19 22:27:49 +02:00
return m.group(2).strip(), get_name_from_taglist(m.group(3))
2023-06-19 22:27:49 +02:00
def get_links_from_mod_search():
links = {}
try:
contents = urllib.request.urlopen("https://krock-works.uk.to/minetest/modList.php").read().decode("utf-8")
for x in json.loads(contents):
try:
link = x.get("link")
if link is not None:
links[int(x["topicId"])] = link
except ValueError:
pass
except urllib.error.URLError:
print("Unable to open krocks mod search!", file=sys.stderr)
return links
return links
2023-06-19 22:27:49 +02:00
@celery.task()
2023-06-19 22:27:49 +02:00
def import_topic_list():
links_by_id = get_links_from_mod_search()
info_by_id = {}
2023-06-19 22:27:49 +02:00
get_topics_from_forum(15, out=info_by_id, extra={'type': PackageType.GAME, 'wip': False})
get_topics_from_forum(50, out=info_by_id, extra={'type': PackageType.GAME, 'wip': True})
get_topics_from_forum(11, out=info_by_id, extra={'type': PackageType.MOD, 'wip': False})
get_topics_from_forum(9, out=info_by_id, extra={'type': PackageType.MOD, 'wip': True})
get_topics_from_forum(4, out=info_by_id, extra={'type': PackageType.TXP, 'wip': False})
# Caches
username_to_user = {}
topics_by_id = {}
for topic in ForumTopic.query.all():
if topic.topic_id in info_by_id:
topics_by_id[topic.topic_id] = topic
else:
db.session.delete(topic)
print(f"Deleting topic {topic.topic_id} title {topic.title}", file=sys.stderr)
username_conflicts = set()
2020-12-04 04:08:54 +01:00
# Create or update
for info in info_by_id.values():
id = int(info["id"])
# Get author
username = info["author"]
user = _get_or_create_user(username, username_to_user)
if user is None:
username_conflicts.add(username)
2020-12-04 04:08:54 +01:00
continue
# Get / add row
topic = topics_by_id.get(id)
if topic is None:
topic = ForumTopic()
db.session.add(topic)
# Parse title
2023-06-19 22:27:49 +02:00
title, name = parse_title(info["title"])
# Get link
link = links_by_id.get(id)
# Fill row
2018-12-21 15:02:57 +01:00
topic.topic_id = int(id)
topic.author = user
topic.type = info["type"]
topic.title = title
topic.name = name
topic.link = link
2018-07-07 00:15:56 +02:00
topic.wip = info["wip"]
2018-12-21 15:02:57 +01:00
topic.posts = int(info["posts"])
topic.views = int(info["views"])
topic.created_at = info["date"]
db.session.commit()
if len(username_conflicts) > 0:
print("The following forum usernames could not be created: " + (", ".join(username_conflicts)))