mirror of
https://github.com/minetest/contentdb.git
synced 2025-01-03 03:37:28 +01:00
parent
12545c69ac
commit
ca961cb35f
@ -111,9 +111,9 @@ def recalc_scores():
|
|||||||
|
|
||||||
|
|
||||||
@action("Import forum topic list")
|
@action("Import forum topic list")
|
||||||
def import_topic_list():
|
def do_import_topic_list():
|
||||||
task = import_topic_list.delay()
|
task = import_topic_list.delay()
|
||||||
return redirect(url_for("tasks.check", id=task.id, r=url_for("todo.topics")))
|
return redirect(url_for("tasks.check", id=task.id, r=url_for("admin.admin_page")))
|
||||||
|
|
||||||
|
|
||||||
@action("Check all forum accounts")
|
@action("Check all forum accounts")
|
||||||
|
@ -18,15 +18,41 @@ import json
|
|||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
import urllib.request
|
import urllib.request
|
||||||
|
from typing import Optional
|
||||||
from urllib.parse import urljoin
|
from urllib.parse import urljoin
|
||||||
|
|
||||||
|
from sqlalchemy import or_
|
||||||
|
|
||||||
from app.models import User, db, PackageType, ForumTopic
|
from app.models import User, db, PackageType, ForumTopic
|
||||||
from app.tasks import celery
|
from app.tasks import celery
|
||||||
from app.utils import is_username_valid
|
from app.utils import make_valid_username
|
||||||
from app.utils.phpbbparser import get_profile, get_topics_from_forum
|
from app.utils.phpbbparser import get_profile, get_topics_from_forum
|
||||||
from .usertasks import set_profile_picture_from_url, update_github_user_id_raw
|
from .usertasks import set_profile_picture_from_url, update_github_user_id_raw
|
||||||
|
|
||||||
|
|
||||||
|
def _get_or_create_user(forums_username: str, cache: Optional[dict] = None) -> Optional[User]:
|
||||||
|
if cache:
|
||||||
|
user = cache.get(forums_username)
|
||||||
|
if user:
|
||||||
|
return user
|
||||||
|
|
||||||
|
user = User.query.filter_by(forums_username=forums_username).first()
|
||||||
|
if user is None:
|
||||||
|
cdb_username = make_valid_username(forums_username)
|
||||||
|
user = User.query.filter(or_(User.username == cdb_username, User.forums_username == cdb_username)).first()
|
||||||
|
if user:
|
||||||
|
return None
|
||||||
|
|
||||||
|
user = User(cdb_username)
|
||||||
|
user.forums_username = forums_username
|
||||||
|
user.display_name = forums_username
|
||||||
|
db.session.add(user)
|
||||||
|
|
||||||
|
if cache:
|
||||||
|
cache[forums_username] = user
|
||||||
|
return user
|
||||||
|
|
||||||
|
|
||||||
@celery.task()
|
@celery.task()
|
||||||
def check_forum_account(forums_username, force_replace_pic=False):
|
def check_forum_account(forums_username, force_replace_pic=False):
|
||||||
print("### Checking " + forums_username, file=sys.stderr)
|
print("### Checking " + forums_username, file=sys.stderr)
|
||||||
@ -39,19 +65,16 @@ def check_forum_account(forums_username, force_replace_pic=False):
|
|||||||
if profile is None:
|
if profile is None:
|
||||||
return
|
return
|
||||||
|
|
||||||
user = User.query.filter_by(forums_username=forums_username).first()
|
user = _get_or_create_user(forums_username)
|
||||||
|
|
||||||
# Create user
|
|
||||||
needs_saving = False
|
|
||||||
if user is None:
|
if user is None:
|
||||||
user = User(forums_username)
|
return
|
||||||
user.forums_username = forums_username
|
|
||||||
db.session.add(user)
|
needs_saving = False
|
||||||
|
|
||||||
# Get GitHub username
|
# Get GitHub username
|
||||||
github_username = profile.get("github")
|
github_username = profile.get("github")
|
||||||
if github_username is not None and github_username.strip() != "":
|
if github_username is not None and github_username.strip() != "":
|
||||||
print("Updated GitHub username for " + user.display_name + " to " + github_username)
|
print("Updated GitHub username for " + user.display_name + " to " + github_username, file=sys.stderr)
|
||||||
user.github_username = github_username
|
user.github_username = github_username
|
||||||
update_github_user_id_raw(user)
|
update_github_user_id_raw(user)
|
||||||
needs_saving = True
|
needs_saving = True
|
||||||
@ -104,7 +127,7 @@ regex_title = re.compile(r"^((?:\[[^\]]+\] *)*)([^\[]+) *((?:\[[^\]]+\] *)*)[^\[
|
|||||||
def parse_title(title):
|
def parse_title(title):
|
||||||
m = regex_title.match(title)
|
m = regex_title.match(title)
|
||||||
if m is None:
|
if m is None:
|
||||||
print("Invalid title format: " + title)
|
print("Invalid title format: " + title, file=sys.stderr)
|
||||||
return title, get_name_from_taglist(title)
|
return title, get_name_from_taglist(title)
|
||||||
else:
|
else:
|
||||||
return m.group(2).strip(), get_name_from_taglist(m.group(3))
|
return m.group(2).strip(), get_name_from_taglist(m.group(3))
|
||||||
@ -124,7 +147,7 @@ def get_links_from_mod_search():
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
except urllib.error.URLError:
|
except urllib.error.URLError:
|
||||||
print("Unable to open krocks mod search!")
|
print("Unable to open krocks mod search!", file=sys.stderr)
|
||||||
return links
|
return links
|
||||||
|
|
||||||
return links
|
return links
|
||||||
@ -135,37 +158,23 @@ def import_topic_list():
|
|||||||
links_by_id = get_links_from_mod_search()
|
links_by_id = get_links_from_mod_search()
|
||||||
|
|
||||||
info_by_id = {}
|
info_by_id = {}
|
||||||
get_topics_from_forum(11, out=info_by_id, extra={'type': PackageType.MOD, 'wip': False})
|
|
||||||
get_topics_from_forum(9, out=info_by_id, extra={'type': PackageType.MOD, 'wip': True})
|
|
||||||
get_topics_from_forum(15, out=info_by_id, extra={'type': PackageType.GAME, 'wip': False})
|
get_topics_from_forum(15, out=info_by_id, extra={'type': PackageType.GAME, 'wip': False})
|
||||||
get_topics_from_forum(50, out=info_by_id, extra={'type': PackageType.GAME, 'wip': True})
|
get_topics_from_forum(50, out=info_by_id, extra={'type': PackageType.GAME, 'wip': True})
|
||||||
|
get_topics_from_forum(11, out=info_by_id, extra={'type': PackageType.MOD, 'wip': False})
|
||||||
|
get_topics_from_forum(9, out=info_by_id, extra={'type': PackageType.MOD, 'wip': True})
|
||||||
|
get_topics_from_forum(4, out=info_by_id, extra={'type': PackageType.TXP, 'wip': False})
|
||||||
|
|
||||||
# Caches
|
# Caches
|
||||||
username_to_user = {}
|
username_to_user = {}
|
||||||
topics_by_id = {}
|
topics_by_id = {}
|
||||||
for topic in ForumTopic.query.all():
|
for topic in ForumTopic.query.all():
|
||||||
topics_by_id[topic.topic_id] = topic
|
if topic.topic_id in info_by_id:
|
||||||
|
topics_by_id[topic.topic_id] = topic
|
||||||
|
else:
|
||||||
|
db.session.delete(topic)
|
||||||
|
print(f"Deleting topic {topic.topic_id} title {topic.title}", file=sys.stderr)
|
||||||
|
|
||||||
def get_or_create_user(username):
|
username_conflicts = set()
|
||||||
user = username_to_user.get(username)
|
|
||||||
if user:
|
|
||||||
return user
|
|
||||||
|
|
||||||
if not is_username_valid(username):
|
|
||||||
return None
|
|
||||||
|
|
||||||
user = User.query.filter_by(forums_username=username).first()
|
|
||||||
if user is None:
|
|
||||||
user = User.query.filter_by(username=username).first()
|
|
||||||
if user:
|
|
||||||
return None
|
|
||||||
|
|
||||||
user = User(username)
|
|
||||||
user.forums_username = username
|
|
||||||
db.session.add(user)
|
|
||||||
|
|
||||||
username_to_user[username] = user
|
|
||||||
return user
|
|
||||||
|
|
||||||
# Create or update
|
# Create or update
|
||||||
for info in info_by_id.values():
|
for info in info_by_id.values():
|
||||||
@ -173,9 +182,9 @@ def import_topic_list():
|
|||||||
|
|
||||||
# Get author
|
# Get author
|
||||||
username = info["author"]
|
username = info["author"]
|
||||||
user = get_or_create_user(username)
|
user = _get_or_create_user(username, username_to_user)
|
||||||
if user is None:
|
if user is None:
|
||||||
print("Error! Unable to create user {}".format(username), file=sys.stderr)
|
username_conflicts.add(username)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Get / add row
|
# Get / add row
|
||||||
@ -203,3 +212,6 @@ def import_topic_list():
|
|||||||
topic.created_at = info["date"]
|
topic.created_at = info["date"]
|
||||||
|
|
||||||
db.session.commit()
|
db.session.commit()
|
||||||
|
|
||||||
|
if len(username_conflicts) > 0:
|
||||||
|
print("The following forum usernames could not be created: " + (", ".join(username_conflicts)))
|
||||||
|
@ -16,6 +16,15 @@
|
|||||||
|
|
||||||
import user_agents
|
import user_agents
|
||||||
|
|
||||||
|
from app.utils import make_valid_username
|
||||||
|
|
||||||
|
|
||||||
|
def test_make_valid_username():
|
||||||
|
assert make_valid_username("rubenwardy") == "rubenwardy"
|
||||||
|
assert make_valid_username("Test123._-") == "Test123._-"
|
||||||
|
assert make_valid_username("Foo Bar") == "Foo_Bar"
|
||||||
|
assert make_valid_username("François") == "Fran_ois"
|
||||||
|
|
||||||
|
|
||||||
def test_web_is_not_bot():
|
def test_web_is_not_bot():
|
||||||
assert not user_agents.parse("Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:126.0) Gecko/20100101 Firefox/126.0").is_bot
|
assert not user_agents.parse("Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:126.0) Gecko/20100101 Firefox/126.0").is_bot
|
||||||
|
@ -28,11 +28,15 @@ from .user import *
|
|||||||
YESES = ["yes", "true", "1", "on"]
|
YESES = ["yes", "true", "1", "on"]
|
||||||
|
|
||||||
|
|
||||||
def is_username_valid(username):
|
def is_username_valid(username: str) -> bool:
|
||||||
return username is not None and len(username) >= 2 and \
|
return username is not None and len(username) >= 2 and \
|
||||||
re.match(r"^[A-Za-z0-9._-]*$", username) and not re.match(r"^\.*$", username)
|
re.match(r"^[A-Za-z0-9._-]*$", username) and not re.match(r"^\.*$", username)
|
||||||
|
|
||||||
|
|
||||||
|
def make_valid_username(username: str) -> str:
|
||||||
|
return re.sub(r"[^A-Za-z0-9._-]+", "_", username)
|
||||||
|
|
||||||
|
|
||||||
def is_yes(val):
|
def is_yes(val):
|
||||||
return val and val.lower() in YESES
|
return val and val.lower() in YESES
|
||||||
|
|
||||||
|
@ -3,6 +3,7 @@
|
|||||||
# Source: https://github.com/rubenwardy/python_phpbb_parser
|
# Source: https://github.com/rubenwardy/python_phpbb_parser
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
import sys
|
||||||
import urllib
|
import urllib
|
||||||
import urllib.parse as urlparse
|
import urllib.parse as urlparse
|
||||||
import urllib.request
|
import urllib.request
|
||||||
@ -121,7 +122,7 @@ regex_id = re.compile(r"^.*t=([0-9]+).*$")
|
|||||||
def parse_forum_list_page(id, page, out, extra=None):
|
def parse_forum_list_page(id, page, out, extra=None):
|
||||||
num_per_page = 30
|
num_per_page = 30
|
||||||
start = page*num_per_page+1
|
start = page*num_per_page+1
|
||||||
print(" - Fetching page {} (topics {}-{})".format(page, start, start+num_per_page))
|
print(" - Fetching page {} (topics {}-{})".format(page, start, start+num_per_page), file=sys.stderr)
|
||||||
|
|
||||||
url = "https://forum.minetest.net/viewforum.php?f=" + str(id) + "&start=" + str(start)
|
url = "https://forum.minetest.net/viewforum.php?f=" + str(id) + "&start=" + str(start)
|
||||||
r = urllib.request.urlopen(url).read().decode("utf-8")
|
r = urllib.request.urlopen(url).read().decode("utf-8")
|
||||||
@ -154,7 +155,7 @@ def parse_forum_list_page(id, page, out, extra=None):
|
|||||||
views = topic.find(class_="views").find(text=True)
|
views = topic.find(class_="views").find(text=True)
|
||||||
|
|
||||||
if id in out:
|
if id in out:
|
||||||
print(" - got {} again, title: {}".format(id, title))
|
print(" - got {} again, title: {}".format(id, title), file=sys.stderr)
|
||||||
assert title == out[id]['title']
|
assert title == out[id]['title']
|
||||||
return False
|
return False
|
||||||
|
|
||||||
@ -177,7 +178,7 @@ def parse_forum_list_page(id, page, out, extra=None):
|
|||||||
|
|
||||||
|
|
||||||
def get_topics_from_forum(id, out, extra=None):
|
def get_topics_from_forum(id, out, extra=None):
|
||||||
print("Fetching all topics from forum {}".format(id))
|
print("Fetching all topics from forum {}".format(id), file=sys.stderr)
|
||||||
page = 0
|
page = 0
|
||||||
while parse_forum_list_page(id, page, out, extra):
|
while parse_forum_list_page(id, page, out, extra):
|
||||||
page = page + 1
|
page = page + 1
|
||||||
|
Loading…
Reference in New Issue
Block a user