contentdb/app/utils/url.py

47 lines
1.6 KiB
Python
Raw Normal View History

# ContentDB
# Copyright (C) 2022 rubenwardy
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
import urllib.parse as urlparse
from typing import Optional, Dict, List
def url_set_query(url: str, params: Dict[str, str]) -> str:
url_parts = list(urlparse.urlparse(url))
query = dict(urlparse.parse_qsl(url_parts[4]))
query.update(params)
url_parts[4] = urlparse.urlencode(query)
return urlparse.urlunparse(url_parts)
def url_get_query(parsed_url: urlparse.ParseResult) -> Dict[str, List[str]]:
return urlparse.parse_qs(parsed_url.query)
def clean_youtube_url(url: str) -> Optional[str]:
parsed = urlparse.urlparse(url)
print(parsed)
if (parsed.netloc == "www.youtube.com" or parsed.netloc == "youtube.com") and parsed.path == "/watch":
print(url_get_query(parsed))
video_id = url_get_query(parsed).get("v", [None])[0]
if video_id:
return url_set_query("https://www.youtube.com/watch", {"v": video_id})
elif parsed.netloc == "youtu.be":
return url_set_query("https://www.youtube.com/watch", {"v": parsed.path[1:]})
return None