diff --git a/Dockerfile b/Dockerfile index 57a97819..db55bf2a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,7 +3,7 @@ # First stage to build python wheel -FROM python:3.10.8-slim-bullseye AS builder +FROM python:3.10.9-slim-bullseye AS builder ARG TARGETPLATFORM RUN apt-get update && apt-get install -y --no-install-recommends \ @@ -14,7 +14,7 @@ COPY ./tubearchivist/requirements.txt /requirements.txt RUN pip install --user -r requirements.txt # build final image -FROM python:3.10.8-slim-bullseye as tubearchivist +FROM python:3.10.9-slim-bullseye as tubearchivist ARG TARGETPLATFORM ARG INSTALL_DEBUG diff --git a/tubearchivist/api/README.md b/tubearchivist/api/README.md index 1a95fb8c..2328f685 100644 --- a/tubearchivist/api/README.md +++ b/tubearchivist/api/README.md @@ -221,6 +221,13 @@ Add to queue previously ignored video: } ``` +Download existing video now: +```json +{ + "status": "priority" +} +``` + DELETE /api/download/\/ Forget or delete from download queue diff --git a/tubearchivist/api/views.py b/tubearchivist/api/views.py index c99871dc..7146de72 100644 --- a/tubearchivist/api/views.py +++ b/tubearchivist/api/views.py @@ -13,9 +13,9 @@ from home.src.index.generic import Pagination from home.src.index.reindex import ReindexProgress from home.src.index.video import SponsorBlock, YoutubeVideo from home.src.ta.config import AppConfig -from home.src.ta.helper import UrlListParser from home.src.ta.ta_redis import RedisArchivist, RedisQueue -from home.tasks import check_reindex, extrac_dl, subscribe_to +from home.src.ta.urlparser import Parser +from home.tasks import check_reindex, download_single, extrac_dl, subscribe_to from rest_framework.authentication import ( SessionAuthentication, TokenAuthentication, @@ -395,7 +395,7 @@ class DownloadApiView(ApiBaseView): """ search_base = "ta_download/_doc/" - valid_status = ["pending", "ignore"] + valid_status = ["pending", "ignore", "priority"] def get(self, request, video_id): # pylint: disable=unused-argument @@ -411,9 +411,17 @@ class DownloadApiView(ApiBaseView): print(message) return Response({"message": message}, status=400) + pending_video, status_code = PendingInteract(video_id).get_item() + if status_code == 404: + message = f"{video_id}: item not found {status_code}" + return Response({"message": message}, status=404) + print(f"{video_id}: change status to {item_status}") - PendingInteract(video_id=video_id, status=item_status).update_status() - RedisQueue(queue_name="dl_queue").clear_item(video_id) + if item_status == "priority": + download_single.delay(pending_video) + else: + PendingInteract(video_id, item_status).update_status() + RedisQueue(queue_name="dl_queue").clear_item(video_id) return Response(request.data) @@ -476,7 +484,7 @@ class DownloadApiListView(ApiBaseView): pending = [i["youtube_id"] for i in to_add if i["status"] == "pending"] url_str = " ".join(pending) try: - youtube_ids = UrlListParser(url_str).process_list() + youtube_ids = Parser(url_str).parse() except ValueError: message = f"failed to parse: {url_str}" print(message) diff --git a/tubearchivist/home/apps.py b/tubearchivist/home/apps.py index 1356ac80..073afcc7 100644 --- a/tubearchivist/home/apps.py +++ b/tubearchivist/home/apps.py @@ -129,14 +129,18 @@ class StartupCheck: def es_set_vid_type(self): """update path 0.3.0 to 0.3.1, set default vid_type to video""" + index_list = ["ta_video", "ta_download"] data = { "query": { "bool": {"must_not": [{"exists": {"field": "vid_type"}}]} }, - "script": {"source": "ctx._source['vid_type'] = 'video'"}, + "script": {"source": "ctx._source['vid_type'] = 'videos'"}, } - response, _ = ElasticWrap("ta_video/_update_by_query").post(data=data) - print(f"ta_video vid_type index update ran: {response}") + + for index_name in index_list: + path = f"{index_name}/_update_by_query" + response, _ = ElasticWrap(path).post(data=data) + print(f"{index_name} vid_type index update ran: {response}") class HomeConfig(AppConfig): diff --git a/tubearchivist/home/config.json b/tubearchivist/home/config.json index de467fb5..aa446399 100644 --- a/tubearchivist/home/config.json +++ b/tubearchivist/home/config.json @@ -15,8 +15,8 @@ "auto_search": false, "auto_download": false, "channel_size": 50, - "live_channel_size": 0, - "shorts_channel_size": 0 + "live_channel_size": 50, + "shorts_channel_size": 50 }, "downloads": { "limit_count": false, diff --git a/tubearchivist/home/src/download/queue.py b/tubearchivist/home/src/download/queue.py index a3338c9f..9e62841c 100644 --- a/tubearchivist/home/src/download/queue.py +++ b/tubearchivist/home/src/download/queue.py @@ -17,7 +17,7 @@ from home.src.es.connect import ElasticWrap, IndexPaginate from home.src.index.playlist import YoutubePlaylist from home.src.index.video_constants import VideoTypeEnum from home.src.ta.config import AppConfig -from home.src.ta.helper import DurationConverter +from home.src.ta.helper import DurationConverter, is_shorts from home.src.ta.ta_redis import RedisArchivist @@ -117,6 +117,12 @@ class PendingInteract: path = f"ta_download/_update/{self.video_id}" _, _ = ElasticWrap(path).post(data=data) + def get_item(self): + """return pending item dict""" + path = f"ta_download/_doc/{self.video_id}" + response, status_code = ElasticWrap(path).get() + return response["_source"], status_code + class PendingList(PendingIndex): """manage the pending videos list""" @@ -156,28 +162,37 @@ class PendingList(PendingIndex): def _process_entry(self, entry): """process single entry from url list""" + vid_type = self._get_vid_type(entry) if entry["type"] == "video": - vid_type = entry.get("vid_type", VideoTypeEnum.VIDEO) self._add_video(entry["url"], vid_type) elif entry["type"] == "channel": - self._parse_channel(entry["url"]) + self._parse_channel(entry["url"], vid_type) elif entry["type"] == "playlist": self._parse_playlist(entry["url"]) PlaylistSubscription().process_url_str([entry], subscribed=False) else: raise ValueError(f"invalid url_type: {entry}") - def _add_video(self, url, vid_type=VideoTypeEnum.VIDEO): + @staticmethod + def _get_vid_type(entry): + """add vid type enum if available""" + vid_type_str = entry.get("vid_type") + if not vid_type_str: + return VideoTypeEnum.UNKNOWN + + return VideoTypeEnum(vid_type_str) + + def _add_video(self, url, vid_type): """add video to list""" if url not in self.missing_videos and url not in self.to_skip: self.missing_videos.append((url, vid_type)) else: print(f"{url}: skipped adding already indexed video to download.") - def _parse_channel(self, url): + def _parse_channel(self, url, vid_type): """add all videos of channel to list""" video_results = ChannelSubscription().get_last_youtube_videos( - url, limit=False + url, limit=False, query_filter=vid_type ) for video_id, _, vid_type in video_results: self._add_video(video_id, vid_type) @@ -189,9 +204,8 @@ class PendingList(PendingIndex): video_results = playlist.json_data.get("playlist_entries") youtube_ids = [i["youtube_id"] for i in video_results] for video_id in youtube_ids: - # FIXME: This will need to be adjusted to support Live/Shorts - # from playlists - self._add_video(video_id, VideoTypeEnum.VIDEO) + # match vid_type later + self._add_video(video_id, VideoTypeEnum.UNKNOWN) def add_to_pending(self, status="pending"): """add missing videos to pending list""" @@ -238,7 +252,7 @@ class PendingList(PendingIndex): if idx + 1 % 25 == 0: print("adding to queue progress: " + progress) - def get_youtube_details(self, youtube_id, vid_type=VideoTypeEnum.VIDEO): + def get_youtube_details(self, youtube_id, vid_type=VideoTypeEnum.VIDEOS): """get details from youtubedl for single pending video""" vid = YtWrap(self.yt_obs, self.config).extract(youtube_id) if not vid: @@ -252,9 +266,28 @@ class PendingList(PendingIndex): if vid["live_status"] in ["is_upcoming", "is_live"]: return False + if vid["live_status"] == "was_live": + vid_type = VideoTypeEnum.STREAMS + else: + if self._check_shorts(vid): + vid_type = VideoTypeEnum.SHORTS + return self._parse_youtube_details(vid, vid_type) - def _parse_youtube_details(self, vid, vid_type=VideoTypeEnum.VIDEO): + @staticmethod + def _check_shorts(vid): + """check if vid is shorts video""" + if vid["width"] > vid["height"]: + return False + + duration = vid.get("duration") + if duration and isinstance(duration, int): + if duration > 60: + return False + + return is_shorts(vid["id"]) + + def _parse_youtube_details(self, vid, vid_type=VideoTypeEnum.VIDEOS): """parse response""" vid_id = vid.get("id") duration_str = DurationConverter.get_str(vid["duration"]) diff --git a/tubearchivist/home/src/download/subscriptions.py b/tubearchivist/home/src/download/subscriptions.py index df77bdec..f154b490 100644 --- a/tubearchivist/home/src/download/subscriptions.py +++ b/tubearchivist/home/src/download/subscriptions.py @@ -36,30 +36,15 @@ class ChannelSubscription: return all_channels - def get_last_youtube_videos(self, channel_id, limit=True): + def get_last_youtube_videos( + self, channel_id, limit=True, query_filter=VideoTypeEnum.UNKNOWN + ): """get a list of last videos from channel""" - - queries = [ - ( - VideoTypeEnum.VIDEO, - "videos", - self.config["subscriptions"]["channel_size"], - ), - ( - VideoTypeEnum.LIVE, - "streams", - self.config["subscriptions"]["live_channel_size"], - ), - ( - VideoTypeEnum.SHORT, - "shorts", - self.config["subscriptions"]["shorts_channel_size"], - ), - ] + queries = self._build_queries(query_filter, limit) last_videos = [] - for vid_type, url, limit_amount in queries: + for vid_type, limit_amount in queries: obs = { "skip_download": True, "extract_flat": True, @@ -67,8 +52,9 @@ class ChannelSubscription: if limit: obs["playlistend"] = limit_amount + path = vid_type.value channel = YtWrap(obs, self.config).extract( - f"https://www.youtube.com/channel/{channel_id}/{url}" + f"https://www.youtube.com/channel/{channel_id}/{path}" ) if not channel: continue @@ -78,6 +64,36 @@ class ChannelSubscription: return last_videos + def _build_queries(self, query_filter, limit): + """build query list for vid_type""" + limit_map = { + "videos": self.config["subscriptions"]["channel_size"], + "streams": self.config["subscriptions"]["live_channel_size"], + "shorts": self.config["subscriptions"]["shorts_channel_size"], + } + + queries = [] + + if query_filter and query_filter.value != "unknown": + if limit: + query_limit = limit_map.get(query_filter.value) + else: + query_limit = False + + queries.append((query_filter, query_limit)) + + return queries + + for query_item, default_limit in limit_map.items(): + if limit: + query_limit = default_limit + else: + query_limit = False + + queries.append((VideoTypeEnum(query_item), query_limit)) + + return queries + def find_missing(self): """add missing videos from subscribed channels to pending""" all_channels = self.get_channels() diff --git a/tubearchivist/home/src/download/yt_dlp_handler.py b/tubearchivist/home/src/download/yt_dlp_handler.py index 636f3fa5..34855484 100644 --- a/tubearchivist/home/src/download/yt_dlp_handler.py +++ b/tubearchivist/home/src/download/yt_dlp_handler.py @@ -104,7 +104,7 @@ class DownloadPostProcess: continue # validate from local - playlists = channel.get_indexed_playlists() + playlists = channel.get_indexed_playlists(active_only=True) all_channel_playlist = [i["playlist_id"] for i in playlists] self._validate_channel_playlist(all_channel_playlist, id_c) @@ -117,6 +117,7 @@ class DownloadPostProcess: playlist.build_json(scrape=True) if not playlist.json_data: playlist.deactivate() + continue playlist.add_vids_to_playlist() playlist.upload_to_es() @@ -188,7 +189,7 @@ class VideoDownloader: youtube_id = youtube_data.get("youtube_id") tmp_vid_type = youtube_data.get( - "vid_type", VideoTypeEnum.VIDEO.value + "vid_type", VideoTypeEnum.VIDEOS.value ) video_type = VideoTypeEnum(tmp_vid_type) print(f"Downloading type: {video_type}") @@ -268,7 +269,7 @@ class VideoDownloader: "youtube_id": i["youtube_id"], # Using .value in default val to match what would be # decoded when parsing json if not set - "vid_type": i.get("vid_type", VideoTypeEnum.VIDEO.value), + "vid_type": i.get("vid_type", VideoTypeEnum.VIDEOS.value), } ) for i in pending.all_pending diff --git a/tubearchivist/home/src/es/connect.py b/tubearchivist/home/src/es/connect.py index aac13dcc..ab73ea8e 100644 --- a/tubearchivist/home/src/es/connect.py +++ b/tubearchivist/home/src/es/connect.py @@ -34,9 +34,11 @@ class ElasticWrap: def get(self, data=False): """get data from es""" if data: - response = requests.get(self.url, json=data, auth=self.auth) + response = requests.get( + self.url, json=data, auth=self.auth, timeout=10 + ) else: - response = requests.get(self.url, auth=self.auth) + response = requests.get(self.url, auth=self.auth, timeout=10) if not response.ok: print(response.text) @@ -53,10 +55,16 @@ class ElasticWrap: if data: response = requests.post( - self.url, data=payload, headers=headers, auth=self.auth + self.url, + data=payload, + headers=headers, + auth=self.auth, + timeout=10, ) else: - response = requests.post(self.url, headers=headers, auth=self.auth) + response = requests.post( + self.url, headers=headers, auth=self.auth, timeout=10 + ) if not response.ok: print(response.text) @@ -67,7 +75,9 @@ class ElasticWrap: """put data to es""" if refresh: self.url = f"{self.url}/?refresh=true" - response = requests.put(f"{self.url}", json=data, auth=self.auth) + response = requests.put( + f"{self.url}", json=data, auth=self.auth, timeout=10 + ) if not response.ok: print(response.text) print(data) @@ -80,9 +90,11 @@ class ElasticWrap: if refresh: self.url = f"{self.url}/?refresh=true" if data: - response = requests.delete(self.url, json=data, auth=self.auth) + response = requests.delete( + self.url, json=data, auth=self.auth, timeout=10 + ) else: - response = requests.delete(self.url, auth=self.auth) + response = requests.delete(self.url, auth=self.auth, timeout=10) if not response.ok: print(response.text) diff --git a/tubearchivist/home/src/frontend/api_calls.py b/tubearchivist/home/src/frontend/api_calls.py index 96957921..a7d38907 100644 --- a/tubearchivist/home/src/frontend/api_calls.py +++ b/tubearchivist/home/src/frontend/api_calls.py @@ -9,11 +9,10 @@ from home.src.download.subscriptions import ( PlaylistSubscription, ) from home.src.index.playlist import YoutubePlaylist -from home.src.ta.helper import UrlListParser from home.src.ta.ta_redis import RedisArchivist, RedisQueue +from home.src.ta.urlparser import Parser from home.tasks import ( download_pending, - download_single, index_channel_playlists, kill_dl, re_sync_thumbs, @@ -56,7 +55,6 @@ class PostData: "sort_order": self._sort_order, "hide_watched": self._hide_watched, "show_subed_only": self._show_subed_only, - "dlnow": self._dlnow, "show_ignored_only": self._show_ignored_only, "manual-import": self._manual_import, "re-embed": self._re_embed, @@ -125,7 +123,7 @@ class PostData: """unsubscribe from channels or playlists""" id_unsub = self.exec_val print(f"{id_unsub}: unsubscribe") - to_unsub_list = UrlListParser(id_unsub).process_list() + to_unsub_list = Parser(id_unsub).parse() for to_unsub in to_unsub_list: unsub_type = to_unsub["type"] unsub_id = to_unsub["url"] @@ -178,16 +176,6 @@ class PostData: RedisArchivist().set_message(key, message) return {"success": True} - def _dlnow(self): - """start downloading single vid now""" - youtube_id = self.exec_val - print(f"{youtube_id}: downloading now") - running = download_single.delay(youtube_id=youtube_id) - task_id = running.id - print("set task id: " + task_id) - RedisArchivist().set_message("dl_queue_id", task_id) - return {"success": True} - def _show_ignored_only(self): """switch view on /downloads/ to show ignored only""" show_value = self.exec_val diff --git a/tubearchivist/home/src/frontend/watched.py b/tubearchivist/home/src/frontend/watched.py index 8978b961..2ce80180 100644 --- a/tubearchivist/home/src/frontend/watched.py +++ b/tubearchivist/home/src/frontend/watched.py @@ -6,7 +6,7 @@ functionality: from datetime import datetime from home.src.es.connect import ElasticWrap -from home.src.ta.helper import UrlListParser +from home.src.ta.urlparser import Parser class WatchState: @@ -34,7 +34,7 @@ class WatchState: def _dedect_type(self): """find youtube id type""" print(self.youtube_id) - url_process = UrlListParser(self.youtube_id).process_list() + url_process = Parser(self.youtube_id).parse() url_type = url_process[0]["type"] return url_type diff --git a/tubearchivist/home/src/index/channel.py b/tubearchivist/home/src/index/channel.py index 90e0c116..894ce322 100644 --- a/tubearchivist/home/src/index/channel.py +++ b/tubearchivist/home/src/index/channel.py @@ -392,14 +392,16 @@ class YoutubeChannel(YouTubeItem): all_entries = [(i["id"], i["title"]) for i in playlists["entries"]] self.all_playlists = all_entries - def get_indexed_playlists(self): + def get_indexed_playlists(self, active_only=False): """get all indexed playlists from channel""" - data = { - "query": { - "term": {"playlist_channel_id": {"value": self.youtube_id}} - }, - "sort": [{"playlist_channel.keyword": {"order": "desc"}}], - } + must_list = [ + {"term": {"playlist_channel_id": {"value": self.youtube_id}}} + ] + if active_only: + must_list.append({"term": {"playlist_active": {"value": True}}}) + + data = {"query": {"bool": {"must": must_list}}} + all_playlists = IndexPaginate("ta_playlist", data).get_results() return all_playlists diff --git a/tubearchivist/home/src/index/reindex.py b/tubearchivist/home/src/index/reindex.py index 5b65afcc..b8058c66 100644 --- a/tubearchivist/home/src/index/reindex.py +++ b/tubearchivist/home/src/index/reindex.py @@ -100,7 +100,7 @@ class ReindexOutdated(ReindexBase): refresh_key = reindex_config["refresh_key"] now_lte = self.now - self.interval * 24 * 60 * 60 must_list = [ - {"match": {"active": True}}, + {"match": {reindex_config["active_key"]: True}}, {"range": {refresh_key: {"lte": now_lte}}}, ] data = { @@ -205,7 +205,7 @@ class Reindex(ReindexBase): def reindex_all(self): """reindex all in queue""" - if self.cookie_invalid(): + if not self.cookie_is_valid(): print("[reindex] cookie invalid, exiting...") return @@ -246,7 +246,7 @@ class Reindex(ReindexBase): try: self._reindex_single_video_call(youtube_id) except FileNotFoundError: - ChannelUrlFixer(youtube_id, self.config) + ChannelUrlFixer(youtube_id, self.config).run() self._reindex_single_video_call(youtube_id) def _reindex_single_video_call(self, youtube_id): @@ -333,8 +333,8 @@ class Reindex(ReindexBase): handler.get_indexed() self.all_indexed_ids = [i["youtube_id"] for i in handler.all_videos] - def cookie_invalid(self): - """return true if cookie is enabled and invalid""" + def cookie_is_valid(self): + """return true if cookie is enabled and valid""" if not self.config["downloads"]["cookie_import"]: return False diff --git a/tubearchivist/home/src/index/video.py b/tubearchivist/home/src/index/video.py index 92cbfdc5..406ad66d 100644 --- a/tubearchivist/home/src/index/video.py +++ b/tubearchivist/home/src/index/video.py @@ -128,7 +128,7 @@ class YoutubeVideo(YouTubeItem, YoutubeSubtitle): self, youtube_id, video_overwrites=False, - video_type=VideoTypeEnum.VIDEO, + video_type=VideoTypeEnum.VIDEOS, ): super().__init__(youtube_id) self.channel_id = False @@ -406,7 +406,7 @@ class YoutubeVideo(YouTubeItem, YoutubeSubtitle): def index_new_video( - youtube_id, video_overwrites=False, video_type=VideoTypeEnum.VIDEO + youtube_id, video_overwrites=False, video_type=VideoTypeEnum.VIDEOS ): """combined classes to create new video in index""" video = YoutubeVideo( diff --git a/tubearchivist/home/src/index/video_constants.py b/tubearchivist/home/src/index/video_constants.py index d5df23a6..9ccd0464 100644 --- a/tubearchivist/home/src/index/video_constants.py +++ b/tubearchivist/home/src/index/video_constants.py @@ -1,7 +1,12 @@ +"""video constants""" + import enum class VideoTypeEnum(enum.Enum): - VIDEO = "video" - LIVE = "live" - SHORT = "short" + """all vid_type fields""" + + VIDEOS = "videos" + STREAMS = "streams" + SHORTS = "shorts" + UNKNOWN = "unknown" diff --git a/tubearchivist/home/src/ta/helper.py b/tubearchivist/home/src/ta/helper.py index af17fc06..f07dc805 100644 --- a/tubearchivist/home/src/ta/helper.py +++ b/tubearchivist/home/src/ta/helper.py @@ -11,9 +11,8 @@ import string import subprocess import unicodedata from datetime import datetime -from urllib.parse import parse_qs, urlparse -from home.src.download.yt_dlp_base import YtWrap +import requests def clean_string(file_name): @@ -136,104 +135,14 @@ def get_mapping(): return index_config -class UrlListParser: - """take a multi line string and detect valid youtube ids""" +def is_shorts(youtube_id): + """check if youtube_id is a shorts video, bot not it it's not a shorts""" + shorts_url = f"https://www.youtube.com/shorts/{youtube_id}" + response = requests.head( + shorts_url, headers=requests_headers(), timeout=10 + ) - def __init__(self, url_str): - self.url_list = [i.strip() for i in url_str.split()] - - def process_list(self): - """loop through the list""" - youtube_ids = [] - for url in self.url_list: - parsed = urlparse(url) - print(f"processing: {url}") - print(parsed) - if not parsed.netloc: - # is not a url - id_type = self.find_valid_id(url) - youtube_id = url - elif "youtube.com" not in url and "youtu.be" not in url: - raise ValueError(f"{url} is not a youtube link") - elif parsed.path: - # is a url - youtube_id, id_type = self.detect_from_url(parsed) - else: - # not detected - raise ValueError(f"failed to detect {url}") - - youtube_ids.append({"url": youtube_id, "type": id_type}) - - return youtube_ids - - def detect_from_url(self, parsed): - """detect from parsed url""" - if parsed.netloc == "youtu.be": - # shortened - youtube_id = parsed.path.strip("/") - _ = self.find_valid_id(youtube_id) - return youtube_id, "video" - - if parsed.query: - # detect from query string - query_parsed = parse_qs(parsed.query) - if "v" in query_parsed: - youtube_id = query_parsed["v"][0] - _ = self.find_valid_id(youtube_id) - return youtube_id, "video" - - if "list" in query_parsed: - youtube_id = query_parsed["list"][0] - return youtube_id, "playlist" - - if parsed.path.startswith("/channel/"): - # channel id in url - youtube_id = parsed.path.split("/")[2] - _ = self.find_valid_id(youtube_id) - return youtube_id, "channel" - - # detect channel with yt_dlp - youtube_id = self.extract_channel_name(parsed.geturl()) - return youtube_id, "channel" - - @staticmethod - def find_valid_id(id_str): - """detect valid id from length of string""" - str_len = len(id_str) - if str_len == 11: - id_type = "video" - elif str_len == 24: - id_type = "channel" - elif str_len in [34, 18] or id_str in ["LL", "WL"]: - id_type = "playlist" - else: - # unable to parse - raise ValueError("not a valid id_str: " + id_str) - - return id_type - - @staticmethod - def extract_channel_name(url): - """find channel id from channel name with yt-dlp help""" - obs_request = { - "skip_download": True, - "extract_flat": True, - "playlistend": 0, - } - url_info = YtWrap(obs_request).extract(url) - channel_id = url_info.get("channel_id", False) - if channel_id: - return channel_id - - url = url_info.get("url", False) - if url: - # handle old channel name redirect with url path split - channel_id = urlparse(url).path.strip("/").split("/")[1] - - return channel_id - - print(f"failed to extract channel id from {url}") - raise ValueError + return response.status_code == 200 class DurationConverter: diff --git a/tubearchivist/home/src/ta/urlparser.py b/tubearchivist/home/src/ta/urlparser.py new file mode 100644 index 00000000..32c6030a --- /dev/null +++ b/tubearchivist/home/src/ta/urlparser.py @@ -0,0 +1,133 @@ +""" +Functionality: +- detect valid youtube ids and links from multi line string +- identify vid_type if possible +""" + +from urllib.parse import parse_qs, urlparse + +from home.src.download.yt_dlp_base import YtWrap +from home.src.index.video_constants import VideoTypeEnum + + +class Parser: + """take a multi line string and detect valid youtube ids""" + + def __init__(self, url_str): + self.url_list = [i.strip() for i in url_str.split()] + + def parse(self): + """parse the list""" + ids = [] + for url in self.url_list: + parsed = urlparse(url) + if parsed.netloc: + # is url + identified = self.process_url(parsed) + else: + # is not url + identified = self._find_valid_id(url) + + if "vid_type" not in identified: + identified.update(self._detect_vid_type(parsed.path)) + + ids.append(identified) + + return ids + + def process_url(self, parsed): + """process as url""" + if parsed.netloc == "youtu.be": + # shortened + youtube_id = parsed.path.strip("/") + return self._validate_expected(youtube_id, "video") + + query_parsed = parse_qs(parsed.query) + if "v" in query_parsed: + # video from v query str + youtube_id = query_parsed["v"][0] + return self._validate_expected(youtube_id, "video") + + if "list" in query_parsed: + # playlist from list query str + youtube_id = query_parsed["list"][0] + return self._validate_expected(youtube_id, "playlist") + + all_paths = parsed.path.strip("/").split("/") + if all_paths[0] == "shorts": + # is shorts video + item = self._validate_expected(all_paths[1], "video") + item.update({"vid_type": VideoTypeEnum.SHORTS.value}) + return item + + if all_paths[0] == "channel": + return self._validate_expected(all_paths[1], "channel") + + # detect channel + channel_id = self._extract_channel_name(parsed.geturl()) + return {"type": "channel", "url": channel_id} + + def _validate_expected(self, youtube_id, expected_type): + """raise value error if not matching""" + matched = self._find_valid_id(youtube_id) + if matched["type"] != expected_type: + raise ValueError( + f"{youtube_id} not of expected type {expected_type}" + ) + + return {"type": expected_type, "url": youtube_id} + + def _find_valid_id(self, id_str): + """detect valid id from length of string""" + if id_str in ("LL", "WL"): + return {"type": "playlist", "url": id_str} + + if id_str.startswith("@"): + url = f"https://www.youtube.com/{id_str}" + channel_id = self._extract_channel_name(url) + return {"type": "channel", "url": channel_id} + + len_id_str = len(id_str) + if len_id_str == 11: + item_type = "video" + elif len_id_str == 24: + item_type = "channel" + elif len_id_str in (34, 18): + item_type = "playlist" + else: + raise ValueError(f"not a valid id_str: {id_str}") + + return {"type": item_type, "url": id_str} + + @staticmethod + def _extract_channel_name(url): + """find channel id from channel name with yt-dlp help""" + obs_request = { + "skip_download": True, + "extract_flat": True, + "playlistend": 0, + } + url_info = YtWrap(obs_request).extract(url) + channel_id = url_info.get("channel_id", False) + if channel_id: + return channel_id + + url = url_info.get("url", False) + if url: + # handle old channel name redirect with url path split + channel_id = urlparse(url).path.strip("/").split("/")[1] + + return channel_id + + print(f"failed to extract channel id from {url}") + raise ValueError + + def _detect_vid_type(self, path): + """try to match enum from path, needs to be serializable""" + last = path.strip("/").split("/")[-1] + try: + vid_type = VideoTypeEnum(last).value + except ValueError: + vid_type = VideoTypeEnum.UNKNOWN.value + + return {"vid_type": vid_type} diff --git a/tubearchivist/home/tasks.py b/tubearchivist/home/tasks.py index 034f805a..d5138368 100644 --- a/tubearchivist/home/tasks.py +++ b/tubearchivist/home/tasks.py @@ -6,6 +6,7 @@ Functionality: because tasks are initiated at application start """ +import json import os from celery import Celery, shared_task @@ -24,8 +25,9 @@ from home.src.index.filesystem import ImportFolderScanner, scan_filesystem from home.src.index.reindex import Reindex, ReindexManual, ReindexOutdated from home.src.index.video_constants import VideoTypeEnum from home.src.ta.config import AppConfig, ReleaseVersion, ScheduleBuilder -from home.src.ta.helper import UrlListParser, clear_dl_cache +from home.src.ta.helper import clear_dl_cache from home.src.ta.ta_redis import RedisArchivist, RedisQueue +from home.src.ta.urlparser import Parser CONFIG = AppConfig().config REDIS_HOST = os.environ.get("REDIS_HOST") @@ -67,7 +69,7 @@ def update_subscribed(): playlist_videos = [ { "type": "video", - "vid_type": VideoTypeEnum.VIDEO, + "vid_type": VideoTypeEnum.VIDEOS, "url": i, } for i in missing_from_playlists @@ -107,11 +109,15 @@ def download_pending(): @shared_task -def download_single(youtube_id): +def download_single(pending_video): """start download single video now""" queue = RedisQueue(queue_name="dl_queue") - queue.add_priority(youtube_id) - print("Added to queue with priority: " + youtube_id) + to_add = { + "youtube_id": pending_video["youtube_id"], + "vid_type": pending_video["vid_type"], + } + queue.add_priority(json.dumps(to_add)) + print(f"Added to queue with priority: {to_add}") # start queue if needed have_lock = False my_lock = RedisArchivist().get_lock("downloading") @@ -256,9 +262,8 @@ def re_sync_thumbs(): @shared_task def subscribe_to(url_str): """take a list of urls to subscribe to""" - to_subscribe_list = UrlListParser(url_str).process_list() - counter = 1 - for item in to_subscribe_list: + to_subscribe_list = Parser(url_str).parse() + for idx, item in enumerate(to_subscribe_list): to_sub_id = item["url"] if item["type"] == "playlist": PlaylistSubscription().process_url_str([item]) @@ -281,10 +286,9 @@ def subscribe_to(url_str): "status": key, "level": "info", "title": "Subscribing to Channels", - "message": f"Processing {counter} of {len(to_subscribe_list)}", + "message": f"Processing {idx + 1} of {len(to_subscribe_list)}", } RedisArchivist().set_message(key, message=message, expire=True) - counter = counter + 1 @shared_task diff --git a/tubearchivist/home/templates/home/channel_id.html b/tubearchivist/home/templates/home/channel_id.html index d23e54df..a4c273fc 100644 --- a/tubearchivist/home/templates/home/channel_id.html +++ b/tubearchivist/home/templates/home/channel_id.html @@ -8,9 +8,15 @@

Videos

-

Live

-

Shorts

-

Playlists

+ {% if has_streams %} +

Streams

+ {% endif %} + {% if has_shorts %} +

Shorts

+ {% endif %} + {% if has_playlists %} +

Playlists

+ {% endif %}

About

{% if has_pending %}

Downloads

diff --git a/tubearchivist/home/templates/home/channel_id_about.html b/tubearchivist/home/templates/home/channel_id_about.html index 2500e600..1688148d 100644 --- a/tubearchivist/home/templates/home/channel_id_about.html +++ b/tubearchivist/home/templates/home/channel_id_about.html @@ -8,9 +8,15 @@

Videos

-

Live

-

Shorts

-

Playlists

+ {% if has_streams %} +

Streams

+ {% endif %} + {% if has_shorts %} +

Shorts

+ {% endif %} + {% if has_playlists %} +

Playlists

+ {% endif %}

About

{% if has_pending %}

Downloads

diff --git a/tubearchivist/home/templates/home/channel_id_playlist.html b/tubearchivist/home/templates/home/channel_id_playlist.html index e43460ca..03b484ab 100644 --- a/tubearchivist/home/templates/home/channel_id_playlist.html +++ b/tubearchivist/home/templates/home/channel_id_playlist.html @@ -8,9 +8,15 @@

Videos

-

Live

-

Shorts

-

Playlists

+ {% if has_streams %} +

Streams

+ {% endif %} + {% if has_shorts %} +

Shorts

+ {% endif %} + {% if has_playlists %} +

Playlists

+ {% endif %}

About

{% if has_pending %}

Downloads

diff --git a/tubearchivist/home/templates/home/downloads.html b/tubearchivist/home/templates/home/downloads.html index c4e76349..4d5bbf5f 100644 --- a/tubearchivist/home/templates/home/downloads.html +++ b/tubearchivist/home/templates/home/downloads.html @@ -73,11 +73,14 @@
video_thumb - {% if show_ignored_only %} - ignored - {% else %} - queued - {% endif %} +
+ {% if show_ignored_only %} + ignored + {% else %} + queued + {% endif %} + {{ video.source.vid_type }} +
diff --git a/tubearchivist/home/urls.py b/tubearchivist/home/urls.py index 6cb0c5ad..8dc8c9d2 100644 --- a/tubearchivist/home/urls.py +++ b/tubearchivist/home/urls.py @@ -47,7 +47,7 @@ urlpatterns = [ name="channel_id", ), path( - "channel//live/", + "channel//streams/", login_required(ChannelIdLiveView.as_view()), name="channel_id_live", ), diff --git a/tubearchivist/home/views.py b/tubearchivist/home/views.py index e5636c57..4918aca4 100644 --- a/tubearchivist/home/views.py +++ b/tubearchivist/home/views.py @@ -38,8 +38,9 @@ from home.src.index.playlist import YoutubePlaylist from home.src.index.reindex import ReindexProgress from home.src.index.video_constants import VideoTypeEnum from home.src.ta.config import AppConfig, ReleaseVersion, ScheduleBuilder -from home.src.ta.helper import UrlListParser, time_parser +from home.src.ta.helper import time_parser from home.src.ta.ta_redis import RedisArchivist +from home.src.ta.urlparser import Parser from home.tasks import extrac_dl, index_channel_playlists, subscribe_to from rest_framework.authtoken.models import Token @@ -456,7 +457,7 @@ class DownloadView(ArchivistResultsView): url_str = request.POST.get("vid_url") print(url_str) try: - youtube_ids = UrlListParser(url_str).process_list() + youtube_ids = Parser(url_str).parse() except ValueError: # failed to process key = "message:add" @@ -488,6 +489,13 @@ class ChannelIdBaseView(ArchivistResultsView): return channel_info + def channel_pages(self, channel_id): + """get additional context for channel pages""" + self.channel_has_pending(channel_id) + self.channel_has_streams(channel_id) + self.channel_has_shorts(channel_id) + self.channel_has_playlist(channel_id) + def channel_has_pending(self, channel_id): """check if channel has pending videos in queue""" path = "ta_download/_search" @@ -501,11 +509,53 @@ class ChannelIdBaseView(ArchivistResultsView): ] } }, + "_source": False, } response, _ = ElasticWrap(path).get(data=data) self.context.update({"has_pending": bool(response["hits"]["hits"])}) + def channel_has_streams(self, channel_id): + """check if channel has streams videos""" + data = self.get_type_data("streams", channel_id) + response, _ = ElasticWrap("ta_video/_search").get(data=data) + + self.context.update({"has_streams": bool(response["hits"]["hits"])}) + + def channel_has_shorts(self, channel_id): + """check if channel has shorts videos""" + data = self.get_type_data("shorts", channel_id) + response, _ = ElasticWrap("ta_video/_search").get(data=data) + + self.context.update({"has_shorts": bool(response["hits"]["hits"])}) + + @staticmethod + def get_type_data(vid_type, channel): + """build data query for vid_type""" + return { + "size": 1, + "query": { + "bool": { + "must": [ + {"term": {"vid_type": {"value": vid_type}}}, + {"term": {"channel.channel_id": {"value": channel}}}, + ] + } + }, + "_source": False, + } + + def channel_has_playlist(self, channel_id): + """check if channel has any playlist indexed""" + path = "ta_playlist/_search" + data = { + "size": 1, + "query": {"term": {"playlist_channel_id": {"value": channel_id}}}, + "_source": False, + } + response, _ = ElasticWrap(path).get(data=data) + self.context.update({"has_playlists": bool(response["hits"]["hits"])}) + class ChannelIdView(ChannelIdBaseView): """resolves to /channel// @@ -514,7 +564,7 @@ class ChannelIdView(ChannelIdBaseView): view_origin = "home" es_search = "ta_video/_search" - video_types = [VideoTypeEnum.VIDEO] + video_types = [VideoTypeEnum.VIDEOS] def get(self, request, channel_id): """get request""" @@ -522,7 +572,7 @@ class ChannelIdView(ChannelIdBaseView): self._update_view_data(channel_id) self.find_results() self.match_progress() - self.channel_has_pending(channel_id) + self.channel_pages(channel_id) if self.context["results"]: channel_info = self.context["results"][0]["source"]["channel"] @@ -584,11 +634,11 @@ class ChannelIdView(ChannelIdBaseView): class ChannelIdLiveView(ChannelIdView): - """resolves to /channel//live/ + """resolves to /channel//streams/ display single channel page from channel_id """ - video_types = [VideoTypeEnum.LIVE] + video_types = [VideoTypeEnum.STREAMS] class ChannelIdShortsView(ChannelIdView): @@ -596,7 +646,7 @@ class ChannelIdShortsView(ChannelIdView): display single channel page from channel_id """ - video_types = [VideoTypeEnum.SHORT] + video_types = [VideoTypeEnum.SHORTS] class ChannelIdAboutView(ChannelIdBaseView): @@ -609,7 +659,7 @@ class ChannelIdAboutView(ChannelIdBaseView): def get(self, request, channel_id): """handle get request""" self.initiate_vars(request) - self.channel_has_pending(channel_id) + self.channel_pages(channel_id) response, _ = ElasticWrap(f"ta_channel/_doc/{channel_id}").get() channel_info = SearchProcess(response).process() @@ -657,7 +707,7 @@ class ChannelIdPlaylistView(ChannelIdBaseView): self.initiate_vars(request) self._update_view_data(channel_id) self.find_results() - self.channel_has_pending(channel_id) + self.channel_pages(channel_id) channel_info = self.get_channel_meta(channel_id) channel_name = channel_info["channel_name"] diff --git a/tubearchivist/requirements.txt b/tubearchivist/requirements.txt index a92e749e..f2e56a7c 100644 --- a/tubearchivist/requirements.txt +++ b/tubearchivist/requirements.txt @@ -1,13 +1,13 @@ beautifulsoup4==4.11.1 celery==5.2.7 -Django==4.1.4 +Django==4.1.5 django-auth-ldap==4.1.0 django-cors-headers==3.13.0 djangorestframework==3.14.0 -Pillow==9.3.0 +Pillow==9.4.0 redis==4.4.0 requests==2.28.1 ryd-client==0.0.6 uWSGI==2.0.21 -whitenoise==6.2.0 -yt_dlp==2022.11.11 +whitenoise==6.3.0 +yt_dlp==2023.1.2 diff --git a/tubearchivist/static/css/style.css b/tubearchivist/static/css/style.css index 4e747951..56ab4e35 100644 --- a/tubearchivist/static/css/style.css +++ b/tubearchivist/static/css/style.css @@ -462,7 +462,7 @@ video:-webkit-full-screen { overflow: hidden; } -.video-item:hover .video-thumb span { +.video-item:hover .video-tags { opacity: 1; } @@ -486,16 +486,20 @@ video:-webkit-full-screen { position: relative; } -.video-thumb span { +.video-tags { position: absolute; top: 5px; - left: 5px; - background-color: var(--accent-font-light); + left: 0; padding: 5px; opacity: 0; transition: 300ms ease-in-out; } +.video-tags span { + background-color: var(--accent-font-light); + padding: 5px; +} + .video-play img { width: 40px; filter: var(--img-filter); @@ -645,6 +649,7 @@ video:-webkit-full-screen { .info-box-item { display: flex; + flex-wrap: wrap; align-items: center; padding: 15px; background-color: var(--highlight-bg); diff --git a/tubearchivist/static/script.js b/tubearchivist/static/script.js index 56174c4e..1028f84a 100644 --- a/tubearchivist/static/script.js +++ b/tubearchivist/static/script.js @@ -193,8 +193,8 @@ function toIgnore(button) { function downloadNow(button) { let youtube_id = button.getAttribute('data-id'); - let payload = JSON.stringify({ dlnow: youtube_id }); - sendPost(payload); + let apiEndpoint = '/api/download/' + youtube_id + '/'; + apiRequest(apiEndpoint, 'POST', { status: 'priority' }); document.getElementById(youtube_id).remove(); setTimeout(function () { checkMessages();