From 98f5b66826ba980cd8e7bcbdc9593c75f834ff23 Mon Sep 17 00:00:00 2001 From: Chance Turner Date: Sun, 1 Jan 2023 21:11:06 -0600 Subject: [PATCH] Lives and shorts (#395) * Initial shot at adding in live and short videos * Fix for typo on config variable names * Remove was_live youtube flag * Adding startup script to support setting vid_type on ta_video for videos created prior to support for live and shorts * Linting updates * More linting fixes * Removing title variable from loop tuple unpacking --- tubearchivist/home/apps.py | 12 +++++ tubearchivist/home/config.json | 4 +- tubearchivist/home/src/download/queue.py | 31 ++++++----- .../home/src/download/subscriptions.py | 54 ++++++++++++++----- .../home/src/download/yt_dlp_handler.py | 35 ++++++++++-- tubearchivist/home/src/es/index_mapping.json | 6 +++ tubearchivist/home/src/frontend/forms.py | 6 +++ tubearchivist/home/src/index/video.py | 19 +++++-- .../home/src/index/video_constants.py | 7 +++ tubearchivist/home/tasks.py | 21 ++++++-- .../home/templates/home/channel_id.html | 2 + .../home/templates/home/channel_id_about.html | 2 + .../templates/home/channel_id_playlist.html | 2 + .../home/templates/home/settings.html | 10 ++++ tubearchivist/home/urls.py | 12 +++++ tubearchivist/home/views.py | 32 ++++++++++- 16 files changed, 214 insertions(+), 41 deletions(-) create mode 100644 tubearchivist/home/src/index/video_constants.py diff --git a/tubearchivist/home/apps.py b/tubearchivist/home/apps.py index 7a70d6eb..1356ac80 100644 --- a/tubearchivist/home/apps.py +++ b/tubearchivist/home/apps.py @@ -36,6 +36,7 @@ class StartupCheck: clear_dl_cache(self.config_handler.config) self.snapshot_check() self.ta_version_check() + self.es_set_vid_type() self.set_has_run() def get_has_run(self): @@ -126,6 +127,17 @@ class StartupCheck: """remove key if updated now""" ReleaseVersion().is_updated() + def es_set_vid_type(self): + """update path 0.3.0 to 0.3.1, set default vid_type to video""" + data = { + "query": { + "bool": {"must_not": [{"exists": {"field": "vid_type"}}]} + }, + "script": {"source": "ctx._source['vid_type'] = 'video'"}, + } + response, _ = ElasticWrap("ta_video/_update_by_query").post(data=data) + print(f"ta_video vid_type index update ran: {response}") + class HomeConfig(AppConfig): """call startup funcs""" diff --git a/tubearchivist/home/config.json b/tubearchivist/home/config.json index 19539f82..de467fb5 100644 --- a/tubearchivist/home/config.json +++ b/tubearchivist/home/config.json @@ -14,7 +14,9 @@ "subscriptions": { "auto_search": false, "auto_download": false, - "channel_size": 50 + "channel_size": 50, + "live_channel_size": 0, + "shorts_channel_size": 0 }, "downloads": { "limit_count": false, diff --git a/tubearchivist/home/src/download/queue.py b/tubearchivist/home/src/download/queue.py index bd35a8d5..a3338c9f 100644 --- a/tubearchivist/home/src/download/queue.py +++ b/tubearchivist/home/src/download/queue.py @@ -15,6 +15,7 @@ from home.src.download.thumbnails import ThumbManager from home.src.download.yt_dlp_base import YtWrap from home.src.es.connect import ElasticWrap, IndexPaginate from home.src.index.playlist import YoutubePlaylist +from home.src.index.video_constants import VideoTypeEnum from home.src.ta.config import AppConfig from home.src.ta.helper import DurationConverter from home.src.ta.ta_redis import RedisArchivist @@ -156,7 +157,8 @@ class PendingList(PendingIndex): def _process_entry(self, entry): """process single entry from url list""" if entry["type"] == "video": - self._add_video(entry["url"]) + vid_type = entry.get("vid_type", VideoTypeEnum.VIDEO) + self._add_video(entry["url"], vid_type) elif entry["type"] == "channel": self._parse_channel(entry["url"]) elif entry["type"] == "playlist": @@ -165,10 +167,10 @@ class PendingList(PendingIndex): else: raise ValueError(f"invalid url_type: {entry}") - def _add_video(self, url): + def _add_video(self, url, vid_type=VideoTypeEnum.VIDEO): """add video to list""" if url not in self.missing_videos and url not in self.to_skip: - self.missing_videos.append(url) + self.missing_videos.append((url, vid_type)) else: print(f"{url}: skipped adding already indexed video to download.") @@ -177,9 +179,8 @@ class PendingList(PendingIndex): video_results = ChannelSubscription().get_last_youtube_videos( url, limit=False ) - youtube_ids = [i[0] for i in video_results] - for video_id in youtube_ids: - self._add_video(video_id) + for video_id, _, vid_type in video_results: + self._add_video(video_id, vid_type) def _parse_playlist(self, url): """add all videos of playlist to list""" @@ -188,16 +189,18 @@ class PendingList(PendingIndex): video_results = playlist.json_data.get("playlist_entries") youtube_ids = [i["youtube_id"] for i in video_results] for video_id in youtube_ids: - self._add_video(video_id) + # FIXME: This will need to be adjusted to support Live/Shorts + # from playlists + self._add_video(video_id, VideoTypeEnum.VIDEO) def add_to_pending(self, status="pending"): """add missing videos to pending list""" self.get_channels() bulk_list = [] - for idx, youtube_id in enumerate(self.missing_videos): - print(f"{youtube_id}: add to download queue") - video_details = self.get_youtube_details(youtube_id) + for idx, (youtube_id, vid_type) in enumerate(self.missing_videos): + print(f"{youtube_id} ({vid_type}): add to download queue") + video_details = self.get_youtube_details(youtube_id, vid_type) if not video_details: continue @@ -235,7 +238,7 @@ class PendingList(PendingIndex): if idx + 1 % 25 == 0: print("adding to queue progress: " + progress) - def get_youtube_details(self, youtube_id): + def get_youtube_details(self, youtube_id, vid_type=VideoTypeEnum.VIDEO): """get details from youtubedl for single pending video""" vid = YtWrap(self.yt_obs, self.config).extract(youtube_id) if not vid: @@ -249,9 +252,9 @@ class PendingList(PendingIndex): if vid["live_status"] in ["is_upcoming", "is_live"]: return False - return self._parse_youtube_details(vid) + return self._parse_youtube_details(vid, vid_type) - def _parse_youtube_details(self, vid): + def _parse_youtube_details(self, vid, vid_type=VideoTypeEnum.VIDEO): """parse response""" vid_id = vid.get("id") duration_str = DurationConverter.get_str(vid["duration"]) @@ -271,6 +274,8 @@ class PendingList(PendingIndex): "duration": duration_str, "published": published, "timestamp": int(datetime.now().timestamp()), + # Pulling enum value out so it is serializable + "vid_type": vid_type.value, } if self.all_channels: youtube_details.update( diff --git a/tubearchivist/home/src/download/subscriptions.py b/tubearchivist/home/src/download/subscriptions.py index 95ff358a..df77bdec 100644 --- a/tubearchivist/home/src/download/subscriptions.py +++ b/tubearchivist/home/src/download/subscriptions.py @@ -10,6 +10,7 @@ from home.src.download.yt_dlp_base import YtWrap from home.src.es.connect import IndexPaginate from home.src.index.channel import YoutubeChannel from home.src.index.playlist import YoutubePlaylist +from home.src.index.video_constants import VideoTypeEnum from home.src.ta.config import AppConfig from home.src.ta.ta_redis import RedisArchivist @@ -37,19 +38,44 @@ class ChannelSubscription: def get_last_youtube_videos(self, channel_id, limit=True): """get a list of last videos from channel""" - obs = { - "skip_download": True, - "extract_flat": True, - } - if limit: - obs["playlistend"] = self.config["subscriptions"]["channel_size"] - url = f"https://www.youtube.com/channel/{channel_id}/videos" - channel = YtWrap(obs, self.config).extract(url) - if not channel: - return False + queries = [ + ( + VideoTypeEnum.VIDEO, + "videos", + self.config["subscriptions"]["channel_size"], + ), + ( + VideoTypeEnum.LIVE, + "streams", + self.config["subscriptions"]["live_channel_size"], + ), + ( + VideoTypeEnum.SHORT, + "shorts", + self.config["subscriptions"]["shorts_channel_size"], + ), + ] + + last_videos = [] + + for vid_type, url, limit_amount in queries: + obs = { + "skip_download": True, + "extract_flat": True, + } + if limit: + obs["playlistend"] = limit_amount + + channel = YtWrap(obs, self.config).extract( + f"https://www.youtube.com/channel/{channel_id}/{url}" + ) + if not channel: + continue + last_videos.extend( + [(i["id"], i["title"], vid_type) for i in channel["entries"]] + ) - last_videos = [(i["id"], i["title"]) for i in channel["entries"]] return last_videos def find_missing(self): @@ -67,9 +93,9 @@ class ChannelSubscription: last_videos = self.get_last_youtube_videos(channel_id) if last_videos: - for video in last_videos: - if video[0] not in pending.to_skip: - missing_videos.append(video[0]) + for video_id, _, vid_type in last_videos: + if video_id not in pending.to_skip: + missing_videos.append((video_id, vid_type)) # notify message = { "status": "message:rescan", diff --git a/tubearchivist/home/src/download/yt_dlp_handler.py b/tubearchivist/home/src/download/yt_dlp_handler.py index c25b161d..636f3fa5 100644 --- a/tubearchivist/home/src/download/yt_dlp_handler.py +++ b/tubearchivist/home/src/download/yt_dlp_handler.py @@ -6,6 +6,7 @@ functionality: - move to archive """ +import json import os import shutil from datetime import datetime @@ -18,6 +19,7 @@ from home.src.index.channel import YoutubeChannel from home.src.index.comments import CommentList from home.src.index.playlist import YoutubePlaylist from home.src.index.video import YoutubeVideo, index_new_video +from home.src.index.video_constants import VideoTypeEnum from home.src.ta.config import AppConfig from home.src.ta.helper import clean_string, ignore_filelist from home.src.ta.ta_redis import RedisArchivist, RedisQueue @@ -174,10 +176,23 @@ class VideoDownloader: queue.trim(limit_queue - 1) while True: - youtube_id = queue.get_next() - if not youtube_id: + youtube_data = queue.get_next() + if not youtube_data: break + try: + youtube_data = json.loads(youtube_data) + except json.JSONDecodeError: # This many not be necessary + continue + + youtube_id = youtube_data.get("youtube_id") + + tmp_vid_type = youtube_data.get( + "vid_type", VideoTypeEnum.VIDEO.value + ) + video_type = VideoTypeEnum(tmp_vid_type) + print(f"Downloading type: {video_type}") + success = self._dl_single_vid(youtube_id) if not success: continue @@ -191,7 +206,9 @@ class VideoDownloader: RedisArchivist().set_message(self.MSG, mess_dict, expire=60) vid_dict = index_new_video( - youtube_id, video_overwrites=self.video_overwrites + youtube_id, + video_overwrites=self.video_overwrites, + video_type=video_type, ) self.channels.add(vid_dict["channel"]["channel_id"]) self.videos.add(vid_dict["youtube_id"]) @@ -245,7 +262,17 @@ class VideoDownloader: RedisArchivist().set_message(self.MSG, mess_dict, expire=True) pending = PendingList() pending.get_download() - to_add = [i["youtube_id"] for i in pending.all_pending] + to_add = [ + json.dumps( + { + "youtube_id": i["youtube_id"], + # Using .value in default val to match what would be + # decoded when parsing json if not set + "vid_type": i.get("vid_type", VideoTypeEnum.VIDEO.value), + } + ) + for i in pending.all_pending + ] if not to_add: # there is nothing pending print("download queue is empty") diff --git a/tubearchivist/home/src/es/index_mapping.json b/tubearchivist/home/src/es/index_mapping.json index e3af1d8f..b47ea449 100644 --- a/tubearchivist/home/src/es/index_mapping.json +++ b/tubearchivist/home/src/es/index_mapping.json @@ -178,6 +178,9 @@ "youtube_id": { "type": "keyword" }, + "vid_type": { + "type": "keyword" + }, "published": { "type": "date" }, @@ -324,6 +327,9 @@ }, "youtube_id": { "type": "keyword" + }, + "vid_type": { + "type": "keyword" } }, "expected_set": { diff --git a/tubearchivist/home/src/frontend/forms.py b/tubearchivist/home/src/frontend/forms.py index ba376266..85c132a6 100644 --- a/tubearchivist/home/src/frontend/forms.py +++ b/tubearchivist/home/src/frontend/forms.py @@ -101,6 +101,12 @@ class ApplicationSettingsForm(forms.Form): subscriptions_channel_size = forms.IntegerField( required=False, min_value=1 ) + subscriptions_live_channel_size = forms.IntegerField( + required=False, min_value=0 + ) + subscriptions_shorts_channel_size = forms.IntegerField( + required=False, min_value=0 + ) downloads_limit_count = forms.IntegerField(required=False) downloads_limit_speed = forms.IntegerField(required=False) downloads_throttledratelimit = forms.IntegerField(required=False) diff --git a/tubearchivist/home/src/index/video.py b/tubearchivist/home/src/index/video.py index 1e3cfb84..92cbfdc5 100644 --- a/tubearchivist/home/src/index/video.py +++ b/tubearchivist/home/src/index/video.py @@ -15,6 +15,7 @@ from home.src.index import comments as ta_comments from home.src.index import playlist as ta_playlist from home.src.index.generic import YouTubeItem from home.src.index.subtitle import YoutubeSubtitle +from home.src.index.video_constants import VideoTypeEnum from home.src.ta.helper import DurationConverter, clean_string, randomizor from home.src.ta.ta_redis import RedisArchivist from ryd_client import ryd_client @@ -123,10 +124,16 @@ class YoutubeVideo(YouTubeItem, YoutubeSubtitle): index_name = "ta_video" yt_base = "https://www.youtube.com/watch?v=" - def __init__(self, youtube_id, video_overwrites=False): + def __init__( + self, + youtube_id, + video_overwrites=False, + video_type=VideoTypeEnum.VIDEO, + ): super().__init__(youtube_id) self.channel_id = False self.video_overwrites = video_overwrites + self.video_type = video_type self.es_path = f"{self.index_name}/_doc/{youtube_id}" self.offline_import = False @@ -189,6 +196,8 @@ class YoutubeVideo(YouTubeItem, YoutubeSubtitle): "vid_last_refresh": last_refresh, "date_downloaded": last_refresh, "youtube_id": self.youtube_id, + # Using .value to make json encodable + "vid_type": self.video_type.value, "active": True, } @@ -396,9 +405,13 @@ class YoutubeVideo(YouTubeItem, YoutubeSubtitle): _, _ = ElasticWrap(path).post(data=data) -def index_new_video(youtube_id, video_overwrites=False): +def index_new_video( + youtube_id, video_overwrites=False, video_type=VideoTypeEnum.VIDEO +): """combined classes to create new video in index""" - video = YoutubeVideo(youtube_id, video_overwrites=video_overwrites) + video = YoutubeVideo( + youtube_id, video_overwrites=video_overwrites, video_type=video_type + ) video.build_json() if not video.json_data: raise ValueError("failed to get metadata for " + youtube_id) diff --git a/tubearchivist/home/src/index/video_constants.py b/tubearchivist/home/src/index/video_constants.py new file mode 100644 index 00000000..d5df23a6 --- /dev/null +++ b/tubearchivist/home/src/index/video_constants.py @@ -0,0 +1,7 @@ +import enum + + +class VideoTypeEnum(enum.Enum): + VIDEO = "video" + LIVE = "live" + SHORT = "short" diff --git a/tubearchivist/home/tasks.py b/tubearchivist/home/tasks.py index f6383503..034f805a 100644 --- a/tubearchivist/home/tasks.py +++ b/tubearchivist/home/tasks.py @@ -22,6 +22,7 @@ from home.src.es.index_setup import ElasitIndexWrap from home.src.index.channel import YoutubeChannel from home.src.index.filesystem import ImportFolderScanner, scan_filesystem from home.src.index.reindex import Reindex, ReindexManual, ReindexOutdated +from home.src.index.video_constants import VideoTypeEnum from home.src.ta.config import AppConfig, ReleaseVersion, ScheduleBuilder from home.src.ta.helper import UrlListParser, clear_dl_cache from home.src.ta.ta_redis import RedisArchivist, RedisQueue @@ -58,10 +59,22 @@ def update_subscribed(): missing_from_channels = channel_handler.find_missing() playlist_handler = PlaylistSubscription() missing_from_playlists = playlist_handler.find_missing() - missing = missing_from_channels + missing_from_playlists - if missing: - youtube_ids = [{"type": "video", "url": i} for i in missing] - pending_handler = PendingList(youtube_ids=youtube_ids) + if missing_from_channels or missing_from_playlists: + channel_videos = [ + {"type": "video", "vid_type": vid_type, "url": vid_id} + for vid_id, vid_type in missing_from_channels + ] + playlist_videos = [ + { + "type": "video", + "vid_type": VideoTypeEnum.VIDEO, + "url": i, + } + for i in missing_from_playlists + ] + pending_handler = PendingList( + youtube_ids=channel_videos + playlist_videos + ) pending_handler.parse_url_list() pending_handler.add_to_pending() diff --git a/tubearchivist/home/templates/home/channel_id.html b/tubearchivist/home/templates/home/channel_id.html index a9fd843e..d23e54df 100644 --- a/tubearchivist/home/templates/home/channel_id.html +++ b/tubearchivist/home/templates/home/channel_id.html @@ -8,6 +8,8 @@

Videos

+

Live

+

Shorts

Playlists

About

{% if has_pending %} diff --git a/tubearchivist/home/templates/home/channel_id_about.html b/tubearchivist/home/templates/home/channel_id_about.html index 9f227b0a..2500e600 100644 --- a/tubearchivist/home/templates/home/channel_id_about.html +++ b/tubearchivist/home/templates/home/channel_id_about.html @@ -8,6 +8,8 @@

Videos

+

Live

+

Shorts

Playlists

About

{% if has_pending %} diff --git a/tubearchivist/home/templates/home/channel_id_playlist.html b/tubearchivist/home/templates/home/channel_id_playlist.html index 9fd51b58..e43460ca 100644 --- a/tubearchivist/home/templates/home/channel_id_playlist.html +++ b/tubearchivist/home/templates/home/channel_id_playlist.html @@ -8,6 +8,8 @@

Videos

+

Live

+

Shorts

Playlists

About

{% if has_pending %} diff --git a/tubearchivist/home/templates/home/settings.html b/tubearchivist/home/templates/home/settings.html index 95e2d6ca..02607493 100644 --- a/tubearchivist/home/templates/home/settings.html +++ b/tubearchivist/home/templates/home/settings.html @@ -38,6 +38,16 @@ Videos to scan to find new items for the Rescan subscriptions task, max recommended 50.
{{ app_form.subscriptions_channel_size }}
+
+

YouTube Live page size: {{ config.subscriptions.live_channel_size }}

+ Live Videos to scan to find new items for the Rescan subscriptions task, max recommended 50, 0 to disable.
+ {{ app_form.subscriptions_live_channel_size }} +
+
+

YouTube Shorts page size: {{ config.subscriptions.shorts_channel_size }}

+ Shorts Videos to scan to find new items for the Rescan subscriptions task, max recommended 50, 0 to disable.
+ {{ app_form.subscriptions_shorts_channel_size }} +

Downloads

diff --git a/tubearchivist/home/urls.py b/tubearchivist/home/urls.py index 521ed3b2..6cb0c5ad 100644 --- a/tubearchivist/home/urls.py +++ b/tubearchivist/home/urls.py @@ -7,7 +7,9 @@ from django.urls import path from home.views import ( AboutView, ChannelIdAboutView, + ChannelIdLiveView, ChannelIdPlaylistView, + ChannelIdShortsView, ChannelIdView, ChannelView, DownloadView, @@ -44,6 +46,16 @@ urlpatterns = [ login_required(ChannelIdView.as_view()), name="channel_id", ), + path( + "channel//live/", + login_required(ChannelIdLiveView.as_view()), + name="channel_id_live", + ), + path( + "channel//shorts/", + login_required(ChannelIdShortsView.as_view()), + name="channel_id_shorts", + ), path( "channel//about/", login_required(ChannelIdAboutView.as_view()), diff --git a/tubearchivist/home/views.py b/tubearchivist/home/views.py index bdf9b27d..e5636c57 100644 --- a/tubearchivist/home/views.py +++ b/tubearchivist/home/views.py @@ -3,7 +3,7 @@ Functionality: - all views for home app - holds base classes to inherit from """ - +import enum import json import urllib.parse from time import sleep @@ -36,6 +36,7 @@ from home.src.index.channel import YoutubeChannel, channel_overwrites from home.src.index.generic import Pagination from home.src.index.playlist import YoutubePlaylist from home.src.index.reindex import ReindexProgress +from home.src.index.video_constants import VideoTypeEnum from home.src.ta.config import AppConfig, ReleaseVersion, ScheduleBuilder from home.src.ta.helper import UrlListParser, time_parser from home.src.ta.ta_redis import RedisArchivist @@ -513,6 +514,7 @@ class ChannelIdView(ChannelIdBaseView): view_origin = "home" es_search = "ta_video/_search" + video_types = [VideoTypeEnum.VIDEO] def get(self, request, channel_id): """get request""" @@ -542,10 +544,20 @@ class ChannelIdView(ChannelIdBaseView): def _update_view_data(self, channel_id): """update view specific data dict""" + vid_type_terms = [] + for t in self.video_types: + if t and isinstance(t, enum.Enum): + vid_type_terms.append(t.value) + else: + print( + "Invalid value passed into video_types on " + + f"ChannelIdView: {t}" + ) self.data["query"] = { "bool": { "must": [ - {"term": {"channel.channel_id": {"value": channel_id}}} + {"term": {"channel.channel_id": {"value": channel_id}}}, + {"terms": {"vid_type": vid_type_terms}}, ] } } @@ -571,6 +583,22 @@ class ChannelIdView(ChannelIdBaseView): return redirect("channel_id", channel_id, permanent=True) +class ChannelIdLiveView(ChannelIdView): + """resolves to /channel//live/ + display single channel page from channel_id + """ + + video_types = [VideoTypeEnum.LIVE] + + +class ChannelIdShortsView(ChannelIdView): + """resolves to /channel//shorts/ + display single channel page from channel_id + """ + + video_types = [VideoTypeEnum.SHORT] + + class ChannelIdAboutView(ChannelIdBaseView): """resolves to /channel//about/ show metadata, handle per channel conf