From ddc46858118fc41e7402a77840f341a5d348aedd Mon Sep 17 00:00:00 2001 From: Simon Date: Sat, 11 May 2024 19:16:35 +0200 Subject: [PATCH] decouple playlist video id matching to individual --- .../home/src/download/subscriptions.py | 5 --- .../home/src/download/yt_dlp_handler.py | 2 - tubearchivist/home/src/index/channel.py | 17 +-------- tubearchivist/home/src/index/playlist.py | 37 ++++++++++--------- tubearchivist/home/src/index/reindex.py | 14 ------- 5 files changed, 22 insertions(+), 53 deletions(-) diff --git a/tubearchivist/home/src/download/subscriptions.py b/tubearchivist/home/src/download/subscriptions.py index c6d85a9c..06c4ad0e 100644 --- a/tubearchivist/home/src/download/subscriptions.py +++ b/tubearchivist/home/src/download/subscriptions.py @@ -174,10 +174,6 @@ class PlaylistSubscription: def process_url_str(self, new_playlists, subscribed=True): """process playlist subscribe form url_str""" - data = {"query": {"match_all": {}}, "_source": ["youtube_id"]} - all_indexed = IndexPaginate("ta_video", data).get_results() - all_youtube_ids = [i["youtube_id"] for i in all_indexed] - for idx, playlist in enumerate(new_playlists): playlist_id = playlist["url"] if not playlist["type"] == "playlist": @@ -185,7 +181,6 @@ class PlaylistSubscription: continue playlist_h = YoutubePlaylist(playlist_id) - playlist_h.all_youtube_ids = all_youtube_ids playlist_h.build_json() if not playlist_h.json_data: message = f"{playlist_h.youtube_id}: failed to extract data" diff --git a/tubearchivist/home/src/download/yt_dlp_handler.py b/tubearchivist/home/src/download/yt_dlp_handler.py index 91308003..72591c73 100644 --- a/tubearchivist/home/src/download/yt_dlp_handler.py +++ b/tubearchivist/home/src/download/yt_dlp_handler.py @@ -109,10 +109,8 @@ class DownloadPostProcess: def _validate_channel_playlist(self, all_channel_playlist, id_c): """scan channel for playlist needing update""" - all_youtube_ids = [i["youtube_id"] for i in self.pending.all_videos] for id_p, playlist_id in enumerate(all_channel_playlist): playlist = YoutubePlaylist(playlist_id) - playlist.all_youtube_ids = all_youtube_ids playlist.build_json(scrape=True) if not playlist.json_data: playlist.deactivate() diff --git a/tubearchivist/home/src/index/channel.py b/tubearchivist/home/src/index/channel.py index fe7b205a..d2ac197c 100644 --- a/tubearchivist/home/src/index/channel.py +++ b/tubearchivist/home/src/index/channel.py @@ -8,7 +8,6 @@ import json import os from datetime import datetime -from home.src.download import queue # partial import from home.src.download.thumbnails import ThumbManager from home.src.download.yt_dlp_base import YtWrap from home.src.es.connect import ElasticWrap, IndexPaginate @@ -267,13 +266,12 @@ class YoutubeChannel(YouTubeItem): print(f"{self.youtube_id}: no playlists found.") return - all_youtube_ids = self.get_all_video_ids() total = len(self.all_playlists) for idx, playlist in enumerate(self.all_playlists): if self.task: self._notify_single_playlist(idx, total) - self._index_single_playlist(playlist, all_youtube_ids) + self._index_single_playlist(playlist) print("add playlist: " + playlist[1]) def _notify_single_playlist(self, idx, total): @@ -286,10 +284,9 @@ class YoutubeChannel(YouTubeItem): self.task.send_progress(message, progress=(idx + 1) / total) @staticmethod - def _index_single_playlist(playlist, all_youtube_ids): + def _index_single_playlist(playlist): """add single playlist if needed""" playlist = YoutubePlaylist(playlist[0]) - playlist.all_youtube_ids = all_youtube_ids playlist.build_json() if not playlist.json_data: return @@ -303,16 +300,6 @@ class YoutubeChannel(YouTubeItem): playlist.add_vids_to_playlist() playlist.get_playlist_art() - @staticmethod - def get_all_video_ids(): - """match all playlists with videos""" - handler = queue.PendingList() - handler.get_download() - handler.get_indexed() - all_youtube_ids = [i["youtube_id"] for i in handler.all_videos] - - return all_youtube_ids - def get_channel_videos(self): """get all videos from channel""" data = { diff --git a/tubearchivist/home/src/index/playlist.py b/tubearchivist/home/src/index/playlist.py index 196a8844..0af5274f 100644 --- a/tubearchivist/home/src/index/playlist.py +++ b/tubearchivist/home/src/index/playlist.py @@ -8,7 +8,7 @@ import json from datetime import datetime from home.src.download.thumbnails import ThumbManager -from home.src.es.connect import ElasticWrap +from home.src.es.connect import ElasticWrap, IndexPaginate from home.src.index.generic import YouTubeItem from home.src.index.video import YoutubeVideo @@ -28,7 +28,6 @@ class YoutubePlaylist(YouTubeItem): super().__init__(youtube_id) self.all_members = False self.nav = False - self.all_youtube_ids = [] def build_json(self, scrape=False): """collection to create json_data""" @@ -45,7 +44,8 @@ class YoutubePlaylist(YouTubeItem): return self.process_youtube_meta() - self.get_entries() + ids_found = self.get_local_vids() + self.get_entries(ids_found) self.json_data["playlist_entries"] = self.all_members self.json_data["playlist_subscribed"] = subscribed @@ -69,25 +69,31 @@ class YoutubePlaylist(YouTubeItem): "playlist_type": "regular", } - def get_entries(self, playlistend=False): - """get all videos in playlist""" - if playlistend: - # implement playlist end - print(playlistend) + def get_local_vids(self) -> list[str]: + """get local video ids from youtube entries""" + entries = self.youtube_meta["entries"] + data = { + "query": {"terms": {"youtube_id": [i["id"] for i in entries]}}, + "_source": ["youtube_id"], + } + indexed_vids = IndexPaginate("ta_video", data).get_results() + ids_found = [i["youtube_id"] for i in indexed_vids] + + return ids_found + + def get_entries(self, ids_found) -> None: + """get all videos in playlist, match downloaded with ids_found""" all_members = [] for idx, entry in enumerate(self.youtube_meta["entries"]): - if self.all_youtube_ids: - downloaded = entry["id"] in self.all_youtube_ids - else: - downloaded = False if not entry["channel"]: continue + to_append = { "youtube_id": entry["id"], "title": entry["title"], "uploader": entry["channel"], "idx": idx, - "downloaded": downloaded, + "downloaded": entry["id"] in ids_found, } all_members.append(to_append) @@ -130,14 +136,11 @@ class YoutubePlaylist(YouTubeItem): def update_playlist(self): """update metadata for playlist with data from YouTube""" - self.get_from_es() - subscribed = self.json_data["playlist_subscribed"] - self.get_from_youtube() + self.build_json(scrape=True) if not self.json_data: # return false to deactivate return False - self.json_data["playlist_subscribed"] = subscribed self.upload_to_es() return True diff --git a/tubearchivist/home/src/index/reindex.py b/tubearchivist/home/src/index/reindex.py index ca2bf37a..20098a2d 100644 --- a/tubearchivist/home/src/index/reindex.py +++ b/tubearchivist/home/src/index/reindex.py @@ -10,7 +10,6 @@ from datetime import datetime from time import sleep from home.models import CustomPeriodicTask -from home.src.download.queue import PendingList from home.src.download.subscriptions import ChannelSubscription from home.src.download.thumbnails import ThumbManager from home.src.download.yt_dlp_base import CookieHandler @@ -243,7 +242,6 @@ class Reindex(ReindexBase): def __init__(self, task=False): super().__init__() self.task = task - self.all_indexed_ids = False self.processed = { "videos": 0, "channels": 0, @@ -374,7 +372,6 @@ class Reindex(ReindexBase): def _reindex_single_playlist(self, playlist_id): """refresh playlist data""" - self._get_all_videos() playlist = YoutubePlaylist(playlist_id) playlist.get_from_es() if ( @@ -384,7 +381,6 @@ class Reindex(ReindexBase): return subscribed = playlist.json_data["playlist_subscribed"] - playlist.all_youtube_ids = self.all_indexed_ids playlist.build_json(scrape=True) if not playlist.json_data: playlist.deactivate() @@ -395,16 +391,6 @@ class Reindex(ReindexBase): self.processed["playlists"] += 1 return - def _get_all_videos(self): - """add all videos for playlist index validation""" - if self.all_indexed_ids: - return - - handler = PendingList() - handler.get_download() - handler.get_indexed() - self.all_indexed_ids = [i["youtube_id"] for i in handler.all_videos] - def cookie_is_valid(self): """return true if cookie is enabled and valid""" if not self.config["downloads"]["cookie_import"]: