diff --git a/tubearchivist/home/src/index/reindex.py b/tubearchivist/home/src/index/reindex.py index 1bf9cb61..6c7b32d8 100644 --- a/tubearchivist/home/src/index/reindex.py +++ b/tubearchivist/home/src/index/reindex.py @@ -13,7 +13,7 @@ from home.src.download.queue import PendingList from home.src.download.thumbnails import ThumbManager from home.src.download.yt_dlp_base import CookieHandler from home.src.download.yt_dlp_handler import VideoDownloader -from home.src.es.connect import ElasticWrap +from home.src.es.connect import ElasticWrap, IndexPaginate from home.src.index.channel import YoutubeChannel from home.src.index.comments import Comments from home.src.index.playlist import YoutubePlaylist @@ -28,18 +28,21 @@ class ReindexBase: REINDEX_CONFIG = [ { "index_name": "ta_video", + "index_type": "videos", "queue_name": "reindex:ta_video", "active_key": "active", "refresh_key": "vid_last_refresh", }, { "index_name": "ta_channel", + "index_type": "channels", "queue_name": "reindex:ta_channel", "active_key": "channel_active", "refresh_key": "channel_last_refresh", }, { "index_name": "ta_playlist", + "index_type": "playlists", "queue_name": "reindex:ta_playlist", "active_key": "playlist_active", "refresh_key": "playlist_last_refresh", @@ -115,6 +118,93 @@ class ReindexOutdated(ReindexBase): return all_ids +class ReindexManual(ReindexBase): + """ + manually add ids to reindex queue from API + data_example = { + "videos": ["video1", "video2", "video3"], + "channels": ["channel1", "channel2", "channel3"], + "playlists": ["playlist1", "playlist2"], + } + extract_videos to also reindex all videos of channel/playlist + """ + + def __init__(self, extract_videos=False): + super().__init__() + self.extract_videos = extract_videos + self.data = False + + def extract_data(self, data): + """process data""" + self.data = data + for key, values in self.data.items(): + reindex_config = self._get_reindex_config(key) + self.process_index(reindex_config, values) + + def _get_reindex_config(self, index_type): + """get reindex config for index""" + + for reindex_config in self.REINDEX_CONFIG: + if reindex_config["index_type"] == index_type: + return reindex_config + + print(f"reindex type {index_type} not valid") + raise ValueError + + def process_index(self, index_config, values): + """process values per index""" + index_name = index_config["index_name"] + if index_name == "ta_video": + self._add_videos(values) + elif index_name == "ta_channel": + self._add_channels(values) + elif index_name == "ta_playlist": + self._add_playlists(values) + + def _add_videos(self, values): + """add list of videos to reindex queue""" + if not values: + return + + RedisQueue("reindex:ta_video").add_list(values) + + def _add_channels(self, values): + """add list of channels to reindex queue""" + RedisQueue("reindex:ta_channel").add_list(values) + + if self.extract_videos: + for channel_id in values: + all_videos = self._get_channel_videos(channel_id) + self._add_videos(all_videos) + + def _add_playlists(self, values): + """add list of playlists to reindex queue""" + RedisQueue("reindex:ta_playlist").add_list(values) + + if self.extract_videos: + for playlist_id in values: + all_videos = self._get_playlist_videos(playlist_id) + self._add_videos(all_videos) + + def _get_channel_videos(self, channel_id): + """get all videos from channel""" + data = { + "query": {"term": {"channel.channel_id": {"value": channel_id}}}, + "_source": ["youtube_id"], + } + all_results = IndexPaginate("ta_video", data).get_results() + return [i["youtube_id"] for i in all_results] + + def _get_playlist_videos(self, playlist_id): + """get all videos from playlist""" + data = { + "query": {"term": {"playlist.keyword": {"value": playlist_id}}}, + "_source": ["youtube_id"], + } + all_results = IndexPaginate("ta_video", data).get_results() + return [i["youtube_id"] for i in all_results] + + class Reindex(ReindexBase): """reindex all documents from redis queue"""