From 646bc1b12e4f799df09cfd89863fc979e80a197b Mon Sep 17 00:00:00 2001 From: simon Date: Sat, 7 Jan 2023 09:45:01 +0700 Subject: [PATCH] filter by vid_type for add to download queue --- tubearchivist/home/src/download/queue.py | 12 ++-- .../home/src/download/subscriptions.py | 58 ++++++++++++------- 2 files changed, 43 insertions(+), 27 deletions(-) diff --git a/tubearchivist/home/src/download/queue.py b/tubearchivist/home/src/download/queue.py index c3571623..dd45fad2 100644 --- a/tubearchivist/home/src/download/queue.py +++ b/tubearchivist/home/src/download/queue.py @@ -162,11 +162,11 @@ class PendingList(PendingIndex): def _process_entry(self, entry): """process single entry from url list""" + vid_type = self._get_vid_type(entry) if entry["type"] == "video": - vid_type = self._get_vid_type(entry) self._add_video(entry["url"], vid_type) elif entry["type"] == "channel": - self._parse_channel(entry["url"]) + self._parse_channel(entry["url"], vid_type) elif entry["type"] == "playlist": self._parse_playlist(entry["url"]) PlaylistSubscription().process_url_str([entry], subscribed=False) @@ -178,21 +178,21 @@ class PendingList(PendingIndex): """add vid type enum if available""" vid_type_str = entry.get("vid_type") if not vid_type_str: - return VideoTypeEnum.VIDEOS + return VideoTypeEnum.UNKNOWN return VideoTypeEnum(vid_type_str) - def _add_video(self, url, vid_type=VideoTypeEnum.VIDEOS): + def _add_video(self, url, vid_type): """add video to list""" if url not in self.missing_videos and url not in self.to_skip: self.missing_videos.append((url, vid_type)) else: print(f"{url}: skipped adding already indexed video to download.") - def _parse_channel(self, url): + def _parse_channel(self, url, vid_type): """add all videos of channel to list""" video_results = ChannelSubscription().get_last_youtube_videos( - url, limit=False + url, limit=False, query_filter=vid_type ) for video_id, _, vid_type in video_results: self._add_video(video_id, vid_type) diff --git a/tubearchivist/home/src/download/subscriptions.py b/tubearchivist/home/src/download/subscriptions.py index afc1490f..f154b490 100644 --- a/tubearchivist/home/src/download/subscriptions.py +++ b/tubearchivist/home/src/download/subscriptions.py @@ -36,30 +36,15 @@ class ChannelSubscription: return all_channels - def get_last_youtube_videos(self, channel_id, limit=True): + def get_last_youtube_videos( + self, channel_id, limit=True, query_filter=VideoTypeEnum.UNKNOWN + ): """get a list of last videos from channel""" - - queries = [ - ( - VideoTypeEnum.VIDEOS, - VideoTypeEnum.VIDEOS.value, - self.config["subscriptions"]["channel_size"], - ), - ( - VideoTypeEnum.STREAMS, - VideoTypeEnum.STREAMS.value, - self.config["subscriptions"]["live_channel_size"], - ), - ( - VideoTypeEnum.SHORTS, - VideoTypeEnum.SHORTS.value, - self.config["subscriptions"]["shorts_channel_size"], - ), - ] + queries = self._build_queries(query_filter, limit) last_videos = [] - for vid_type, url, limit_amount in queries: + for vid_type, limit_amount in queries: obs = { "skip_download": True, "extract_flat": True, @@ -67,8 +52,9 @@ class ChannelSubscription: if limit: obs["playlistend"] = limit_amount + path = vid_type.value channel = YtWrap(obs, self.config).extract( - f"https://www.youtube.com/channel/{channel_id}/{url}" + f"https://www.youtube.com/channel/{channel_id}/{path}" ) if not channel: continue @@ -78,6 +64,36 @@ class ChannelSubscription: return last_videos + def _build_queries(self, query_filter, limit): + """build query list for vid_type""" + limit_map = { + "videos": self.config["subscriptions"]["channel_size"], + "streams": self.config["subscriptions"]["live_channel_size"], + "shorts": self.config["subscriptions"]["shorts_channel_size"], + } + + queries = [] + + if query_filter and query_filter.value != "unknown": + if limit: + query_limit = limit_map.get(query_filter.value) + else: + query_limit = False + + queries.append((query_filter, query_limit)) + + return queries + + for query_item, default_limit in limit_map.items(): + if limit: + query_limit = default_limit + else: + query_limit = False + + queries.append((VideoTypeEnum(query_item), query_limit)) + + return queries + def find_missing(self): """add missing videos from subscribed channels to pending""" all_channels = self.get_channels()