mirror of
https://github.com/tubearchivist/tubearchivist
synced 2024-11-02 09:41:07 +00:00
refactor index_channel_playlists task, move to YoutubeChannel class
This commit is contained in:
parent
c9399f61d0
commit
91a9477bba
@ -12,11 +12,13 @@ from datetime import datetime
|
|||||||
import requests
|
import requests
|
||||||
import yt_dlp
|
import yt_dlp
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
from home.src.download import queue # partial import
|
||||||
from home.src.download.thumbnails import ThumbManager
|
from home.src.download.thumbnails import ThumbManager
|
||||||
from home.src.es.connect import ElasticWrap, IndexPaginate
|
from home.src.es.connect import ElasticWrap, IndexPaginate
|
||||||
from home.src.index.generic import YouTubeItem
|
from home.src.index.generic import YouTubeItem
|
||||||
from home.src.index.playlist import YoutubePlaylist
|
from home.src.index.playlist import YoutubePlaylist
|
||||||
from home.src.ta.helper import clean_string
|
from home.src.ta.helper import clean_string
|
||||||
|
from home.src.ta.ta_redis import RedisArchivist
|
||||||
|
|
||||||
|
|
||||||
class ChannelScraper:
|
class ChannelScraper:
|
||||||
@ -153,6 +155,7 @@ class YoutubeChannel(YouTubeItem):
|
|||||||
def __init__(self, youtube_id):
|
def __init__(self, youtube_id):
|
||||||
super().__init__(youtube_id)
|
super().__init__(youtube_id)
|
||||||
self.es_path = f"{self.index_name}/_doc/{youtube_id}"
|
self.es_path = f"{self.index_name}/_doc/{youtube_id}"
|
||||||
|
self.all_playlists = False
|
||||||
|
|
||||||
def build_json(self, upload=False):
|
def build_json(self, upload=False):
|
||||||
"""get from es or from youtube"""
|
"""get from es or from youtube"""
|
||||||
@ -241,6 +244,64 @@ class YoutubeChannel(YouTubeItem):
|
|||||||
self.delete_es_videos()
|
self.delete_es_videos()
|
||||||
self.del_in_es()
|
self.del_in_es()
|
||||||
|
|
||||||
|
def index_channel_playlists(self):
|
||||||
|
"""add all playlists of channel to index"""
|
||||||
|
mess_dict = {
|
||||||
|
"status": "message:playlistscan",
|
||||||
|
"level": "info",
|
||||||
|
"title": "Looking for playlists",
|
||||||
|
"message": f'Scanning channel "{self.youtube_id}" in progress',
|
||||||
|
}
|
||||||
|
RedisArchivist().set_message("message:playlistscan", mess_dict)
|
||||||
|
self.get_all_playlists()
|
||||||
|
if not self.all_playlists:
|
||||||
|
print(f"{self.youtube_id}: no playlists found.")
|
||||||
|
return
|
||||||
|
|
||||||
|
all_youtube_ids = self.get_all_video_ids()
|
||||||
|
for idx, playlist in enumerate(self.all_playlists):
|
||||||
|
self.notify_single_playlist(idx, playlist)
|
||||||
|
self.index_single_playlist(playlist, all_youtube_ids)
|
||||||
|
|
||||||
|
def notify_single_playlist(self, idx, playlist):
|
||||||
|
"""send notification"""
|
||||||
|
mess_dict = {
|
||||||
|
"status": "message:playlistscan",
|
||||||
|
"level": "info",
|
||||||
|
"title": "Scanning channel for playlists",
|
||||||
|
"message": f"Progress: {idx + 1}/{len(self.all_playlists)}",
|
||||||
|
}
|
||||||
|
RedisArchivist().set_message("message:playlistscan", mess_dict)
|
||||||
|
print("add playlist: " + playlist[1])
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def index_single_playlist(playlist, all_youtube_ids):
|
||||||
|
"""add single playlist if needed"""
|
||||||
|
playlist = YoutubePlaylist(playlist[0])
|
||||||
|
playlist.all_youtube_ids = all_youtube_ids
|
||||||
|
playlist.build_json()
|
||||||
|
if not playlist.json_data:
|
||||||
|
return
|
||||||
|
|
||||||
|
entries = playlist.json_data["playlist_entries"]
|
||||||
|
downloaded = [i for i in entries if i["downloaded"]]
|
||||||
|
if not downloaded:
|
||||||
|
return
|
||||||
|
|
||||||
|
playlist.upload_to_es()
|
||||||
|
playlist.add_vids_to_playlist()
|
||||||
|
playlist.get_playlist_art()
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_all_video_ids():
|
||||||
|
"""match all playlists with videos"""
|
||||||
|
handler = queue.PendingList()
|
||||||
|
handler.get_download()
|
||||||
|
handler.get_indexed()
|
||||||
|
all_youtube_ids = [i["youtube_id"] for i in handler.all_videos]
|
||||||
|
|
||||||
|
return all_youtube_ids
|
||||||
|
|
||||||
def get_all_playlists(self):
|
def get_all_playlists(self):
|
||||||
"""get all playlists owned by this channel"""
|
"""get all playlists owned by this channel"""
|
||||||
url = (
|
url = (
|
||||||
@ -254,8 +315,7 @@ class YoutubeChannel(YouTubeItem):
|
|||||||
}
|
}
|
||||||
playlists = yt_dlp.YoutubeDL(obs).extract_info(url)
|
playlists = yt_dlp.YoutubeDL(obs).extract_info(url)
|
||||||
all_entries = [(i["id"], i["title"]) for i in playlists["entries"]]
|
all_entries = [(i["id"], i["title"]) for i in playlists["entries"]]
|
||||||
|
self.all_playlists = all_entries
|
||||||
return all_entries
|
|
||||||
|
|
||||||
def get_indexed_playlists(self):
|
def get_indexed_playlists(self):
|
||||||
"""get all indexed playlists from channel"""
|
"""get all indexed playlists from channel"""
|
||||||
|
@ -24,7 +24,6 @@ from home.src.index.filesystem import (
|
|||||||
reindex_old_documents,
|
reindex_old_documents,
|
||||||
scan_filesystem,
|
scan_filesystem,
|
||||||
)
|
)
|
||||||
from home.src.index.playlist import YoutubePlaylist
|
|
||||||
from home.src.ta.config import AppConfig, ScheduleBuilder
|
from home.src.ta.config import AppConfig, ScheduleBuilder
|
||||||
from home.src.ta.helper import UrlListParser
|
from home.src.ta.helper import UrlListParser
|
||||||
from home.src.ta.ta_redis import RedisArchivist, RedisQueue
|
from home.src.ta.ta_redis import RedisArchivist, RedisQueue
|
||||||
@ -268,52 +267,7 @@ def index_channel_playlists(channel_id):
|
|||||||
"message": f'Scanning channel "{channel.youtube_id}" in progress',
|
"message": f'Scanning channel "{channel.youtube_id}" in progress',
|
||||||
}
|
}
|
||||||
RedisArchivist().set_message("message:playlistscan", mess_dict)
|
RedisArchivist().set_message("message:playlistscan", mess_dict)
|
||||||
all_playlists = channel.get_all_playlists()
|
channel.index_channel_playlists()
|
||||||
|
|
||||||
if not all_playlists:
|
|
||||||
print(f"no playlists found for channel {channel_id}")
|
|
||||||
return
|
|
||||||
|
|
||||||
handler = PendingList()
|
|
||||||
handler.get_download()
|
|
||||||
handler.get_indexed()
|
|
||||||
all_youtube_ids = [i["youtube_id"] for i in handler.all_videos]
|
|
||||||
|
|
||||||
for idx, (playlist_id, playlist_title) in enumerate(all_playlists):
|
|
||||||
# notify
|
|
||||||
mess_dict = {
|
|
||||||
"status": "message:playlistscan",
|
|
||||||
"level": "info",
|
|
||||||
"title": "Scanning channel for playlists",
|
|
||||||
"message": f"Progress: {idx + 1}/{len(all_playlists)}",
|
|
||||||
}
|
|
||||||
RedisArchivist().set_message("message:playlistscan", mess_dict)
|
|
||||||
print("add playlist: " + playlist_title)
|
|
||||||
|
|
||||||
playlist = YoutubePlaylist(playlist_id)
|
|
||||||
playlist.all_youtube_ids = all_youtube_ids
|
|
||||||
playlist.build_json()
|
|
||||||
|
|
||||||
if not playlist.json_data:
|
|
||||||
# skip if not available
|
|
||||||
continue
|
|
||||||
|
|
||||||
# don't add if no videos downloaded
|
|
||||||
downloaded = [
|
|
||||||
i
|
|
||||||
for i in playlist.json_data["playlist_entries"]
|
|
||||||
if i["downloaded"]
|
|
||||||
]
|
|
||||||
if not downloaded:
|
|
||||||
continue
|
|
||||||
|
|
||||||
playlist.upload_to_es()
|
|
||||||
playlist.add_vids_to_playlist()
|
|
||||||
|
|
||||||
if all_playlists:
|
|
||||||
playlist.get_playlist_art()
|
|
||||||
|
|
||||||
return
|
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
Loading…
Reference in New Issue
Block a user