refactor new YtWrap class for yt-dlp calls, #build

Changed:
- new yt-dlp base class
- new cookie class using io_stream
- MetadataFromField postprocessors
This commit is contained in:
simon 2022-05-25 16:15:38 +07:00
commit 88e526f5e5
No known key found for this signature in database
GPG Key ID: 2C15AA5E89985DD4
13 changed files with 145 additions and 231 deletions

View File

@ -3,7 +3,7 @@
from api.src.search_processor import SearchProcess
from api.src.task_processor import TaskHandler
from home.src.download.queue import PendingInteract
from home.src.download.yt_cookie import CookieHandler
from home.src.download.yt_dlp_base import CookieHandler
from home.src.es.connect import ElasticWrap
from home.src.index.generic import Pagination
from home.src.index.video import SponsorBlock
@ -484,6 +484,7 @@ class CookieView(ApiBaseView):
def post(request):
"""handle post request"""
# pylint: disable=unused-argument
validated = CookieHandler().validate()
config = AppConfig().config
validated = CookieHandler(config).validate()
return Response({"cookie_validated": validated})

View File

@ -7,13 +7,12 @@ Functionality:
import json
from datetime import datetime
import yt_dlp
from home.src.download.subscriptions import (
ChannelSubscription,
PlaylistSubscription,
)
from home.src.download.thumbnails import ThumbManager
from home.src.download.yt_cookie import CookieHandler
from home.src.download.yt_dlp_base import YtWrap
from home.src.es.connect import ElasticWrap, IndexPaginate
from home.src.index.playlist import YoutubePlaylist
from home.src.ta.config import AppConfig
@ -133,28 +132,11 @@ class PendingList(PendingIndex):
def __init__(self, youtube_ids=False):
super().__init__()
self.process_config()
self.config = AppConfig().config
self.youtube_ids = youtube_ids
self.to_skip = False
self.missing_videos = False
def process_config(self):
"""add user config to yt_obs"""
config = AppConfig().config
if config["downloads"]["cookie_import"]:
cookie_path = CookieHandler().use()
self.yt_obs.update({"cookiefile": cookie_path})
def close_config(self):
"""remove config after task finished"""
config = AppConfig().config
if config["downloads"]["cookie_import"]:
CookieHandler().hide()
try:
del self.yt_obs["cookiefile"]
except KeyError:
pass
def parse_url_list(self):
"""extract youtube ids from list"""
self.missing_videos = []
@ -235,8 +217,6 @@ class PendingList(PendingIndex):
query_str = "\n".join(bulk_list)
_, _ = ElasticWrap("_bulk").post(query_str, ndjson=True)
self.close_config()
def _notify_add(self, idx):
"""send notification for adding videos to download queue"""
progress = f"{idx + 1}/{len(self.missing_videos)}"
@ -256,11 +236,10 @@ class PendingList(PendingIndex):
def get_youtube_details(self, youtube_id):
"""get details from youtubedl for single pending video"""
try:
vid = yt_dlp.YoutubeDL(self.yt_obs).extract_info(youtube_id)
except yt_dlp.utils.DownloadError:
print(f"{youtube_id}: failed to extract info")
vid = YtWrap(self.yt_obs, self.config).extract(youtube_id)
if not vid:
return False
if vid.get("id") != youtube_id:
# skip premium videos with different id
print(f"{youtube_id}: skipping premium video, id not matching")

View File

@ -4,8 +4,8 @@ Functionality:
- handle playlist subscriptions
"""
import yt_dlp
from home.src.download import queue # partial import
from home.src.download.yt_dlp_base import YtWrap
from home.src.es.connect import IndexPaginate
from home.src.index.channel import YoutubeChannel
from home.src.index.playlist import YoutubePlaylist
@ -17,10 +17,7 @@ class ChannelSubscription:
"""manage the list of channels subscribed"""
def __init__(self):
config = AppConfig().config
self.es_url = config["application"]["es_url"]
self.es_auth = config["application"]["es_auth"]
self.channel_size = config["subscriptions"]["channel_size"]
self.config = AppConfig().config
@staticmethod
def get_channels(subscribed_only=True):
@ -39,23 +36,18 @@ class ChannelSubscription:
def get_last_youtube_videos(self, channel_id, limit=True):
"""get a list of last videos from channel"""
url = f"https://www.youtube.com/channel/{channel_id}/videos"
obs = {
"default_search": "ytsearch",
"quiet": True,
"skip_download": True,
"extract_flat": True,
}
if limit:
obs["playlistend"] = self.channel_size
obs["playlistend"] = self.config["subscriptions"]["channel_size"]
try:
chan = yt_dlp.YoutubeDL(obs).extract_info(url, download=False)
except yt_dlp.utils.DownloadError:
print(f"{channel_id}: failed to extract videos, skipping.")
channel = YtWrap(obs, self.config).extract(channel_id)
if not channel:
return False
last_videos = [(i["id"], i["title"]) for i in chan["entries"]]
last_videos = [(i["id"], i["title"]) for i in channel["entries"]]
return last_videos
def find_missing(self):

View File

@ -1,88 +0,0 @@
"""
functionality:
- import yt cookie from filesystem
- make cookie available for yt-dlp
"""
import os
import yt_dlp
from home.src.ta.config import AppConfig
from home.src.ta.ta_redis import RedisArchivist
class CookieHandler:
"""handle youtube cookie for yt-dlp"""
CONFIG = AppConfig().config
CACHE_PATH = CONFIG["application"]["cache_dir"]
COOKIE_FILE_NAME = "cookies.google.txt"
COOKIE_KEY = "cookie"
COOKIE_PATH = "cookie.txt"
def import_cookie(self):
"""import cookie from file"""
import_path = os.path.join(
self.CACHE_PATH, "import", self.COOKIE_FILE_NAME
)
with open(import_path, encoding="utf-8") as cookie_file:
cookie = cookie_file.read()
RedisArchivist().set_message(self.COOKIE_KEY, cookie, expire=False)
os.remove(import_path)
print("cookie: import successfully")
def use(self):
"""make cookie available in FS"""
cookie = RedisArchivist().get_message(self.COOKIE_KEY)
if isinstance(cookie, dict):
print("no cookie imported")
raise FileNotFoundError
if os.path.exists(self.COOKIE_PATH):
return self.COOKIE_PATH
with open(self.COOKIE_PATH, "w", encoding="utf-8") as cookie_file:
cookie_file.write(cookie)
print("cookie: made available")
return self.COOKIE_PATH
def hide(self):
"""hide cookie file if not in use"""
try:
os.remove(self.COOKIE_PATH)
except FileNotFoundError:
print("cookie: not available")
return
print("cookie: hidden")
def revoke(self):
"""revoke cookie"""
self.hide()
RedisArchivist().del_message(self.COOKIE_KEY)
print("cookie: revoked")
def validate(self):
"""validate cookie using the liked videos playlist"""
try:
_ = self.use()
except FileNotFoundError:
return False
url = "https://www.youtube.com/playlist?list=LL"
yt_obs = {
"quiet": True,
"skip_download": True,
"extract_flat": True,
"cookiefile": self.COOKIE_PATH,
}
try:
response = yt_dlp.YoutubeDL(yt_obs).extract_info(url)
except yt_dlp.utils.DownloadError:
print("failed to validate cookie")
response = False
return bool(response)

View File

@ -0,0 +1,101 @@
"""
functionality:
- base class to make all calls to yt-dlp
- handle yt-dlp errors
"""
import os
from io import StringIO
import yt_dlp
from home.src.ta.ta_redis import RedisArchivist
class YtWrap:
"""wrap calls to yt"""
OBS_BASE = {
"default_search": "ytsearch",
"quiet": True,
"check_formats": "selected",
"socket_timeout": 2,
}
def __init__(self, obs_request, config=False):
self.obs_request = obs_request
self.config = config
self.build_obs()
def build_obs(self):
"""build yt-dlp obs"""
self.obs = self.OBS_BASE.copy()
self.obs.update(self.obs_request)
self.add_cookie()
def add_cookie(self):
"""add cookie if enabled"""
if self.config["downloads"]["cookie_import"]:
cookie_io = CookieHandler(self.config).get()
self.obs["cookiefile"] = cookie_io
def download(self, url):
"""make download request"""
with yt_dlp.YoutubeDL(self.obs) as ydl:
try:
ydl.download([url])
except yt_dlp.utils.DownloadError:
print(f"{url}: failed to download.")
return False
return True
def extract(self, url):
"""make extract request"""
try:
response = yt_dlp.YoutubeDL(self.obs).extract_info(url)
except (yt_dlp.utils.ExtractorError, yt_dlp.utils.DownloadError):
print(f"{url}: failed to get info from youtube")
response = False
return response
class CookieHandler:
"""handle youtube cookie for yt-dlp"""
def __init__(self, config):
self.cookie_io = False
self.config = config
def get(self):
"""get cookie io stream"""
cookie = RedisArchivist().get_message("cookie")
self.cookie_io = StringIO(cookie)
return self.cookie_io
def import_cookie(self):
"""import cookie from file"""
cache_path = self.config["application"]["cache_dir"]
import_path = os.path.join(cache_path, "import", "cookies.google.txt")
with open(import_path, encoding="utf-8") as cookie_file:
cookie = cookie_file.read()
RedisArchivist().set_message("cookie", cookie, expire=False)
os.remove(import_path)
print("cookie: import successful")
@staticmethod
def revoke():
"""revoke cookie"""
RedisArchivist().del_message("cookie")
print("cookie: revoked")
def validate(self):
"""validate cookie using the liked videos playlist"""
obs_request = {
"skip_download": True,
"extract_flat": True,
}
response = YtWrap(obs_request, self.config).extract("LL")
return bool(response)

View File

@ -9,12 +9,10 @@ functionality:
import os
import shutil
from datetime import datetime
from time import sleep
import yt_dlp
from home.src.download.queue import PendingList
from home.src.download.subscriptions import PlaylistSubscription
from home.src.download.yt_cookie import CookieHandler
from home.src.download.yt_dlp_base import YtWrap
from home.src.es.connect import ElasticWrap, IndexPaginate
from home.src.index.channel import YoutubeChannel
from home.src.index.playlist import YoutubePlaylist
@ -41,7 +39,6 @@ class DownloadPostProcess:
self.auto_delete_all()
self.auto_delete_overwrites()
self.validate_playlists()
self.pending.close_config()
def auto_delete_all(self):
"""handle auto delete"""
@ -174,11 +171,10 @@ class VideoDownloader:
if not youtube_id:
break
try:
self._dl_single_vid(youtube_id)
except yt_dlp.utils.DownloadError:
print("failed to download " + youtube_id)
success = self._dl_single_vid(youtube_id)
if not success:
continue
vid_dict = index_new_video(
youtube_id, video_overwrites=self.video_overwrites
)
@ -293,9 +289,6 @@ class VideoDownloader:
self.obs["ratelimit"] = (
self.config["downloads"]["limit_speed"] * 1024
)
if self.config["downloads"]["cookie_import"]:
cookie_path = CookieHandler().use()
self.obs["cookiefile"] = cookie_path
throttle = self.config["downloads"]["throttledratelimit"]
if throttle:
@ -359,13 +352,7 @@ class VideoDownloader:
if youtube_id in file_name:
obs["outtmpl"] = os.path.join(dl_cache, file_name)
with yt_dlp.YoutubeDL(obs) as ydl:
try:
ydl.download([youtube_id])
except yt_dlp.utils.DownloadError:
print("retry failed download: " + youtube_id)
sleep(10)
ydl.download([youtube_id])
success = YtWrap(obs, self.config).download(youtube_id)
if self.obs["writethumbnail"]:
# webp files don't get cleaned up automatically
@ -375,6 +362,8 @@ class VideoDownloader:
file_path = os.path.join(dl_cache, file_name)
os.remove(file_path)
return success
def move_to_archive(self, vid_dict):
"""move downloaded video from cache to archive"""
videos = self.config["application"]["videos"]

View File

@ -10,10 +10,10 @@ import re
from datetime import datetime
import requests
import yt_dlp
from bs4 import BeautifulSoup
from home.src.download import queue # partial import
from home.src.download.thumbnails import ThumbManager
from home.src.download.yt_dlp_base import YtWrap
from home.src.es.connect import ElasticWrap, IndexPaginate
from home.src.index.generic import YouTubeItem
from home.src.index.playlist import YoutubePlaylist
@ -314,12 +314,8 @@ class YoutubeChannel(YouTubeItem):
f"https://www.youtube.com/channel/{self.youtube_id}"
+ "/playlists?view=1&sort=dd&shelf_id=0"
)
obs = {
"quiet": True,
"skip_download": True,
"extract_flat": True,
}
playlists = yt_dlp.YoutubeDL(obs).extract_info(url)
obs = {"skip_download": True, "extract_flat": True}
playlists = YtWrap(obs, self.config).extract(url)
all_entries = [(i["id"], i["title"]) for i in playlists["entries"]]
self.all_playlists = all_entries

View File

@ -12,7 +12,6 @@ import shutil
import subprocess
from home.src.download.queue import PendingList
from home.src.download.yt_cookie import CookieHandler
from home.src.download.yt_dlp_handler import VideoDownloader
from home.src.es.connect import ElasticWrap
from home.src.index.reindex import Reindex
@ -309,12 +308,6 @@ def scan_filesystem():
def reindex_old_documents():
"""daily refresh of old documents"""
handler = Reindex()
if handler.config["downloads"]["cookie_import"]:
CookieHandler().use()
try:
handler.check_outdated()
handler.reindex()
RedisArchivist().set_message("last_reindex", handler.now, expire=False)
finally:
if handler.config["downloads"]["cookie_import"]:
CookieHandler().hide()
handler.check_outdated()
handler.reindex()
RedisArchivist().set_message("last_reindex", handler.now, expire=False)

View File

@ -5,8 +5,7 @@ functionality:
import math
import yt_dlp
from home.src.download.yt_cookie import CookieHandler
from home.src.download.yt_dlp_base import YtWrap
from home.src.es.connect import ElasticWrap
from home.src.ta.config import AppConfig
from home.src.ta.ta_redis import RedisArchivist
@ -19,42 +18,22 @@ class YouTubeItem:
index_name = False
yt_base = False
yt_obs = {
"quiet": True,
"default_search": "ytsearch",
"skip_download": True,
"check_formats": "selected",
"socket_timeout": 3,
"noplaylist": True,
}
def __init__(self, youtube_id):
self.youtube_id = youtube_id
self.config = False
self.app_conf = False
self.youtube_meta = False
self.json_data = False
self._get_conf()
def _get_conf(self):
"""read user conf"""
self.config = AppConfig().config
self.app_conf = self.config["application"]
if self.config["downloads"]["cookie_import"]:
cookie_path = CookieHandler().use()
self.yt_obs.update({"cookiefile": cookie_path})
self.youtube_meta = False
self.json_data = False
def get_from_youtube(self):
"""use yt-dlp to get meta data from youtube"""
print(f"{self.youtube_id}: get metadata from youtube")
try:
yt_item = yt_dlp.YoutubeDL(self.yt_obs)
response = yt_item.extract_info(self.yt_base + self.youtube_id)
except (
yt_dlp.utils.ExtractorError,
yt_dlp.utils.DownloadError,
):
print(f"{self.youtube_id}: failed to get info from youtube")
response = False
url = self.yt_base + self.youtube_id
response = YtWrap(self.yt_obs, self.config).extract(url)
self.youtube_meta = response

View File

@ -18,12 +18,7 @@ class YoutubePlaylist(YouTubeItem):
es_path = False
index_name = "ta_playlist"
yt_obs = {
"default_search": "ytsearch",
"quiet": True,
"skip_download": True,
"extract_flat": True,
}
yt_obs = {"extract_flat": True}
yt_base = "https://www.youtube.com/playlist?list="
def __init__(self, youtube_id):

View File

@ -11,7 +11,7 @@ import unicodedata
from datetime import datetime
from urllib.parse import parse_qs, urlparse
import yt_dlp
from home.src.download.yt_dlp_base import YtWrap
def clean_string(file_name):
@ -184,14 +184,12 @@ class UrlListParser:
@staticmethod
def extract_channel_name(url):
"""find channel id from channel name with yt-dlp help"""
obs = {
"default_search": "ytsearch",
"quiet": True,
obs_request = {
"skip_download": True,
"extract_flat": True,
"playlistend": 0,
}
url_info = yt_dlp.YoutubeDL(obs).extract_info(url, download=False)
url_info = YtWrap(obs_request).extract(url)
try:
channel_id = url_info["channel_id"]
except KeyError as error:

View File

@ -8,7 +8,6 @@ import json
import os
import redis
from home.src.ta.helper import ignore_filelist
class RedisBase:
@ -95,27 +94,6 @@ class RedisArchivist(RedisBase):
return all_messages
@staticmethod
def monitor_cache_dir(cache_dir):
"""
look at download cache dir directly as alternative progress info
"""
dl_cache = os.path.join(cache_dir, "download")
all_cache_file = os.listdir(dl_cache)
cache_file = ignore_filelist(all_cache_file)
if cache_file:
filename = cache_file[0][12:].replace("_", " ").split(".")[0]
mess_dict = {
"status": "message:download",
"level": "info",
"title": "Downloading: " + filename,
"message": "",
}
else:
return False
return mess_dict
class RedisQueue(RedisBase):
"""dynamically interact with the download queue in redis"""

View File

@ -14,7 +14,7 @@ from django.contrib.auth.forms import AuthenticationForm
from django.http import JsonResponse
from django.shortcuts import redirect, render
from django.views import View
from home.src.download.yt_cookie import CookieHandler
from home.src.download.yt_dlp_base import CookieHandler
from home.src.es.index_setup import get_available_backups
from home.src.frontend.api_calls import PostData
from home.src.frontend.forms import (
@ -795,18 +795,19 @@ class SettingsView(View):
def post(self, request):
"""handle form post to update settings"""
user_form = UserSettingsForm(request.POST)
config_handler = AppConfig()
if user_form.is_valid():
user_form_post = user_form.cleaned_data
if any(user_form_post.values()):
AppConfig().set_user_config(user_form_post, request.user.id)
config_handler.set_user_config(user_form_post, request.user.id)
app_form = ApplicationSettingsForm(request.POST)
if app_form.is_valid():
app_form_post = app_form.cleaned_data
if app_form_post:
print(app_form_post)
updated = AppConfig().update_config(app_form_post)
self.post_process_updated(updated)
updated = config_handler.update_config(app_form_post)
self.post_process_updated(updated, config_handler.config)
scheduler_form = SchedulerSettingsForm(request.POST)
if scheduler_form.is_valid():
@ -819,7 +820,7 @@ class SettingsView(View):
return redirect("settings", permanent=True)
@staticmethod
def post_process_updated(updated):
def post_process_updated(updated, config):
"""apply changes for config"""
if not updated:
return
@ -827,9 +828,9 @@ class SettingsView(View):
for config_value, updated_value in updated:
if config_value == "cookie_import":
if updated_value:
CookieHandler().import_cookie()
CookieHandler(config).import_cookie()
else:
CookieHandler().revoke()
CookieHandler(config).revoke()
def progress(request):