mirror of
https://github.com/tubearchivist/tubearchivist
synced 2024-11-02 09:41:07 +00:00
offline import, #build
Changed: - added offline import for videos and channels - thumbnail manager rewrite
This commit is contained in:
commit
c39ce61b2c
@ -220,3 +220,12 @@ Second best way to support the development is to provide for caffeinated beverag
|
||||
* [Paypal.me](https://paypal.me/bbilly1) for a one time coffee
|
||||
* [Paypal Subscription](https://www.paypal.com/webapps/billing/plans/subscribe?plan_id=P-03770005GR991451KMFGVPMQ) for a monthly coffee
|
||||
* [ko-fi.com](https://ko-fi.com/bbilly1) for an alternative platform
|
||||
|
||||
|
||||
## Sponsor
|
||||
Big thank you to [Digitalocean](https://www.digitalocean.com/) for generously donating credit for the tubearchivist.com VPS and buildserver.
|
||||
<p>
|
||||
<a href="https://www.digitalocean.com/">
|
||||
<img src="https://opensource.nyc3.cdn.digitaloceanspaces.com/attribution/assets/PoweredByDO/DO_Powered_by_Badge_blue.svg" width="201px">
|
||||
</a>
|
||||
</p>
|
||||
|
@ -74,7 +74,7 @@ class SearchProcess:
|
||||
media_url = urllib.parse.quote(video_dict["media_url"])
|
||||
vid_last_refresh = date_praser(video_dict["vid_last_refresh"])
|
||||
published = date_praser(video_dict["published"])
|
||||
vid_thumb_url = ThumbManager().vid_thumb_path(video_id)
|
||||
vid_thumb_url = ThumbManager(video_id).vid_thumb_path()
|
||||
channel = self._process_channel(video_dict["channel"])
|
||||
|
||||
if "subtitles" in video_dict:
|
||||
@ -113,7 +113,7 @@ class SearchProcess:
|
||||
def _process_download(self, download_dict):
|
||||
"""run on single download item"""
|
||||
video_id = download_dict["youtube_id"]
|
||||
vid_thumb_url = ThumbManager().vid_thumb_path(video_id)
|
||||
vid_thumb_url = ThumbManager(video_id).vid_thumb_path()
|
||||
published = date_praser(download_dict["published"])
|
||||
|
||||
download_dict.update(
|
||||
|
@ -161,10 +161,7 @@ class PendingList(PendingIndex):
|
||||
self._parse_channel(entry["url"])
|
||||
elif entry["type"] == "playlist":
|
||||
self._parse_playlist(entry["url"])
|
||||
new_thumbs = PlaylistSubscription().process_url_str(
|
||||
[entry], subscribed=False
|
||||
)
|
||||
ThumbManager().download_playlist(new_thumbs)
|
||||
PlaylistSubscription().process_url_str([entry], subscribed=False)
|
||||
else:
|
||||
raise ValueError(f"invalid url_type: {entry}")
|
||||
|
||||
@ -198,7 +195,6 @@ class PendingList(PendingIndex):
|
||||
self.get_channels()
|
||||
bulk_list = []
|
||||
|
||||
thumb_handler = ThumbManager()
|
||||
for idx, youtube_id in enumerate(self.missing_videos):
|
||||
video_details = self.get_youtube_details(youtube_id)
|
||||
if not video_details:
|
||||
@ -209,8 +205,9 @@ class PendingList(PendingIndex):
|
||||
bulk_list.append(json.dumps(action))
|
||||
bulk_list.append(json.dumps(video_details))
|
||||
|
||||
thumb_needed = [(youtube_id, video_details["vid_thumb_url"])]
|
||||
thumb_handler.download_vid(thumb_needed)
|
||||
url = video_details["vid_thumb_url"]
|
||||
ThumbManager(youtube_id).download_video_thumb(url)
|
||||
|
||||
self._notify_add(idx)
|
||||
|
||||
if bulk_list:
|
||||
|
@ -5,6 +5,7 @@ Functionality:
|
||||
"""
|
||||
|
||||
from home.src.download import queue # partial import
|
||||
from home.src.download.thumbnails import ThumbManager
|
||||
from home.src.download.yt_dlp_base import YtWrap
|
||||
from home.src.es.connect import IndexPaginate
|
||||
from home.src.index.channel import YoutubeChannel
|
||||
@ -129,11 +130,9 @@ class PlaylistSubscription:
|
||||
all_indexed = IndexPaginate("ta_video", data).get_results()
|
||||
all_youtube_ids = [i["youtube_id"] for i in all_indexed]
|
||||
|
||||
new_thumbs = []
|
||||
for idx, playlist in enumerate(new_playlists):
|
||||
url_type = playlist["type"]
|
||||
playlist_id = playlist["url"]
|
||||
if not url_type == "playlist":
|
||||
if not playlist["type"] == "playlist":
|
||||
print(f"{playlist_id} not a playlist, skipping...")
|
||||
continue
|
||||
|
||||
@ -144,8 +143,11 @@ class PlaylistSubscription:
|
||||
playlist_h.upload_to_es()
|
||||
playlist_h.add_vids_to_playlist()
|
||||
self.channel_validate(playlist_h.json_data["playlist_channel_id"])
|
||||
thumb = playlist_h.json_data["playlist_thumbnail"]
|
||||
new_thumbs.append((playlist_id, thumb))
|
||||
|
||||
url = playlist_h.json_data["playlist_thumbnail"]
|
||||
thumb = ThumbManager(playlist_id, item_type="playlist")
|
||||
thumb.download_playlist_thumb(url)
|
||||
|
||||
# notify
|
||||
message = {
|
||||
"status": "message:subplaylist",
|
||||
@ -157,8 +159,6 @@ class PlaylistSubscription:
|
||||
"message:subplaylist", message=message, expire=True
|
||||
)
|
||||
|
||||
return new_thumbs
|
||||
|
||||
@staticmethod
|
||||
def channel_validate(channel_id):
|
||||
"""make sure channel of playlist is there"""
|
||||
|
@ -6,136 +6,66 @@ functionality:
|
||||
|
||||
import base64
|
||||
import os
|
||||
from collections import Counter
|
||||
from io import BytesIO
|
||||
from time import sleep
|
||||
|
||||
import requests
|
||||
from home.src.download import queue # partial import
|
||||
from home.src.download import subscriptions # partial import
|
||||
from home.src.es.connect import IndexPaginate
|
||||
from home.src.ta.config import AppConfig
|
||||
from home.src.ta.helper import ignore_filelist
|
||||
from home.src.ta.ta_redis import RedisArchivist
|
||||
from mutagen.mp4 import MP4, MP4Cover
|
||||
from PIL import Image, ImageFile, ImageFilter
|
||||
|
||||
ImageFile.LOAD_TRUNCATED_IMAGES = True
|
||||
|
||||
|
||||
class ThumbManager:
|
||||
"""handle thumbnails related functions"""
|
||||
class ThumbManagerBase:
|
||||
"""base class for thumbnail management"""
|
||||
|
||||
CONFIG = AppConfig().config
|
||||
MEDIA_DIR = CONFIG["application"]["videos"]
|
||||
CACHE_DIR = CONFIG["application"]["cache_dir"]
|
||||
VIDEO_DIR = os.path.join(CACHE_DIR, "videos")
|
||||
CHANNEL_DIR = os.path.join(CACHE_DIR, "channels")
|
||||
PLAYLIST_DIR = os.path.join(CACHE_DIR, "playlists")
|
||||
|
||||
def get_all_thumbs(self):
|
||||
"""get all video artwork already downloaded"""
|
||||
all_thumb_folders = ignore_filelist(os.listdir(self.VIDEO_DIR))
|
||||
all_thumbs = []
|
||||
for folder in all_thumb_folders:
|
||||
folder_path = os.path.join(self.VIDEO_DIR, folder)
|
||||
if os.path.isfile(folder_path):
|
||||
self.update_path(folder)
|
||||
all_thumbs.append(folder_path)
|
||||
continue
|
||||
# raise exemption here in a future version
|
||||
# raise FileExistsError("video cache dir has files inside")
|
||||
def __init__(self, item_id, item_type, fallback=False):
|
||||
self.item_id = item_id
|
||||
self.item_type = item_type
|
||||
self.fallback = fallback
|
||||
|
||||
all_folder_thumbs = ignore_filelist(os.listdir(folder_path))
|
||||
all_thumbs.extend(all_folder_thumbs)
|
||||
def download_raw(self, url):
|
||||
"""download thumbnail for video"""
|
||||
if not url:
|
||||
return self.get_fallback()
|
||||
|
||||
return all_thumbs
|
||||
for i in range(3):
|
||||
try:
|
||||
response = requests.get(url, stream=True)
|
||||
if response.ok:
|
||||
return Image.open(response.raw)
|
||||
if response.status_code == 404:
|
||||
return self.get_fallback()
|
||||
|
||||
def update_path(self, file_name):
|
||||
"""reorganize thumbnails into folders as update path from v0.0.5"""
|
||||
folder_name = file_name[0].lower()
|
||||
folder_path = os.path.join(self.VIDEO_DIR, folder_name)
|
||||
old_file = os.path.join(self.VIDEO_DIR, file_name)
|
||||
new_file = os.path.join(folder_path, file_name)
|
||||
os.makedirs(folder_path, exist_ok=True)
|
||||
os.rename(old_file, new_file)
|
||||
except ConnectionError:
|
||||
print(f"{self.item_id}: retry thumbnail download {url}")
|
||||
sleep((i + 1) ** i)
|
||||
|
||||
def get_needed_thumbs(self, missing_only=False):
|
||||
"""get a list of all missing thumbnails"""
|
||||
all_thumbs = self.get_all_thumbs()
|
||||
return False
|
||||
|
||||
pending = queue.PendingList()
|
||||
pending.get_download()
|
||||
pending.get_indexed()
|
||||
def get_fallback(self):
|
||||
"""get fallback thumbnail if not available"""
|
||||
if self.fallback:
|
||||
img_raw = Image.open(self.fallback)
|
||||
return img_raw
|
||||
|
||||
needed_thumbs = []
|
||||
for video in pending.all_videos:
|
||||
youtube_id = video["youtube_id"]
|
||||
thumb_url = video["vid_thumb_url"]
|
||||
if missing_only:
|
||||
if youtube_id + ".jpg" not in all_thumbs:
|
||||
needed_thumbs.append((youtube_id, thumb_url))
|
||||
else:
|
||||
needed_thumbs.append((youtube_id, thumb_url))
|
||||
|
||||
for video in pending.all_pending + pending.all_ignored:
|
||||
youtube_id = video["youtube_id"]
|
||||
thumb_url = video["vid_thumb_url"]
|
||||
if missing_only:
|
||||
if youtube_id + ".jpg" not in all_thumbs:
|
||||
needed_thumbs.append((youtube_id, thumb_url))
|
||||
else:
|
||||
needed_thumbs.append((youtube_id, thumb_url))
|
||||
|
||||
return needed_thumbs
|
||||
|
||||
def get_missing_channels(self):
|
||||
"""get all channel artwork"""
|
||||
all_channel_art = os.listdir(self.CHANNEL_DIR)
|
||||
files = [i[0:24] for i in all_channel_art]
|
||||
cached_channel_ids = [k for (k, v) in Counter(files).items() if v > 1]
|
||||
channel_sub = subscriptions.ChannelSubscription()
|
||||
channels = channel_sub.get_channels(subscribed_only=False)
|
||||
|
||||
missing_channels = []
|
||||
for channel in channels:
|
||||
channel_id = channel["channel_id"]
|
||||
if channel_id not in cached_channel_ids:
|
||||
channel_banner = channel["channel_banner_url"]
|
||||
channel_thumb = channel["channel_thumb_url"]
|
||||
missing_channels.append(
|
||||
(channel_id, channel_thumb, channel_banner)
|
||||
)
|
||||
|
||||
return missing_channels
|
||||
|
||||
def get_missing_playlists(self):
|
||||
"""get all missing playlist artwork"""
|
||||
all_downloaded = ignore_filelist(os.listdir(self.PLAYLIST_DIR))
|
||||
all_ids_downloaded = [i.replace(".jpg", "") for i in all_downloaded]
|
||||
playlist_sub = subscriptions.PlaylistSubscription()
|
||||
playlists = playlist_sub.get_playlists(subscribed_only=False)
|
||||
|
||||
missing_playlists = []
|
||||
for playlist in playlists:
|
||||
playlist_id = playlist["playlist_id"]
|
||||
if playlist_id not in all_ids_downloaded:
|
||||
playlist_thumb = playlist["playlist_thumbnail"]
|
||||
missing_playlists.append((playlist_id, playlist_thumb))
|
||||
|
||||
return missing_playlists
|
||||
|
||||
def get_raw_img(self, img_url, thumb_type):
|
||||
"""get raw image from youtube and handle 404"""
|
||||
try:
|
||||
app_root = self.CONFIG["application"]["app_root"]
|
||||
except KeyError:
|
||||
# lazy keyerror fix to not have to deal with a strange startup
|
||||
# racing contition between the threads in HomeConfig.ready()
|
||||
app_root = "/app"
|
||||
app_root = self.CONFIG["application"]["app_root"]
|
||||
default_map = {
|
||||
"video": os.path.join(
|
||||
app_root, "static/img/default-video-thumb.jpg"
|
||||
),
|
||||
"playlist": os.path.join(
|
||||
app_root, "static/img/default-video-thumb.jpg"
|
||||
),
|
||||
"icon": os.path.join(
|
||||
app_root, "static/img/default-channel-icon.jpg"
|
||||
),
|
||||
@ -143,116 +73,134 @@ class ThumbManager:
|
||||
app_root, "static/img/default-channel-banner.jpg"
|
||||
),
|
||||
}
|
||||
if img_url:
|
||||
try:
|
||||
response = requests.get(img_url, stream=True)
|
||||
except ConnectionError:
|
||||
sleep(5)
|
||||
response = requests.get(img_url, stream=True)
|
||||
if not response.ok and not response.status_code == 404:
|
||||
print("retry thumbnail download for " + img_url)
|
||||
sleep(5)
|
||||
response = requests.get(img_url, stream=True)
|
||||
else:
|
||||
response = False
|
||||
if not response or response.status_code == 404:
|
||||
# use default
|
||||
img_raw = Image.open(default_map[thumb_type])
|
||||
else:
|
||||
# use response
|
||||
img_obj = response.raw
|
||||
img_raw = Image.open(img_obj)
|
||||
|
||||
img_raw = Image.open(default_map[self.item_type])
|
||||
|
||||
return img_raw
|
||||
|
||||
def download_vid(self, missing_thumbs, notify=True):
|
||||
"""download all missing thumbnails from list"""
|
||||
print(f"downloading {len(missing_thumbs)} thumbnails")
|
||||
for idx, (youtube_id, thumb_url) in enumerate(missing_thumbs):
|
||||
folder_path = os.path.join(self.VIDEO_DIR, youtube_id[0].lower())
|
||||
thumb_path = os.path.join(
|
||||
self.CACHE_DIR, self.vid_thumb_path(youtube_id)
|
||||
)
|
||||
|
||||
os.makedirs(folder_path, exist_ok=True)
|
||||
img_raw = self.get_raw_img(thumb_url, "video")
|
||||
class ThumbManager(ThumbManagerBase):
|
||||
"""handle thumbnails related functions"""
|
||||
|
||||
width, height = img_raw.size
|
||||
if not width / height == 16 / 9:
|
||||
new_height = width / 16 * 9
|
||||
offset = (height - new_height) / 2
|
||||
img_raw = img_raw.crop((0, offset, width, height - offset))
|
||||
img_raw.convert("RGB").save(thumb_path)
|
||||
def __init__(self, item_id, item_type="video", fallback=False):
|
||||
super().__init__(item_id, item_type, fallback=fallback)
|
||||
|
||||
progress = f"{idx + 1}/{len(missing_thumbs)}"
|
||||
if notify:
|
||||
mess_dict = {
|
||||
"status": "message:add",
|
||||
"level": "info",
|
||||
"title": "Processing Videos",
|
||||
"message": "Downloading Thumbnails, Progress: " + progress,
|
||||
}
|
||||
if idx + 1 == len(missing_thumbs):
|
||||
expire = 4
|
||||
else:
|
||||
expire = True
|
||||
def download(self, url):
|
||||
"""download thumbnail"""
|
||||
print(f"{self.item_id}: download {self.item_type} thumbnail")
|
||||
if self.item_type == "video":
|
||||
self.download_video_thumb(url)
|
||||
elif self.item_type == "channel":
|
||||
self.download_channel_art(url)
|
||||
elif self.item_type == "playlist":
|
||||
self.download_playlist_thumb(url)
|
||||
|
||||
RedisArchivist().set_message(
|
||||
"message:add", mess_dict, expire=expire
|
||||
)
|
||||
def delete(self):
|
||||
"""delete thumbnail file"""
|
||||
print(f"{self.item_id}: delete {self.item_type} thumbnail")
|
||||
if self.item_type == "video":
|
||||
self.delete_video_thumb()
|
||||
elif self.item_type == "channel":
|
||||
self.delete_channel_thumb()
|
||||
elif self.item_type == "playlist":
|
||||
self.delete_playlist_thumb()
|
||||
|
||||
if idx + 1 % 25 == 0:
|
||||
print("thumbnail progress: " + progress)
|
||||
def download_video_thumb(self, url, skip_existing=False):
|
||||
"""pass url for video thumbnail"""
|
||||
folder_path = os.path.join(self.VIDEO_DIR, self.item_id[0].lower())
|
||||
thumb_path = self.vid_thumb_path(absolute=True)
|
||||
|
||||
def download_chan(self, missing_channels):
|
||||
"""download needed artwork for channels"""
|
||||
print(f"downloading {len(missing_channels)} channel artwork")
|
||||
for channel in missing_channels:
|
||||
channel_id, channel_thumb, channel_banner = channel
|
||||
if skip_existing and os.path.exists(thumb_path):
|
||||
return
|
||||
|
||||
thumb_path = os.path.join(
|
||||
self.CHANNEL_DIR, channel_id + "_thumb.jpg"
|
||||
)
|
||||
img_raw = self.get_raw_img(channel_thumb, "icon")
|
||||
img_raw.convert("RGB").save(thumb_path)
|
||||
os.makedirs(folder_path, exist_ok=True)
|
||||
img_raw = self.download_raw(url)
|
||||
width, height = img_raw.size
|
||||
|
||||
banner_path = os.path.join(
|
||||
self.CHANNEL_DIR, channel_id + "_banner.jpg"
|
||||
)
|
||||
img_raw = self.get_raw_img(channel_banner, "banner")
|
||||
img_raw.convert("RGB").save(banner_path)
|
||||
if not width / height == 16 / 9:
|
||||
new_height = width / 16 * 9
|
||||
offset = (height - new_height) / 2
|
||||
img_raw = img_raw.crop((0, offset, width, height - offset))
|
||||
|
||||
mess_dict = {
|
||||
"status": "message:download",
|
||||
"level": "info",
|
||||
"title": "Processing Channels",
|
||||
"message": "Downloading Channel Art.",
|
||||
}
|
||||
key = "message:download"
|
||||
RedisArchivist().set_message(key, mess_dict, expire=True)
|
||||
img_raw.convert("RGB").save(thumb_path)
|
||||
|
||||
def download_playlist(self, missing_playlists):
|
||||
"""download needed artwork for playlists"""
|
||||
print(f"downloading {len(missing_playlists)} playlist artwork")
|
||||
for playlist in missing_playlists:
|
||||
playlist_id, playlist_thumb_url = playlist
|
||||
thumb_path = os.path.join(self.PLAYLIST_DIR, playlist_id + ".jpg")
|
||||
img_raw = self.get_raw_img(playlist_thumb_url, "video")
|
||||
img_raw.convert("RGB").save(thumb_path)
|
||||
def vid_thumb_path(self, absolute=False):
|
||||
"""build expected path for video thumbnail from youtube_id"""
|
||||
folder_name = self.item_id[0].lower()
|
||||
folder_path = os.path.join("videos", folder_name)
|
||||
thumb_path = os.path.join(folder_path, f"{self.item_id}.jpg")
|
||||
if absolute:
|
||||
thumb_path = os.path.join(self.CACHE_DIR, thumb_path)
|
||||
|
||||
mess_dict = {
|
||||
"status": "message:download",
|
||||
"level": "info",
|
||||
"title": "Processing Playlists",
|
||||
"message": "Downloading Playlist Art.",
|
||||
}
|
||||
key = "message:download"
|
||||
RedisArchivist().set_message(key, mess_dict, expire=True)
|
||||
return thumb_path
|
||||
|
||||
def get_base64_blur(self, youtube_id):
|
||||
def download_channel_art(self, urls, skip_existing=False):
|
||||
"""pass tuple of channel thumbnails"""
|
||||
channel_thumb, channel_banner = urls
|
||||
self._download_channel_thumb(channel_thumb, skip_existing)
|
||||
self._download_channel_banner(channel_banner, skip_existing)
|
||||
|
||||
def _download_channel_thumb(self, channel_thumb, skip_existing):
|
||||
"""download channel thumbnail"""
|
||||
|
||||
thumb_path = os.path.join(
|
||||
self.CHANNEL_DIR, f"{self.item_id}_thumb.jpg"
|
||||
)
|
||||
self.item_type = "icon"
|
||||
|
||||
if skip_existing and os.path.exists(thumb_path):
|
||||
return
|
||||
|
||||
img_raw = self.download_raw(channel_thumb)
|
||||
img_raw.convert("RGB").save(thumb_path)
|
||||
|
||||
def _download_channel_banner(self, channel_banner, skip_existing):
|
||||
"""download channel banner"""
|
||||
|
||||
banner_path = os.path.join(
|
||||
self.CHANNEL_DIR, self.item_id + "_banner.jpg"
|
||||
)
|
||||
self.item_type = "banner"
|
||||
if skip_existing and os.path.exists(banner_path):
|
||||
return
|
||||
|
||||
img_raw = self.download_raw(channel_banner)
|
||||
img_raw.convert("RGB").save(banner_path)
|
||||
|
||||
def download_playlist_thumb(self, url, skip_existing=False):
|
||||
"""pass thumbnail url"""
|
||||
thumb_path = os.path.join(self.PLAYLIST_DIR, f"{self.item_id}.jpg")
|
||||
if skip_existing and os.path.exists(thumb_path):
|
||||
return
|
||||
|
||||
img_raw = self.download_raw(url)
|
||||
img_raw.convert("RGB").save(thumb_path)
|
||||
|
||||
def delete_video_thumb(self):
|
||||
"""delete video thumbnail if exists"""
|
||||
thumb_path = self.vid_thumb_path()
|
||||
to_delete = os.path.join(self.CACHE_DIR, thumb_path)
|
||||
if os.path.exists(to_delete):
|
||||
os.remove(to_delete)
|
||||
|
||||
def delete_channel_thumb(self):
|
||||
"""delete all artwork of channel"""
|
||||
thumb = os.path.join(self.CHANNEL_DIR, f"{self.item_id}_thumb.jpg")
|
||||
banner = os.path.join(self.CHANNEL_DIR, f"{self.item_id}_banner.jpg")
|
||||
if os.path.exists(thumb):
|
||||
os.remove(thumb)
|
||||
if os.path.exists(banner):
|
||||
os.remove(banner)
|
||||
|
||||
def delete_playlist_thumb(self):
|
||||
"""delete playlist thumbnail"""
|
||||
thumb_path = os.path.join(self.PLAYLIST_DIR, f"{self.item_id}.jpg")
|
||||
if os.path.exists(thumb_path):
|
||||
os.remove(thumb_path)
|
||||
|
||||
def get_vid_base64_blur(self):
|
||||
"""return base64 encoded placeholder"""
|
||||
img_path = self.vid_thumb_path(youtube_id)
|
||||
file_path = os.path.join(self.CACHE_DIR, img_path)
|
||||
file_path = os.path.join(self.CACHE_DIR, self.vid_thumb_path())
|
||||
img_raw = Image.open(file_path)
|
||||
img_raw.thumbnail((img_raw.width // 20, img_raw.height // 20))
|
||||
img_blur = img_raw.filter(ImageFilter.BLUR)
|
||||
@ -264,40 +212,109 @@ class ThumbManager:
|
||||
|
||||
return data_url
|
||||
|
||||
@staticmethod
|
||||
def vid_thumb_path(youtube_id):
|
||||
"""build expected path for video thumbnail from youtube_id"""
|
||||
folder_name = youtube_id[0].lower()
|
||||
folder_path = os.path.join("videos", folder_name)
|
||||
thumb_path = os.path.join(folder_path, youtube_id + ".jpg")
|
||||
return thumb_path
|
||||
|
||||
def delete_vid_thumb(self, youtube_id):
|
||||
"""delete video thumbnail if exists"""
|
||||
thumb_path = self.vid_thumb_path(youtube_id)
|
||||
to_delete = os.path.join(self.CACHE_DIR, thumb_path)
|
||||
if os.path.exists(to_delete):
|
||||
os.remove(to_delete)
|
||||
class ValidatorCallback:
|
||||
"""handle callback validate thumbnails page by page"""
|
||||
|
||||
def delete_chan_thumb(self, channel_id):
|
||||
"""delete all artwork of channel"""
|
||||
thumb = os.path.join(self.CHANNEL_DIR, channel_id + "_thumb.jpg")
|
||||
banner = os.path.join(self.CHANNEL_DIR, channel_id + "_banner.jpg")
|
||||
if os.path.exists(thumb):
|
||||
os.remove(thumb)
|
||||
if os.path.exists(banner):
|
||||
os.remove(banner)
|
||||
def __init__(self, source, index_name):
|
||||
self.source = source
|
||||
self.index_name = index_name
|
||||
|
||||
def cleanup_downloaded(self):
|
||||
"""find downloaded thumbnails without video indexed"""
|
||||
all_thumbs = self.get_all_thumbs()
|
||||
all_indexed = self.get_needed_thumbs()
|
||||
all_needed_thumbs = [i[0] + ".jpg" for i in all_indexed]
|
||||
for thumb in all_thumbs:
|
||||
if thumb not in all_needed_thumbs:
|
||||
# cleanup
|
||||
youtube_id = thumb.rstrip(".jpg")
|
||||
self.delete_vid_thumb(youtube_id)
|
||||
def run(self):
|
||||
"""run the task for page"""
|
||||
print(f"{self.index_name}: validate artwork")
|
||||
if self.index_name == "ta_video":
|
||||
self._validate_videos()
|
||||
elif self.index_name == "ta_channel":
|
||||
self._validate_channels()
|
||||
elif self.index_name == "ta_playlist":
|
||||
self._validate_playlists()
|
||||
|
||||
def _validate_videos(self):
|
||||
"""check if video thumbnails are correct"""
|
||||
for video in self.source:
|
||||
url = video["_source"]["vid_thumb_url"]
|
||||
handler = ThumbManager(video["_source"]["youtube_id"])
|
||||
handler.download_video_thumb(url, skip_existing=True)
|
||||
|
||||
def _validate_channels(self):
|
||||
"""check if all channel artwork is there"""
|
||||
for channel in self.source:
|
||||
urls = (
|
||||
channel["_source"]["channel_thumb_url"],
|
||||
channel["_source"]["channel_banner_url"],
|
||||
)
|
||||
handler = ThumbManager(channel["_source"]["channel_id"])
|
||||
handler.download_channel_art(urls, skip_existing=True)
|
||||
|
||||
def _validate_playlists(self):
|
||||
"""check if all playlist artwork is there"""
|
||||
for playlist in self.source:
|
||||
url = playlist["_source"]["playlist_thumbnail"]
|
||||
handler = ThumbManager(playlist["_source"]["playlist_id"])
|
||||
handler.download_playlist_thumb(url, skip_existing=True)
|
||||
|
||||
|
||||
class ThumbValidator:
|
||||
"""validate thumbnails"""
|
||||
|
||||
def download_missing(self):
|
||||
"""download all missing artwork"""
|
||||
self.download_missing_videos()
|
||||
self.download_missing_channels()
|
||||
self.download_missing_playlists()
|
||||
|
||||
def download_missing_videos(self):
|
||||
"""get all missing video thumbnails"""
|
||||
data = {
|
||||
"query": {"term": {"active": {"value": True}}},
|
||||
"sort": [{"youtube_id": {"order": "asc"}}],
|
||||
"_source": ["vid_thumb_url", "youtube_id"],
|
||||
}
|
||||
paginate = IndexPaginate(
|
||||
"ta_video", data, size=5000, callback=ValidatorCallback
|
||||
)
|
||||
_ = paginate.get_results()
|
||||
|
||||
def download_missing_channels(self):
|
||||
"""get all missing channel thumbnails"""
|
||||
data = {
|
||||
"query": {"term": {"channel_active": {"value": True}}},
|
||||
"sort": [{"channel_id": {"order": "asc"}}],
|
||||
"_source": {
|
||||
"excludes": ["channel_description", "channel_overwrites"]
|
||||
},
|
||||
}
|
||||
paginate = IndexPaginate(
|
||||
"ta_channel", data, callback=ValidatorCallback
|
||||
)
|
||||
_ = paginate.get_results()
|
||||
|
||||
def download_missing_playlists(self):
|
||||
"""get all missing playlist artwork"""
|
||||
data = {
|
||||
"query": {"term": {"playlist_active": {"value": True}}},
|
||||
"sort": [{"playlist_id": {"order": "asc"}}],
|
||||
"_source": ["playlist_id", "playlist_thumbnail"],
|
||||
}
|
||||
paginate = IndexPaginate(
|
||||
"ta_playlist", data, callback=ValidatorCallback
|
||||
)
|
||||
_ = paginate.get_results()
|
||||
|
||||
|
||||
class ThumbFilesystem:
|
||||
"""filesystem tasks for thumbnails"""
|
||||
|
||||
CONFIG = AppConfig().config
|
||||
CACHE_DIR = CONFIG["application"]["cache_dir"]
|
||||
MEDIA_DIR = CONFIG["application"]["videos"]
|
||||
VIDEO_DIR = os.path.join(CACHE_DIR, "videos")
|
||||
|
||||
def sync(self):
|
||||
"""embed thumbnails to mediafiles"""
|
||||
video_list = self.get_thumb_list()
|
||||
self._embed_thumbs(video_list)
|
||||
|
||||
def get_thumb_list(self):
|
||||
"""get list of mediafiles and matching thumbnails"""
|
||||
@ -307,10 +324,10 @@ class ThumbManager:
|
||||
|
||||
video_list = []
|
||||
for video in pending.all_videos:
|
||||
youtube_id = video["youtube_id"]
|
||||
video_id = video["youtube_id"]
|
||||
media_url = os.path.join(self.MEDIA_DIR, video["media_url"])
|
||||
thumb_path = os.path.join(
|
||||
self.CACHE_DIR, self.vid_thumb_path(youtube_id)
|
||||
self.CACHE_DIR, ThumbManager(video_id).vid_thumb_path()
|
||||
)
|
||||
video_list.append(
|
||||
{
|
||||
@ -322,7 +339,7 @@ class ThumbManager:
|
||||
return video_list
|
||||
|
||||
@staticmethod
|
||||
def write_all_thumbs(video_list):
|
||||
def _embed_thumbs(video_list):
|
||||
"""rewrite the thumbnail into media file"""
|
||||
|
||||
counter = 1
|
||||
@ -340,15 +357,3 @@ class ThumbManager:
|
||||
if counter % 50 == 0:
|
||||
print(f"thumbnail write progress {counter}/{len(video_list)}")
|
||||
counter = counter + 1
|
||||
|
||||
|
||||
def validate_thumbnails():
|
||||
"""check if all thumbnails are there and organized correctly"""
|
||||
handler = ThumbManager()
|
||||
thumbs_to_download = handler.get_needed_thumbs(missing_only=True)
|
||||
handler.download_vid(thumbs_to_download)
|
||||
missing_channels = handler.get_missing_channels()
|
||||
handler.download_chan(missing_channels)
|
||||
missing_playlists = handler.get_missing_playlists()
|
||||
handler.download_playlist(missing_playlists)
|
||||
handler.cleanup_downloaded()
|
||||
|
@ -119,7 +119,7 @@ class SearchHandler:
|
||||
|
||||
if "vid_thumb_url" in hit_keys:
|
||||
youtube_id = hit["source"]["youtube_id"]
|
||||
thumb_path = ThumbManager().vid_thumb_path(youtube_id)
|
||||
thumb_path = ThumbManager(youtube_id).vid_thumb_path()
|
||||
hit["source"]["vid_thumb_url"] = thumb_path
|
||||
|
||||
if "channel_last_refresh" in hit_keys:
|
||||
@ -138,7 +138,7 @@ class SearchHandler:
|
||||
|
||||
if "subtitle_fragment_id" in hit_keys:
|
||||
youtube_id = hit["source"]["youtube_id"]
|
||||
thumb_path = ThumbManager().vid_thumb_path(youtube_id)
|
||||
thumb_path = ThumbManager(youtube_id).vid_thumb_path()
|
||||
hit["source"]["vid_thumb_url"] = f"/cache/{thumb_path}"
|
||||
|
||||
return hit
|
||||
|
@ -173,30 +173,71 @@ class YoutubeChannel(YouTubeItem):
|
||||
self.es_path = f"{self.index_name}/_doc/{youtube_id}"
|
||||
self.all_playlists = False
|
||||
|
||||
def build_json(self, upload=False):
|
||||
def build_json(self, upload=False, fallback=False):
|
||||
"""get from es or from youtube"""
|
||||
self.get_from_es()
|
||||
if self.json_data:
|
||||
return
|
||||
|
||||
self.get_from_youtube()
|
||||
self.get_from_youtube(fallback)
|
||||
|
||||
if upload:
|
||||
self.upload_to_es()
|
||||
return
|
||||
|
||||
def get_from_youtube(self):
|
||||
def get_from_youtube(self, fallback=False):
|
||||
"""use bs4 to scrape channel about page"""
|
||||
self.json_data = ChannelScraper(self.youtube_id).get_json()
|
||||
|
||||
if not self.json_data and fallback:
|
||||
self._video_fallback(fallback)
|
||||
|
||||
self.get_channel_art()
|
||||
|
||||
def _video_fallback(self, fallback):
|
||||
"""use video metadata as fallback"""
|
||||
print(f"{self.youtube_id}: fallback to video metadata")
|
||||
self.json_data = {
|
||||
"channel_active": False,
|
||||
"channel_last_refresh": int(datetime.now().strftime("%s")),
|
||||
"channel_subs": fallback.get("channel_follower_count", 0),
|
||||
"channel_name": fallback["uploader"],
|
||||
"channel_banner_url": False,
|
||||
"channel_tvart_url": False,
|
||||
"channel_id": self.youtube_id,
|
||||
"channel_subscribed": False,
|
||||
"channel_description": False,
|
||||
"channel_thumb_url": False,
|
||||
"channel_views": 0,
|
||||
}
|
||||
self._info_json_fallback()
|
||||
|
||||
def _info_json_fallback(self):
|
||||
"""read channel info.json for additional metadata"""
|
||||
info_json = os.path.join(
|
||||
self.config["application"]["cache_dir"],
|
||||
"import",
|
||||
f"{self.youtube_id}.info.json",
|
||||
)
|
||||
if os.path.exists(info_json):
|
||||
print(f"{self.youtube_id}: read info.json file")
|
||||
with open(info_json, "r", encoding="utf-8") as f:
|
||||
content = json.loads(f.read())
|
||||
|
||||
self.json_data.update(
|
||||
{
|
||||
"channel_subs": content["channel_follower_count"],
|
||||
"channel_description": content["description"],
|
||||
}
|
||||
)
|
||||
|
||||
def get_channel_art(self):
|
||||
"""download channel art for new channels"""
|
||||
channel_id = self.youtube_id
|
||||
channel_thumb = self.json_data["channel_thumb_url"]
|
||||
channel_banner = self.json_data["channel_banner_url"]
|
||||
ThumbManager().download_chan(
|
||||
[(channel_id, channel_thumb, channel_banner)]
|
||||
urls = (
|
||||
self.json_data["channel_thumb_url"],
|
||||
self.json_data["channel_banner_url"],
|
||||
)
|
||||
ThumbManager(self.youtube_id, item_type="channel").download(urls)
|
||||
|
||||
def sync_to_videos(self):
|
||||
"""sync new channel_dict to all videos of channel"""
|
||||
|
@ -12,13 +12,16 @@ import shutil
|
||||
import subprocess
|
||||
|
||||
from home.src.download.queue import PendingList
|
||||
from home.src.download.yt_dlp_handler import VideoDownloader
|
||||
from home.src.download.thumbnails import ThumbManager
|
||||
from home.src.es.connect import ElasticWrap
|
||||
from home.src.index.reindex import Reindex
|
||||
from home.src.index.video import index_new_video
|
||||
from home.src.index.video import YoutubeVideo, index_new_video
|
||||
from home.src.ta.config import AppConfig
|
||||
from home.src.ta.helper import clean_string, ignore_filelist
|
||||
from home.src.ta.ta_redis import RedisArchivist
|
||||
from PIL import Image, ImageFile
|
||||
|
||||
ImageFile.LOAD_TRUNCATED_IMAGES = True
|
||||
|
||||
|
||||
class FilesystemScanner:
|
||||
@ -157,63 +160,140 @@ class FilesystemScanner:
|
||||
_, _ = ElasticWrap(path).delete()
|
||||
|
||||
|
||||
class ManualImport:
|
||||
"""import and indexing existing video files"""
|
||||
class ImportFolderScanner:
|
||||
"""import and indexing existing video files
|
||||
- identify all media files belonging to a video
|
||||
- identify youtube id
|
||||
- convert if needed
|
||||
"""
|
||||
|
||||
CONFIG = AppConfig().config
|
||||
CACHE_DIR = CONFIG["application"]["cache_dir"]
|
||||
IMPORT_DIR = os.path.join(CACHE_DIR, "import")
|
||||
|
||||
EXT_MAP = {
|
||||
"media": [".mp4", ".mkv", ".webm"],
|
||||
"metadata": [".json"],
|
||||
"thumb": [".jpg", ".png", ".webp"],
|
||||
"subtitle": [".vtt"],
|
||||
}
|
||||
|
||||
def __init__(self):
|
||||
self.identified = self.import_folder_parser()
|
||||
self.to_import = False
|
||||
|
||||
def import_folder_parser(self):
|
||||
"""detect files in import folder"""
|
||||
import_files = os.listdir(self.IMPORT_DIR)
|
||||
to_import = ignore_filelist(import_files)
|
||||
to_import.sort()
|
||||
video_files = [i for i in to_import if not i.endswith(".json")]
|
||||
def scan(self):
|
||||
"""scan and match media files"""
|
||||
all_files = self.get_all_files()
|
||||
self.match_files(all_files)
|
||||
self.process_videos()
|
||||
|
||||
identified = []
|
||||
return self.to_import
|
||||
|
||||
for file_path in video_files:
|
||||
def get_all_files(self):
|
||||
"""get all files in /import"""
|
||||
rel_paths = ignore_filelist(os.listdir(self.IMPORT_DIR))
|
||||
all_files = [os.path.join(self.IMPORT_DIR, i) for i in rel_paths]
|
||||
all_files.sort()
|
||||
|
||||
file_dict = {"video_file": file_path}
|
||||
file_name, _ = os.path.splitext(file_path)
|
||||
|
||||
matching_json = [
|
||||
i
|
||||
for i in to_import
|
||||
if i.startswith(file_name) and i.endswith(".json")
|
||||
]
|
||||
if matching_json:
|
||||
json_file = matching_json[0]
|
||||
youtube_id = self.extract_id_from_json(json_file)
|
||||
file_dict.update({"json_file": json_file})
|
||||
else:
|
||||
youtube_id = self.extract_id_from_filename(file_name)
|
||||
file_dict.update({"json_file": False})
|
||||
|
||||
file_dict.update({"youtube_id": youtube_id})
|
||||
identified.append(file_dict)
|
||||
|
||||
return identified
|
||||
return all_files
|
||||
|
||||
@staticmethod
|
||||
def extract_id_from_filename(file_name):
|
||||
def _get_template():
|
||||
"""base dict for video"""
|
||||
return {
|
||||
"media": False,
|
||||
"video_id": False,
|
||||
"metadata": False,
|
||||
"thumb": False,
|
||||
"subtitle": [],
|
||||
}
|
||||
|
||||
def match_files(self, all_files):
|
||||
"""loop through all files, join what matches"""
|
||||
self.to_import = []
|
||||
|
||||
current_video = self._get_template()
|
||||
last_base = False
|
||||
|
||||
for file_path in all_files:
|
||||
base_name_raw, ext = os.path.splitext(file_path)
|
||||
base_name, _ = os.path.splitext(base_name_raw)
|
||||
|
||||
key, file_path = self._detect_type(file_path, ext)
|
||||
if not key or not file_path:
|
||||
continue
|
||||
|
||||
if base_name != last_base:
|
||||
if last_base:
|
||||
self.to_import.append(current_video)
|
||||
|
||||
current_video = self._get_template()
|
||||
last_base = base_name
|
||||
|
||||
if key == "subtitle":
|
||||
current_video["subtitle"].append(file_path)
|
||||
else:
|
||||
current_video[key] = file_path
|
||||
|
||||
if current_video.get("media"):
|
||||
self.to_import.append(current_video)
|
||||
|
||||
def _detect_type(self, file_path, ext):
|
||||
"""detect metadata type for file"""
|
||||
|
||||
for key, value in self.EXT_MAP.items():
|
||||
if ext in value:
|
||||
return key, file_path
|
||||
|
||||
return False, False
|
||||
|
||||
def process_videos(self):
|
||||
"""loop through all videos"""
|
||||
for current_video in self.to_import:
|
||||
if not current_video["media"]:
|
||||
print(f"{current_video}: no matching media file found.")
|
||||
raise ValueError
|
||||
|
||||
self._detect_youtube_id(current_video)
|
||||
self._dump_thumb(current_video)
|
||||
self._convert_thumb(current_video)
|
||||
self._convert_video(current_video)
|
||||
|
||||
ManualImport(current_video, self.CONFIG).run()
|
||||
|
||||
def _detect_youtube_id(self, current_video):
|
||||
"""find video id from filename or json"""
|
||||
print(current_video)
|
||||
youtube_id = self._extract_id_from_filename(current_video["media"])
|
||||
if youtube_id:
|
||||
current_video["video_id"] = youtube_id
|
||||
return
|
||||
|
||||
youtube_id = self._extract_id_from_json(current_video["metadata"])
|
||||
if youtube_id:
|
||||
current_video["video_id"] = youtube_id
|
||||
return
|
||||
|
||||
print(current_video["media"])
|
||||
raise ValueError("failed to find video id")
|
||||
|
||||
@staticmethod
|
||||
def _extract_id_from_filename(file_name):
|
||||
"""
|
||||
look at the file name for the youtube id
|
||||
expects filename ending in [<youtube_id>].<ext>
|
||||
"""
|
||||
id_search = re.search(r"\[([a-zA-Z0-9_-]{11})\]$", file_name)
|
||||
base_name, _ = os.path.splitext(file_name)
|
||||
id_search = re.search(r"\[([a-zA-Z0-9_-]{11})\]$", base_name)
|
||||
if id_search:
|
||||
youtube_id = id_search.group(1)
|
||||
return youtube_id
|
||||
|
||||
print("failed to extract youtube id for: " + file_name)
|
||||
raise Exception
|
||||
print(f"id extraction failed from filename: {file_name}")
|
||||
|
||||
def extract_id_from_json(self, json_file):
|
||||
return False
|
||||
|
||||
def _extract_id_from_json(self, json_file):
|
||||
"""open json file and extract id"""
|
||||
json_path = os.path.join(self.CACHE_DIR, "import", json_file)
|
||||
with open(json_path, "r", encoding="utf-8") as f:
|
||||
@ -223,66 +303,239 @@ class ManualImport:
|
||||
|
||||
return youtube_id
|
||||
|
||||
def process_import(self):
|
||||
"""go through identified media files"""
|
||||
def _dump_thumb(self, current_video):
|
||||
"""extract embedded thumb before converting"""
|
||||
if current_video["thumb"]:
|
||||
return
|
||||
|
||||
all_videos_added = []
|
||||
media_path = current_video["media"]
|
||||
_, ext = os.path.splitext(media_path)
|
||||
|
||||
for media_file in self.identified:
|
||||
json_file = media_file["json_file"]
|
||||
video_file = media_file["video_file"]
|
||||
youtube_id = media_file["youtube_id"]
|
||||
new_path = False
|
||||
if ext == ".mkv":
|
||||
idx, thumb_type = self._get_mkv_thumb_stream(media_path)
|
||||
if idx:
|
||||
new_path = self.dump_mpv_thumb(media_path, idx, thumb_type)
|
||||
|
||||
video_path = os.path.join(self.CACHE_DIR, "import", video_file)
|
||||
elif ext == ".mp4":
|
||||
thumb_type = self.get_mp4_thumb_type(media_path)
|
||||
if thumb_type:
|
||||
new_path = self.dump_mp4_thumb(media_path, thumb_type)
|
||||
|
||||
self.move_to_cache(video_path, youtube_id)
|
||||
if new_path:
|
||||
current_video["thumb"] = new_path
|
||||
|
||||
# identify and archive
|
||||
vid_dict = index_new_video(youtube_id)
|
||||
VideoDownloader([youtube_id]).move_to_archive(vid_dict)
|
||||
youtube_id = vid_dict["youtube_id"]
|
||||
thumb_url = vid_dict["vid_thumb_url"]
|
||||
all_videos_added.append((youtube_id, thumb_url))
|
||||
def _get_mkv_thumb_stream(self, media_path):
|
||||
"""get stream idx of thumbnail for mkv files"""
|
||||
streams = self._get_streams(media_path)
|
||||
attachments = [
|
||||
i for i in streams["streams"] if i["codec_type"] == "attachment"
|
||||
]
|
||||
|
||||
# cleanup
|
||||
if os.path.exists(video_path):
|
||||
os.remove(video_path)
|
||||
if json_file:
|
||||
json_path = os.path.join(self.CACHE_DIR, "import", json_file)
|
||||
os.remove(json_path)
|
||||
for idx, stream in enumerate(attachments):
|
||||
tags = stream["tags"]
|
||||
if "mimetype" in tags and tags["filename"].startswith("cover"):
|
||||
_, ext = os.path.splitext(tags["filename"])
|
||||
return idx, ext
|
||||
|
||||
return all_videos_added
|
||||
return False, False
|
||||
|
||||
def move_to_cache(self, video_path, youtube_id):
|
||||
"""move identified video file to cache, convert to mp4"""
|
||||
file_name = os.path.split(video_path)[-1]
|
||||
video_file, ext = os.path.splitext(file_name)
|
||||
@staticmethod
|
||||
def dump_mpv_thumb(media_path, idx, thumb_type):
|
||||
"""write cover to disk for mkv"""
|
||||
_, media_ext = os.path.splitext(media_path)
|
||||
new_path = f"{media_path.rstrip(media_ext)}{thumb_type}"
|
||||
subprocess.run(
|
||||
[
|
||||
"ffmpeg",
|
||||
"-v",
|
||||
"quiet",
|
||||
f"-dump_attachment:t:{idx}",
|
||||
new_path,
|
||||
"-i",
|
||||
media_path,
|
||||
],
|
||||
check=False,
|
||||
)
|
||||
|
||||
# make sure youtube_id is in filename
|
||||
if youtube_id not in video_file:
|
||||
video_file = f"{video_file}_{youtube_id}"
|
||||
return new_path
|
||||
|
||||
# move, convert if needed
|
||||
def get_mp4_thumb_type(self, media_path):
|
||||
"""dedect filetype of embedded thumbnail"""
|
||||
streams = self._get_streams(media_path)
|
||||
|
||||
for stream in streams["streams"]:
|
||||
if stream["codec_name"] in ["png", "jpg"]:
|
||||
return stream["codec_name"]
|
||||
|
||||
return False
|
||||
|
||||
def _convert_thumb(self, current_video):
|
||||
"""convert all thumbnails to jpg"""
|
||||
if not current_video["thumb"]:
|
||||
return
|
||||
|
||||
thumb_path = current_video["thumb"]
|
||||
|
||||
base_path, ext = os.path.splitext(thumb_path)
|
||||
if ext == ".jpg":
|
||||
return
|
||||
|
||||
new_path = f"{base_path}.jpg"
|
||||
img_raw = Image.open(thumb_path)
|
||||
img_raw.convert("RGB").save(new_path)
|
||||
|
||||
os.remove(thumb_path)
|
||||
current_video["thumb"] = new_path
|
||||
|
||||
@staticmethod
|
||||
def _get_streams(media_path):
|
||||
"""return all streams from media_path"""
|
||||
streams_raw = subprocess.run(
|
||||
[
|
||||
"ffprobe",
|
||||
"-v",
|
||||
"error",
|
||||
"-show_streams",
|
||||
"-print_format",
|
||||
"json",
|
||||
media_path,
|
||||
],
|
||||
capture_output=True,
|
||||
check=True,
|
||||
)
|
||||
streams = json.loads(streams_raw.stdout.decode())
|
||||
|
||||
return streams
|
||||
|
||||
@staticmethod
|
||||
def dump_mp4_thumb(media_path, thumb_type):
|
||||
"""save cover to disk"""
|
||||
_, ext = os.path.splitext(media_path)
|
||||
new_path = f"{media_path.rstrip(ext)}.{thumb_type}"
|
||||
|
||||
subprocess.run(
|
||||
[
|
||||
"ffmpeg",
|
||||
"-i",
|
||||
media_path,
|
||||
"-map",
|
||||
"0:v",
|
||||
"-map",
|
||||
"-0:V",
|
||||
"-c",
|
||||
"copy",
|
||||
new_path,
|
||||
],
|
||||
check=True,
|
||||
)
|
||||
|
||||
return new_path
|
||||
|
||||
def _convert_video(self, current_video):
|
||||
"""convert if needed"""
|
||||
current_path = current_video["media"]
|
||||
base_path, ext = os.path.splitext(current_path)
|
||||
if ext == ".mp4":
|
||||
new_file = video_file + ext
|
||||
dest_path = os.path.join(self.CACHE_DIR, "download", new_file)
|
||||
shutil.move(video_path, dest_path, copy_function=shutil.copyfile)
|
||||
return
|
||||
|
||||
new_path = base_path + ".mp4"
|
||||
subprocess.run(
|
||||
[
|
||||
"ffmpeg",
|
||||
"-i",
|
||||
current_path,
|
||||
new_path,
|
||||
"-loglevel",
|
||||
"warning",
|
||||
"-stats",
|
||||
],
|
||||
check=True,
|
||||
)
|
||||
current_video["media"] = new_path
|
||||
os.remove(current_path)
|
||||
|
||||
|
||||
class ManualImport:
|
||||
"""import single identified video"""
|
||||
|
||||
def __init__(self, current_video, config):
|
||||
self.current_video = current_video
|
||||
self.config = config
|
||||
|
||||
def run(self):
|
||||
"""run all"""
|
||||
json_data = self.index_metadata()
|
||||
self._move_to_archive(json_data)
|
||||
self._cleanup(json_data)
|
||||
|
||||
def index_metadata(self):
|
||||
"""get metadata from yt or json"""
|
||||
video_id = self.current_video["video_id"]
|
||||
video = YoutubeVideo(video_id)
|
||||
video.build_json(
|
||||
youtube_meta_overwrite=self._get_info_json(),
|
||||
media_path=self.current_video["media"],
|
||||
)
|
||||
if not video.json_data:
|
||||
print(f"{video_id}: manual import failed, and no metadata found.")
|
||||
raise ValueError
|
||||
|
||||
video.check_subtitles()
|
||||
video.upload_to_es()
|
||||
|
||||
if video.offline_import and self.current_video["thumb"]:
|
||||
old_path = self.current_video["thumb"]
|
||||
new_path = ThumbManager(video_id).vid_thumb_path(absolute=True)
|
||||
shutil.move(old_path, new_path, copy_function=shutil.copyfile)
|
||||
else:
|
||||
print(f"processing with ffmpeg: {video_file}")
|
||||
new_file = video_file + ".mp4"
|
||||
dest_path = os.path.join(self.CACHE_DIR, "download", new_file)
|
||||
subprocess.run(
|
||||
[
|
||||
"ffmpeg",
|
||||
"-i",
|
||||
video_path,
|
||||
dest_path,
|
||||
"-loglevel",
|
||||
"warning",
|
||||
"-stats",
|
||||
],
|
||||
check=True,
|
||||
)
|
||||
url = video.json_data["vid_thumb_url"]
|
||||
ThumbManager(video_id).download_video_thumb(url)
|
||||
|
||||
return video.json_data
|
||||
|
||||
def _get_info_json(self):
|
||||
"""read info_json from file"""
|
||||
if not self.current_video["metadata"]:
|
||||
return False
|
||||
|
||||
with open(self.current_video["metadata"], "r", encoding="utf-8") as f:
|
||||
info_json = json.loads(f.read())
|
||||
|
||||
return info_json
|
||||
|
||||
def _move_to_archive(self, json_data):
|
||||
"""move identified media file to archive"""
|
||||
videos = self.config["application"]["videos"]
|
||||
|
||||
channel, file = os.path.split(json_data["media_url"])
|
||||
channel_folder = os.path.join(videos, channel)
|
||||
if not os.path.exists(channel_folder):
|
||||
os.makedirs(channel_folder)
|
||||
|
||||
old_path = self.current_video["media"]
|
||||
new_path = os.path.join(channel_folder, file)
|
||||
shutil.move(old_path, new_path, copy_function=shutil.copyfile)
|
||||
|
||||
def _cleanup(self, json_data):
|
||||
"""cleanup leftover files"""
|
||||
if os.path.exists(self.current_video["metadata"]):
|
||||
os.remove(self.current_video["metadata"])
|
||||
|
||||
if os.path.exists(self.current_video["thumb"]):
|
||||
os.remove(self.current_video["thumb"])
|
||||
|
||||
for subtitle_file in self.current_video["subtitle"]:
|
||||
if os.path.exists(subtitle_file):
|
||||
os.remove(subtitle_file)
|
||||
|
||||
channel_info = os.path.join(
|
||||
self.config["application"]["cache_dir"],
|
||||
"import",
|
||||
f"{json_data['channel']['channel_id']}.info.json",
|
||||
)
|
||||
if os.path.exists(channel_info):
|
||||
os.remove(channel_info)
|
||||
|
||||
|
||||
def scan_filesystem():
|
||||
|
@ -41,7 +41,6 @@ class YoutubePlaylist(YouTubeItem):
|
||||
self.process_youtube_meta()
|
||||
self.get_entries()
|
||||
self.json_data["playlist_entries"] = self.all_members
|
||||
self.get_playlist_art()
|
||||
self.json_data["playlist_subscribed"] = subscribed
|
||||
|
||||
def process_youtube_meta(self):
|
||||
@ -81,12 +80,10 @@ class YoutubePlaylist(YouTubeItem):
|
||||
|
||||
self.all_members = all_members
|
||||
|
||||
@staticmethod
|
||||
def get_playlist_art():
|
||||
def get_playlist_art(self):
|
||||
"""download artwork of playlist"""
|
||||
thumbnails = ThumbManager()
|
||||
missing_playlists = thumbnails.get_missing_playlists()
|
||||
thumbnails.download_playlist(missing_playlists)
|
||||
url = self.json_data["playlist_thumbnail"]
|
||||
ThumbManager(self.youtube_id, item_type="playlist").download(url)
|
||||
|
||||
def add_vids_to_playlist(self):
|
||||
"""sync the playlist id to videos"""
|
||||
@ -145,17 +142,15 @@ class YoutubePlaylist(YouTubeItem):
|
||||
previous_item = False
|
||||
else:
|
||||
previous_item = all_entries[current_idx - 1]
|
||||
prev_thumb = ThumbManager().vid_thumb_path(
|
||||
previous_item["youtube_id"]
|
||||
)
|
||||
previous_item["vid_thumb"] = prev_thumb
|
||||
prev_id = previous_item["youtube_id"]
|
||||
previous_item["vid_thumb"] = ThumbManager(prev_id).vid_thumb_path()
|
||||
|
||||
if current_idx == len(all_entries) - 1:
|
||||
next_item = False
|
||||
else:
|
||||
next_item = all_entries[current_idx + 1]
|
||||
next_thumb = ThumbManager().vid_thumb_path(next_item["youtube_id"])
|
||||
next_item["vid_thumb"] = next_thumb
|
||||
next_id = next_item["youtube_id"]
|
||||
next_item["vid_thumb"] = ThumbManager(next_id).vid_thumb_path()
|
||||
|
||||
self.nav = {
|
||||
"playlist_meta": {
|
||||
|
@ -181,10 +181,10 @@ class Reindex:
|
||||
|
||||
video.upload_to_es()
|
||||
|
||||
thumb_handler = ThumbManager()
|
||||
thumb_handler.delete_vid_thumb(youtube_id)
|
||||
to_download = (youtube_id, video.json_data["vid_thumb_url"])
|
||||
thumb_handler.download_vid([to_download], notify=False)
|
||||
thumb_handler = ThumbManager(youtube_id)
|
||||
thumb_handler.delete_video_thumb()
|
||||
thumb_handler.download_video_thumb(video.json_data["vid_thumb_url"])
|
||||
|
||||
return
|
||||
|
||||
@staticmethod
|
||||
|
@ -425,18 +425,23 @@ class YoutubeVideo(YouTubeItem, YoutubeSubtitle):
|
||||
self.channel_id = False
|
||||
self.video_overwrites = video_overwrites
|
||||
self.es_path = f"{self.index_name}/_doc/{youtube_id}"
|
||||
self.offline_import = False
|
||||
|
||||
def build_json(self):
|
||||
def build_json(self, youtube_meta_overwrite=False, media_path=False):
|
||||
"""build json dict of video"""
|
||||
self.get_from_youtube()
|
||||
if not self.youtube_meta:
|
||||
if not self.youtube_meta and not youtube_meta_overwrite:
|
||||
return
|
||||
|
||||
if not self.youtube_meta:
|
||||
self.youtube_meta = youtube_meta_overwrite
|
||||
self.offline_import = True
|
||||
|
||||
self._process_youtube_meta()
|
||||
self._add_channel()
|
||||
self._add_stats()
|
||||
self.add_file_path()
|
||||
self.add_player()
|
||||
self.add_player(media_path)
|
||||
if self.config["downloads"]["integrate_ryd"]:
|
||||
self._get_ryd_stats()
|
||||
|
||||
@ -487,7 +492,7 @@ class YoutubeVideo(YouTubeItem, YoutubeSubtitle):
|
||||
def _add_channel(self):
|
||||
"""add channel dict to video json_data"""
|
||||
channel = ta_channel.YoutubeChannel(self.channel_id)
|
||||
channel.build_json(upload=True)
|
||||
channel.build_json(upload=True, fallback=self.youtube_meta)
|
||||
self.json_data.update({"channel": channel.json_data})
|
||||
|
||||
def _add_stats(self):
|
||||
@ -495,13 +500,14 @@ class YoutubeVideo(YouTubeItem, YoutubeSubtitle):
|
||||
# likes
|
||||
like_count = self.youtube_meta.get("like_count", 0)
|
||||
dislike_count = self.youtube_meta.get("dislike_count", 0)
|
||||
average_rating = self.youtube_meta.get("average_rating", 0)
|
||||
self.json_data.update(
|
||||
{
|
||||
"stats": {
|
||||
"view_count": self.youtube_meta["view_count"],
|
||||
"like_count": like_count,
|
||||
"dislike_count": dislike_count,
|
||||
"average_rating": self.youtube_meta["average_rating"],
|
||||
"average_rating": average_rating,
|
||||
}
|
||||
}
|
||||
)
|
||||
@ -518,8 +524,28 @@ class YoutubeVideo(YouTubeItem, YoutubeSubtitle):
|
||||
|
||||
raise FileNotFoundError
|
||||
|
||||
def add_player(self):
|
||||
def add_player(self, media_path=False):
|
||||
"""add player information for new videos"""
|
||||
vid_path = self._get_vid_path(media_path)
|
||||
|
||||
duration_handler = DurationConverter()
|
||||
duration = duration_handler.get_sec(vid_path)
|
||||
duration_str = duration_handler.get_str(duration)
|
||||
self.json_data.update(
|
||||
{
|
||||
"player": {
|
||||
"watched": False,
|
||||
"duration": duration,
|
||||
"duration_str": duration_str,
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
def _get_vid_path(self, media_path=False):
|
||||
"""get path of media file"""
|
||||
if media_path:
|
||||
return media_path
|
||||
|
||||
try:
|
||||
# when indexing from download task
|
||||
vid_path = self.build_dl_cache_path()
|
||||
@ -535,18 +561,7 @@ class YoutubeVideo(YouTubeItem, YoutubeSubtitle):
|
||||
else:
|
||||
raise FileNotFoundError("could not find video file") from err
|
||||
|
||||
duration_handler = DurationConverter()
|
||||
duration = duration_handler.get_sec(vid_path)
|
||||
duration_str = duration_handler.get_str(duration)
|
||||
self.json_data.update(
|
||||
{
|
||||
"player": {
|
||||
"watched": False,
|
||||
"duration": duration,
|
||||
"duration_str": duration_str,
|
||||
}
|
||||
}
|
||||
)
|
||||
return vid_path
|
||||
|
||||
def add_file_path(self):
|
||||
"""build media_url for where file will be located"""
|
||||
|
@ -15,12 +15,12 @@ from home.src.download.subscriptions import (
|
||||
ChannelSubscription,
|
||||
PlaylistSubscription,
|
||||
)
|
||||
from home.src.download.thumbnails import ThumbManager, validate_thumbnails
|
||||
from home.src.download.thumbnails import ThumbFilesystem, ThumbValidator
|
||||
from home.src.download.yt_dlp_handler import VideoDownloader
|
||||
from home.src.es.index_setup import backup_all_indexes, restore_from_backup
|
||||
from home.src.index.channel import YoutubeChannel
|
||||
from home.src.index.filesystem import (
|
||||
ManualImport,
|
||||
ImportFolderScanner,
|
||||
reindex_old_documents,
|
||||
scan_filesystem,
|
||||
)
|
||||
@ -150,10 +150,7 @@ def run_manual_import():
|
||||
try:
|
||||
have_lock = my_lock.acquire(blocking=False)
|
||||
if have_lock:
|
||||
import_handler = ManualImport()
|
||||
if import_handler.identified:
|
||||
all_videos_added = import_handler.process_import()
|
||||
ThumbManager().download_vid(all_videos_added)
|
||||
ImportFolderScanner().scan()
|
||||
else:
|
||||
print("Did not acquire lock form import.")
|
||||
|
||||
@ -204,21 +201,19 @@ def kill_dl(task_id):
|
||||
def rescan_filesystem():
|
||||
"""check the media folder for mismatches"""
|
||||
scan_filesystem()
|
||||
validate_thumbnails()
|
||||
ThumbValidator().download_missing()
|
||||
|
||||
|
||||
@shared_task(name="thumbnail_check")
|
||||
def thumbnail_check():
|
||||
"""validate thumbnails"""
|
||||
validate_thumbnails()
|
||||
ThumbValidator().download_missing()
|
||||
|
||||
|
||||
@shared_task
|
||||
def re_sync_thumbs():
|
||||
"""sync thumbnails to mediafiles"""
|
||||
handler = ThumbManager()
|
||||
video_list = handler.get_thumb_list()
|
||||
handler.write_all_thumbs(video_list)
|
||||
ThumbFilesystem().sync()
|
||||
|
||||
|
||||
@shared_task
|
||||
@ -229,9 +224,7 @@ def subscribe_to(url_str):
|
||||
for item in to_subscribe_list:
|
||||
to_sub_id = item["url"]
|
||||
if item["type"] == "playlist":
|
||||
new_thumbs = PlaylistSubscription().process_url_str([item])
|
||||
if new_thumbs:
|
||||
ThumbManager().download_playlist(new_thumbs)
|
||||
PlaylistSubscription().process_url_str([item])
|
||||
continue
|
||||
|
||||
if item["type"] == "video":
|
||||
|
@ -1,6 +1,7 @@
|
||||
beautifulsoup4==4.11.1
|
||||
celery==5.2.7
|
||||
Django==4.0.6
|
||||
Django==4.1
|
||||
django-auth-ldap==4.1.0
|
||||
django-cors-headers==3.13.0
|
||||
djangorestframework==3.13.1
|
||||
Pillow==9.2.0
|
||||
@ -9,5 +10,4 @@ requests==2.28.1
|
||||
ryd-client==0.0.3
|
||||
uWSGI==2.0.20
|
||||
whitenoise==6.2.0
|
||||
yt_dlp==2022.7.18
|
||||
django-auth-ldap==4.1.0
|
||||
yt_dlp==2022.8.8
|
||||
|
Loading…
Reference in New Issue
Block a user