Shorts and Streams, #build

Changed
- Added separate channel pages for streams and shorts
- Changed URL parser to take shorts urls
- Changed hiding empty channel tabs from UI
- Fixed playlist validator to only validate active playlists
- Fixed reindex task not triggering for channels and playlists
- [API] Add download now endpoint
This commit is contained in:
simon 2023-01-07 18:45:48 +07:00
commit 61b9110a5c
No known key found for this signature in database
GPG Key ID: 2C15AA5E89985DD4
27 changed files with 429 additions and 231 deletions

View File

@ -3,7 +3,7 @@
# First stage to build python wheel
FROM python:3.10.8-slim-bullseye AS builder
FROM python:3.10.9-slim-bullseye AS builder
ARG TARGETPLATFORM
RUN apt-get update && apt-get install -y --no-install-recommends \
@ -14,7 +14,7 @@ COPY ./tubearchivist/requirements.txt /requirements.txt
RUN pip install --user -r requirements.txt
# build final image
FROM python:3.10.8-slim-bullseye as tubearchivist
FROM python:3.10.9-slim-bullseye as tubearchivist
ARG TARGETPLATFORM
ARG INSTALL_DEBUG

View File

@ -221,6 +221,13 @@ Add to queue previously ignored video:
}
```
Download existing video now:
```json
{
"status": "priority"
}
```
DELETE /api/download/\<video_id>/
Forget or delete from download queue

View File

@ -13,9 +13,9 @@ from home.src.index.generic import Pagination
from home.src.index.reindex import ReindexProgress
from home.src.index.video import SponsorBlock, YoutubeVideo
from home.src.ta.config import AppConfig
from home.src.ta.helper import UrlListParser
from home.src.ta.ta_redis import RedisArchivist, RedisQueue
from home.tasks import check_reindex, extrac_dl, subscribe_to
from home.src.ta.urlparser import Parser
from home.tasks import check_reindex, download_single, extrac_dl, subscribe_to
from rest_framework.authentication import (
SessionAuthentication,
TokenAuthentication,
@ -395,7 +395,7 @@ class DownloadApiView(ApiBaseView):
"""
search_base = "ta_download/_doc/"
valid_status = ["pending", "ignore"]
valid_status = ["pending", "ignore", "priority"]
def get(self, request, video_id):
# pylint: disable=unused-argument
@ -411,8 +411,16 @@ class DownloadApiView(ApiBaseView):
print(message)
return Response({"message": message}, status=400)
pending_video, status_code = PendingInteract(video_id).get_item()
if status_code == 404:
message = f"{video_id}: item not found {status_code}"
return Response({"message": message}, status=404)
print(f"{video_id}: change status to {item_status}")
PendingInteract(video_id=video_id, status=item_status).update_status()
if item_status == "priority":
download_single.delay(pending_video)
else:
PendingInteract(video_id, item_status).update_status()
RedisQueue(queue_name="dl_queue").clear_item(video_id)
return Response(request.data)
@ -476,7 +484,7 @@ class DownloadApiListView(ApiBaseView):
pending = [i["youtube_id"] for i in to_add if i["status"] == "pending"]
url_str = " ".join(pending)
try:
youtube_ids = UrlListParser(url_str).process_list()
youtube_ids = Parser(url_str).parse()
except ValueError:
message = f"failed to parse: {url_str}"
print(message)

View File

@ -129,14 +129,18 @@ class StartupCheck:
def es_set_vid_type(self):
"""update path 0.3.0 to 0.3.1, set default vid_type to video"""
index_list = ["ta_video", "ta_download"]
data = {
"query": {
"bool": {"must_not": [{"exists": {"field": "vid_type"}}]}
},
"script": {"source": "ctx._source['vid_type'] = 'video'"},
"script": {"source": "ctx._source['vid_type'] = 'videos'"},
}
response, _ = ElasticWrap("ta_video/_update_by_query").post(data=data)
print(f"ta_video vid_type index update ran: {response}")
for index_name in index_list:
path = f"{index_name}/_update_by_query"
response, _ = ElasticWrap(path).post(data=data)
print(f"{index_name} vid_type index update ran: {response}")
class HomeConfig(AppConfig):

View File

@ -15,8 +15,8 @@
"auto_search": false,
"auto_download": false,
"channel_size": 50,
"live_channel_size": 0,
"shorts_channel_size": 0
"live_channel_size": 50,
"shorts_channel_size": 50
},
"downloads": {
"limit_count": false,

View File

@ -17,7 +17,7 @@ from home.src.es.connect import ElasticWrap, IndexPaginate
from home.src.index.playlist import YoutubePlaylist
from home.src.index.video_constants import VideoTypeEnum
from home.src.ta.config import AppConfig
from home.src.ta.helper import DurationConverter
from home.src.ta.helper import DurationConverter, is_shorts
from home.src.ta.ta_redis import RedisArchivist
@ -117,6 +117,12 @@ class PendingInteract:
path = f"ta_download/_update/{self.video_id}"
_, _ = ElasticWrap(path).post(data=data)
def get_item(self):
"""return pending item dict"""
path = f"ta_download/_doc/{self.video_id}"
response, status_code = ElasticWrap(path).get()
return response["_source"], status_code
class PendingList(PendingIndex):
"""manage the pending videos list"""
@ -156,28 +162,37 @@ class PendingList(PendingIndex):
def _process_entry(self, entry):
"""process single entry from url list"""
vid_type = self._get_vid_type(entry)
if entry["type"] == "video":
vid_type = entry.get("vid_type", VideoTypeEnum.VIDEO)
self._add_video(entry["url"], vid_type)
elif entry["type"] == "channel":
self._parse_channel(entry["url"])
self._parse_channel(entry["url"], vid_type)
elif entry["type"] == "playlist":
self._parse_playlist(entry["url"])
PlaylistSubscription().process_url_str([entry], subscribed=False)
else:
raise ValueError(f"invalid url_type: {entry}")
def _add_video(self, url, vid_type=VideoTypeEnum.VIDEO):
@staticmethod
def _get_vid_type(entry):
"""add vid type enum if available"""
vid_type_str = entry.get("vid_type")
if not vid_type_str:
return VideoTypeEnum.UNKNOWN
return VideoTypeEnum(vid_type_str)
def _add_video(self, url, vid_type):
"""add video to list"""
if url not in self.missing_videos and url not in self.to_skip:
self.missing_videos.append((url, vid_type))
else:
print(f"{url}: skipped adding already indexed video to download.")
def _parse_channel(self, url):
def _parse_channel(self, url, vid_type):
"""add all videos of channel to list"""
video_results = ChannelSubscription().get_last_youtube_videos(
url, limit=False
url, limit=False, query_filter=vid_type
)
for video_id, _, vid_type in video_results:
self._add_video(video_id, vid_type)
@ -189,9 +204,8 @@ class PendingList(PendingIndex):
video_results = playlist.json_data.get("playlist_entries")
youtube_ids = [i["youtube_id"] for i in video_results]
for video_id in youtube_ids:
# FIXME: This will need to be adjusted to support Live/Shorts
# from playlists
self._add_video(video_id, VideoTypeEnum.VIDEO)
# match vid_type later
self._add_video(video_id, VideoTypeEnum.UNKNOWN)
def add_to_pending(self, status="pending"):
"""add missing videos to pending list"""
@ -238,7 +252,7 @@ class PendingList(PendingIndex):
if idx + 1 % 25 == 0:
print("adding to queue progress: " + progress)
def get_youtube_details(self, youtube_id, vid_type=VideoTypeEnum.VIDEO):
def get_youtube_details(self, youtube_id, vid_type=VideoTypeEnum.VIDEOS):
"""get details from youtubedl for single pending video"""
vid = YtWrap(self.yt_obs, self.config).extract(youtube_id)
if not vid:
@ -252,9 +266,28 @@ class PendingList(PendingIndex):
if vid["live_status"] in ["is_upcoming", "is_live"]:
return False
if vid["live_status"] == "was_live":
vid_type = VideoTypeEnum.STREAMS
else:
if self._check_shorts(vid):
vid_type = VideoTypeEnum.SHORTS
return self._parse_youtube_details(vid, vid_type)
def _parse_youtube_details(self, vid, vid_type=VideoTypeEnum.VIDEO):
@staticmethod
def _check_shorts(vid):
"""check if vid is shorts video"""
if vid["width"] > vid["height"]:
return False
duration = vid.get("duration")
if duration and isinstance(duration, int):
if duration > 60:
return False
return is_shorts(vid["id"])
def _parse_youtube_details(self, vid, vid_type=VideoTypeEnum.VIDEOS):
"""parse response"""
vid_id = vid.get("id")
duration_str = DurationConverter.get_str(vid["duration"])

View File

@ -36,30 +36,15 @@ class ChannelSubscription:
return all_channels
def get_last_youtube_videos(self, channel_id, limit=True):
def get_last_youtube_videos(
self, channel_id, limit=True, query_filter=VideoTypeEnum.UNKNOWN
):
"""get a list of last videos from channel"""
queries = [
(
VideoTypeEnum.VIDEO,
"videos",
self.config["subscriptions"]["channel_size"],
),
(
VideoTypeEnum.LIVE,
"streams",
self.config["subscriptions"]["live_channel_size"],
),
(
VideoTypeEnum.SHORT,
"shorts",
self.config["subscriptions"]["shorts_channel_size"],
),
]
queries = self._build_queries(query_filter, limit)
last_videos = []
for vid_type, url, limit_amount in queries:
for vid_type, limit_amount in queries:
obs = {
"skip_download": True,
"extract_flat": True,
@ -67,8 +52,9 @@ class ChannelSubscription:
if limit:
obs["playlistend"] = limit_amount
path = vid_type.value
channel = YtWrap(obs, self.config).extract(
f"https://www.youtube.com/channel/{channel_id}/{url}"
f"https://www.youtube.com/channel/{channel_id}/{path}"
)
if not channel:
continue
@ -78,6 +64,36 @@ class ChannelSubscription:
return last_videos
def _build_queries(self, query_filter, limit):
"""build query list for vid_type"""
limit_map = {
"videos": self.config["subscriptions"]["channel_size"],
"streams": self.config["subscriptions"]["live_channel_size"],
"shorts": self.config["subscriptions"]["shorts_channel_size"],
}
queries = []
if query_filter and query_filter.value != "unknown":
if limit:
query_limit = limit_map.get(query_filter.value)
else:
query_limit = False
queries.append((query_filter, query_limit))
return queries
for query_item, default_limit in limit_map.items():
if limit:
query_limit = default_limit
else:
query_limit = False
queries.append((VideoTypeEnum(query_item), query_limit))
return queries
def find_missing(self):
"""add missing videos from subscribed channels to pending"""
all_channels = self.get_channels()

View File

@ -104,7 +104,7 @@ class DownloadPostProcess:
continue
# validate from local
playlists = channel.get_indexed_playlists()
playlists = channel.get_indexed_playlists(active_only=True)
all_channel_playlist = [i["playlist_id"] for i in playlists]
self._validate_channel_playlist(all_channel_playlist, id_c)
@ -117,6 +117,7 @@ class DownloadPostProcess:
playlist.build_json(scrape=True)
if not playlist.json_data:
playlist.deactivate()
continue
playlist.add_vids_to_playlist()
playlist.upload_to_es()
@ -188,7 +189,7 @@ class VideoDownloader:
youtube_id = youtube_data.get("youtube_id")
tmp_vid_type = youtube_data.get(
"vid_type", VideoTypeEnum.VIDEO.value
"vid_type", VideoTypeEnum.VIDEOS.value
)
video_type = VideoTypeEnum(tmp_vid_type)
print(f"Downloading type: {video_type}")
@ -268,7 +269,7 @@ class VideoDownloader:
"youtube_id": i["youtube_id"],
# Using .value in default val to match what would be
# decoded when parsing json if not set
"vid_type": i.get("vid_type", VideoTypeEnum.VIDEO.value),
"vid_type": i.get("vid_type", VideoTypeEnum.VIDEOS.value),
}
)
for i in pending.all_pending

View File

@ -34,9 +34,11 @@ class ElasticWrap:
def get(self, data=False):
"""get data from es"""
if data:
response = requests.get(self.url, json=data, auth=self.auth)
response = requests.get(
self.url, json=data, auth=self.auth, timeout=10
)
else:
response = requests.get(self.url, auth=self.auth)
response = requests.get(self.url, auth=self.auth, timeout=10)
if not response.ok:
print(response.text)
@ -53,10 +55,16 @@ class ElasticWrap:
if data:
response = requests.post(
self.url, data=payload, headers=headers, auth=self.auth
self.url,
data=payload,
headers=headers,
auth=self.auth,
timeout=10,
)
else:
response = requests.post(self.url, headers=headers, auth=self.auth)
response = requests.post(
self.url, headers=headers, auth=self.auth, timeout=10
)
if not response.ok:
print(response.text)
@ -67,7 +75,9 @@ class ElasticWrap:
"""put data to es"""
if refresh:
self.url = f"{self.url}/?refresh=true"
response = requests.put(f"{self.url}", json=data, auth=self.auth)
response = requests.put(
f"{self.url}", json=data, auth=self.auth, timeout=10
)
if not response.ok:
print(response.text)
print(data)
@ -80,9 +90,11 @@ class ElasticWrap:
if refresh:
self.url = f"{self.url}/?refresh=true"
if data:
response = requests.delete(self.url, json=data, auth=self.auth)
response = requests.delete(
self.url, json=data, auth=self.auth, timeout=10
)
else:
response = requests.delete(self.url, auth=self.auth)
response = requests.delete(self.url, auth=self.auth, timeout=10)
if not response.ok:
print(response.text)

View File

@ -9,11 +9,10 @@ from home.src.download.subscriptions import (
PlaylistSubscription,
)
from home.src.index.playlist import YoutubePlaylist
from home.src.ta.helper import UrlListParser
from home.src.ta.ta_redis import RedisArchivist, RedisQueue
from home.src.ta.urlparser import Parser
from home.tasks import (
download_pending,
download_single,
index_channel_playlists,
kill_dl,
re_sync_thumbs,
@ -56,7 +55,6 @@ class PostData:
"sort_order": self._sort_order,
"hide_watched": self._hide_watched,
"show_subed_only": self._show_subed_only,
"dlnow": self._dlnow,
"show_ignored_only": self._show_ignored_only,
"manual-import": self._manual_import,
"re-embed": self._re_embed,
@ -125,7 +123,7 @@ class PostData:
"""unsubscribe from channels or playlists"""
id_unsub = self.exec_val
print(f"{id_unsub}: unsubscribe")
to_unsub_list = UrlListParser(id_unsub).process_list()
to_unsub_list = Parser(id_unsub).parse()
for to_unsub in to_unsub_list:
unsub_type = to_unsub["type"]
unsub_id = to_unsub["url"]
@ -178,16 +176,6 @@ class PostData:
RedisArchivist().set_message(key, message)
return {"success": True}
def _dlnow(self):
"""start downloading single vid now"""
youtube_id = self.exec_val
print(f"{youtube_id}: downloading now")
running = download_single.delay(youtube_id=youtube_id)
task_id = running.id
print("set task id: " + task_id)
RedisArchivist().set_message("dl_queue_id", task_id)
return {"success": True}
def _show_ignored_only(self):
"""switch view on /downloads/ to show ignored only"""
show_value = self.exec_val

View File

@ -6,7 +6,7 @@ functionality:
from datetime import datetime
from home.src.es.connect import ElasticWrap
from home.src.ta.helper import UrlListParser
from home.src.ta.urlparser import Parser
class WatchState:
@ -34,7 +34,7 @@ class WatchState:
def _dedect_type(self):
"""find youtube id type"""
print(self.youtube_id)
url_process = UrlListParser(self.youtube_id).process_list()
url_process = Parser(self.youtube_id).parse()
url_type = url_process[0]["type"]
return url_type

View File

@ -392,14 +392,16 @@ class YoutubeChannel(YouTubeItem):
all_entries = [(i["id"], i["title"]) for i in playlists["entries"]]
self.all_playlists = all_entries
def get_indexed_playlists(self):
def get_indexed_playlists(self, active_only=False):
"""get all indexed playlists from channel"""
data = {
"query": {
"term": {"playlist_channel_id": {"value": self.youtube_id}}
},
"sort": [{"playlist_channel.keyword": {"order": "desc"}}],
}
must_list = [
{"term": {"playlist_channel_id": {"value": self.youtube_id}}}
]
if active_only:
must_list.append({"term": {"playlist_active": {"value": True}}})
data = {"query": {"bool": {"must": must_list}}}
all_playlists = IndexPaginate("ta_playlist", data).get_results()
return all_playlists

View File

@ -100,7 +100,7 @@ class ReindexOutdated(ReindexBase):
refresh_key = reindex_config["refresh_key"]
now_lte = self.now - self.interval * 24 * 60 * 60
must_list = [
{"match": {"active": True}},
{"match": {reindex_config["active_key"]: True}},
{"range": {refresh_key: {"lte": now_lte}}},
]
data = {
@ -205,7 +205,7 @@ class Reindex(ReindexBase):
def reindex_all(self):
"""reindex all in queue"""
if self.cookie_invalid():
if not self.cookie_is_valid():
print("[reindex] cookie invalid, exiting...")
return
@ -246,7 +246,7 @@ class Reindex(ReindexBase):
try:
self._reindex_single_video_call(youtube_id)
except FileNotFoundError:
ChannelUrlFixer(youtube_id, self.config)
ChannelUrlFixer(youtube_id, self.config).run()
self._reindex_single_video_call(youtube_id)
def _reindex_single_video_call(self, youtube_id):
@ -333,8 +333,8 @@ class Reindex(ReindexBase):
handler.get_indexed()
self.all_indexed_ids = [i["youtube_id"] for i in handler.all_videos]
def cookie_invalid(self):
"""return true if cookie is enabled and invalid"""
def cookie_is_valid(self):
"""return true if cookie is enabled and valid"""
if not self.config["downloads"]["cookie_import"]:
return False

View File

@ -128,7 +128,7 @@ class YoutubeVideo(YouTubeItem, YoutubeSubtitle):
self,
youtube_id,
video_overwrites=False,
video_type=VideoTypeEnum.VIDEO,
video_type=VideoTypeEnum.VIDEOS,
):
super().__init__(youtube_id)
self.channel_id = False
@ -406,7 +406,7 @@ class YoutubeVideo(YouTubeItem, YoutubeSubtitle):
def index_new_video(
youtube_id, video_overwrites=False, video_type=VideoTypeEnum.VIDEO
youtube_id, video_overwrites=False, video_type=VideoTypeEnum.VIDEOS
):
"""combined classes to create new video in index"""
video = YoutubeVideo(

View File

@ -1,7 +1,12 @@
"""video constants"""
import enum
class VideoTypeEnum(enum.Enum):
VIDEO = "video"
LIVE = "live"
SHORT = "short"
"""all vid_type fields"""
VIDEOS = "videos"
STREAMS = "streams"
SHORTS = "shorts"
UNKNOWN = "unknown"

View File

@ -11,9 +11,8 @@ import string
import subprocess
import unicodedata
from datetime import datetime
from urllib.parse import parse_qs, urlparse
from home.src.download.yt_dlp_base import YtWrap
import requests
def clean_string(file_name):
@ -136,104 +135,14 @@ def get_mapping():
return index_config
class UrlListParser:
"""take a multi line string and detect valid youtube ids"""
def is_shorts(youtube_id):
"""check if youtube_id is a shorts video, bot not it it's not a shorts"""
shorts_url = f"https://www.youtube.com/shorts/{youtube_id}"
response = requests.head(
shorts_url, headers=requests_headers(), timeout=10
)
def __init__(self, url_str):
self.url_list = [i.strip() for i in url_str.split()]
def process_list(self):
"""loop through the list"""
youtube_ids = []
for url in self.url_list:
parsed = urlparse(url)
print(f"processing: {url}")
print(parsed)
if not parsed.netloc:
# is not a url
id_type = self.find_valid_id(url)
youtube_id = url
elif "youtube.com" not in url and "youtu.be" not in url:
raise ValueError(f"{url} is not a youtube link")
elif parsed.path:
# is a url
youtube_id, id_type = self.detect_from_url(parsed)
else:
# not detected
raise ValueError(f"failed to detect {url}")
youtube_ids.append({"url": youtube_id, "type": id_type})
return youtube_ids
def detect_from_url(self, parsed):
"""detect from parsed url"""
if parsed.netloc == "youtu.be":
# shortened
youtube_id = parsed.path.strip("/")
_ = self.find_valid_id(youtube_id)
return youtube_id, "video"
if parsed.query:
# detect from query string
query_parsed = parse_qs(parsed.query)
if "v" in query_parsed:
youtube_id = query_parsed["v"][0]
_ = self.find_valid_id(youtube_id)
return youtube_id, "video"
if "list" in query_parsed:
youtube_id = query_parsed["list"][0]
return youtube_id, "playlist"
if parsed.path.startswith("/channel/"):
# channel id in url
youtube_id = parsed.path.split("/")[2]
_ = self.find_valid_id(youtube_id)
return youtube_id, "channel"
# detect channel with yt_dlp
youtube_id = self.extract_channel_name(parsed.geturl())
return youtube_id, "channel"
@staticmethod
def find_valid_id(id_str):
"""detect valid id from length of string"""
str_len = len(id_str)
if str_len == 11:
id_type = "video"
elif str_len == 24:
id_type = "channel"
elif str_len in [34, 18] or id_str in ["LL", "WL"]:
id_type = "playlist"
else:
# unable to parse
raise ValueError("not a valid id_str: " + id_str)
return id_type
@staticmethod
def extract_channel_name(url):
"""find channel id from channel name with yt-dlp help"""
obs_request = {
"skip_download": True,
"extract_flat": True,
"playlistend": 0,
}
url_info = YtWrap(obs_request).extract(url)
channel_id = url_info.get("channel_id", False)
if channel_id:
return channel_id
url = url_info.get("url", False)
if url:
# handle old channel name redirect with url path split
channel_id = urlparse(url).path.strip("/").split("/")[1]
return channel_id
print(f"failed to extract channel id from {url}")
raise ValueError
return response.status_code == 200
class DurationConverter:

View File

@ -0,0 +1,133 @@
"""
Functionality:
- detect valid youtube ids and links from multi line string
- identify vid_type if possible
"""
from urllib.parse import parse_qs, urlparse
from home.src.download.yt_dlp_base import YtWrap
from home.src.index.video_constants import VideoTypeEnum
class Parser:
"""take a multi line string and detect valid youtube ids"""
def __init__(self, url_str):
self.url_list = [i.strip() for i in url_str.split()]
def parse(self):
"""parse the list"""
ids = []
for url in self.url_list:
parsed = urlparse(url)
if parsed.netloc:
# is url
identified = self.process_url(parsed)
else:
# is not url
identified = self._find_valid_id(url)
if "vid_type" not in identified:
identified.update(self._detect_vid_type(parsed.path))
ids.append(identified)
return ids
def process_url(self, parsed):
"""process as url"""
if parsed.netloc == "youtu.be":
# shortened
youtube_id = parsed.path.strip("/")
return self._validate_expected(youtube_id, "video")
query_parsed = parse_qs(parsed.query)
if "v" in query_parsed:
# video from v query str
youtube_id = query_parsed["v"][0]
return self._validate_expected(youtube_id, "video")
if "list" in query_parsed:
# playlist from list query str
youtube_id = query_parsed["list"][0]
return self._validate_expected(youtube_id, "playlist")
all_paths = parsed.path.strip("/").split("/")
if all_paths[0] == "shorts":
# is shorts video
item = self._validate_expected(all_paths[1], "video")
item.update({"vid_type": VideoTypeEnum.SHORTS.value})
return item
if all_paths[0] == "channel":
return self._validate_expected(all_paths[1], "channel")
# detect channel
channel_id = self._extract_channel_name(parsed.geturl())
return {"type": "channel", "url": channel_id}
def _validate_expected(self, youtube_id, expected_type):
"""raise value error if not matching"""
matched = self._find_valid_id(youtube_id)
if matched["type"] != expected_type:
raise ValueError(
f"{youtube_id} not of expected type {expected_type}"
)
return {"type": expected_type, "url": youtube_id}
def _find_valid_id(self, id_str):
"""detect valid id from length of string"""
if id_str in ("LL", "WL"):
return {"type": "playlist", "url": id_str}
if id_str.startswith("@"):
url = f"https://www.youtube.com/{id_str}"
channel_id = self._extract_channel_name(url)
return {"type": "channel", "url": channel_id}
len_id_str = len(id_str)
if len_id_str == 11:
item_type = "video"
elif len_id_str == 24:
item_type = "channel"
elif len_id_str in (34, 18):
item_type = "playlist"
else:
raise ValueError(f"not a valid id_str: {id_str}")
return {"type": item_type, "url": id_str}
@staticmethod
def _extract_channel_name(url):
"""find channel id from channel name with yt-dlp help"""
obs_request = {
"skip_download": True,
"extract_flat": True,
"playlistend": 0,
}
url_info = YtWrap(obs_request).extract(url)
channel_id = url_info.get("channel_id", False)
if channel_id:
return channel_id
url = url_info.get("url", False)
if url:
# handle old channel name redirect with url path split
channel_id = urlparse(url).path.strip("/").split("/")[1]
return channel_id
print(f"failed to extract channel id from {url}")
raise ValueError
def _detect_vid_type(self, path):
"""try to match enum from path, needs to be serializable"""
last = path.strip("/").split("/")[-1]
try:
vid_type = VideoTypeEnum(last).value
except ValueError:
vid_type = VideoTypeEnum.UNKNOWN.value
return {"vid_type": vid_type}

View File

@ -6,6 +6,7 @@ Functionality:
because tasks are initiated at application start
"""
import json
import os
from celery import Celery, shared_task
@ -24,8 +25,9 @@ from home.src.index.filesystem import ImportFolderScanner, scan_filesystem
from home.src.index.reindex import Reindex, ReindexManual, ReindexOutdated
from home.src.index.video_constants import VideoTypeEnum
from home.src.ta.config import AppConfig, ReleaseVersion, ScheduleBuilder
from home.src.ta.helper import UrlListParser, clear_dl_cache
from home.src.ta.helper import clear_dl_cache
from home.src.ta.ta_redis import RedisArchivist, RedisQueue
from home.src.ta.urlparser import Parser
CONFIG = AppConfig().config
REDIS_HOST = os.environ.get("REDIS_HOST")
@ -67,7 +69,7 @@ def update_subscribed():
playlist_videos = [
{
"type": "video",
"vid_type": VideoTypeEnum.VIDEO,
"vid_type": VideoTypeEnum.VIDEOS,
"url": i,
}
for i in missing_from_playlists
@ -107,11 +109,15 @@ def download_pending():
@shared_task
def download_single(youtube_id):
def download_single(pending_video):
"""start download single video now"""
queue = RedisQueue(queue_name="dl_queue")
queue.add_priority(youtube_id)
print("Added to queue with priority: " + youtube_id)
to_add = {
"youtube_id": pending_video["youtube_id"],
"vid_type": pending_video["vid_type"],
}
queue.add_priority(json.dumps(to_add))
print(f"Added to queue with priority: {to_add}")
# start queue if needed
have_lock = False
my_lock = RedisArchivist().get_lock("downloading")
@ -256,9 +262,8 @@ def re_sync_thumbs():
@shared_task
def subscribe_to(url_str):
"""take a list of urls to subscribe to"""
to_subscribe_list = UrlListParser(url_str).process_list()
counter = 1
for item in to_subscribe_list:
to_subscribe_list = Parser(url_str).parse()
for idx, item in enumerate(to_subscribe_list):
to_sub_id = item["url"]
if item["type"] == "playlist":
PlaylistSubscription().process_url_str([item])
@ -281,10 +286,9 @@ def subscribe_to(url_str):
"status": key,
"level": "info",
"title": "Subscribing to Channels",
"message": f"Processing {counter} of {len(to_subscribe_list)}",
"message": f"Processing {idx + 1} of {len(to_subscribe_list)}",
}
RedisArchivist().set_message(key, message=message, expire=True)
counter = counter + 1
@shared_task

View File

@ -8,9 +8,15 @@
</div>
<div class="info-box-item channel-nav">
<a href="{% url 'channel_id' channel_info.channel_id %}"><h3>Videos</h3></a>
<a href="{% url 'channel_id_live' channel_info.channel_id %}"><h3>Live</h3></a>
{% if has_streams %}
<a href="{% url 'channel_id_live' channel_info.channel_id %}"><h3>Streams</h3></a>
{% endif %}
{% if has_shorts %}
<a href="{% url 'channel_id_shorts' channel_info.channel_id %}"><h3>Shorts</h3></a>
{% endif %}
{% if has_playlists %}
<a href="{% url 'channel_id_playlist' channel_info.channel_id %}"><h3>Playlists</h3></a>
{% endif %}
<a href="{% url 'channel_id_about' channel_info.channel_id %}"><h3>About</h3></a>
{% if has_pending %}
<a href="{% url 'downloads' %}?channel={{ channel_info.channel_id }}"><h3>Downloads</h3></a>

View File

@ -8,9 +8,15 @@
</div>
<div class="info-box-item channel-nav">
<a href="{% url 'channel_id' channel_info.channel_id %}"><h3>Videos</h3></a>
<a href="{% url 'channel_id_live' channel_info.channel_id %}"><h3>Live</h3></a>
{% if has_streams %}
<a href="{% url 'channel_id_live' channel_info.channel_id %}"><h3>Streams</h3></a>
{% endif %}
{% if has_shorts %}
<a href="{% url 'channel_id_shorts' channel_info.channel_id %}"><h3>Shorts</h3></a>
{% endif %}
{% if has_playlists %}
<a href="{% url 'channel_id_playlist' channel_info.channel_id %}"><h3>Playlists</h3></a>
{% endif %}
<a href="{% url 'channel_id_about' channel_info.channel_id %}"><h3>About</h3></a>
{% if has_pending %}
<a href="{% url 'downloads' %}?channel={{ channel_info.channel_id }}"><h3>Downloads</h3></a>

View File

@ -8,9 +8,15 @@
</div>
<div class="info-box-item channel-nav">
<a href="{% url 'channel_id' channel_info.channel_id %}"><h3>Videos</h3></a>
<a href="{% url 'channel_id_live' channel_info.channel_id %}"><h3>Live</h3></a>
{% if has_streams %}
<a href="{% url 'channel_id_live' channel_info.channel_id %}"><h3>Streams</h3></a>
{% endif %}
{% if has_shorts %}
<a href="{% url 'channel_id_shorts' channel_info.channel_id %}"><h3>Shorts</h3></a>
{% endif %}
{% if has_playlists %}
<a href="{% url 'channel_id_playlist' channel_info.channel_id %}"><h3>Playlists</h3></a>
{% endif %}
<a href="{% url 'channel_id_about' channel_info.channel_id %}"><h3>About</h3></a>
{% if has_pending %}
<a href="{% url 'downloads' %}?channel={{ channel_info.channel_id }}"><h3>Downloads</h3></a>

View File

@ -73,11 +73,14 @@
<div class="video-thumb-wrap {{ view_style }}">
<div class="video-thumb">
<img src="{{ video.source.vid_thumb_url }}" alt="video_thumb">
<div class="video-tags">
{% if show_ignored_only %}
<span>ignored</span>
{% else %}
<span>queued</span>
{% endif %}
<span>{{ video.source.vid_type }}</span>
</div>
</div>
</div>
<div class="video-desc {{ view_style }}">

View File

@ -47,7 +47,7 @@ urlpatterns = [
name="channel_id",
),
path(
"channel/<slug:channel_id>/live/",
"channel/<slug:channel_id>/streams/",
login_required(ChannelIdLiveView.as_view()),
name="channel_id_live",
),

View File

@ -38,8 +38,9 @@ from home.src.index.playlist import YoutubePlaylist
from home.src.index.reindex import ReindexProgress
from home.src.index.video_constants import VideoTypeEnum
from home.src.ta.config import AppConfig, ReleaseVersion, ScheduleBuilder
from home.src.ta.helper import UrlListParser, time_parser
from home.src.ta.helper import time_parser
from home.src.ta.ta_redis import RedisArchivist
from home.src.ta.urlparser import Parser
from home.tasks import extrac_dl, index_channel_playlists, subscribe_to
from rest_framework.authtoken.models import Token
@ -456,7 +457,7 @@ class DownloadView(ArchivistResultsView):
url_str = request.POST.get("vid_url")
print(url_str)
try:
youtube_ids = UrlListParser(url_str).process_list()
youtube_ids = Parser(url_str).parse()
except ValueError:
# failed to process
key = "message:add"
@ -488,6 +489,13 @@ class ChannelIdBaseView(ArchivistResultsView):
return channel_info
def channel_pages(self, channel_id):
"""get additional context for channel pages"""
self.channel_has_pending(channel_id)
self.channel_has_streams(channel_id)
self.channel_has_shorts(channel_id)
self.channel_has_playlist(channel_id)
def channel_has_pending(self, channel_id):
"""check if channel has pending videos in queue"""
path = "ta_download/_search"
@ -501,11 +509,53 @@ class ChannelIdBaseView(ArchivistResultsView):
]
}
},
"_source": False,
}
response, _ = ElasticWrap(path).get(data=data)
self.context.update({"has_pending": bool(response["hits"]["hits"])})
def channel_has_streams(self, channel_id):
"""check if channel has streams videos"""
data = self.get_type_data("streams", channel_id)
response, _ = ElasticWrap("ta_video/_search").get(data=data)
self.context.update({"has_streams": bool(response["hits"]["hits"])})
def channel_has_shorts(self, channel_id):
"""check if channel has shorts videos"""
data = self.get_type_data("shorts", channel_id)
response, _ = ElasticWrap("ta_video/_search").get(data=data)
self.context.update({"has_shorts": bool(response["hits"]["hits"])})
@staticmethod
def get_type_data(vid_type, channel):
"""build data query for vid_type"""
return {
"size": 1,
"query": {
"bool": {
"must": [
{"term": {"vid_type": {"value": vid_type}}},
{"term": {"channel.channel_id": {"value": channel}}},
]
}
},
"_source": False,
}
def channel_has_playlist(self, channel_id):
"""check if channel has any playlist indexed"""
path = "ta_playlist/_search"
data = {
"size": 1,
"query": {"term": {"playlist_channel_id": {"value": channel_id}}},
"_source": False,
}
response, _ = ElasticWrap(path).get(data=data)
self.context.update({"has_playlists": bool(response["hits"]["hits"])})
class ChannelIdView(ChannelIdBaseView):
"""resolves to /channel/<channel-id>/
@ -514,7 +564,7 @@ class ChannelIdView(ChannelIdBaseView):
view_origin = "home"
es_search = "ta_video/_search"
video_types = [VideoTypeEnum.VIDEO]
video_types = [VideoTypeEnum.VIDEOS]
def get(self, request, channel_id):
"""get request"""
@ -522,7 +572,7 @@ class ChannelIdView(ChannelIdBaseView):
self._update_view_data(channel_id)
self.find_results()
self.match_progress()
self.channel_has_pending(channel_id)
self.channel_pages(channel_id)
if self.context["results"]:
channel_info = self.context["results"][0]["source"]["channel"]
@ -584,11 +634,11 @@ class ChannelIdView(ChannelIdBaseView):
class ChannelIdLiveView(ChannelIdView):
"""resolves to /channel/<channel-id>/live/
"""resolves to /channel/<channel-id>/streams/
display single channel page from channel_id
"""
video_types = [VideoTypeEnum.LIVE]
video_types = [VideoTypeEnum.STREAMS]
class ChannelIdShortsView(ChannelIdView):
@ -596,7 +646,7 @@ class ChannelIdShortsView(ChannelIdView):
display single channel page from channel_id
"""
video_types = [VideoTypeEnum.SHORT]
video_types = [VideoTypeEnum.SHORTS]
class ChannelIdAboutView(ChannelIdBaseView):
@ -609,7 +659,7 @@ class ChannelIdAboutView(ChannelIdBaseView):
def get(self, request, channel_id):
"""handle get request"""
self.initiate_vars(request)
self.channel_has_pending(channel_id)
self.channel_pages(channel_id)
response, _ = ElasticWrap(f"ta_channel/_doc/{channel_id}").get()
channel_info = SearchProcess(response).process()
@ -657,7 +707,7 @@ class ChannelIdPlaylistView(ChannelIdBaseView):
self.initiate_vars(request)
self._update_view_data(channel_id)
self.find_results()
self.channel_has_pending(channel_id)
self.channel_pages(channel_id)
channel_info = self.get_channel_meta(channel_id)
channel_name = channel_info["channel_name"]

View File

@ -1,13 +1,13 @@
beautifulsoup4==4.11.1
celery==5.2.7
Django==4.1.4
Django==4.1.5
django-auth-ldap==4.1.0
django-cors-headers==3.13.0
djangorestframework==3.14.0
Pillow==9.3.0
Pillow==9.4.0
redis==4.4.0
requests==2.28.1
ryd-client==0.0.6
uWSGI==2.0.21
whitenoise==6.2.0
yt_dlp==2022.11.11
whitenoise==6.3.0
yt_dlp==2023.1.2

View File

@ -462,7 +462,7 @@ video:-webkit-full-screen {
overflow: hidden;
}
.video-item:hover .video-thumb span {
.video-item:hover .video-tags {
opacity: 1;
}
@ -486,16 +486,20 @@ video:-webkit-full-screen {
position: relative;
}
.video-thumb span {
.video-tags {
position: absolute;
top: 5px;
left: 5px;
background-color: var(--accent-font-light);
left: 0;
padding: 5px;
opacity: 0;
transition: 300ms ease-in-out;
}
.video-tags span {
background-color: var(--accent-font-light);
padding: 5px;
}
.video-play img {
width: 40px;
filter: var(--img-filter);
@ -645,6 +649,7 @@ video:-webkit-full-screen {
.info-box-item {
display: flex;
flex-wrap: wrap;
align-items: center;
padding: 15px;
background-color: var(--highlight-bg);

View File

@ -193,8 +193,8 @@ function toIgnore(button) {
function downloadNow(button) {
let youtube_id = button.getAttribute('data-id');
let payload = JSON.stringify({ dlnow: youtube_id });
sendPost(payload);
let apiEndpoint = '/api/download/' + youtube_id + '/';
apiRequest(apiEndpoint, 'POST', { status: 'priority' });
document.getElementById(youtube_id).remove();
setTimeout(function () {
checkMessages();