diff --git a/docs/Video.md b/docs/Video.md index 72f74072..cd1f5b37 100644 --- a/docs/Video.md +++ b/docs/Video.md @@ -1,5 +1,5 @@ # Video Page -Every video downloaded gets a dedicated page accessible at `/video//` of your Tube Archivist. +Every video downloaded gets a dedicated page accessible at `/video//` of your Tube Archivist. Throughout the interface, click on a video title to access the video page. Clicking on the channel name or the channel icon will bring you to the dedicated channel detail [page](Channels#channel-detail). diff --git a/tubearchivist/api/README.md b/tubearchivist/api/README.md index 2328f685..00e929f3 100644 --- a/tubearchivist/api/README.md +++ b/tubearchivist/api/README.md @@ -155,6 +155,9 @@ Timestamps either *int* or *float*, end time can't be before start time. ## Channel List View /api/channel/ +Parameter: +- filter: subscribed + ### Subscribe to a list of channels POST /api/channel/ ```json diff --git a/tubearchivist/api/views.py b/tubearchivist/api/views.py index 7146de72..14f547f6 100644 --- a/tubearchivist/api/views.py +++ b/tubearchivist/api/views.py @@ -289,14 +289,27 @@ class ChannelApiListView(ApiBaseView): """ search_base = "ta_channel/_search/" + valid_filter = ["subscribed"] def get(self, request): """get request""" - self.get_document_list(request) self.data.update( {"sort": [{"channel_name.keyword": {"order": "asc"}}]} ) + query_filter = request.GET.get("filter", False) + must_list = [] + if query_filter: + if query_filter not in self.valid_filter: + message = f"invalid url query filder: {query_filter}" + print(message) + return Response({"message": message}, status=400) + + must_list.append({"term": {"channel_subscribed": {"value": True}}}) + + self.data["query"] = {"bool": {"must": must_list}} + self.get_document_list(request) + return Response(self.response) @staticmethod @@ -430,7 +443,7 @@ class DownloadApiView(ApiBaseView): # pylint: disable=unused-argument """delete single video from queue""" print(f"{video_id}: delete from queue") - PendingInteract(video_id=video_id).delete_item() + PendingInteract(video_id).delete_item() return Response({"success": True}) diff --git a/tubearchivist/home/src/download/queue.py b/tubearchivist/home/src/download/queue.py index ca7487c2..cd20c9b6 100644 --- a/tubearchivist/home/src/download/queue.py +++ b/tubearchivist/home/src/download/queue.py @@ -96,13 +96,13 @@ class PendingIndex: class PendingInteract: """interact with items in download queue""" - def __init__(self, video_id=False, status=False): - self.video_id = video_id + def __init__(self, youtube_id=False, status=False): + self.youtube_id = youtube_id self.status = status def delete_item(self): """delete single item from pending""" - path = f"ta_download/_doc/{self.video_id}" + path = f"ta_download/_doc/{self.youtube_id}" _, _ = ElasticWrap(path).delete(refresh=True) def delete_by_status(self): @@ -114,15 +114,35 @@ class PendingInteract: def update_status(self): """update status field of pending item""" data = {"doc": {"status": self.status}} - path = f"ta_download/_update/{self.video_id}" + path = f"ta_download/_update/{self.youtube_id}" _, _ = ElasticWrap(path).post(data=data) def get_item(self): """return pending item dict""" - path = f"ta_download/_doc/{self.video_id}" + path = f"ta_download/_doc/{self.youtube_id}" response, status_code = ElasticWrap(path).get() return response["_source"], status_code + def get_channel(self): + """ + get channel metadata from queue to not depend on channel to be indexed + """ + data = { + "size": 1, + "query": {"term": {"channel_id": {"value": self.youtube_id}}}, + } + response, _ = ElasticWrap("ta_download/_search").get(data=data) + hits = response["hits"]["hits"] + if not hits: + channel_name = "NA" + else: + channel_name = hits[0]["_source"].get("channel_name", "NA") + + return { + "channel_id": self.youtube_id, + "channel_name": channel_name, + } + class PendingList(PendingIndex): """manage the pending videos list""" @@ -264,6 +284,7 @@ class PendingList(PendingIndex): return False # stop if video is streaming live now if vid["live_status"] in ["is_upcoming", "is_live"]: + print(f"{youtube_id}: skip is_upcoming or is_live") return False if vid["live_status"] == "was_live": diff --git a/tubearchivist/home/src/download/thumbnails.py b/tubearchivist/home/src/download/thumbnails.py index e91a97db..49082aab 100644 --- a/tubearchivist/home/src/download/thumbnails.py +++ b/tubearchivist/home/src/download/thumbnails.py @@ -43,8 +43,12 @@ class ThumbManagerBase: response = requests.get(url, stream=True, timeout=5) if response.ok: try: - return Image.open(response.raw) - except UnidentifiedImageError: + img = Image.open(response.raw) + if isinstance(img, Image.Image): + return img + return self.get_fallback() + + except (UnidentifiedImageError, OSError): print(f"failed to open thumbnail: {url}") return self.get_fallback() @@ -59,6 +63,7 @@ class ThumbManagerBase: def get_fallback(self): """get fallback thumbnail if not available""" + print(f"{self.item_id}: failed to extract thumbnail, use fallback") if self.fallback: img_raw = Image.open(self.fallback) return img_raw diff --git a/tubearchivist/home/src/index/comments.py b/tubearchivist/home/src/index/comments.py index f3837087..e9ec0d9a 100644 --- a/tubearchivist/home/src/index/comments.py +++ b/tubearchivist/home/src/index/comments.py @@ -109,12 +109,20 @@ class Comments: if comments_raw: for comment in comments_raw: cleaned_comment = self.clean_comment(comment) + if not cleaned_comment: + continue + comments.append(cleaned_comment) self.comments_format = comments def clean_comment(self, comment): """parse metadata from comment for indexing""" + if not comment.get("text"): + # comment text can be empty + print(f"{self.youtube_id}: Failed to extract text, {comment}") + return False + time_text_datetime = datetime.utcfromtimestamp(comment["timestamp"]) if time_text_datetime.hour == 0 and time_text_datetime.minute == 0: diff --git a/tubearchivist/home/tasks.py b/tubearchivist/home/tasks.py index a2265fcd..4abc6cbf 100644 --- a/tubearchivist/home/tasks.py +++ b/tubearchivist/home/tasks.py @@ -111,9 +111,10 @@ def download_pending(): def download_single(pending_video): """start download single video now""" queue = RedisQueue(queue_name="dl_queue") + to_add = { "youtube_id": pending_video["youtube_id"], - "vid_type": pending_video["vid_type"], + "vid_type": pending_video.get("vid_type", VideoTypeEnum.VIDEOS.value), } queue.add_priority(json.dumps(to_add)) print(f"Added to queue with priority: {to_add}") diff --git a/tubearchivist/home/views.py b/tubearchivist/home/views.py index 4918aca4..db735d01 100644 --- a/tubearchivist/home/views.py +++ b/tubearchivist/home/views.py @@ -15,6 +15,7 @@ from django.contrib.auth.forms import AuthenticationForm from django.http import JsonResponse from django.shortcuts import redirect, render from django.views import View +from home.src.download.queue import PendingInteract from home.src.download.yt_dlp_base import CookieHandler from home.src.es.backup import ElasticBackup from home.src.es.connect import ElasticWrap @@ -32,7 +33,7 @@ from home.src.frontend.forms import ( UserSettingsForm, ) from home.src.frontend.searching import SearchHandler -from home.src.index.channel import YoutubeChannel, channel_overwrites +from home.src.index.channel import channel_overwrites from home.src.index.generic import Pagination from home.src.index.playlist import YoutubePlaylist from home.src.index.reindex import ReindexProgress @@ -375,13 +376,13 @@ class DownloadView(ArchivistResultsView): def get(self, request): """handle get request""" self.initiate_vars(request) - self._update_view_data(request) + filter_view = self._update_view_data(request) self.find_results() self.context.update( { "title": "Downloads", "add_form": AddToQueueForm(), - "channel_agg_list": self._get_channel_agg(), + "channel_agg_list": self._get_channel_agg(filter_view), } ) return render(request, "home/downloads.html", self.context) @@ -401,12 +402,11 @@ class DownloadView(ArchivistResultsView): {"term": {"channel_id": {"value": channel_filter}}} ) - channel = YoutubeChannel(channel_filter) - channel.get_from_es() + channel = PendingInteract(channel_filter).get_channel() self.context.update( { - "channel_filter_id": channel_filter, - "channel_filter_name": channel.json_data["channel_name"], + "channel_filter_id": channel.get("channel_id"), + "channel_filter_name": channel.get("channel_name"), } ) @@ -417,11 +417,13 @@ class DownloadView(ArchivistResultsView): } ) - def _get_channel_agg(self): + return filter_view + + def _get_channel_agg(self, filter_view): """get pending channel with count""" data = { "size": 0, - "query": {"term": {"status": {"value": "pending"}}}, + "query": {"term": {"status": {"value": filter_view}}}, "aggs": { "channel_downloads": { "multi_terms": { diff --git a/tubearchivist/requirements.txt b/tubearchivist/requirements.txt index b8a60b1b..8da89941 100644 --- a/tubearchivist/requirements.txt +++ b/tubearchivist/requirements.txt @@ -1,13 +1,13 @@ beautifulsoup4==4.11.2 celery==5.2.7 -Django==4.1.6 +Django==4.1.7 django-auth-ldap==4.1.0 django-cors-headers==3.13.0 djangorestframework==3.14.0 Pillow==9.4.0 -redis==4.4.2 +redis==4.5.1 requests==2.28.2 ryd-client==0.0.6 uWSGI==2.0.21 whitenoise==6.3.0 -yt_dlp==2023.1.6 +yt_dlp==2023.2.17 diff --git a/tubearchivist/static/script.js b/tubearchivist/static/script.js index 1028f84a..8dfff95b 100644 --- a/tubearchivist/static/script.js +++ b/tubearchivist/static/script.js @@ -142,7 +142,7 @@ function toggleCheckbox(checkbox) { let payload = JSON.stringify(payloadDict); sendPost(payload); setTimeout(function () { - let currPage = window.location.pathname + window.location.search; + let currPage = window.location.pathname; window.location.replace(currPage); }, 500); }