From 0fcd6c086b28453a9e273b6dc9bb9f0c11d90cec Mon Sep 17 00:00:00 2001 From: simon Date: Fri, 10 Feb 2023 22:16:04 +0700 Subject: [PATCH 01/14] add kip log output for is_upcoming or is_live --- tubearchivist/home/src/download/queue.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tubearchivist/home/src/download/queue.py b/tubearchivist/home/src/download/queue.py index ca7487c2..7c81ebf0 100644 --- a/tubearchivist/home/src/download/queue.py +++ b/tubearchivist/home/src/download/queue.py @@ -264,6 +264,7 @@ class PendingList(PendingIndex): return False # stop if video is streaming live now if vid["live_status"] in ["is_upcoming", "is_live"]: + print(f"{youtube_id}: skip is_upcoming or is_live") return False if vid["live_status"] == "was_live": From 4936f2fdf245343c69b6a3a8fb8d1445e862c9ea Mon Sep 17 00:00:00 2001 From: lamusmaser <1940060+lamusmaser@users.noreply.github.com> Date: Sat, 11 Feb 2023 03:07:36 -0700 Subject: [PATCH 02/14] 412 fix `vid_type` with downloads (single and bulk) (#413) * update TA_VERSION v0.3.2 * Update `single_download` task to request proper `vid_type`. * Modify `single_download` to match bulk download. * pr 413: undo TA_VERSION update --------- Co-authored-by: simon --- tubearchivist/home/tasks.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tubearchivist/home/tasks.py b/tubearchivist/home/tasks.py index a2265fcd..4abc6cbf 100644 --- a/tubearchivist/home/tasks.py +++ b/tubearchivist/home/tasks.py @@ -111,9 +111,10 @@ def download_pending(): def download_single(pending_video): """start download single video now""" queue = RedisQueue(queue_name="dl_queue") + to_add = { "youtube_id": pending_video["youtube_id"], - "vid_type": pending_video["vid_type"], + "vid_type": pending_video.get("vid_type", VideoTypeEnum.VIDEOS.value), } queue.add_priority(json.dumps(to_add)) print(f"Added to queue with priority: {to_add}") From df1018e70ce73bfecbefcf65366ab77620fbaf2f Mon Sep 17 00:00:00 2001 From: simon Date: Sat, 11 Feb 2023 18:24:41 +0700 Subject: [PATCH 03/14] bump redis --- tubearchivist/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubearchivist/requirements.txt b/tubearchivist/requirements.txt index b8a60b1b..e180a311 100644 --- a/tubearchivist/requirements.txt +++ b/tubearchivist/requirements.txt @@ -5,7 +5,7 @@ django-auth-ldap==4.1.0 django-cors-headers==3.13.0 djangorestframework==3.14.0 Pillow==9.4.0 -redis==4.4.2 +redis==4.5.1 requests==2.28.2 ryd-client==0.0.6 uWSGI==2.0.21 From ef685ecb42023d2fe477a62d85aad0c4343c928c Mon Sep 17 00:00:00 2001 From: simon Date: Mon, 13 Feb 2023 11:55:47 +0700 Subject: [PATCH 04/14] handle PIL error empty thumbnail image, #425 --- tubearchivist/home/src/download/thumbnails.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tubearchivist/home/src/download/thumbnails.py b/tubearchivist/home/src/download/thumbnails.py index e91a97db..7b71f915 100644 --- a/tubearchivist/home/src/download/thumbnails.py +++ b/tubearchivist/home/src/download/thumbnails.py @@ -43,7 +43,11 @@ class ThumbManagerBase: response = requests.get(url, stream=True, timeout=5) if response.ok: try: - return Image.open(response.raw) + img = Image.open(response.raw) + if isinstance(img, Image.Image): + return img + return self.get_fallback() + except UnidentifiedImageError: print(f"failed to open thumbnail: {url}") return self.get_fallback() From 77c26134d1cb36f58c74ee4af1b9a17e0bb70ecb Mon Sep 17 00:00:00 2001 From: simon Date: Mon, 13 Feb 2023 12:15:12 +0700 Subject: [PATCH 05/14] add print for thumbnail fallback, #425 --- tubearchivist/home/src/download/thumbnails.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tubearchivist/home/src/download/thumbnails.py b/tubearchivist/home/src/download/thumbnails.py index 7b71f915..3f1975d2 100644 --- a/tubearchivist/home/src/download/thumbnails.py +++ b/tubearchivist/home/src/download/thumbnails.py @@ -63,6 +63,7 @@ class ThumbManagerBase: def get_fallback(self): """get fallback thumbnail if not available""" + print(f"{self.item_id}: failed to extract thumbnail, use fallback") if self.fallback: img_raw = Image.open(self.fallback) return img_raw From 6641db3e7e5b3a96b8d974d5ce455592dc8ea8dc Mon Sep 17 00:00:00 2001 From: simon Date: Mon, 13 Feb 2023 12:42:55 +0700 Subject: [PATCH 06/14] skip empty comment, #429 --- tubearchivist/home/src/index/comments.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tubearchivist/home/src/index/comments.py b/tubearchivist/home/src/index/comments.py index f3837087..e9ec0d9a 100644 --- a/tubearchivist/home/src/index/comments.py +++ b/tubearchivist/home/src/index/comments.py @@ -109,12 +109,20 @@ class Comments: if comments_raw: for comment in comments_raw: cleaned_comment = self.clean_comment(comment) + if not cleaned_comment: + continue + comments.append(cleaned_comment) self.comments_format = comments def clean_comment(self, comment): """parse metadata from comment for indexing""" + if not comment.get("text"): + # comment text can be empty + print(f"{self.youtube_id}: Failed to extract text, {comment}") + return False + time_text_datetime = datetime.utcfromtimestamp(comment["timestamp"]) if time_text_datetime.hour == 0 and time_text_datetime.minute == 0: From 5ec0636807187c9c35e8983925a7330689ab4cb1 Mon Sep 17 00:00:00 2001 From: simon Date: Tue, 14 Feb 2023 11:12:58 +0700 Subject: [PATCH 07/14] [API] add filter subscribed to channel list --- tubearchivist/api/README.md | 3 +++ tubearchivist/api/views.py | 15 ++++++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/tubearchivist/api/README.md b/tubearchivist/api/README.md index 2328f685..00e929f3 100644 --- a/tubearchivist/api/README.md +++ b/tubearchivist/api/README.md @@ -155,6 +155,9 @@ Timestamps either *int* or *float*, end time can't be before start time. ## Channel List View /api/channel/ +Parameter: +- filter: subscribed + ### Subscribe to a list of channels POST /api/channel/ ```json diff --git a/tubearchivist/api/views.py b/tubearchivist/api/views.py index 7146de72..a872d504 100644 --- a/tubearchivist/api/views.py +++ b/tubearchivist/api/views.py @@ -289,14 +289,27 @@ class ChannelApiListView(ApiBaseView): """ search_base = "ta_channel/_search/" + valid_filter = ["subscribed"] def get(self, request): """get request""" - self.get_document_list(request) self.data.update( {"sort": [{"channel_name.keyword": {"order": "asc"}}]} ) + query_filter = request.GET.get("filter", False) + must_list = [] + if query_filter: + if query_filter not in self.valid_filter: + message = f"invalid url query filder: {query_filter}" + print(message) + return Response({"message": message}, status=400) + + must_list.append({"term": {"channel_subscribed": {"value": True}}}) + + self.data["query"] = {"bool": {"must": must_list}} + self.get_document_list(request) + return Response(self.response) @staticmethod From 0e5421a558f760ee77ff762196044465ca616243 Mon Sep 17 00:00:00 2001 From: simon Date: Fri, 17 Feb 2023 08:37:38 +0700 Subject: [PATCH 08/14] bump django --- tubearchivist/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubearchivist/requirements.txt b/tubearchivist/requirements.txt index e180a311..91e7a20a 100644 --- a/tubearchivist/requirements.txt +++ b/tubearchivist/requirements.txt @@ -1,6 +1,6 @@ beautifulsoup4==4.11.2 celery==5.2.7 -Django==4.1.6 +Django==4.1.7 django-auth-ldap==4.1.0 django-cors-headers==3.13.0 djangorestframework==3.14.0 From 1125c0c4bf7eb8fc6f0cfc00033e650e118bad6c Mon Sep 17 00:00:00 2001 From: simon Date: Fri, 17 Feb 2023 09:17:04 +0700 Subject: [PATCH 09/14] handle malformed thumbnail download, #430 --- tubearchivist/home/src/download/thumbnails.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubearchivist/home/src/download/thumbnails.py b/tubearchivist/home/src/download/thumbnails.py index 3f1975d2..49082aab 100644 --- a/tubearchivist/home/src/download/thumbnails.py +++ b/tubearchivist/home/src/download/thumbnails.py @@ -48,7 +48,7 @@ class ThumbManagerBase: return img return self.get_fallback() - except UnidentifiedImageError: + except (UnidentifiedImageError, OSError): print(f"failed to open thumbnail: {url}") return self.get_fallback() From 162c05628c948e8bc5bea14e7438beeec40aa16c Mon Sep 17 00:00:00 2001 From: simon Date: Fri, 17 Feb 2023 10:26:10 +0700 Subject: [PATCH 10/14] rename PendingInteract youtube_id kwargs --- tubearchivist/api/views.py | 2 +- tubearchivist/home/src/download/queue.py | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tubearchivist/api/views.py b/tubearchivist/api/views.py index a872d504..14f547f6 100644 --- a/tubearchivist/api/views.py +++ b/tubearchivist/api/views.py @@ -443,7 +443,7 @@ class DownloadApiView(ApiBaseView): # pylint: disable=unused-argument """delete single video from queue""" print(f"{video_id}: delete from queue") - PendingInteract(video_id=video_id).delete_item() + PendingInteract(video_id).delete_item() return Response({"success": True}) diff --git a/tubearchivist/home/src/download/queue.py b/tubearchivist/home/src/download/queue.py index 7c81ebf0..ee7ba462 100644 --- a/tubearchivist/home/src/download/queue.py +++ b/tubearchivist/home/src/download/queue.py @@ -96,13 +96,13 @@ class PendingIndex: class PendingInteract: """interact with items in download queue""" - def __init__(self, video_id=False, status=False): - self.video_id = video_id + def __init__(self, youtube_id=False, status=False): + self.youtube_id = youtube_id self.status = status def delete_item(self): """delete single item from pending""" - path = f"ta_download/_doc/{self.video_id}" + path = f"ta_download/_doc/{self.youtube_id}" _, _ = ElasticWrap(path).delete(refresh=True) def delete_by_status(self): @@ -114,12 +114,12 @@ class PendingInteract: def update_status(self): """update status field of pending item""" data = {"doc": {"status": self.status}} - path = f"ta_download/_update/{self.video_id}" + path = f"ta_download/_update/{self.youtube_id}" _, _ = ElasticWrap(path).post(data=data) def get_item(self): """return pending item dict""" - path = f"ta_download/_doc/{self.video_id}" + path = f"ta_download/_doc/{self.youtube_id}" response, status_code = ElasticWrap(path).get() return response["_source"], status_code From a34c9479f29a96571280703e75987aec8a1a713b Mon Sep 17 00:00:00 2001 From: simon Date: Fri, 17 Feb 2023 12:21:00 +0700 Subject: [PATCH 11/14] use channel from queue for download query filtering, #431 --- tubearchivist/home/src/download/queue.py | 20 ++++++++++++++++++++ tubearchivist/home/views.py | 10 +++++----- 2 files changed, 25 insertions(+), 5 deletions(-) diff --git a/tubearchivist/home/src/download/queue.py b/tubearchivist/home/src/download/queue.py index ee7ba462..cd20c9b6 100644 --- a/tubearchivist/home/src/download/queue.py +++ b/tubearchivist/home/src/download/queue.py @@ -123,6 +123,26 @@ class PendingInteract: response, status_code = ElasticWrap(path).get() return response["_source"], status_code + def get_channel(self): + """ + get channel metadata from queue to not depend on channel to be indexed + """ + data = { + "size": 1, + "query": {"term": {"channel_id": {"value": self.youtube_id}}}, + } + response, _ = ElasticWrap("ta_download/_search").get(data=data) + hits = response["hits"]["hits"] + if not hits: + channel_name = "NA" + else: + channel_name = hits[0]["_source"].get("channel_name", "NA") + + return { + "channel_id": self.youtube_id, + "channel_name": channel_name, + } + class PendingList(PendingIndex): """manage the pending videos list""" diff --git a/tubearchivist/home/views.py b/tubearchivist/home/views.py index 4918aca4..3e443733 100644 --- a/tubearchivist/home/views.py +++ b/tubearchivist/home/views.py @@ -15,6 +15,7 @@ from django.contrib.auth.forms import AuthenticationForm from django.http import JsonResponse from django.shortcuts import redirect, render from django.views import View +from home.src.download.queue import PendingInteract from home.src.download.yt_dlp_base import CookieHandler from home.src.es.backup import ElasticBackup from home.src.es.connect import ElasticWrap @@ -32,7 +33,7 @@ from home.src.frontend.forms import ( UserSettingsForm, ) from home.src.frontend.searching import SearchHandler -from home.src.index.channel import YoutubeChannel, channel_overwrites +from home.src.index.channel import channel_overwrites from home.src.index.generic import Pagination from home.src.index.playlist import YoutubePlaylist from home.src.index.reindex import ReindexProgress @@ -401,12 +402,11 @@ class DownloadView(ArchivistResultsView): {"term": {"channel_id": {"value": channel_filter}}} ) - channel = YoutubeChannel(channel_filter) - channel.get_from_es() + channel = PendingInteract(channel_filter).get_channel() self.context.update( { - "channel_filter_id": channel_filter, - "channel_filter_name": channel.json_data["channel_name"], + "channel_filter_id": channel.get("channel_id"), + "channel_filter_name": channel.get("channel_name"), } ) From fdf5b2d802cf6c9042f29828ff87dbcb00eb76ce Mon Sep 17 00:00:00 2001 From: simon Date: Fri, 17 Feb 2023 13:48:00 +0700 Subject: [PATCH 12/14] clarify video page access from title, #432 --- docs/Video.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/Video.md b/docs/Video.md index 72f74072..cd1f5b37 100644 --- a/docs/Video.md +++ b/docs/Video.md @@ -1,5 +1,5 @@ # Video Page -Every video downloaded gets a dedicated page accessible at `/video//` of your Tube Archivist. +Every video downloaded gets a dedicated page accessible at `/video//` of your Tube Archivist. Throughout the interface, click on a video title to access the video page. Clicking on the channel name or the channel icon will bring you to the dedicated channel detail [page](Channels#channel-detail). From 72f091b85e13f7ca87fa10f644079a3713379ab0 Mon Sep 17 00:00:00 2001 From: simon Date: Fri, 17 Feb 2023 14:04:52 +0700 Subject: [PATCH 13/14] fix download filter for ignored videos, #401 --- tubearchivist/home/views.py | 10 ++++++---- tubearchivist/static/script.js | 2 +- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/tubearchivist/home/views.py b/tubearchivist/home/views.py index 3e443733..db735d01 100644 --- a/tubearchivist/home/views.py +++ b/tubearchivist/home/views.py @@ -376,13 +376,13 @@ class DownloadView(ArchivistResultsView): def get(self, request): """handle get request""" self.initiate_vars(request) - self._update_view_data(request) + filter_view = self._update_view_data(request) self.find_results() self.context.update( { "title": "Downloads", "add_form": AddToQueueForm(), - "channel_agg_list": self._get_channel_agg(), + "channel_agg_list": self._get_channel_agg(filter_view), } ) return render(request, "home/downloads.html", self.context) @@ -417,11 +417,13 @@ class DownloadView(ArchivistResultsView): } ) - def _get_channel_agg(self): + return filter_view + + def _get_channel_agg(self, filter_view): """get pending channel with count""" data = { "size": 0, - "query": {"term": {"status": {"value": "pending"}}}, + "query": {"term": {"status": {"value": filter_view}}}, "aggs": { "channel_downloads": { "multi_terms": { diff --git a/tubearchivist/static/script.js b/tubearchivist/static/script.js index 1028f84a..8dfff95b 100644 --- a/tubearchivist/static/script.js +++ b/tubearchivist/static/script.js @@ -142,7 +142,7 @@ function toggleCheckbox(checkbox) { let payload = JSON.stringify(payloadDict); sendPost(payload); setTimeout(function () { - let currPage = window.location.pathname + window.location.search; + let currPage = window.location.pathname; window.location.replace(currPage); }, 500); } From 72924fab66c82789756dd270fae72f5bf5ebe305 Mon Sep 17 00:00:00 2001 From: simon Date: Fri, 17 Feb 2023 20:25:21 +0700 Subject: [PATCH 14/14] bump yt-dlp, #433 --- tubearchivist/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubearchivist/requirements.txt b/tubearchivist/requirements.txt index 91e7a20a..8da89941 100644 --- a/tubearchivist/requirements.txt +++ b/tubearchivist/requirements.txt @@ -10,4 +10,4 @@ requests==2.28.2 ryd-client==0.0.6 uWSGI==2.0.21 whitenoise==6.3.0 -yt_dlp==2023.1.6 +yt_dlp==2023.2.17