mirror of
https://github.com/tubearchivist/tubearchivist
synced 2024-11-17 21:25:49 +00:00
Reindex shorts and streams, #build
Changed: - Match exiting videos with new video type - Randomize version check schedule per installation
This commit is contained in:
commit
1471fce2c3
@ -52,6 +52,6 @@
|
|||||||
"thumbnail_check": {"minute": "0", "hour": "17", "day_of_week": "*"},
|
"thumbnail_check": {"minute": "0", "hour": "17", "day_of_week": "*"},
|
||||||
"run_backup": {"minute": "0", "hour": "8", "day_of_week": "0"},
|
"run_backup": {"minute": "0", "hour": "8", "day_of_week": "0"},
|
||||||
"run_backup_rotate": 5,
|
"run_backup_rotate": 5,
|
||||||
"version_check": {"minute": "0", "hour": "11", "day_of_week": "*"}
|
"version_check": "rand-d"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -381,6 +381,17 @@ class YoutubeChannel(YouTubeItem):
|
|||||||
|
|
||||||
return all_youtube_ids
|
return all_youtube_ids
|
||||||
|
|
||||||
|
def get_channel_videos(self):
|
||||||
|
"""get all videos from channel"""
|
||||||
|
data = {
|
||||||
|
"query": {
|
||||||
|
"term": {"channel.channel_id": {"value": self.youtube_id}}
|
||||||
|
},
|
||||||
|
"_source": ["youtube_id", "vid_type"],
|
||||||
|
}
|
||||||
|
all_videos = IndexPaginate("ta_video", data).get_results()
|
||||||
|
return all_videos
|
||||||
|
|
||||||
def get_all_playlists(self):
|
def get_all_playlists(self):
|
||||||
"""get all playlists owned by this channel"""
|
"""get all playlists owned by this channel"""
|
||||||
url = (
|
url = (
|
||||||
|
@ -4,12 +4,14 @@ functionality:
|
|||||||
- index and update in es
|
- index and update in es
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from time import sleep
|
from time import sleep
|
||||||
|
|
||||||
from home.src.download.queue import PendingList
|
from home.src.download.queue import PendingList
|
||||||
|
from home.src.download.subscriptions import ChannelSubscription
|
||||||
from home.src.download.thumbnails import ThumbManager
|
from home.src.download.thumbnails import ThumbManager
|
||||||
from home.src.download.yt_dlp_base import CookieHandler
|
from home.src.download.yt_dlp_base import CookieHandler
|
||||||
from home.src.download.yt_dlp_handler import VideoDownloader
|
from home.src.download.yt_dlp_handler import VideoDownloader
|
||||||
@ -307,6 +309,8 @@ class Reindex(ReindexBase):
|
|||||||
channel.upload_to_es()
|
channel.upload_to_es()
|
||||||
channel.sync_to_videos()
|
channel.sync_to_videos()
|
||||||
|
|
||||||
|
ChannelFullScan(channel_id).scan()
|
||||||
|
|
||||||
def _reindex_single_playlist(self, playlist_id):
|
def _reindex_single_playlist(self, playlist_id):
|
||||||
"""refresh playlist data"""
|
"""refresh playlist data"""
|
||||||
self._get_all_videos()
|
self._get_all_videos()
|
||||||
@ -473,3 +477,74 @@ class ChannelUrlFixer:
|
|||||||
shutil.move(video_path_is, new_path, copy_function=shutil.copyfile)
|
shutil.move(video_path_is, new_path, copy_function=shutil.copyfile)
|
||||||
VideoDownloader().move_to_archive(self.video.json_data)
|
VideoDownloader().move_to_archive(self.video.json_data)
|
||||||
self.video.update_media_url()
|
self.video.update_media_url()
|
||||||
|
|
||||||
|
|
||||||
|
class ChannelFullScan:
|
||||||
|
"""
|
||||||
|
update from v0.3.0 to v0.3.1
|
||||||
|
full scan of channel to fix vid_type mismatch
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, channel_id):
|
||||||
|
self.channel_id = channel_id
|
||||||
|
self.to_update = False
|
||||||
|
|
||||||
|
def scan(self):
|
||||||
|
"""match local with remote"""
|
||||||
|
print(f"{self.channel_id}: start full scan")
|
||||||
|
all_local_videos = self._get_all_local()
|
||||||
|
all_remote_videos = self._get_all_remote()
|
||||||
|
self.to_update = []
|
||||||
|
for video in all_local_videos:
|
||||||
|
video_id = video["youtube_id"]
|
||||||
|
remote_match = [i for i in all_remote_videos if i[0] == video_id]
|
||||||
|
if not remote_match:
|
||||||
|
print(f"{video_id}: no remote match found")
|
||||||
|
continue
|
||||||
|
|
||||||
|
expected_type = remote_match[0][-1].value
|
||||||
|
if video["vid_type"] != expected_type:
|
||||||
|
self.to_update.append(
|
||||||
|
{
|
||||||
|
"video_id": video_id,
|
||||||
|
"vid_type": expected_type,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
self.update()
|
||||||
|
|
||||||
|
def _get_all_remote(self):
|
||||||
|
"""get all channel videos"""
|
||||||
|
sub = ChannelSubscription()
|
||||||
|
all_remote_videos = sub.get_last_youtube_videos(
|
||||||
|
self.channel_id, limit=False
|
||||||
|
)
|
||||||
|
|
||||||
|
return all_remote_videos
|
||||||
|
|
||||||
|
def _get_all_local(self):
|
||||||
|
"""get all local indexed channel_videos"""
|
||||||
|
channel = YoutubeChannel(self.channel_id)
|
||||||
|
all_local_videos = channel.get_channel_videos()
|
||||||
|
|
||||||
|
return all_local_videos
|
||||||
|
|
||||||
|
def update(self):
|
||||||
|
"""build bulk query for updates"""
|
||||||
|
if not self.to_update:
|
||||||
|
print(f"{self.channel_id}: nothing to update")
|
||||||
|
return
|
||||||
|
|
||||||
|
print(f"{self.channel_id}: fixing {len(self.to_update)} videos")
|
||||||
|
bulk_list = []
|
||||||
|
for video in self.to_update:
|
||||||
|
action = {
|
||||||
|
"update": {"_id": video.get("video_id"), "_index": "ta_video"}
|
||||||
|
}
|
||||||
|
source = {"doc": {"vid_type": video.get("vid_type")}}
|
||||||
|
bulk_list.append(json.dumps(action))
|
||||||
|
bulk_list.append(json.dumps(source))
|
||||||
|
# add last newline
|
||||||
|
bulk_list.append("\n")
|
||||||
|
data = "\n".join(bulk_list)
|
||||||
|
_, _ = ElasticWrap("_bulk").post(data=data, ndjson=True)
|
||||||
|
@ -7,6 +7,7 @@ Functionality:
|
|||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
from random import randint
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
from celery.schedules import crontab
|
from celery.schedules import crontab
|
||||||
@ -117,6 +118,15 @@ class AppConfig:
|
|||||||
self.config["application"]["colors"] = colors
|
self.config["application"]["colors"] = colors
|
||||||
return colors
|
return colors
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _build_rand_daily():
|
||||||
|
"""build random daily schedule per installation"""
|
||||||
|
return {
|
||||||
|
"minute": randint(0, 59),
|
||||||
|
"hour": randint(0, 23),
|
||||||
|
"day_of_week": "*",
|
||||||
|
}
|
||||||
|
|
||||||
def load_new_defaults(self):
|
def load_new_defaults(self):
|
||||||
"""check config.json for missing defaults"""
|
"""check config.json for missing defaults"""
|
||||||
default_config = self.get_config_file()
|
default_config = self.get_config_file()
|
||||||
@ -140,6 +150,9 @@ class AppConfig:
|
|||||||
# missing nested values
|
# missing nested values
|
||||||
for sub_key, sub_value in value.items():
|
for sub_key, sub_value in value.items():
|
||||||
if sub_key not in redis_config[key].keys():
|
if sub_key not in redis_config[key].keys():
|
||||||
|
if sub_value == "rand-d":
|
||||||
|
sub_value = self._build_rand_daily()
|
||||||
|
|
||||||
redis_config[key].update({sub_key: sub_value})
|
redis_config[key].update({sub_key: sub_value})
|
||||||
needs_update = True
|
needs_update = True
|
||||||
|
|
||||||
@ -256,19 +269,18 @@ class ScheduleBuilder:
|
|||||||
if not item_conf:
|
if not item_conf:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
minute = item_conf["minute"]
|
schedule_dict.update(
|
||||||
hour = item_conf["hour"]
|
{
|
||||||
day_of_week = item_conf["day_of_week"]
|
f"schedule_{schedule_item}": {
|
||||||
schedule_name = f"schedule_{schedule_item}"
|
"task": schedule_item,
|
||||||
to_add = {
|
"schedule": crontab(
|
||||||
schedule_name: {
|
minute=item_conf["minute"],
|
||||||
"task": schedule_item,
|
hour=item_conf["hour"],
|
||||||
"schedule": crontab(
|
day_of_week=item_conf["day_of_week"],
|
||||||
minute=minute, hour=hour, day_of_week=day_of_week
|
),
|
||||||
),
|
}
|
||||||
}
|
}
|
||||||
}
|
)
|
||||||
schedule_dict.update(to_add)
|
|
||||||
|
|
||||||
return schedule_dict
|
return schedule_dict
|
||||||
|
|
||||||
|
@ -5,9 +5,9 @@ django-auth-ldap==4.1.0
|
|||||||
django-cors-headers==3.13.0
|
django-cors-headers==3.13.0
|
||||||
djangorestframework==3.14.0
|
djangorestframework==3.14.0
|
||||||
Pillow==9.4.0
|
Pillow==9.4.0
|
||||||
redis==4.4.0
|
redis==4.4.2
|
||||||
requests==2.28.1
|
requests==2.28.1
|
||||||
ryd-client==0.0.6
|
ryd-client==0.0.6
|
||||||
uWSGI==2.0.21
|
uWSGI==2.0.21
|
||||||
whitenoise==6.3.0
|
whitenoise==6.3.0
|
||||||
yt_dlp==2023.1.2
|
yt_dlp==2023.1.6
|
||||||
|
Loading…
Reference in New Issue
Block a user