mirror of
https://github.com/tubearchivist/tubearchivist
synced 2024-11-17 21:25:49 +00:00
Reindex shorts and streams, #build
Changed: - Match exiting videos with new video type - Randomize version check schedule per installation
This commit is contained in:
commit
1471fce2c3
@ -52,6 +52,6 @@
|
||||
"thumbnail_check": {"minute": "0", "hour": "17", "day_of_week": "*"},
|
||||
"run_backup": {"minute": "0", "hour": "8", "day_of_week": "0"},
|
||||
"run_backup_rotate": 5,
|
||||
"version_check": {"minute": "0", "hour": "11", "day_of_week": "*"}
|
||||
"version_check": "rand-d"
|
||||
}
|
||||
}
|
||||
|
@ -381,6 +381,17 @@ class YoutubeChannel(YouTubeItem):
|
||||
|
||||
return all_youtube_ids
|
||||
|
||||
def get_channel_videos(self):
|
||||
"""get all videos from channel"""
|
||||
data = {
|
||||
"query": {
|
||||
"term": {"channel.channel_id": {"value": self.youtube_id}}
|
||||
},
|
||||
"_source": ["youtube_id", "vid_type"],
|
||||
}
|
||||
all_videos = IndexPaginate("ta_video", data).get_results()
|
||||
return all_videos
|
||||
|
||||
def get_all_playlists(self):
|
||||
"""get all playlists owned by this channel"""
|
||||
url = (
|
||||
|
@ -4,12 +4,14 @@ functionality:
|
||||
- index and update in es
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import shutil
|
||||
from datetime import datetime
|
||||
from time import sleep
|
||||
|
||||
from home.src.download.queue import PendingList
|
||||
from home.src.download.subscriptions import ChannelSubscription
|
||||
from home.src.download.thumbnails import ThumbManager
|
||||
from home.src.download.yt_dlp_base import CookieHandler
|
||||
from home.src.download.yt_dlp_handler import VideoDownloader
|
||||
@ -307,6 +309,8 @@ class Reindex(ReindexBase):
|
||||
channel.upload_to_es()
|
||||
channel.sync_to_videos()
|
||||
|
||||
ChannelFullScan(channel_id).scan()
|
||||
|
||||
def _reindex_single_playlist(self, playlist_id):
|
||||
"""refresh playlist data"""
|
||||
self._get_all_videos()
|
||||
@ -473,3 +477,74 @@ class ChannelUrlFixer:
|
||||
shutil.move(video_path_is, new_path, copy_function=shutil.copyfile)
|
||||
VideoDownloader().move_to_archive(self.video.json_data)
|
||||
self.video.update_media_url()
|
||||
|
||||
|
||||
class ChannelFullScan:
|
||||
"""
|
||||
update from v0.3.0 to v0.3.1
|
||||
full scan of channel to fix vid_type mismatch
|
||||
"""
|
||||
|
||||
def __init__(self, channel_id):
|
||||
self.channel_id = channel_id
|
||||
self.to_update = False
|
||||
|
||||
def scan(self):
|
||||
"""match local with remote"""
|
||||
print(f"{self.channel_id}: start full scan")
|
||||
all_local_videos = self._get_all_local()
|
||||
all_remote_videos = self._get_all_remote()
|
||||
self.to_update = []
|
||||
for video in all_local_videos:
|
||||
video_id = video["youtube_id"]
|
||||
remote_match = [i for i in all_remote_videos if i[0] == video_id]
|
||||
if not remote_match:
|
||||
print(f"{video_id}: no remote match found")
|
||||
continue
|
||||
|
||||
expected_type = remote_match[0][-1].value
|
||||
if video["vid_type"] != expected_type:
|
||||
self.to_update.append(
|
||||
{
|
||||
"video_id": video_id,
|
||||
"vid_type": expected_type,
|
||||
}
|
||||
)
|
||||
|
||||
self.update()
|
||||
|
||||
def _get_all_remote(self):
|
||||
"""get all channel videos"""
|
||||
sub = ChannelSubscription()
|
||||
all_remote_videos = sub.get_last_youtube_videos(
|
||||
self.channel_id, limit=False
|
||||
)
|
||||
|
||||
return all_remote_videos
|
||||
|
||||
def _get_all_local(self):
|
||||
"""get all local indexed channel_videos"""
|
||||
channel = YoutubeChannel(self.channel_id)
|
||||
all_local_videos = channel.get_channel_videos()
|
||||
|
||||
return all_local_videos
|
||||
|
||||
def update(self):
|
||||
"""build bulk query for updates"""
|
||||
if not self.to_update:
|
||||
print(f"{self.channel_id}: nothing to update")
|
||||
return
|
||||
|
||||
print(f"{self.channel_id}: fixing {len(self.to_update)} videos")
|
||||
bulk_list = []
|
||||
for video in self.to_update:
|
||||
action = {
|
||||
"update": {"_id": video.get("video_id"), "_index": "ta_video"}
|
||||
}
|
||||
source = {"doc": {"vid_type": video.get("vid_type")}}
|
||||
bulk_list.append(json.dumps(action))
|
||||
bulk_list.append(json.dumps(source))
|
||||
# add last newline
|
||||
bulk_list.append("\n")
|
||||
data = "\n".join(bulk_list)
|
||||
_, _ = ElasticWrap("_bulk").post(data=data, ndjson=True)
|
||||
|
@ -7,6 +7,7 @@ Functionality:
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
from random import randint
|
||||
|
||||
import requests
|
||||
from celery.schedules import crontab
|
||||
@ -117,6 +118,15 @@ class AppConfig:
|
||||
self.config["application"]["colors"] = colors
|
||||
return colors
|
||||
|
||||
@staticmethod
|
||||
def _build_rand_daily():
|
||||
"""build random daily schedule per installation"""
|
||||
return {
|
||||
"minute": randint(0, 59),
|
||||
"hour": randint(0, 23),
|
||||
"day_of_week": "*",
|
||||
}
|
||||
|
||||
def load_new_defaults(self):
|
||||
"""check config.json for missing defaults"""
|
||||
default_config = self.get_config_file()
|
||||
@ -140,6 +150,9 @@ class AppConfig:
|
||||
# missing nested values
|
||||
for sub_key, sub_value in value.items():
|
||||
if sub_key not in redis_config[key].keys():
|
||||
if sub_value == "rand-d":
|
||||
sub_value = self._build_rand_daily()
|
||||
|
||||
redis_config[key].update({sub_key: sub_value})
|
||||
needs_update = True
|
||||
|
||||
@ -256,19 +269,18 @@ class ScheduleBuilder:
|
||||
if not item_conf:
|
||||
continue
|
||||
|
||||
minute = item_conf["minute"]
|
||||
hour = item_conf["hour"]
|
||||
day_of_week = item_conf["day_of_week"]
|
||||
schedule_name = f"schedule_{schedule_item}"
|
||||
to_add = {
|
||||
schedule_name: {
|
||||
"task": schedule_item,
|
||||
"schedule": crontab(
|
||||
minute=minute, hour=hour, day_of_week=day_of_week
|
||||
),
|
||||
schedule_dict.update(
|
||||
{
|
||||
f"schedule_{schedule_item}": {
|
||||
"task": schedule_item,
|
||||
"schedule": crontab(
|
||||
minute=item_conf["minute"],
|
||||
hour=item_conf["hour"],
|
||||
day_of_week=item_conf["day_of_week"],
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
schedule_dict.update(to_add)
|
||||
)
|
||||
|
||||
return schedule_dict
|
||||
|
||||
|
@ -5,9 +5,9 @@ django-auth-ldap==4.1.0
|
||||
django-cors-headers==3.13.0
|
||||
djangorestframework==3.14.0
|
||||
Pillow==9.4.0
|
||||
redis==4.4.0
|
||||
redis==4.4.2
|
||||
requests==2.28.1
|
||||
ryd-client==0.0.6
|
||||
uWSGI==2.0.21
|
||||
whitenoise==6.3.0
|
||||
yt_dlp==2023.1.2
|
||||
yt_dlp==2023.1.6
|
||||
|
Loading…
Reference in New Issue
Block a user