From f60dbde3dc47edf66f1374699e7ae87addc4931c Mon Sep 17 00:00:00 2001 From: simon Date: Mon, 13 Sep 2021 22:17:36 +0700 Subject: [PATCH] manual video file import celery task --- tubearchivist/home/__init__.py | 2 +- tubearchivist/home/src/reindex.py | 119 +++++++++++++++++++++++++++++- tubearchivist/home/tasks.py | 11 ++- 3 files changed, 129 insertions(+), 3 deletions(-) diff --git a/tubearchivist/home/__init__.py b/tubearchivist/home/__init__.py index 9943cd59..8eb7ad59 100644 --- a/tubearchivist/home/__init__.py +++ b/tubearchivist/home/__init__.py @@ -23,7 +23,7 @@ def sync_redis_state(): def make_folders(): """ make needed folders here to avoid letting docker messing it up """ - folders = ['download', 'channels', 'videos'] + folders = ['download', 'channels', 'videos', 'import'] config = AppConfig().config cache_dir = config['application']['cache_dir'] for folder in folders: diff --git a/tubearchivist/home/src/reindex.py b/tubearchivist/home/src/reindex.py index ca2e3a76..61d8d67c 100644 --- a/tubearchivist/home/src/reindex.py +++ b/tubearchivist/home/src/reindex.py @@ -7,13 +7,17 @@ Functionality: import json import os +import re +import subprocess +import shutil + from datetime import datetime from time import sleep from math import ceil import requests -from home.src.download import ChannelSubscription, PendingList +from home.src.download import ChannelSubscription, PendingList, VideoDownloader from home.src.config import AppConfig from home.src.index import ( YoutubeChannel, @@ -332,6 +336,119 @@ class FilesystemScanner: print(request.text) +class ManualImport: + """ import and indexing existing video files """ + + CONFIG = AppConfig().config + CACHE_DIR = CONFIG['application']['cache_dir'] + IMPORT_DIR = os.path.join(CACHE_DIR, 'import') + + def __init__(self): + self.identified = self.import_folder_parser() + + def import_folder_parser(self): + """ detect files in import folder """ + + to_import = os.listdir(self.IMPORT_DIR) + to_import.sort() + video_files = [i for i in to_import if not i.endswith('.json')] + + identified = [] + + for file_path in video_files: + + file_dict = {'video_file': file_path} + file_name, _ = os.path.splitext(file_path) + + matching_json = [ + i for i in to_import if i.startswith(file_name) + and i.endswith('.json') + ] + if matching_json: + json_file = matching_json[0] + youtube_id = self.extract_id_from_json(json_file) + file_dict.update({'json_file': json_file}) + else: + youtube_id = self.extract_id_from_filename(file_name) + file_dict.update({'json_file': False}) + + file_dict.update({'youtube_id': youtube_id}) + identified.append(file_dict) + + return identified + + @staticmethod + def extract_id_from_filename(file_name): + """ + look at the file name for the youtube id + expects filename ending in []. + """ + id_search = re.search(r'\[([a-zA-Z0-9_-]{11})\]$', file_name) + if id_search: + youtube_id = id_search.group(1) + return youtube_id + + print('failed to extract youtube id for: ' + file_name) + raise Exception + + def extract_id_from_json(self, json_file): + """ open json file and extract id """ + json_path = os.path.join(self.CACHE_DIR, 'import', json_file) + with open(json_path, 'r', encoding='utf-8') as f: + json_content = f.read() + + youtube_id = json.loads(json_content)['id'] + + return youtube_id + + def process_import(self): + """ go through identified media files """ + + for media_file in self.identified: + json_file = media_file['json_file'] + video_file = media_file['video_file'] + youtube_id = media_file['youtube_id'] + + video_path = os.path.join(self.CACHE_DIR, 'import', video_file) + + self.move_to_cache(video_path, youtube_id) + + # identify and archive + vid_dict = index_new_video(youtube_id) + VideoDownloader([youtube_id]).move_to_archive(vid_dict) + + # cleanup + if os.path.exists(video_path): + os.remove(video_path) + if json_file: + json_path = os.path.join(self.CACHE_DIR, 'import', json_file) + os.remove(json_path) + + def move_to_cache(self, video_path, youtube_id): + """ move identified video file to cache, convert to mp4 """ + file_name = os.path.split(video_path)[-1] + video_file, ext = os.path.splitext(file_name) + if not youtube_id in video_file: + new_file_name = f'{video_file}_{youtube_id}{ext}' + new_path = os.path.join(self.CACHE_DIR, 'download', new_file_name) + else: + new_path = os.path.join(self.CACHE_DIR, 'download', file_name) + if ext == '.mp4': + # just move + new_path = os.path.join(self.CACHE_DIR, 'download', file_name) + shutil.move(video_path, new_path) + else: + # needs conversion + new_path = os.path.join( + self.CACHE_DIR, 'download', video_file + '.mp4' + ) + print(f'processing with ffmpeg: {video_file}') + subprocess.run( + ["ffmpeg", "-i", video_path, new_path, + "-loglevel", "warning", "-stats"], check=True + ) + + def scan_filesystem(): """ grouped function to delete and update index """ filesystem_handler = FilesystemScanner() diff --git a/tubearchivist/home/tasks.py b/tubearchivist/home/tasks.py index 01f13531..8cb8ec14 100644 --- a/tubearchivist/home/tasks.py +++ b/tubearchivist/home/tasks.py @@ -14,7 +14,7 @@ from home.src.download import ( VideoDownloader ) from home.src.config import AppConfig -from home.src.reindex import reindex_old_documents +from home.src.reindex import reindex_old_documents, ManualImport CONFIG = AppConfig().config @@ -70,3 +70,12 @@ def extrac_dl(youtube_ids): def check_reindex(): """ run the reindex main command """ reindex_old_documents() + + +@shared_task +def run_manual_import(): + """ called from settings page, to go through import folder """ + print('starting media file import') + import_handler = ManualImport() + if import_handler.identified: + import_handler.process_import()