archive vtt files for offline import

This commit is contained in:
simon 2022-08-16 15:42:54 +07:00
parent a4932b163b
commit 0210a97b48
No known key found for this signature in database
GPG Key ID: 2C15AA5E89985DD4
2 changed files with 32 additions and 2 deletions

View File

@ -511,7 +511,7 @@ class ManualImport:
print(f"{video_id}: manual import failed, and no metadata found.")
raise ValueError
video.check_subtitles()
video.check_subtitles(subtitle_files=self.current_video["subtitle"])
video.upload_to_es()
if video.offline_import and self.current_video["thumb"]:
@ -547,6 +547,12 @@ class ManualImport:
new_path = os.path.join(channel_folder, file)
shutil.move(old_path, new_path, copy_function=shutil.copyfile)
base_name, _ = os.path.splitext(new_path)
for old_path in self.current_video["subtitle"]:
lang = old_path.split(".")[-2]
new_path = f"{base_name}.{lang}.vtt"
shutil.move(old_path, new_path, copy_function=shutil.copyfile)
def _cleanup(self, json_data):
"""cleanup leftover files"""
if os.path.exists(self.current_video["metadata"]):

View File

@ -344,14 +344,38 @@ class YoutubeVideo(YouTubeItem, YoutubeSubtitle):
if sponsorblock:
self.json_data["sponsorblock"] = sponsorblock
def check_subtitles(self):
def check_subtitles(self, subtitle_files=False):
"""optionally add subtitles"""
if self.offline_import and subtitle_files:
indexed = self._offline_subtitles(subtitle_files)
self.json_data["subtitles"] = indexed
return
handler = YoutubeSubtitle(self)
subtitles = handler.get_subtitles()
if subtitles:
indexed = handler.download_subtitles(relevant_subtitles=subtitles)
self.json_data["subtitles"] = indexed
def _offline_subtitles(self, subtitle_files):
"""import offline subtitles"""
base_name, _ = os.path.splitext(self.json_data["media_url"])
subtitles = []
for subtitle in subtitle_files:
lang = subtitle.split(".")[-2]
subtitle_media_url = f"{base_name}.{lang}.vtt"
to_add = {
"ext": "vtt",
"url": False,
"name": lang,
"lang": lang,
"source": "file",
"media_url": subtitle_media_url,
}
subtitles.append(to_add)
return subtitles
def update_media_url(self):
"""update only media_url in es for reindex channel rename"""
data = {"doc": {"media_url": self.json_data["media_url"]}}