mirror of
https://github.com/tubearchivist/tubearchivist
synced 2024-11-02 09:41:07 +00:00
private methods for YoutubeSubtitle and SubtitleParser
This commit is contained in:
parent
6e3df21f8c
commit
d3e9646fb6
@ -23,7 +23,7 @@ class YoutubeSubtitle:
|
|||||||
self.video = video
|
self.video = video
|
||||||
self.languages = False
|
self.languages = False
|
||||||
|
|
||||||
def sub_conf_parse(self):
|
def _sub_conf_parse(self):
|
||||||
"""add additional conf values to self"""
|
"""add additional conf values to self"""
|
||||||
languages_raw = self.video.config["downloads"]["subtitle"]
|
languages_raw = self.video.config["downloads"]["subtitle"]
|
||||||
if languages_raw:
|
if languages_raw:
|
||||||
@ -31,26 +31,26 @@ class YoutubeSubtitle:
|
|||||||
|
|
||||||
def get_subtitles(self):
|
def get_subtitles(self):
|
||||||
"""check what to do"""
|
"""check what to do"""
|
||||||
self.sub_conf_parse()
|
self._sub_conf_parse()
|
||||||
if not self.languages:
|
if not self.languages:
|
||||||
# no subtitles
|
# no subtitles
|
||||||
return False
|
return False
|
||||||
|
|
||||||
relevant_subtitles = []
|
relevant_subtitles = []
|
||||||
for lang in self.languages:
|
for lang in self.languages:
|
||||||
user_sub = self.get_user_subtitles(lang)
|
user_sub = self._get_user_subtitles(lang)
|
||||||
if user_sub:
|
if user_sub:
|
||||||
relevant_subtitles.append(user_sub)
|
relevant_subtitles.append(user_sub)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if self.video.config["downloads"]["subtitle_source"] == "auto":
|
if self.video.config["downloads"]["subtitle_source"] == "auto":
|
||||||
auto_cap = self.get_auto_caption(lang)
|
auto_cap = self._get_auto_caption(lang)
|
||||||
if auto_cap:
|
if auto_cap:
|
||||||
relevant_subtitles.append(auto_cap)
|
relevant_subtitles.append(auto_cap)
|
||||||
|
|
||||||
return relevant_subtitles
|
return relevant_subtitles
|
||||||
|
|
||||||
def get_auto_caption(self, lang):
|
def _get_auto_caption(self, lang):
|
||||||
"""get auto_caption subtitles"""
|
"""get auto_caption subtitles"""
|
||||||
print(f"{self.video.youtube_id}-{lang}: get auto generated subtitles")
|
print(f"{self.video.youtube_id}-{lang}: get auto generated subtitles")
|
||||||
all_subtitles = self.video.youtube_meta.get("automatic_captions")
|
all_subtitles = self.video.youtube_meta.get("automatic_captions")
|
||||||
@ -87,7 +87,7 @@ class YoutubeSubtitle:
|
|||||||
|
|
||||||
return all_subtitles
|
return all_subtitles
|
||||||
|
|
||||||
def get_user_subtitles(self, lang):
|
def _get_user_subtitles(self, lang):
|
||||||
"""get subtitles uploaded from channel owner"""
|
"""get subtitles uploaded from channel owner"""
|
||||||
print(f"{self.video.youtube_id}-{lang}: get user uploaded subtitles")
|
print(f"{self.video.youtube_id}-{lang}: get user uploaded subtitles")
|
||||||
all_subtitles = self._normalize_lang()
|
all_subtitles = self._normalize_lang()
|
||||||
@ -160,8 +160,8 @@ class SubtitleParser:
|
|||||||
self.all_cues = []
|
self.all_cues = []
|
||||||
for idx, event in enumerate(all_events):
|
for idx, event in enumerate(all_events):
|
||||||
cue = {
|
cue = {
|
||||||
"start": self.ms_conv(event["tStartMs"]),
|
"start": self._ms_conv(event["tStartMs"]),
|
||||||
"end": self.ms_conv(event["tStartMs"] + event["dDurationMs"]),
|
"end": self._ms_conv(event["tStartMs"] + event["dDurationMs"]),
|
||||||
"text": "".join([i.get("utf8") for i in event["segs"]]),
|
"text": "".join([i.get("utf8") for i in event["segs"]]),
|
||||||
"idx": idx + 1,
|
"idx": idx + 1,
|
||||||
}
|
}
|
||||||
@ -184,7 +184,7 @@ class SubtitleParser:
|
|||||||
return flatten
|
return flatten
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def ms_conv(ms):
|
def _ms_conv(ms):
|
||||||
"""convert ms to timestamp"""
|
"""convert ms to timestamp"""
|
||||||
hours = str((ms // (1000 * 60 * 60)) % 24).zfill(2)
|
hours = str((ms // (1000 * 60 * 60)) % 24).zfill(2)
|
||||||
minutes = str((ms // (1000 * 60)) % 60).zfill(2)
|
minutes = str((ms // (1000 * 60)) % 60).zfill(2)
|
||||||
@ -206,7 +206,7 @@ class SubtitleParser:
|
|||||||
|
|
||||||
def create_bulk_import(self, video, source):
|
def create_bulk_import(self, video, source):
|
||||||
"""subtitle lines for es import"""
|
"""subtitle lines for es import"""
|
||||||
documents = self.create_documents(video, source)
|
documents = self._create_documents(video, source)
|
||||||
bulk_list = []
|
bulk_list = []
|
||||||
|
|
||||||
for document in documents:
|
for document in documents:
|
||||||
@ -220,9 +220,9 @@ class SubtitleParser:
|
|||||||
|
|
||||||
return query_str
|
return query_str
|
||||||
|
|
||||||
def create_documents(self, video, source):
|
def _create_documents(self, video, source):
|
||||||
"""process documents"""
|
"""process documents"""
|
||||||
documents = self.chunk_list(video.youtube_id)
|
documents = self._chunk_list(video.youtube_id)
|
||||||
channel = video.json_data.get("channel")
|
channel = video.json_data.get("channel")
|
||||||
meta_dict = {
|
meta_dict = {
|
||||||
"youtube_id": video.youtube_id,
|
"youtube_id": video.youtube_id,
|
||||||
@ -238,7 +238,7 @@ class SubtitleParser:
|
|||||||
|
|
||||||
return documents
|
return documents
|
||||||
|
|
||||||
def chunk_list(self, youtube_id):
|
def _chunk_list(self, youtube_id):
|
||||||
"""join cues for bulk import"""
|
"""join cues for bulk import"""
|
||||||
chunk_list = []
|
chunk_list = []
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user