mirror of
https://github.com/tubearchivist/tubearchivist
synced 2024-11-02 09:41:07 +00:00
cover edge cases where end timestamp is after start timestamp of new cue
This commit is contained in:
parent
d99ce0d98e
commit
b494fc10af
@ -159,6 +159,7 @@ class SubtitleParser:
|
||||
self._parse_cues()
|
||||
self._match_text_lines()
|
||||
self._add_id()
|
||||
self._timestamp_check()
|
||||
|
||||
def _parse_cues(self):
|
||||
"""split into cues"""
|
||||
@ -181,7 +182,8 @@ class SubtitleParser:
|
||||
clean = re.sub(self.stamp_reg, "", line)
|
||||
clean = re.sub(self.tag_reg, "", clean)
|
||||
cue_dict["lines"].append(clean)
|
||||
if clean and clean not in self.all_text_lines:
|
||||
if clean.strip() and clean not in self.all_text_lines[-4:]:
|
||||
# remove immediate duplicates
|
||||
self.all_text_lines.append(clean)
|
||||
|
||||
return cue_dict
|
||||
@ -205,6 +207,21 @@ class SubtitleParser:
|
||||
|
||||
self.matched.append(new_cue)
|
||||
|
||||
def _timestamp_check(self):
|
||||
"""check if end timestamp is bigger than start timestamp"""
|
||||
for idx, cue in enumerate(self.matched):
|
||||
# this
|
||||
end = int(re.sub("[^0-9]", "", cue.get("end")))
|
||||
# next
|
||||
try:
|
||||
next_cue = self.matched[idx + 1]
|
||||
except IndexError:
|
||||
continue
|
||||
|
||||
start_next = int(re.sub("[^0-9]", "", next_cue.get("start")))
|
||||
if end > start_next:
|
||||
self.matched[idx]["end"] = next_cue.get("start")
|
||||
|
||||
def _add_id(self):
|
||||
"""add id to matched cues"""
|
||||
for idx, _ in enumerate(self.matched):
|
||||
|
Loading…
Reference in New Issue
Block a user