fix: Make YoutubeLoader support on demand language translation (#13583)

**Description:**
Enhance the functionality of YoutubeLoader to enable the translation of
available transcripts by refining the existing logic.

**Issue:**
Encountering a problem with YoutubeLoader (#13523) where the translation
feature is not functioning as expected.

Tag maintainers/contributors who might be interested:
@eyurtsev

---------

Co-authored-by: Bagatur <baskaryan@gmail.com>
This commit is contained in:
rafly lesmana 2023-11-20 08:34:48 +07:00 committed by GitHub
parent cc50e023d1
commit 420a17542d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 6 additions and 4 deletions

View File

@ -99,7 +99,7 @@
"\n", "\n",
"Language param : It's a list of language codes in a descending priority, `en` by default.\n", "Language param : It's a list of language codes in a descending priority, `en` by default.\n",
"\n", "\n",
"translation param : It's a translate preference when the youtube does'nt have your select language, `en` by default." "translation param : It's a translate preference, you can translate available transcript to your preferred language."
] ]
}, },
{ {

View File

@ -146,7 +146,7 @@ class YoutubeLoader(BaseLoader):
video_id: str, video_id: str,
add_video_info: bool = False, add_video_info: bool = False,
language: Union[str, Sequence[str]] = "en", language: Union[str, Sequence[str]] = "en",
translation: str = "en", translation: Optional[str] = None,
continue_on_failure: bool = False, continue_on_failure: bool = False,
): ):
"""Initialize with YouTube video ID.""" """Initialize with YouTube video ID."""
@ -206,8 +206,10 @@ class YoutubeLoader(BaseLoader):
try: try:
transcript = transcript_list.find_transcript(self.language) transcript = transcript_list.find_transcript(self.language)
except NoTranscriptFound: except NoTranscriptFound:
en_transcript = transcript_list.find_transcript(["en"]) transcript = transcript_list.find_transcript(["en"])
transcript = en_transcript.translate(self.translation)
if self.translation is not None:
transcript = transcript.translate(self.translation)
transcript_pieces = transcript.fetch() transcript_pieces = transcript.fetch()