mirror of
https://github.com/hwchase17/langchain
synced 2024-11-06 03:20:49 +00:00
fix: Make YoutubeLoader support on demand language translation (#13583)
**Description:** Enhance the functionality of YoutubeLoader to enable the translation of available transcripts by refining the existing logic. **Issue:** Encountering a problem with YoutubeLoader (#13523) where the translation feature is not functioning as expected. Tag maintainers/contributors who might be interested: @eyurtsev --------- Co-authored-by: Bagatur <baskaryan@gmail.com>
This commit is contained in:
parent
cc50e023d1
commit
420a17542d
@ -99,7 +99,7 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"Language param : It's a list of language codes in a descending priority, `en` by default.\n",
|
"Language param : It's a list of language codes in a descending priority, `en` by default.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"translation param : It's a translate preference when the youtube does'nt have your select language, `en` by default."
|
"translation param : It's a translate preference, you can translate available transcript to your preferred language."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -146,7 +146,7 @@ class YoutubeLoader(BaseLoader):
|
|||||||
video_id: str,
|
video_id: str,
|
||||||
add_video_info: bool = False,
|
add_video_info: bool = False,
|
||||||
language: Union[str, Sequence[str]] = "en",
|
language: Union[str, Sequence[str]] = "en",
|
||||||
translation: str = "en",
|
translation: Optional[str] = None,
|
||||||
continue_on_failure: bool = False,
|
continue_on_failure: bool = False,
|
||||||
):
|
):
|
||||||
"""Initialize with YouTube video ID."""
|
"""Initialize with YouTube video ID."""
|
||||||
@ -206,8 +206,10 @@ class YoutubeLoader(BaseLoader):
|
|||||||
try:
|
try:
|
||||||
transcript = transcript_list.find_transcript(self.language)
|
transcript = transcript_list.find_transcript(self.language)
|
||||||
except NoTranscriptFound:
|
except NoTranscriptFound:
|
||||||
en_transcript = transcript_list.find_transcript(["en"])
|
transcript = transcript_list.find_transcript(["en"])
|
||||||
transcript = en_transcript.translate(self.translation)
|
|
||||||
|
if self.translation is not None:
|
||||||
|
transcript = transcript.translate(self.translation)
|
||||||
|
|
||||||
transcript_pieces = transcript.fetch()
|
transcript_pieces = transcript.fetch()
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user