forked from Archives/langchain
Harrison/youtube multi language (#5758)
Co-authored-by: rafly lesmana <raflylesmana111@gmail.com>
This commit is contained in:
parent
2dcda8a8ac
commit
25487fa5ee
@ -1,6 +1,7 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "df770c72",
|
||||
"metadata": {},
|
||||
@ -55,11 +56,12 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "6b278a1b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Add video info"
|
||||
"### Add video info"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -79,20 +81,36 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = YoutubeLoader.from_youtube_url(\"https://www.youtube.com/watch?v=QsYGlZkevEg\", add_video_info=True)"
|
||||
"loader = YoutubeLoader.from_youtube_url(\"https://www.youtube.com/watch?v=QsYGlZkevEg\", add_video_info=True)\n",
|
||||
"loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "fc417e31",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Add language preferences\n",
|
||||
"\n",
|
||||
"Language param : It's a list of language codes in a descending priority, `en` by default.\n",
|
||||
"\n",
|
||||
"translation param : It's a translate preference when the youtube does'nt have your select language, `en` by default."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "97b98e92",
|
||||
"id": "08510625",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = YoutubeLoader.from_youtube_url(\"https://www.youtube.com/watch?v=QsYGlZkevEg\", add_video_info=True, language=['en','id'], translation='en')\n",
|
||||
"loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "65796cc5",
|
||||
"metadata": {},
|
||||
|
@ -3,7 +3,7 @@ from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
from typing import Any, Dict, List, Optional, Sequence, Union
|
||||
from urllib.parse import parse_qs, urlparse
|
||||
|
||||
from pydantic import root_validator
|
||||
@ -146,13 +146,19 @@ class YoutubeLoader(BaseLoader):
|
||||
self,
|
||||
video_id: str,
|
||||
add_video_info: bool = False,
|
||||
language: str = "en",
|
||||
language: Union[str, Sequence[str]] = "en",
|
||||
translation: str = "en",
|
||||
continue_on_failure: bool = False,
|
||||
):
|
||||
"""Initialize with YouTube video ID."""
|
||||
self.video_id = video_id
|
||||
self.add_video_info = add_video_info
|
||||
self.language = language
|
||||
if isinstance(language, str):
|
||||
self.language = [language]
|
||||
else:
|
||||
self.language = language
|
||||
self.translation = translation
|
||||
self.continue_on_failure = continue_on_failure
|
||||
|
||||
@staticmethod
|
||||
@ -199,10 +205,10 @@ class YoutubeLoader(BaseLoader):
|
||||
return []
|
||||
|
||||
try:
|
||||
transcript = transcript_list.find_transcript([self.language])
|
||||
transcript = transcript_list.find_transcript(self.language)
|
||||
except NoTranscriptFound:
|
||||
en_transcript = transcript_list.find_transcript(["en"])
|
||||
transcript = en_transcript.translate(self.language)
|
||||
transcript = en_transcript.translate(self.translation)
|
||||
|
||||
transcript_pieces = transcript.fetch()
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user