forked from Archives/langchain
Harrison/youtube multi language (#5758)
Co-authored-by: rafly lesmana <raflylesmana111@gmail.com>
This commit is contained in:
parent
2dcda8a8ac
commit
25487fa5ee
@ -1,6 +1,7 @@
|
|||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"id": "df770c72",
|
"id": "df770c72",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
@ -55,11 +56,12 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"id": "6b278a1b",
|
"id": "6b278a1b",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"## Add video info"
|
"### Add video info"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -79,20 +81,36 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"loader = YoutubeLoader.from_youtube_url(\"https://www.youtube.com/watch?v=QsYGlZkevEg\", add_video_info=True)"
|
"loader = YoutubeLoader.from_youtube_url(\"https://www.youtube.com/watch?v=QsYGlZkevEg\", add_video_info=True)\n",
|
||||||
|
"loader.load()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"attachments": {},
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "fc417e31",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Add language preferences\n",
|
||||||
|
"\n",
|
||||||
|
"Language param : It's a list of language codes in a descending priority, `en` by default.\n",
|
||||||
|
"\n",
|
||||||
|
"translation param : It's a translate preference when the youtube does'nt have your select language, `en` by default."
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"id": "97b98e92",
|
"id": "08510625",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
"loader = YoutubeLoader.from_youtube_url(\"https://www.youtube.com/watch?v=QsYGlZkevEg\", add_video_info=True, language=['en','id'], translation='en')\n",
|
||||||
"loader.load()"
|
"loader.load()"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"id": "65796cc5",
|
"id": "65796cc5",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
|
@ -3,7 +3,7 @@ from __future__ import annotations
|
|||||||
|
|
||||||
import logging
|
import logging
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, Dict, List, Optional
|
from typing import Any, Dict, List, Optional, Sequence, Union
|
||||||
from urllib.parse import parse_qs, urlparse
|
from urllib.parse import parse_qs, urlparse
|
||||||
|
|
||||||
from pydantic import root_validator
|
from pydantic import root_validator
|
||||||
@ -146,13 +146,19 @@ class YoutubeLoader(BaseLoader):
|
|||||||
self,
|
self,
|
||||||
video_id: str,
|
video_id: str,
|
||||||
add_video_info: bool = False,
|
add_video_info: bool = False,
|
||||||
language: str = "en",
|
language: Union[str, Sequence[str]] = "en",
|
||||||
|
translation: str = "en",
|
||||||
continue_on_failure: bool = False,
|
continue_on_failure: bool = False,
|
||||||
):
|
):
|
||||||
"""Initialize with YouTube video ID."""
|
"""Initialize with YouTube video ID."""
|
||||||
self.video_id = video_id
|
self.video_id = video_id
|
||||||
self.add_video_info = add_video_info
|
self.add_video_info = add_video_info
|
||||||
self.language = language
|
self.language = language
|
||||||
|
if isinstance(language, str):
|
||||||
|
self.language = [language]
|
||||||
|
else:
|
||||||
|
self.language = language
|
||||||
|
self.translation = translation
|
||||||
self.continue_on_failure = continue_on_failure
|
self.continue_on_failure = continue_on_failure
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@ -199,10 +205,10 @@ class YoutubeLoader(BaseLoader):
|
|||||||
return []
|
return []
|
||||||
|
|
||||||
try:
|
try:
|
||||||
transcript = transcript_list.find_transcript([self.language])
|
transcript = transcript_list.find_transcript(self.language)
|
||||||
except NoTranscriptFound:
|
except NoTranscriptFound:
|
||||||
en_transcript = transcript_list.find_transcript(["en"])
|
en_transcript = transcript_list.find_transcript(["en"])
|
||||||
transcript = en_transcript.translate(self.language)
|
transcript = en_transcript.translate(self.translation)
|
||||||
|
|
||||||
transcript_pieces = transcript.fetch()
|
transcript_pieces = transcript.fetch()
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user