diff --git a/docs/modules/indexes/document_loaders/examples/youtube_transcript.ipynb b/docs/modules/indexes/document_loaders/examples/youtube_transcript.ipynb index 70d5be06cb..9264bf5434 100644 --- a/docs/modules/indexes/document_loaders/examples/youtube_transcript.ipynb +++ b/docs/modules/indexes/document_loaders/examples/youtube_transcript.ipynb @@ -1,6 +1,7 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", "id": "df770c72", "metadata": {}, @@ -55,11 +56,12 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "6b278a1b", "metadata": {}, "source": [ - "## Add video info" + "### Add video info" ] }, { @@ -79,20 +81,36 @@ "metadata": {}, "outputs": [], "source": [ - "loader = YoutubeLoader.from_youtube_url(\"https://www.youtube.com/watch?v=QsYGlZkevEg\", add_video_info=True)" + "loader = YoutubeLoader.from_youtube_url(\"https://www.youtube.com/watch?v=QsYGlZkevEg\", add_video_info=True)\n", + "loader.load()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "fc417e31", + "metadata": {}, + "source": [ + "### Add language preferences\n", + "\n", + "Language param : It's a list of language codes in a descending priority, `en` by default.\n", + "\n", + "translation param : It's a translate preference when the youtube does'nt have your select language, `en` by default." ] }, { "cell_type": "code", "execution_count": null, - "id": "97b98e92", + "id": "08510625", "metadata": {}, "outputs": [], "source": [ + "loader = YoutubeLoader.from_youtube_url(\"https://www.youtube.com/watch?v=QsYGlZkevEg\", add_video_info=True, language=['en','id'], translation='en')\n", "loader.load()" ] }, { + "attachments": {}, "cell_type": "markdown", "id": "65796cc5", "metadata": {}, diff --git a/langchain/document_loaders/youtube.py b/langchain/document_loaders/youtube.py index 9960411cd6..b828c3b0b7 100644 --- a/langchain/document_loaders/youtube.py +++ b/langchain/document_loaders/youtube.py @@ -3,7 +3,7 @@ from __future__ import annotations import logging from pathlib import Path -from typing import Any, Dict, List, Optional +from typing import Any, Dict, List, Optional, Sequence, Union from urllib.parse import parse_qs, urlparse from pydantic import root_validator @@ -146,13 +146,19 @@ class YoutubeLoader(BaseLoader): self, video_id: str, add_video_info: bool = False, - language: str = "en", + language: Union[str, Sequence[str]] = "en", + translation: str = "en", continue_on_failure: bool = False, ): """Initialize with YouTube video ID.""" self.video_id = video_id self.add_video_info = add_video_info self.language = language + if isinstance(language, str): + self.language = [language] + else: + self.language = language + self.translation = translation self.continue_on_failure = continue_on_failure @staticmethod @@ -199,10 +205,10 @@ class YoutubeLoader(BaseLoader): return [] try: - transcript = transcript_list.find_transcript([self.language]) + transcript = transcript_list.find_transcript(self.language) except NoTranscriptFound: en_transcript = transcript_list.find_transcript(["en"]) - transcript = en_transcript.translate(self.language) + transcript = en_transcript.translate(self.translation) transcript_pieces = transcript.fetch()