From 1e91266a8a74367a99042325379185b62255d20a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Serta=C3=A7=20=C3=96zercan?= <852750+sozercan@users.noreply.github.com> Date: Fri, 21 Apr 2023 01:27:42 -0700 Subject: [PATCH] fix: handle youtube TranscriptsDisabled (#3276) handles error when youtube video has transcripts disabled ``` youtube_transcript_api._errors.TranscriptsDisabled: Could not retrieve a transcript for the video https://www.youtube.com/watch?v= This is most likely caused by: Subtitles are disabled for this video If you are sure that the described cause is not responsible for this error and that a transcript should be retrievable, please create an issue at https://github.com/jdepoix/youtube-transcript-api/issues. Please add which version of youtube_transcript_api you are using and provide the information needed to replicate the error. Also make sure that there are no open issues which already describe your problem! ``` Signed-off-by: Sertac Ozercan --- langchain/document_loaders/youtube.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/langchain/document_loaders/youtube.py b/langchain/document_loaders/youtube.py index a3401df4..5ad1cd12 100644 --- a/langchain/document_loaders/youtube.py +++ b/langchain/document_loaders/youtube.py @@ -114,7 +114,11 @@ class YoutubeLoader(BaseLoader): def load(self) -> List[Document]: """Load documents.""" try: - from youtube_transcript_api import NoTranscriptFound, YouTubeTranscriptApi + from youtube_transcript_api import ( + NoTranscriptFound, + TranscriptsDisabled, + YouTubeTranscriptApi, + ) except ImportError: raise ImportError( "Could not import youtube_transcript_api python package. " @@ -129,7 +133,11 @@ class YoutubeLoader(BaseLoader): video_info = self._get_video_info() metadata.update(video_info) - transcript_list = YouTubeTranscriptApi.list_transcripts(self.video_id) + try: + transcript_list = YouTubeTranscriptApi.list_transcripts(self.video_id) + except TranscriptsDisabled: + return [] + try: transcript = transcript_list.find_transcript([self.language]) except NoTranscriptFound: