mirror of
https://github.com/hwchase17/langchain
synced 2024-11-06 03:20:49 +00:00
fix: handle youtube TranscriptsDisabled (#3276)
handles error when youtube video has transcripts disabled ``` youtube_transcript_api._errors.TranscriptsDisabled: Could not retrieve a transcript for the video https://www.youtube.com/watch?v=<URL> This is most likely caused by: Subtitles are disabled for this video If you are sure that the described cause is not responsible for this error and that a transcript should be retrievable, please create an issue at https://github.com/jdepoix/youtube-transcript-api/issues. Please add which version of youtube_transcript_api you are using and provide the information needed to replicate the error. Also make sure that there are no open issues which already describe your problem! ``` Signed-off-by: Sertac Ozercan <sozercan@gmail.com>
This commit is contained in:
parent
04e1d6c699
commit
1e91266a8a
@ -114,7 +114,11 @@ class YoutubeLoader(BaseLoader):
|
|||||||
def load(self) -> List[Document]:
|
def load(self) -> List[Document]:
|
||||||
"""Load documents."""
|
"""Load documents."""
|
||||||
try:
|
try:
|
||||||
from youtube_transcript_api import NoTranscriptFound, YouTubeTranscriptApi
|
from youtube_transcript_api import (
|
||||||
|
NoTranscriptFound,
|
||||||
|
TranscriptsDisabled,
|
||||||
|
YouTubeTranscriptApi,
|
||||||
|
)
|
||||||
except ImportError:
|
except ImportError:
|
||||||
raise ImportError(
|
raise ImportError(
|
||||||
"Could not import youtube_transcript_api python package. "
|
"Could not import youtube_transcript_api python package. "
|
||||||
@ -129,7 +133,11 @@ class YoutubeLoader(BaseLoader):
|
|||||||
video_info = self._get_video_info()
|
video_info = self._get_video_info()
|
||||||
metadata.update(video_info)
|
metadata.update(video_info)
|
||||||
|
|
||||||
transcript_list = YouTubeTranscriptApi.list_transcripts(self.video_id)
|
try:
|
||||||
|
transcript_list = YouTubeTranscriptApi.list_transcripts(self.video_id)
|
||||||
|
except TranscriptsDisabled:
|
||||||
|
return []
|
||||||
|
|
||||||
try:
|
try:
|
||||||
transcript = transcript_list.find_transcript([self.language])
|
transcript = transcript_list.find_transcript([self.language])
|
||||||
except NoTranscriptFound:
|
except NoTranscriptFound:
|
||||||
|
Loading…
Reference in New Issue
Block a user