fix: handle youtube TranscriptsDisabled (#3276)

handles error when youtube video has transcripts disabled

```
youtube_transcript_api._errors.TranscriptsDisabled: 
Could not retrieve a transcript for the video https://www.youtube.com/watch?v=<URL> This is most likely caused by:

Subtitles are disabled for this video

If you are sure that the described cause is not responsible for this error and that a transcript should be retrievable, please create an issue at https://github.com/jdepoix/youtube-transcript-api/issues. Please add which version of youtube_transcript_api you are using and provide the information needed to replicate the error. Also make sure that there are no open issues which already describe your problem!
```

Signed-off-by: Sertac Ozercan <sozercan@gmail.com>
This commit is contained in:
Sertaç Özercan 2023-04-21 01:27:42 -07:00 committed by GitHub
parent 04e1d6c699
commit 1e91266a8a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -114,7 +114,11 @@ class YoutubeLoader(BaseLoader):
def load(self) -> List[Document]: def load(self) -> List[Document]:
"""Load documents.""" """Load documents."""
try: try:
from youtube_transcript_api import NoTranscriptFound, YouTubeTranscriptApi from youtube_transcript_api import (
NoTranscriptFound,
TranscriptsDisabled,
YouTubeTranscriptApi,
)
except ImportError: except ImportError:
raise ImportError( raise ImportError(
"Could not import youtube_transcript_api python package. " "Could not import youtube_transcript_api python package. "
@ -129,7 +133,11 @@ class YoutubeLoader(BaseLoader):
video_info = self._get_video_info() video_info = self._get_video_info()
metadata.update(video_info) metadata.update(video_info)
try:
transcript_list = YouTubeTranscriptApi.list_transcripts(self.video_id) transcript_list = YouTubeTranscriptApi.list_transcripts(self.video_id)
except TranscriptsDisabled:
return []
try: try:
transcript = transcript_list.find_transcript([self.language]) transcript = transcript_list.find_transcript([self.language])
except NoTranscriptFound: except NoTranscriptFound: