|
|
@ -1,4 +1,4 @@
|
|
|
|
from typing import Iterator
|
|
|
|
from typing import Iterator, Optional
|
|
|
|
|
|
|
|
|
|
|
|
from langchain.document_loaders.base import BaseBlobParser
|
|
|
|
from langchain.document_loaders.base import BaseBlobParser
|
|
|
|
from langchain.document_loaders.blob_loaders import Blob
|
|
|
|
from langchain.document_loaders.blob_loaders import Blob
|
|
|
@ -9,6 +9,9 @@ class OpenAIWhisperParser(BaseBlobParser):
|
|
|
|
"""Transcribe and parse audio files.
|
|
|
|
"""Transcribe and parse audio files.
|
|
|
|
Audio transcription is with OpenAI Whisper model."""
|
|
|
|
Audio transcription is with OpenAI Whisper model."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def __init__(self, api_key: Optional[str] = None):
|
|
|
|
|
|
|
|
self.api_key = api_key
|
|
|
|
|
|
|
|
|
|
|
|
def lazy_parse(self, blob: Blob) -> Iterator[Document]:
|
|
|
|
def lazy_parse(self, blob: Blob) -> Iterator[Document]:
|
|
|
|
"""Lazily parse the blob."""
|
|
|
|
"""Lazily parse the blob."""
|
|
|
|
|
|
|
|
|
|
|
@ -28,6 +31,10 @@ class OpenAIWhisperParser(BaseBlobParser):
|
|
|
|
"pydub package not found, please install it with " "`pip install pydub`"
|
|
|
|
"pydub package not found, please install it with " "`pip install pydub`"
|
|
|
|
)
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Set the API key if provided
|
|
|
|
|
|
|
|
if self.api_key:
|
|
|
|
|
|
|
|
openai.api_key = self.api_key
|
|
|
|
|
|
|
|
|
|
|
|
# Audio file from disk
|
|
|
|
# Audio file from disk
|
|
|
|
audio = AudioSegment.from_file(blob.path)
|
|
|
|
audio = AudioSegment.from_file(blob.path)
|
|
|
|
|
|
|
|
|
|
|
|