forked from Archives/langchain
Create OpenAIWhisperParser for generating Documents from audio files (#5580)
# OpenAIWhisperParser This PR creates a new parser, `OpenAIWhisperParser`, that uses the [OpenAI Whisper model](https://platform.openai.com/docs/guides/speech-to-text/quickstart) to perform transcription of audio files to text (`Documents`). Please see the notebook for usage.
This commit is contained in:
parent
a4c9053d40
commit
aea090045b
@ -30,6 +30,7 @@ For detailed instructions on how to get set up with Unstructured, see installati
|
||||
:maxdepth: 1
|
||||
:glob:
|
||||
|
||||
./document_loaders/examples/audio.ipynb
|
||||
./document_loaders/examples/conll-u.ipynb
|
||||
./document_loaders/examples/copypaste.ipynb
|
||||
./document_loaders/examples/csv.ipynb
|
||||
|
97
docs/modules/indexes/document_loaders/examples/audio.ipynb
Normal file
97
docs/modules/indexes/document_loaders/examples/audio.ipynb
Normal file
File diff suppressed because one or more lines are too long
@ -1,3 +1,4 @@
|
||||
from langchain.document_loaders.parsers.audio import OpenAIWhisperParser
|
||||
from langchain.document_loaders.parsers.html import BS4HTMLParser
|
||||
from langchain.document_loaders.parsers.pdf import (
|
||||
PDFMinerParser,
|
||||
@ -9,6 +10,7 @@ from langchain.document_loaders.parsers.pdf import (
|
||||
|
||||
__all__ = [
|
||||
"BS4HTMLParser",
|
||||
"OpenAIWhisperParser",
|
||||
"PDFMinerParser",
|
||||
"PDFPlumberParser",
|
||||
"PyMuPDFParser",
|
||||
|
21
langchain/document_loaders/parsers/audio.py
Normal file
21
langchain/document_loaders/parsers/audio.py
Normal file
@ -0,0 +1,21 @@
|
||||
from typing import Iterator
|
||||
|
||||
from langchain.document_loaders.base import BaseBlobParser
|
||||
from langchain.document_loaders.blob_loaders import Blob
|
||||
from langchain.schema import Document
|
||||
|
||||
|
||||
class OpenAIWhisperParser(BaseBlobParser):
|
||||
"""Transcribe and parse audio files.
|
||||
Audio transcription is with OpenAI Whisper model."""
|
||||
|
||||
def lazy_parse(self, blob: Blob) -> Iterator[Document]:
|
||||
"""Lazily parse the blob."""
|
||||
|
||||
import openai
|
||||
|
||||
with blob.as_bytes_io() as f:
|
||||
transcript = openai.Audio.transcribe("whisper-1", f)
|
||||
yield Document(
|
||||
page_content=transcript.text, metadata={"source": blob.source}
|
||||
)
|
@ -5,6 +5,7 @@ def test_parsers_public_api_correct() -> None:
|
||||
"""Test public API of parsers for breaking changes."""
|
||||
assert set(__all__) == {
|
||||
"BS4HTMLParser",
|
||||
"OpenAIWhisperParser",
|
||||
"PyPDFParser",
|
||||
"PDFMinerParser",
|
||||
"PyMuPDFParser",
|
||||
|
Loading…
Reference in New Issue
Block a user