forked from Archives/langchain
Create OpenAIWhisperParser for generating Documents from audio files (#5580)
# OpenAIWhisperParser This PR creates a new parser, `OpenAIWhisperParser`, that uses the [OpenAI Whisper model](https://platform.openai.com/docs/guides/speech-to-text/quickstart) to perform transcription of audio files to text (`Documents`). Please see the notebook for usage.
This commit is contained in:
parent
a4c9053d40
commit
aea090045b
@ -30,6 +30,7 @@ For detailed instructions on how to get set up with Unstructured, see installati
|
|||||||
:maxdepth: 1
|
:maxdepth: 1
|
||||||
:glob:
|
:glob:
|
||||||
|
|
||||||
|
./document_loaders/examples/audio.ipynb
|
||||||
./document_loaders/examples/conll-u.ipynb
|
./document_loaders/examples/conll-u.ipynb
|
||||||
./document_loaders/examples/copypaste.ipynb
|
./document_loaders/examples/copypaste.ipynb
|
||||||
./document_loaders/examples/csv.ipynb
|
./document_loaders/examples/csv.ipynb
|
||||||
|
97
docs/modules/indexes/document_loaders/examples/audio.ipynb
Normal file
97
docs/modules/indexes/document_loaders/examples/audio.ipynb
Normal file
File diff suppressed because one or more lines are too long
@ -1,3 +1,4 @@
|
|||||||
|
from langchain.document_loaders.parsers.audio import OpenAIWhisperParser
|
||||||
from langchain.document_loaders.parsers.html import BS4HTMLParser
|
from langchain.document_loaders.parsers.html import BS4HTMLParser
|
||||||
from langchain.document_loaders.parsers.pdf import (
|
from langchain.document_loaders.parsers.pdf import (
|
||||||
PDFMinerParser,
|
PDFMinerParser,
|
||||||
@ -9,6 +10,7 @@ from langchain.document_loaders.parsers.pdf import (
|
|||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
"BS4HTMLParser",
|
"BS4HTMLParser",
|
||||||
|
"OpenAIWhisperParser",
|
||||||
"PDFMinerParser",
|
"PDFMinerParser",
|
||||||
"PDFPlumberParser",
|
"PDFPlumberParser",
|
||||||
"PyMuPDFParser",
|
"PyMuPDFParser",
|
||||||
|
21
langchain/document_loaders/parsers/audio.py
Normal file
21
langchain/document_loaders/parsers/audio.py
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
from typing import Iterator
|
||||||
|
|
||||||
|
from langchain.document_loaders.base import BaseBlobParser
|
||||||
|
from langchain.document_loaders.blob_loaders import Blob
|
||||||
|
from langchain.schema import Document
|
||||||
|
|
||||||
|
|
||||||
|
class OpenAIWhisperParser(BaseBlobParser):
|
||||||
|
"""Transcribe and parse audio files.
|
||||||
|
Audio transcription is with OpenAI Whisper model."""
|
||||||
|
|
||||||
|
def lazy_parse(self, blob: Blob) -> Iterator[Document]:
|
||||||
|
"""Lazily parse the blob."""
|
||||||
|
|
||||||
|
import openai
|
||||||
|
|
||||||
|
with blob.as_bytes_io() as f:
|
||||||
|
transcript = openai.Audio.transcribe("whisper-1", f)
|
||||||
|
yield Document(
|
||||||
|
page_content=transcript.text, metadata={"source": blob.source}
|
||||||
|
)
|
@ -5,6 +5,7 @@ def test_parsers_public_api_correct() -> None:
|
|||||||
"""Test public API of parsers for breaking changes."""
|
"""Test public API of parsers for breaking changes."""
|
||||||
assert set(__all__) == {
|
assert set(__all__) == {
|
||||||
"BS4HTMLParser",
|
"BS4HTMLParser",
|
||||||
|
"OpenAIWhisperParser",
|
||||||
"PyPDFParser",
|
"PyPDFParser",
|
||||||
"PDFMinerParser",
|
"PDFMinerParser",
|
||||||
"PyMuPDFParser",
|
"PyMuPDFParser",
|
||||||
|
Loading…
Reference in New Issue
Block a user