langchain/libs/community/tests/unit_tests/document_loaders/test_assemblyai.py

import pytest
from pytest_mock import MockerFixture

from langchain_community.document_loaders import AssemblyAIAudioTranscriptLoader
from langchain_community.document_loaders.assemblyai import TranscriptFormat


@pytest.mark.requires("assemblyai")
def test_initialization() -> None:
    loader = AssemblyAIAudioTranscriptLoader(
        file_path="./testfile.mp3", api_key="api_key"
    )
    assert loader.file_path == "./testfile.mp3"
    assert loader.transcript_format == TranscriptFormat.TEXT


@pytest.mark.requires("assemblyai")
def test_load(mocker: MockerFixture) -> None:
    mocker.patch(
        "assemblyai.Transcriber.transcribe",
        return_value=mocker.MagicMock(
            text="Test transcription text", json_response={"id": "1"}, error=None
        ),
    )

    loader = AssemblyAIAudioTranscriptLoader(
        file_path="./testfile.mp3", api_key="api_key"
    )
    docs = loader.load()
    assert len(docs) == 1
    assert docs[0].page_content == "Test transcription text"
    assert docs[0].metadata == {"id": "1"}


@pytest.mark.requires("assemblyai")
def test_transcription_error(mocker: MockerFixture) -> None:
    mocker.patch(
        "assemblyai.Transcriber.transcribe",
        return_value=mocker.MagicMock(error="Test error"),
    )

    loader = AssemblyAIAudioTranscriptLoader(
        file_path="./testfile.mp3", api_key="api_key"
    )

    expected_error = "Could not transcribe file: Test error"
    with pytest.raises(ValueError, match=expected_error):
        loader.load()
Add new document_loader: AssemblyAIAudioTranscriptLoader (#9667) This PR adds a new document loader `AssemblyAIAudioTranscriptLoader` that allows to transcribe audio files with the [AssemblyAI API](https://www.assemblyai.com) and loads the transcribed text into documents. - Add new document_loader with class `AssemblyAIAudioTranscriptLoader` - Add optional dependency `assemblyai` - Add unit tests (using a Mock client) - Add docs notebook This is the equivalent to the JS integration already available in LangChain.js. See the [LangChain JS docs AssemblyAI page](https://js.langchain.com/docs/modules/data_connection/document_loaders/integrations/web_loaders/assemblyai_audio_transcription). At its simplest, you can use the loader to get a transcript back from an audio file like this: ```python from langchain.document_loaders.assemblyai import AssemblyAIAudioTranscriptLoader loader = AssemblyAIAudioTranscriptLoader(file_path="./testfile.mp3") docs = loader.load() ``` To use it, it needs the `assemblyai` python package installed, and the environment variable `ASSEMBLYAI_API_KEY` set with your API key. Alternatively, the API key can also be passed as an argument. Twitter handles to shout out if so kindly 🙇 [@AssemblyAI](https://twitter.com/AssemblyAI) and [@patloeber](https://twitter.com/patloeber) --------- Co-authored-by: Bagatur <22008038+baskaryan@users.noreply.github.com> Co-authored-by: Eugene Yurtsev <eyurtsev@gmail.com> 2023-08-24 05:51:19 +00:00			`import pytest`
			`from pytest_mock import MockerFixture`

community[major], core[patch], langchain[patch], experimental[patch]: Create langchain-community (#14463) Moved the following modules to new package langchain-community in a backwards compatible fashion: ``` mv langchain/langchain/adapters community/langchain_community mv langchain/langchain/callbacks community/langchain_community/callbacks mv langchain/langchain/chat_loaders community/langchain_community mv langchain/langchain/chat_models community/langchain_community mv langchain/langchain/document_loaders community/langchain_community mv langchain/langchain/docstore community/langchain_community mv langchain/langchain/document_transformers community/langchain_community mv langchain/langchain/embeddings community/langchain_community mv langchain/langchain/graphs community/langchain_community mv langchain/langchain/llms community/langchain_community mv langchain/langchain/memory/chat_message_histories community/langchain_community mv langchain/langchain/retrievers community/langchain_community mv langchain/langchain/storage community/langchain_community mv langchain/langchain/tools community/langchain_community mv langchain/langchain/utilities community/langchain_community mv langchain/langchain/vectorstores community/langchain_community mv langchain/langchain/agents/agent_toolkits community/langchain_community mv langchain/langchain/cache.py community/langchain_community mv langchain/langchain/adapters community/langchain_community mv langchain/langchain/callbacks community/langchain_community/callbacks mv langchain/langchain/chat_loaders community/langchain_community mv langchain/langchain/chat_models community/langchain_community mv langchain/langchain/document_loaders community/langchain_community mv langchain/langchain/docstore community/langchain_community mv langchain/langchain/document_transformers community/langchain_community mv langchain/langchain/embeddings community/langchain_community mv langchain/langchain/graphs community/langchain_community mv langchain/langchain/llms community/langchain_community mv langchain/langchain/memory/chat_message_histories community/langchain_community mv langchain/langchain/retrievers community/langchain_community mv langchain/langchain/storage community/langchain_community mv langchain/langchain/tools community/langchain_community mv langchain/langchain/utilities community/langchain_community mv langchain/langchain/vectorstores community/langchain_community mv langchain/langchain/agents/agent_toolkits community/langchain_community mv langchain/langchain/cache.py community/langchain_community ``` Moved the following to core ``` mv langchain/langchain/utils/json_schema.py core/langchain_core/utils mv langchain/langchain/utils/html.py core/langchain_core/utils mv langchain/langchain/utils/strings.py core/langchain_core/utils cat langchain/langchain/utils/env.py >> core/langchain_core/utils/env.py rm langchain/langchain/utils/env.py ``` See .scripts/community_split/script_integrations.sh for all changes 2023-12-11 21:53:30 +00:00			`from langchain_community.document_loaders import AssemblyAIAudioTranscriptLoader`
			`from langchain_community.document_loaders.assemblyai import TranscriptFormat`
Add new document_loader: AssemblyAIAudioTranscriptLoader (#9667) This PR adds a new document loader `AssemblyAIAudioTranscriptLoader` that allows to transcribe audio files with the [AssemblyAI API](https://www.assemblyai.com) and loads the transcribed text into documents. - Add new document_loader with class `AssemblyAIAudioTranscriptLoader` - Add optional dependency `assemblyai` - Add unit tests (using a Mock client) - Add docs notebook This is the equivalent to the JS integration already available in LangChain.js. See the [LangChain JS docs AssemblyAI page](https://js.langchain.com/docs/modules/data_connection/document_loaders/integrations/web_loaders/assemblyai_audio_transcription). At its simplest, you can use the loader to get a transcript back from an audio file like this: ```python from langchain.document_loaders.assemblyai import AssemblyAIAudioTranscriptLoader loader = AssemblyAIAudioTranscriptLoader(file_path="./testfile.mp3") docs = loader.load() ``` To use it, it needs the `assemblyai` python package installed, and the environment variable `ASSEMBLYAI_API_KEY` set with your API key. Alternatively, the API key can also be passed as an argument. Twitter handles to shout out if so kindly 🙇 [@AssemblyAI](https://twitter.com/AssemblyAI) and [@patloeber](https://twitter.com/patloeber) --------- Co-authored-by: Bagatur <22008038+baskaryan@users.noreply.github.com> Co-authored-by: Eugene Yurtsev <eyurtsev@gmail.com> 2023-08-24 05:51:19 +00:00

			`@pytest.mark.requires("assemblyai")`
			`def test_initialization() -> None:`
			`loader = AssemblyAIAudioTranscriptLoader(`
			`file_path="./testfile.mp3", api_key="api_key"`
			`)`
			`assert loader.file_path == "./testfile.mp3"`
			`assert loader.transcript_format == TranscriptFormat.TEXT`


			`@pytest.mark.requires("assemblyai")`
			`def test_load(mocker: MockerFixture) -> None:`
			`mocker.patch(`
			`"assemblyai.Transcriber.transcribe",`
			`return_value=mocker.MagicMock(`
			`text="Test transcription text", json_response={"id": "1"}, error=None`
			`),`
			`)`

			`loader = AssemblyAIAudioTranscriptLoader(`
			`file_path="./testfile.mp3", api_key="api_key"`
			`)`
			`docs = loader.load()`
			`assert len(docs) == 1`
			`assert docs[0].page_content == "Test transcription text"`
			`assert docs[0].metadata == {"id": "1"}`


			`@pytest.mark.requires("assemblyai")`
			`def test_transcription_error(mocker: MockerFixture) -> None:`
			`mocker.patch(`
			`"assemblyai.Transcriber.transcribe",`
			`return_value=mocker.MagicMock(error="Test error"),`
			`)`

			`loader = AssemblyAIAudioTranscriptLoader(`
			`file_path="./testfile.mp3", api_key="api_key"`
			`)`

			`expected_error = "Could not transcribe file: Test error"`
			`with pytest.raises(ValueError, match=expected_error):`
			`loader.load()`