Add yandex stt parser (#11435)

Description: Introducing an ability to load a transcription document of audio file using [Yandex SpeechKit](https://cloud.yandex.com/en-ru/services/speechkit) Issue: None Dependencies: yandex-speechkit Tag maintainer: @rlancemartin, @eyurtsev
12 months ago · ead9d5b55c
parent 15687a28d5
commit ead9d5b55c
1 changed files with 78 additions and 0 deletions
--- a/libs/langchain/langchain/document_loaders/parsers/audio.py
+++ b/libs/langchain/langchain/document_loaders/parsers/audio.py
@ -219,3 +219,81 @@ class OpenAIWhisperParserLocal(BaseBlobParser):
            page_content=prediction,
            metadata={"source": blob.source},
        )
+
+
+class YandexSTTParser(BaseBlobParser):
+    """Transcribe and parse audio files.
+    Audio transcription is with OpenAI Whisper model."""
+
+    def __init__(
+        self,
+        *,
+        api_key: Optional[str] = None,
+        iam_token: Optional[str] = None,
+        model: str = "general",
+        language: str = "auto",
+    ):
+        """Initialize the parser.
+
+        Args:
+            api_key: API key for a service account
+            with the `ai.speechkit-stt.user` role.
+            iam_token: IAM token for a service account
+            with the `ai.speechkit-stt.user` role.
+            model: Recognition model name.
+              Defaults to general.
+            language: The language in ISO 639-1 format.
+              Defaults to automatic language recognition.
+        Either `api_key` or `iam_token` must be provided, but not both.
+        """
+        if (api_key is None) == (iam_token is None):
+            raise ValueError(
+                "Either 'api_key' or 'iam_token' must be provided, but not both."
+            )
+        self.api_key = api_key
+        self.iam_token = iam_token
+        self.model = model
+        self.language = language
+
+    def lazy_parse(self, blob: Blob) -> Iterator[Document]:
+        """Lazily parse the blob."""
+
+        try:
+            from speechkit import configure_credentials, creds, model_repository
+            from speechkit.stt import AudioProcessingType
+        except ImportError:
+            raise ImportError(
+                "yandex-speechkit package not found, please install it with "
+                "`pip install yandex-speechkit`"
+            )
+        try:
+            from pydub import AudioSegment
+        except ImportError:
+            raise ImportError(
+                "pydub package not found, please install it with " "`pip install pydub`"
+            )
+
+        if self.api_key:
+            configure_credentials(
+                yandex_credentials=creds.YandexCredentials(api_key=self.api_key)
+            )
+        else:
+            configure_credentials(
+                yandex_credentials=creds.YandexCredentials(iam_token=self.iam_token)
+            )
+
+        audio = AudioSegment.from_file(blob.path)
+
+        model = model_repository.recognition_model()
+
+        model.model = self.model
+        model.language = self.language
+        model.audio_processing_type = AudioProcessingType.Full
+
+        result = model.transcribe(audio)
+
+        for res in result:
+            yield Document(
+                page_content=res.normalized_text,
+                metadata={"source": blob.source},
+            )