Add yandex stt parser (#11435)

Description: Introducing an ability to load a transcription document of
audio file using [Yandex
SpeechKit](https://cloud.yandex.com/en-ru/services/speechkit)
Issue: None
Dependencies: yandex-speechkit
Tag maintainer: @rlancemartin, @eyurtsev
pull/11718/head
Dmitry Tyumentsev 12 months ago committed by GitHub
parent 15687a28d5
commit ead9d5b55c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -219,3 +219,81 @@ class OpenAIWhisperParserLocal(BaseBlobParser):
page_content=prediction,
metadata={"source": blob.source},
)
class YandexSTTParser(BaseBlobParser):
"""Transcribe and parse audio files.
Audio transcription is with OpenAI Whisper model."""
def __init__(
self,
*,
api_key: Optional[str] = None,
iam_token: Optional[str] = None,
model: str = "general",
language: str = "auto",
):
"""Initialize the parser.
Args:
api_key: API key for a service account
with the `ai.speechkit-stt.user` role.
iam_token: IAM token for a service account
with the `ai.speechkit-stt.user` role.
model: Recognition model name.
Defaults to general.
language: The language in ISO 639-1 format.
Defaults to automatic language recognition.
Either `api_key` or `iam_token` must be provided, but not both.
"""
if (api_key is None) == (iam_token is None):
raise ValueError(
"Either 'api_key' or 'iam_token' must be provided, but not both."
)
self.api_key = api_key
self.iam_token = iam_token
self.model = model
self.language = language
def lazy_parse(self, blob: Blob) -> Iterator[Document]:
"""Lazily parse the blob."""
try:
from speechkit import configure_credentials, creds, model_repository
from speechkit.stt import AudioProcessingType
except ImportError:
raise ImportError(
"yandex-speechkit package not found, please install it with "
"`pip install yandex-speechkit`"
)
try:
from pydub import AudioSegment
except ImportError:
raise ImportError(
"pydub package not found, please install it with " "`pip install pydub`"
)
if self.api_key:
configure_credentials(
yandex_credentials=creds.YandexCredentials(api_key=self.api_key)
)
else:
configure_credentials(
yandex_credentials=creds.YandexCredentials(iam_token=self.iam_token)
)
audio = AudioSegment.from_file(blob.path)
model = model_repository.recognition_model()
model.model = self.model
model.language = self.language
model.audio_processing_type = AudioProcessingType.Full
result = model.transcribe(audio)
for res in result:
yield Document(
page_content=res.normalized_text,
metadata={"source": blob.source},
)

Loading…
Cancel
Save