@ -219,3 +219,81 @@ class OpenAIWhisperParserLocal(BaseBlobParser):
page_content = prediction ,
metadata = { " source " : blob . source } ,
)
class YandexSTTParser ( BaseBlobParser ) :
""" Transcribe and parse audio files.
Audio transcription is with OpenAI Whisper model . """
def __init__ (
self ,
* ,
api_key : Optional [ str ] = None ,
iam_token : Optional [ str ] = None ,
model : str = " general " ,
language : str = " auto " ,
) :
""" Initialize the parser.
Args :
api_key : API key for a service account
with the ` ai . speechkit - stt . user ` role .
iam_token : IAM token for a service account
with the ` ai . speechkit - stt . user ` role .
model : Recognition model name .
Defaults to general .
language : The language in ISO 639 - 1 format .
Defaults to automatic language recognition .
Either ` api_key ` or ` iam_token ` must be provided , but not both .
"""
if ( api_key is None ) == ( iam_token is None ) :
raise ValueError (
" Either ' api_key ' or ' iam_token ' must be provided, but not both. "
)
self . api_key = api_key
self . iam_token = iam_token
self . model = model
self . language = language
def lazy_parse ( self , blob : Blob ) - > Iterator [ Document ] :
""" Lazily parse the blob. """
try :
from speechkit import configure_credentials , creds , model_repository
from speechkit . stt import AudioProcessingType
except ImportError :
raise ImportError (
" yandex-speechkit package not found, please install it with "
" `pip install yandex-speechkit` "
)
try :
from pydub import AudioSegment
except ImportError :
raise ImportError (
" pydub package not found, please install it with " " `pip install pydub` "
)
if self . api_key :
configure_credentials (
yandex_credentials = creds . YandexCredentials ( api_key = self . api_key )
)
else :
configure_credentials (
yandex_credentials = creds . YandexCredentials ( iam_token = self . iam_token )
)
audio = AudioSegment . from_file ( blob . path )
model = model_repository . recognition_model ( )
model . model = self . model
model . language = self . language
model . audio_processing_type = AudioProcessingType . Full
result = model . transcribe ( audio )
for res in result :
yield Document (
page_content = res . normalized_text ,
metadata = { " source " : blob . source } ,
)