You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
langchain/libs/community/langchain_community/tools/edenai/audio_speech_to_text.py

104 lines
3.4 KiB
Python

from __future__ import annotations
import json
import logging
import time
from typing import List, Optional
import requests
from langchain_core.callbacks import CallbackManagerForToolRun
from langchain_core.pydantic_v1 import validator
from langchain_community.tools.edenai.edenai_base_tool import EdenaiTool
logger = logging.getLogger(__name__)
class EdenAiSpeechToTextTool(EdenaiTool):
"""Tool that queries the Eden AI Speech To Text API.
for api reference check edenai documentation:
https://app.edenai.run/bricks/speech/asynchronous-speech-to-text.
To use, you should have
the environment variable ``EDENAI_API_KEY`` set with your API token.
You can find your token here: https://app.edenai.run/admin/account/settings
"""
edenai_api_key: Optional[str] = None
name = "edenai_speech_to_text"
description = (
"A wrapper around edenai Services speech to text "
"Useful for when you have to convert audio to text."
"Input should be a url to an audio file."
)
is_async = True
language: Optional[str] = "en"
speakers: Optional[int]
profanity_filter: bool = False
custom_vocabulary: Optional[List[str]]
feature: str = "audio"
subfeature: str = "speech_to_text_async"
base_url = "https://api.edenai.run/v2/audio/speech_to_text_async/"
@validator("providers")
def check_only_one_provider_selected(cls, v: List[str]) -> List[str]:
"""
This tool has no feature to combine providers results.
Therefore we only allow one provider
"""
if len(v) > 1:
raise ValueError(
"Please select only one provider. "
"The feature to combine providers results is not available "
"for this tool."
)
return v
def _wait_processing(self, url: str) -> requests.Response:
for _ in range(10):
time.sleep(1)
audio_analysis_result = self._get_edenai(url)
temp = audio_analysis_result.json()
if temp["status"] == "finished":
if temp["results"][self.providers[0]]["error"] is not None:
raise Exception(
f"""EdenAI returned an unexpected response
{temp['results'][self.providers[0]]['error']}"""
)
else:
return audio_analysis_result
raise Exception("Edenai speech to text job id processing Timed out")
def _parse_response(self, response: dict) -> str:
return response["public_id"]
def _run(
self,
query: str,
run_manager: Optional[CallbackManagerForToolRun] = None,
) -> str:
"""Use the tool."""
all_params = {
"file_url": query,
"language": self.language,
"speakers": self.speakers,
"profanity_filter": self.profanity_filter,
"custom_vocabulary": self.custom_vocabulary,
}
# filter so we don't send val to api when val is `None
query_params = {k: v for k, v in all_params.items() if v is not None}
job_id = self._call_eden_ai(query_params)
url = self.base_url + job_id
audio_analysis_result = self._wait_processing(url)
result = audio_analysis_result.text
formatted_text = json.loads(result)
return formatted_text["results"][self.providers[0]]["text"]