mirror of
https://github.com/hwchase17/langchain
synced 2024-11-02 09:40:22 +00:00
feat: Add Google Cloud Text-to-Speech Tool (#12572)
- Add Tool for [Google Cloud Text-to-Speech](https://cloud.google.com/text-to-speech) - Follows similar structure to [Eleven Labs Text2Speech](https://python.langchain.com/docs/integrations/tools/eleven_labs_tts) --------- Co-authored-by: Bagatur <baskaryan@gmail.com>
This commit is contained in:
parent
1f2c672d4a
commit
e53b9ccd70
94
docs/docs/integrations/tools/google_cloud_texttospeech.ipynb
Normal file
94
docs/docs/integrations/tools/google_cloud_texttospeech.ipynb
Normal file
@ -0,0 +1,94 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "a991a6f8-1897-4f49-a191-ae3bdaeda856",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Google Cloud Text-to-Speech\n",
|
||||||
|
"\n",
|
||||||
|
"This notebook shows how to interact with the `Google Cloud Text-to-Speech API` to achieve speech synthesis capabilities."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "9eeb311e-e1bd-4959-8536-4d267f302eb3",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"First, you need to set up an Google Cloud project. You can follow the instructions [here](https://cloud.google.com/text-to-speech/docs/before-you-begin)."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 1,
|
||||||
|
"id": "0a309c0e-5310-4eaa-8af9-bcbc252e45da",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# !pip install google-cloud-text-to-speech"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "434b2454-2bff-484d-822c-4026a9dc1383",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Usage"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "2f57a647-9214-4562-a8cf-f263a15d1f40",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from langchain.tools import GoogleCloudTextToSpeechTool\n",
|
||||||
|
"\n",
|
||||||
|
"text_to_speak = \"Hello world!\"\n",
|
||||||
|
"\n",
|
||||||
|
"tts = GoogleCloudTextToSpeechTool()\n",
|
||||||
|
"tts.name"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "d4613fed-66f0-47c6-be50-7e7670654427",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"We can generate audio, save it to the temporary file and then play it."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 7,
|
||||||
|
"id": "f1984844-aa75-4f83-9d42-1c8052d87cc0",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"speech_file = tts.run(text_to_speak)"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3 (ipykernel)",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.11.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 5
|
||||||
|
}
|
@ -33,6 +33,7 @@ from langchain.tools.pubmed.tool import PubmedQueryRun
|
|||||||
from langchain.tools.base import BaseTool
|
from langchain.tools.base import BaseTool
|
||||||
from langchain.tools.bing_search.tool import BingSearchRun
|
from langchain.tools.bing_search.tool import BingSearchRun
|
||||||
from langchain.tools.ddg_search.tool import DuckDuckGoSearchRun
|
from langchain.tools.ddg_search.tool import DuckDuckGoSearchRun
|
||||||
|
from langchain.tools.google_cloud.texttospeech import GoogleCloudTextToSpeechTool
|
||||||
from langchain.tools.google_search.tool import GoogleSearchResults, GoogleSearchRun
|
from langchain.tools.google_search.tool import GoogleSearchResults, GoogleSearchRun
|
||||||
from langchain.tools.google_scholar.tool import GoogleScholarQueryRun
|
from langchain.tools.google_scholar.tool import GoogleScholarQueryRun
|
||||||
from langchain.tools.metaphor_search.tool import MetaphorSearchResults
|
from langchain.tools.metaphor_search.tool import MetaphorSearchResults
|
||||||
@ -326,6 +327,10 @@ def _get_eleven_labs_text2speech(**kwargs: Any) -> BaseTool:
|
|||||||
return ElevenLabsText2SpeechTool(**kwargs)
|
return ElevenLabsText2SpeechTool(**kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
def _get_google_cloud_texttospeech(**kwargs: Any) -> BaseTool:
|
||||||
|
return GoogleCloudTextToSpeechTool(**kwargs)
|
||||||
|
|
||||||
|
|
||||||
_EXTRA_LLM_TOOLS: Dict[
|
_EXTRA_LLM_TOOLS: Dict[
|
||||||
str,
|
str,
|
||||||
Tuple[Callable[[Arg(BaseLanguageModel, "llm"), KwArg(Any)], BaseTool], List[str]],
|
Tuple[Callable[[Arg(BaseLanguageModel, "llm"), KwArg(Any)], BaseTool], List[str]],
|
||||||
@ -390,6 +395,7 @@ _EXTRA_OPTIONAL_TOOLS: Dict[str, Tuple[Callable[[KwArg(Any)], BaseTool], List[st
|
|||||||
["api_login", "api_password", "aiosession"],
|
["api_login", "api_password", "aiosession"],
|
||||||
),
|
),
|
||||||
"eleven_labs_text2speech": (_get_eleven_labs_text2speech, ["eleven_api_key"]),
|
"eleven_labs_text2speech": (_get_eleven_labs_text2speech, ["eleven_api_key"]),
|
||||||
|
"google_cloud_texttospeech": (_get_google_cloud_texttospeech, []),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -240,6 +240,12 @@ def _import_gmail_GmailSendMessage() -> Any:
|
|||||||
return GmailSendMessage
|
return GmailSendMessage
|
||||||
|
|
||||||
|
|
||||||
|
def _import_google_cloud_texttospeech() -> Any:
|
||||||
|
from langchain.tools.google_cloud.texttospeech import GoogleCloudTextToSpeechTool
|
||||||
|
|
||||||
|
return GoogleCloudTextToSpeechTool
|
||||||
|
|
||||||
|
|
||||||
def _import_google_places_tool() -> Any:
|
def _import_google_places_tool() -> Any:
|
||||||
from langchain.tools.google_places.tool import GooglePlacesTool
|
from langchain.tools.google_places.tool import GooglePlacesTool
|
||||||
|
|
||||||
@ -731,6 +737,8 @@ def __getattr__(name: str) -> Any:
|
|||||||
return _import_gmail_GmailSearch()
|
return _import_gmail_GmailSearch()
|
||||||
elif name == "GmailSendMessage":
|
elif name == "GmailSendMessage":
|
||||||
return _import_gmail_GmailSendMessage()
|
return _import_gmail_GmailSendMessage()
|
||||||
|
elif name == "GoogleCloudTextToSpeechTool":
|
||||||
|
return _import_google_cloud_texttospeech()
|
||||||
elif name == "GooglePlacesTool":
|
elif name == "GooglePlacesTool":
|
||||||
return _import_google_places_tool()
|
return _import_google_places_tool()
|
||||||
elif name == "GoogleSearchResults":
|
elif name == "GoogleSearchResults":
|
||||||
@ -916,6 +924,7 @@ __all__ = [
|
|||||||
"GmailGetThread",
|
"GmailGetThread",
|
||||||
"GmailSearch",
|
"GmailSearch",
|
||||||
"GmailSendMessage",
|
"GmailSendMessage",
|
||||||
|
"GoogleCloudTextToSpeechTool",
|
||||||
"GooglePlacesTool",
|
"GooglePlacesTool",
|
||||||
"GoogleSearchResults",
|
"GoogleSearchResults",
|
||||||
"GoogleSearchRun",
|
"GoogleSearchRun",
|
||||||
|
5
libs/langchain/langchain/tools/google_cloud/__init__.py
Normal file
5
libs/langchain/langchain/tools/google_cloud/__init__.py
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
"""Google Cloud Tools."""
|
||||||
|
|
||||||
|
from langchain.tools.google_cloud.texttospeech import GoogleCloudTextToSpeechTool
|
||||||
|
|
||||||
|
__all__ = ["GoogleCloudTextToSpeechTool"]
|
90
libs/langchain/langchain/tools/google_cloud/texttospeech.py
Normal file
90
libs/langchain/langchain/tools/google_cloud/texttospeech.py
Normal file
@ -0,0 +1,90 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import tempfile
|
||||||
|
from typing import TYPE_CHECKING, Any, Optional
|
||||||
|
|
||||||
|
from langchain.callbacks.manager import CallbackManagerForToolRun
|
||||||
|
from langchain.tools.base import BaseTool
|
||||||
|
from langchain.utilities.vertexai import get_client_info
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from google.cloud import texttospeech
|
||||||
|
|
||||||
|
|
||||||
|
def _import_google_cloud_texttospeech() -> Any:
|
||||||
|
try:
|
||||||
|
from google.cloud import texttospeech
|
||||||
|
except ImportError as e:
|
||||||
|
raise ImportError(
|
||||||
|
"Cannot import google.cloud.texttospeech, please install "
|
||||||
|
"`pip install google-cloud-texttospeech`."
|
||||||
|
) from e
|
||||||
|
return texttospeech
|
||||||
|
|
||||||
|
|
||||||
|
def _encoding_file_extension_map(encoding: texttospeech.AudioEncoding) -> Optional[str]:
|
||||||
|
texttospeech = _import_google_cloud_texttospeech()
|
||||||
|
|
||||||
|
ENCODING_FILE_EXTENSION_MAP = {
|
||||||
|
texttospeech.AudioEncoding.LINEAR16: ".wav",
|
||||||
|
texttospeech.AudioEncoding.MP3: ".mp3",
|
||||||
|
texttospeech.AudioEncoding.OGG_OPUS: ".ogg",
|
||||||
|
texttospeech.AudioEncoding.MULAW: ".wav",
|
||||||
|
texttospeech.AudioEncoding.ALAW: ".wav",
|
||||||
|
}
|
||||||
|
return ENCODING_FILE_EXTENSION_MAP.get(encoding)
|
||||||
|
|
||||||
|
|
||||||
|
class GoogleCloudTextToSpeechTool(BaseTool):
|
||||||
|
"""Tool that queries the Google Cloud Text to Speech API.
|
||||||
|
|
||||||
|
In order to set this up, follow instructions at:
|
||||||
|
https://cloud.google.com/text-to-speech/docs/before-you-begin
|
||||||
|
"""
|
||||||
|
|
||||||
|
name: str = "google_cloud_texttospeech"
|
||||||
|
description: str = (
|
||||||
|
"A wrapper around Google Cloud Text-to-Speech. "
|
||||||
|
"Useful for when you need to synthesize audio from text. "
|
||||||
|
"It supports multiple languages, including English, German, Polish, "
|
||||||
|
"Spanish, Italian, French, Portuguese, and Hindi. "
|
||||||
|
)
|
||||||
|
|
||||||
|
_client: Any
|
||||||
|
|
||||||
|
def __init__(self, **kwargs: Any) -> None:
|
||||||
|
"""Initializes private fields."""
|
||||||
|
texttospeech = _import_google_cloud_texttospeech()
|
||||||
|
|
||||||
|
super().__init__(**kwargs)
|
||||||
|
|
||||||
|
self._client = texttospeech.TextToSpeechClient(
|
||||||
|
client_info=get_client_info(module="text-to-speech")
|
||||||
|
)
|
||||||
|
|
||||||
|
def _run(
|
||||||
|
self,
|
||||||
|
input_text: str,
|
||||||
|
language_code: str = "en-US",
|
||||||
|
ssml_gender: Optional[texttospeech.SsmlVoiceGender] = None,
|
||||||
|
audio_encoding: Optional[texttospeech.AudioEncoding] = None,
|
||||||
|
run_manager: Optional[CallbackManagerForToolRun] = None,
|
||||||
|
) -> str:
|
||||||
|
"""Use the tool."""
|
||||||
|
texttospeech = _import_google_cloud_texttospeech()
|
||||||
|
ssml_gender = ssml_gender or texttospeech.SsmlVoiceGender.NEUTRAL
|
||||||
|
audio_encoding = audio_encoding or texttospeech.AudioEncoding.MP3
|
||||||
|
|
||||||
|
response = self._client.synthesize_speech(
|
||||||
|
input=texttospeech.SynthesisInput(text=input_text),
|
||||||
|
voice=texttospeech.VoiceSelectionParams(
|
||||||
|
language_code=language_code, ssml_gender=ssml_gender
|
||||||
|
),
|
||||||
|
audio_config=texttospeech.AudioConfig(audio_encoding=audio_encoding),
|
||||||
|
)
|
||||||
|
|
||||||
|
suffix = _encoding_file_extension_map(audio_encoding)
|
||||||
|
|
||||||
|
with tempfile.NamedTemporaryFile(mode="bx", suffix=suffix, delete=False) as f:
|
||||||
|
f.write(response.audio_content)
|
||||||
|
return f.name
|
@ -46,6 +46,7 @@ _EXPECTED = [
|
|||||||
"GmailGetThread",
|
"GmailGetThread",
|
||||||
"GmailSearch",
|
"GmailSearch",
|
||||||
"GmailSendMessage",
|
"GmailSendMessage",
|
||||||
|
"GoogleCloudTextToSpeechTool",
|
||||||
"GooglePlacesTool",
|
"GooglePlacesTool",
|
||||||
"GoogleSearchResults",
|
"GoogleSearchResults",
|
||||||
"GoogleSearchRun",
|
"GoogleSearchRun",
|
||||||
|
Loading…
Reference in New Issue
Block a user