diff --git a/docs/docs/integrations/tools/google_cloud_texttospeech.ipynb b/docs/docs/integrations/tools/google_cloud_texttospeech.ipynb new file mode 100644 index 0000000000..7a72b87310 --- /dev/null +++ b/docs/docs/integrations/tools/google_cloud_texttospeech.ipynb @@ -0,0 +1,94 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "a991a6f8-1897-4f49-a191-ae3bdaeda856", + "metadata": {}, + "source": [ + "# Google Cloud Text-to-Speech\n", + "\n", + "This notebook shows how to interact with the `Google Cloud Text-to-Speech API` to achieve speech synthesis capabilities." + ] + }, + { + "cell_type": "markdown", + "id": "9eeb311e-e1bd-4959-8536-4d267f302eb3", + "metadata": {}, + "source": [ + "First, you need to set up an Google Cloud project. You can follow the instructions [here](https://cloud.google.com/text-to-speech/docs/before-you-begin)." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "0a309c0e-5310-4eaa-8af9-bcbc252e45da", + "metadata": {}, + "outputs": [], + "source": [ + "# !pip install google-cloud-text-to-speech" + ] + }, + { + "cell_type": "markdown", + "id": "434b2454-2bff-484d-822c-4026a9dc1383", + "metadata": {}, + "source": [ + "## Usage" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2f57a647-9214-4562-a8cf-f263a15d1f40", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.tools import GoogleCloudTextToSpeechTool\n", + "\n", + "text_to_speak = \"Hello world!\"\n", + "\n", + "tts = GoogleCloudTextToSpeechTool()\n", + "tts.name" + ] + }, + { + "cell_type": "markdown", + "id": "d4613fed-66f0-47c6-be50-7e7670654427", + "metadata": {}, + "source": [ + "We can generate audio, save it to the temporary file and then play it." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "f1984844-aa75-4f83-9d42-1c8052d87cc0", + "metadata": {}, + "outputs": [], + "source": [ + "speech_file = tts.run(text_to_speak)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.0" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/libs/langchain/langchain/agents/load_tools.py b/libs/langchain/langchain/agents/load_tools.py index a4be206bd5..fe30320eb9 100644 --- a/libs/langchain/langchain/agents/load_tools.py +++ b/libs/langchain/langchain/agents/load_tools.py @@ -33,6 +33,7 @@ from langchain.tools.pubmed.tool import PubmedQueryRun from langchain.tools.base import BaseTool from langchain.tools.bing_search.tool import BingSearchRun from langchain.tools.ddg_search.tool import DuckDuckGoSearchRun +from langchain.tools.google_cloud.texttospeech import GoogleCloudTextToSpeechTool from langchain.tools.google_search.tool import GoogleSearchResults, GoogleSearchRun from langchain.tools.google_scholar.tool import GoogleScholarQueryRun from langchain.tools.metaphor_search.tool import MetaphorSearchResults @@ -326,6 +327,10 @@ def _get_eleven_labs_text2speech(**kwargs: Any) -> BaseTool: return ElevenLabsText2SpeechTool(**kwargs) +def _get_google_cloud_texttospeech(**kwargs: Any) -> BaseTool: + return GoogleCloudTextToSpeechTool(**kwargs) + + _EXTRA_LLM_TOOLS: Dict[ str, Tuple[Callable[[Arg(BaseLanguageModel, "llm"), KwArg(Any)], BaseTool], List[str]], @@ -390,6 +395,7 @@ _EXTRA_OPTIONAL_TOOLS: Dict[str, Tuple[Callable[[KwArg(Any)], BaseTool], List[st ["api_login", "api_password", "aiosession"], ), "eleven_labs_text2speech": (_get_eleven_labs_text2speech, ["eleven_api_key"]), + "google_cloud_texttospeech": (_get_google_cloud_texttospeech, []), } diff --git a/libs/langchain/langchain/tools/__init__.py b/libs/langchain/langchain/tools/__init__.py index 0b63e6d990..848a7e915c 100644 --- a/libs/langchain/langchain/tools/__init__.py +++ b/libs/langchain/langchain/tools/__init__.py @@ -240,6 +240,12 @@ def _import_gmail_GmailSendMessage() -> Any: return GmailSendMessage +def _import_google_cloud_texttospeech() -> Any: + from langchain.tools.google_cloud.texttospeech import GoogleCloudTextToSpeechTool + + return GoogleCloudTextToSpeechTool + + def _import_google_places_tool() -> Any: from langchain.tools.google_places.tool import GooglePlacesTool @@ -731,6 +737,8 @@ def __getattr__(name: str) -> Any: return _import_gmail_GmailSearch() elif name == "GmailSendMessage": return _import_gmail_GmailSendMessage() + elif name == "GoogleCloudTextToSpeechTool": + return _import_google_cloud_texttospeech() elif name == "GooglePlacesTool": return _import_google_places_tool() elif name == "GoogleSearchResults": @@ -916,6 +924,7 @@ __all__ = [ "GmailGetThread", "GmailSearch", "GmailSendMessage", + "GoogleCloudTextToSpeechTool", "GooglePlacesTool", "GoogleSearchResults", "GoogleSearchRun", diff --git a/libs/langchain/langchain/tools/google_cloud/__init__.py b/libs/langchain/langchain/tools/google_cloud/__init__.py new file mode 100644 index 0000000000..b3bcf980a0 --- /dev/null +++ b/libs/langchain/langchain/tools/google_cloud/__init__.py @@ -0,0 +1,5 @@ +"""Google Cloud Tools.""" + +from langchain.tools.google_cloud.texttospeech import GoogleCloudTextToSpeechTool + +__all__ = ["GoogleCloudTextToSpeechTool"] diff --git a/libs/langchain/langchain/tools/google_cloud/texttospeech.py b/libs/langchain/langchain/tools/google_cloud/texttospeech.py new file mode 100644 index 0000000000..f6bf8e6328 --- /dev/null +++ b/libs/langchain/langchain/tools/google_cloud/texttospeech.py @@ -0,0 +1,90 @@ +from __future__ import annotations + +import tempfile +from typing import TYPE_CHECKING, Any, Optional + +from langchain.callbacks.manager import CallbackManagerForToolRun +from langchain.tools.base import BaseTool +from langchain.utilities.vertexai import get_client_info + +if TYPE_CHECKING: + from google.cloud import texttospeech + + +def _import_google_cloud_texttospeech() -> Any: + try: + from google.cloud import texttospeech + except ImportError as e: + raise ImportError( + "Cannot import google.cloud.texttospeech, please install " + "`pip install google-cloud-texttospeech`." + ) from e + return texttospeech + + +def _encoding_file_extension_map(encoding: texttospeech.AudioEncoding) -> Optional[str]: + texttospeech = _import_google_cloud_texttospeech() + + ENCODING_FILE_EXTENSION_MAP = { + texttospeech.AudioEncoding.LINEAR16: ".wav", + texttospeech.AudioEncoding.MP3: ".mp3", + texttospeech.AudioEncoding.OGG_OPUS: ".ogg", + texttospeech.AudioEncoding.MULAW: ".wav", + texttospeech.AudioEncoding.ALAW: ".wav", + } + return ENCODING_FILE_EXTENSION_MAP.get(encoding) + + +class GoogleCloudTextToSpeechTool(BaseTool): + """Tool that queries the Google Cloud Text to Speech API. + + In order to set this up, follow instructions at: + https://cloud.google.com/text-to-speech/docs/before-you-begin + """ + + name: str = "google_cloud_texttospeech" + description: str = ( + "A wrapper around Google Cloud Text-to-Speech. " + "Useful for when you need to synthesize audio from text. " + "It supports multiple languages, including English, German, Polish, " + "Spanish, Italian, French, Portuguese, and Hindi. " + ) + + _client: Any + + def __init__(self, **kwargs: Any) -> None: + """Initializes private fields.""" + texttospeech = _import_google_cloud_texttospeech() + + super().__init__(**kwargs) + + self._client = texttospeech.TextToSpeechClient( + client_info=get_client_info(module="text-to-speech") + ) + + def _run( + self, + input_text: str, + language_code: str = "en-US", + ssml_gender: Optional[texttospeech.SsmlVoiceGender] = None, + audio_encoding: Optional[texttospeech.AudioEncoding] = None, + run_manager: Optional[CallbackManagerForToolRun] = None, + ) -> str: + """Use the tool.""" + texttospeech = _import_google_cloud_texttospeech() + ssml_gender = ssml_gender or texttospeech.SsmlVoiceGender.NEUTRAL + audio_encoding = audio_encoding or texttospeech.AudioEncoding.MP3 + + response = self._client.synthesize_speech( + input=texttospeech.SynthesisInput(text=input_text), + voice=texttospeech.VoiceSelectionParams( + language_code=language_code, ssml_gender=ssml_gender + ), + audio_config=texttospeech.AudioConfig(audio_encoding=audio_encoding), + ) + + suffix = _encoding_file_extension_map(audio_encoding) + + with tempfile.NamedTemporaryFile(mode="bx", suffix=suffix, delete=False) as f: + f.write(response.audio_content) + return f.name diff --git a/libs/langchain/tests/unit_tests/tools/test_public_api.py b/libs/langchain/tests/unit_tests/tools/test_public_api.py index f5b6c329da..ca675d591a 100644 --- a/libs/langchain/tests/unit_tests/tools/test_public_api.py +++ b/libs/langchain/tests/unit_tests/tools/test_public_api.py @@ -46,6 +46,7 @@ _EXPECTED = [ "GmailGetThread", "GmailSearch", "GmailSendMessage", + "GoogleCloudTextToSpeechTool", "GooglePlacesTool", "GoogleSearchResults", "GoogleSearchRun",