diff --git a/docs/extras/integrations/tools/eleven_labs_tts.ipynb b/docs/extras/integrations/tools/eleven_labs_tts.ipynb new file mode 100644 index 0000000000..cfc5f22217 --- /dev/null +++ b/docs/extras/integrations/tools/eleven_labs_tts.ipynb @@ -0,0 +1,243 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "c8871563-02fe-49f2-901e-c0f05d655a6b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from dotenv import load_dotenv\n", + "load_dotenv()" + ] + }, + { + "cell_type": "markdown", + "id": "a991a6f8-1897-4f49-a191-ae3bdaeda856", + "metadata": {}, + "source": [ + "# Eleven Labs Text2Speech\n", + "\n", + "This notebook shows how to interact with the `ElevenLabs API` to achieve text-to-speech capabilities." + ] + }, + { + "cell_type": "markdown", + "id": "9eeb311e-e1bd-4959-8536-4d267f302eb3", + "metadata": {}, + "source": [ + "First, you need to set up an ElevenLabs account. You can follow the instructions [here](https://docs.elevenlabs.io/welcome/introduction)." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "0a309c0e-5310-4eaa-8af9-bcbc252e45da", + "metadata": {}, + "outputs": [], + "source": [ + "# !pip install elevenlabs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f097c3b1-f761-43cb-aad0-8ba2e93e5f5f", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "os.environ[\"ELEVEN_API_KEY\"] = \"\"" + ] + }, + { + "cell_type": "markdown", + "id": "434b2454-2bff-484d-822c-4026a9dc1383", + "metadata": {}, + "source": [ + "## Usage" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "2f57a647-9214-4562-a8cf-f263a15d1f40", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.tools import ElevenLabsText2SpeechTool\n", + "tts = ElevenLabsText2SpeechTool()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "2ff86b69-de9d-4922-ada9-88f98b5c7569", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'eleven_labs_text2speech'" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tts.name" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "f1984844-aa75-4f83-9d42-1c8052d87cc0", + "metadata": {}, + "outputs": [], + "source": [ + "speech_file = tts.run(\"Hello world! I am real slim shady\")\n", + "tts.play(speech_file)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3bcced62-4e7c-40ca-95ed-0680baca3082", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "a152766d-5f06-48b1-ac89-b4e8d88d3c9f", + "metadata": {}, + "source": [ + "## Use within an Agent" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "37626aea-0cf0-4849-9c00-c0f40515ffe0", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain import OpenAI\n", + "from langchain.agents import initialize_agent, AgentType, load_tools" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "c168f28e-d5b7-4c93-bed8-0ab317b4a44b", + "metadata": {}, + "outputs": [], + "source": [ + "llm = OpenAI(temperature=0)\n", + "tools = load_tools(['eleven_labs_text2speech'])\n", + "agent = initialize_agent(\n", + " tools=tools,\n", + " llm=llm,\n", + " agent=AgentType.STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION,\n", + " verbose=True,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "336bf95a-3ccb-4963-aac3-638a4df2ed78", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n", + "\u001b[32;1m\u001b[1;3mAction:\n", + "```\n", + "{\n", + " \"action\": \"eleven_labs_text2speech\",\n", + " \"action_input\": {\n", + " \"query\": \"Why did the chicken cross the playground? To get to the other slide!\"\n", + " }\n", + "}\n", + "```\n", + "\n", + "\u001b[0m\n", + "Observation: \u001b[36;1m\u001b[1;3m/tmp/tmp8z9e6xf6.wav\u001b[0m\n", + "Thought:\u001b[32;1m\u001b[1;3m I have the audio file ready to be played\n", + "Action:\n", + "```\n", + "{\n", + " \"action\": \"Final Answer\",\n", + " \"action_input\": \"/tmp/tmp8z9e6xf6.wav\"\n", + "}\n", + "```\n", + "\u001b[0m\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n" + ] + } + ], + "source": [ + "audio_file = agent.run(\"Tell me a joke and read it out for me.\")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "f0aa7aa9-4682-4599-8cae-59347d9e5210", + "metadata": {}, + "outputs": [], + "source": [ + "tts.play(audio_file)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "caffa8af-6d12-40c4-a25c-bdb28c204a09", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.16" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/libs/langchain/langchain/agents/load_tools.py b/libs/langchain/langchain/agents/load_tools.py index 8fc93e45ae..0329552289 100644 --- a/libs/langchain/langchain/agents/load_tools.py +++ b/libs/langchain/langchain/agents/load_tools.py @@ -32,6 +32,7 @@ from langchain.tools.requests.tool import ( RequestsPostTool, RequestsPutTool, ) +from langchain.tools.eleven_labs.text2speech import ElevenLabsText2SpeechTool from langchain.tools.scenexplain.tool import SceneXplainTool from langchain.tools.searx_search.tool import SearxSearchResults, SearxSearchRun from langchain.tools.shell.tool import ShellTool @@ -284,6 +285,9 @@ def _get_dataforseo_api_search(**kwargs: Any) -> BaseTool: def _get_dataforseo_api_search_json(**kwargs: Any) -> BaseTool: return DataForSeoAPISearchResults(api_wrapper=DataForSeoAPIWrapper(**kwargs)) +def _get_eleven_labs_text2speech() -> BaseTool: + return ElevenLabsText2SpeechTool() + _EXTRA_LLM_TOOLS: Dict[ str, @@ -340,6 +344,9 @@ _EXTRA_OPTIONAL_TOOLS: Dict[str, Tuple[Callable[[KwArg(Any)], BaseTool], List[st _get_dataforseo_api_search_json, ["api_login", "api_password", "aiosession"], ), + "eleven_labs_text2speech": ( + _get_eleven_labs_text2speech, ["eleven_api_key"] + ) } diff --git a/libs/langchain/langchain/tools/__init__.py b/libs/langchain/langchain/tools/__init__.py index 56958d90b2..5674929f3d 100644 --- a/libs/langchain/langchain/tools/__init__.py +++ b/libs/langchain/langchain/tools/__init__.py @@ -44,6 +44,7 @@ from langchain.tools.edenai import ( EdenAiTextToSpeechTool, EdenaiTool, ) +from langchain.tools.eleven_labs.text2speech import ElevenLabsText2SpeechTool from langchain.tools.file_management import ( CopyFileTool, DeleteFileTool, @@ -167,6 +168,7 @@ __all__ = [ "EdenAiSpeechToTextTool", "EdenAiTextModerationTool", "EdenaiTool", + "ElevenLabsText2SpeechTool", "ExtractHyperlinksTool", "ExtractTextTool", "FileSearchTool", diff --git a/libs/langchain/langchain/tools/eleven_labs/__init__.py b/libs/langchain/langchain/tools/eleven_labs/__init__.py new file mode 100644 index 0000000000..1823918ea9 --- /dev/null +++ b/libs/langchain/langchain/tools/eleven_labs/__init__.py @@ -0,0 +1,8 @@ +"""Eleven Labs Services Tools.""" + +from langchain.tools.eleven_labs.text2speech import ( + ElevenLabsText2SpeechTool +) + + +__all__ = [ElevenLabsText2SpeechTool] diff --git a/libs/langchain/langchain/tools/eleven_labs/text2speech.py b/libs/langchain/langchain/tools/eleven_labs/text2speech.py new file mode 100644 index 0000000000..fdace351f9 --- /dev/null +++ b/libs/langchain/langchain/tools/eleven_labs/text2speech.py @@ -0,0 +1,86 @@ +import tempfile +from typing import Dict + +from langchain.pydantic_v1 import root_validator +from langchain.tools.base import BaseTool +from langchain.utils import get_from_dict_or_env + + +class ElevenLabsText2SpeechTool(BaseTool): + """Tool that queries the Eleven Labs Text2Speech API. + + In order to set this up, follow instructions at: + https://docs.elevenlabs.io/welcome/introduction + """ + + name: str = "eleven_labs_text2speech" + description: str = ( + "A wrapper around Eleven Labs Text2Speech. " + "Useful for when you need to convert text to speech. " + "It supports multiple languages, including English, German, Polish, Spanish, Italian, French, Portuguese, and Hindi. " + ) + + @root_validator(pre=True) + def validate_environment(cls, values: Dict) -> Dict: + """Validate that api key exists in environment.""" + _ = get_from_dict_or_env( + values, "eleven_api_key", "ELEVEN_API_KEY" + ) + + return values + + def _text2speech(self, text: str) -> str: + try: + from elevenlabs import generate + + except ImportError: + raise ImportError( + "elevenlabs is not installed. " + "Run `pip install elevenlabs` to install." + ) + + speech = generate(text=text, model='eleven_multilingual_v1') + with tempfile.NamedTemporaryFile( + mode="bx", suffix=".wav", delete=False + ) as f: + f.write(speech) + return f.name + + def _run(self, query: str) -> str: + """Use the tool.""" + try: + speech_file = self._text2speech(query) + return speech_file + except Exception as e: + raise RuntimeError(f"Error while running ElevenLabsText2SpeechTool: {e}") + + def play(self, speech_file: str) -> None: + """Play the text as speech.""" + try: + from elevenlabs import play + + except ImportError: + raise ImportError( + "elevenlabs is not installed. " + "Run `pip install elevenlabs` to install." + ) + with open(speech_file, mode="rb") as f: + speech = f.read() + + play(speech) + + def stream(self, query: str) -> None: + """Stream the text as speech.""" + + try: + from elevenlabs import stream, generate + + except ImportError: + raise ImportError( + "elevenlabs is not installed. " + "Run `pip install elevenlabs` to install." + ) + + speech_stream = generate(text=query, model='eleven_multilingual_v1', stream=True) + stream(speech_stream) + \ No newline at end of file diff --git a/libs/langchain/tests/unit_tests/tools/test_public_api.py b/libs/langchain/tests/unit_tests/tools/test_public_api.py index e7fd784587..d0c310837d 100644 --- a/libs/langchain/tests/unit_tests/tools/test_public_api.py +++ b/libs/langchain/tests/unit_tests/tools/test_public_api.py @@ -36,6 +36,7 @@ _EXPECTED = [ "EdenAiTextModerationTool", "EdenAiTextToSpeechTool", "EdenaiTool", + "ElevenLabsText2SpeechTool", "ExtractHyperlinksTool", "ExtractTextTool", "FileSearchTool",