Integration with eleven labs

1 year ago · 800fe4a73f
parent 27944cb611
commit 800fe4a73f
6 changed files with 347 additions and 0 deletions
--- a/docs/extras/integrations/tools/eleven_labs_tts.ipynb
+++ b/docs/extras/integrations/tools/eleven_labs_tts.ipynb
@ -0,0 +1,243 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "c8871563-02fe-49f2-901e-c0f05d655a6b",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "True"
+      ]
+     },
+     "execution_count": 1,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from dotenv import load_dotenv\n",
+    "load_dotenv()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a991a6f8-1897-4f49-a191-ae3bdaeda856",
+   "metadata": {},
+   "source": [
+    "# Eleven Labs Text2Speech\n",
+    "\n",
+    "This notebook shows how to interact with the `ElevenLabs API` to achieve text-to-speech capabilities."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9eeb311e-e1bd-4959-8536-4d267f302eb3",
+   "metadata": {},
+   "source": [
+    "First, you need to set up an ElevenLabs account. You can follow the instructions [here](https://docs.elevenlabs.io/welcome/introduction)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "0a309c0e-5310-4eaa-8af9-bcbc252e45da",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# !pip install elevenlabs"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f097c3b1-f761-43cb-aad0-8ba2e93e5f5f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "\n",
+    "os.environ[\"ELEVEN_API_KEY\"] = \"\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "434b2454-2bff-484d-822c-4026a9dc1383",
+   "metadata": {},
+   "source": [
+    "## Usage"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "2f57a647-9214-4562-a8cf-f263a15d1f40",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain.tools import ElevenLabsText2SpeechTool\n",
+    "tts = ElevenLabsText2SpeechTool()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "2ff86b69-de9d-4922-ada9-88f98b5c7569",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'eleven_labs_text2speech'"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "tts.name"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "f1984844-aa75-4f83-9d42-1c8052d87cc0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "speech_file = tts.run(\"Hello world! I am real slim shady\")\n",
+    "tts.play(speech_file)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3bcced62-4e7c-40ca-95ed-0680baca3082",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a152766d-5f06-48b1-ac89-b4e8d88d3c9f",
+   "metadata": {},
+   "source": [
+    "## Use within an Agent"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "37626aea-0cf0-4849-9c00-c0f40515ffe0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain import OpenAI\n",
+    "from langchain.agents import initialize_agent, AgentType, load_tools"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "c168f28e-d5b7-4c93-bed8-0ab317b4a44b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "llm = OpenAI(temperature=0)\n",
+    "tools = load_tools(['eleven_labs_text2speech'])\n",
+    "agent = initialize_agent(\n",
+    "    tools=tools,\n",
+    "    llm=llm,\n",
+    "    agent=AgentType.STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION,\n",
+    "    verbose=True,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "336bf95a-3ccb-4963-aac3-638a4df2ed78",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
+      "\u001b[32;1m\u001b[1;3mAction:\n",
+      "```\n",
+      "{\n",
+      "  \"action\": \"eleven_labs_text2speech\",\n",
+      "  \"action_input\": {\n",
+      "    \"query\": \"Why did the chicken cross the playground? To get to the other slide!\"\n",
+      "  }\n",
+      "}\n",
+      "```\n",
+      "\n",
+      "\u001b[0m\n",
+      "Observation: \u001b[36;1m\u001b[1;3m/tmp/tmp8z9e6xf6.wav\u001b[0m\n",
+      "Thought:\u001b[32;1m\u001b[1;3m I have the audio file ready to be played\n",
+      "Action:\n",
+      "```\n",
+      "{\n",
+      "  \"action\": \"Final Answer\",\n",
+      "  \"action_input\": \"/tmp/tmp8z9e6xf6.wav\"\n",
+      "}\n",
+      "```\n",
+      "\u001b[0m\n",
+      "\n",
+      "\u001b[1m> Finished chain.\u001b[0m\n"
+     ]
+    }
+   ],
+   "source": [
+    "audio_file = agent.run(\"Tell me a joke and read it out for me.\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "f0aa7aa9-4682-4599-8cae-59347d9e5210",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tts.play(audio_file)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "caffa8af-6d12-40c4-a25c-bdb28c204a09",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.16"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/libs/langchain/langchain/agents/load_tools.py
+++ b/libs/langchain/langchain/agents/load_tools.py
@ -32,6 +32,7 @@ from langchain.tools.requests.tool import (
    RequestsPostTool,
    RequestsPutTool,
 )
+from langchain.tools.eleven_labs.text2speech import ElevenLabsText2SpeechTool
 from langchain.tools.scenexplain.tool import SceneXplainTool
 from langchain.tools.searx_search.tool import SearxSearchResults, SearxSearchRun
 from langchain.tools.shell.tool import ShellTool
@ -284,6 +285,9 @@ def _get_dataforseo_api_search(**kwargs: Any) -> BaseTool:
 def _get_dataforseo_api_search_json(**kwargs: Any) -> BaseTool:
    return DataForSeoAPISearchResults(api_wrapper=DataForSeoAPIWrapper(**kwargs))

+def _get_eleven_labs_text2speech() -> BaseTool:
+    return ElevenLabsText2SpeechTool()
+

 _EXTRA_LLM_TOOLS: Dict[
    str,
@ -340,6 +344,9 @@ _EXTRA_OPTIONAL_TOOLS: Dict[str, Tuple[Callable[[KwArg(Any)], BaseTool], List[st
        _get_dataforseo_api_search_json,
        ["api_login", "api_password", "aiosession"],
    ),
+    "eleven_labs_text2speech": (
+        _get_eleven_labs_text2speech, ["eleven_api_key"]
+    )
 }


--- a/libs/langchain/langchain/tools/init.py
+++ b/libs/langchain/langchain/tools/init.py
@ -44,6 +44,7 @@ from langchain.tools.edenai import (
    EdenAiTextToSpeechTool,
    EdenaiTool,
 )
+from langchain.tools.eleven_labs.text2speech import ElevenLabsText2SpeechTool
 from langchain.tools.file_management import (
    CopyFileTool,
    DeleteFileTool,
@ -167,6 +168,7 @@ __all__ = [
    "EdenAiSpeechToTextTool",
    "EdenAiTextModerationTool",
    "EdenaiTool",
+    "ElevenLabsText2SpeechTool",
    "ExtractHyperlinksTool",
    "ExtractTextTool",
    "FileSearchTool",
--- a/libs/langchain/langchain/tools/eleven_labs/init.py
+++ b/libs/langchain/langchain/tools/eleven_labs/init.py
@ -0,0 +1,8 @@
+"""Eleven Labs Services Tools."""
+
+from langchain.tools.eleven_labs.text2speech import (
+    ElevenLabsText2SpeechTool   
+)
+
+
+__all__ = [ElevenLabsText2SpeechTool]
--- a/libs/langchain/langchain/tools/eleven_labs/text2speech.py
+++ b/libs/langchain/langchain/tools/eleven_labs/text2speech.py
@ -0,0 +1,86 @@
+import tempfile
+from typing import Dict
+
+from langchain.pydantic_v1 import root_validator
+from langchain.tools.base import BaseTool
+from langchain.utils import get_from_dict_or_env
+
+
+class ElevenLabsText2SpeechTool(BaseTool):
+    """Tool that queries the Eleven Labs Text2Speech API.
+
+    In order to set this up, follow instructions at:
+    https://docs.elevenlabs.io/welcome/introduction
+    """
+    
+    name: str = "eleven_labs_text2speech"
+    description: str = (
+        "A wrapper around Eleven Labs Text2Speech. "
+        "Useful for when you need to convert text to speech. "
+        "It supports multiple languages, including English, German, Polish, Spanish, Italian, French, Portuguese, and Hindi. "
+    )
+    
+    @root_validator(pre=True)
+    def validate_environment(cls, values: Dict) -> Dict:
+        """Validate that api key exists in environment."""
+        _ = get_from_dict_or_env(
+            values, "eleven_api_key", "ELEVEN_API_KEY"
+        )
+        
+        return values
+    
+    def _text2speech(self, text: str) -> str:
+        try:
+            from elevenlabs import generate
+
+        except ImportError:
+            raise ImportError(
+                "elevenlabs is not installed. "
+                "Run `pip install elevenlabs` to install."
+            )
+        
+        speech = generate(text=text, model='eleven_multilingual_v1')
+        with tempfile.NamedTemporaryFile(
+            mode="bx", suffix=".wav", delete=False
+        ) as f:
+            f.write(speech)
+        return f.name
+    
+    def _run(self, query: str) -> str:
+        """Use the tool."""
+        try:
+            speech_file = self._text2speech(query)
+            return speech_file
+        except Exception as e:
+            raise RuntimeError(f"Error while running ElevenLabsText2SpeechTool: {e}")
+
+    def play(self, speech_file: str) -> None:
+        """Play the text as speech."""
+        try:
+            from elevenlabs import play
+        
+        except ImportError:
+            raise ImportError(
+                "elevenlabs is not installed. "
+                "Run `pip install elevenlabs` to install."
+            )
+        with open(speech_file, mode="rb") as f:
+            speech = f.read()
+
+        play(speech)
+        
+    def stream(self, query: str) -> None:
+        """Stream the text as speech."""
+        
+        try:
+            from elevenlabs import stream, generate
+        
+        except ImportError:
+            raise ImportError(
+                "elevenlabs is not installed. "
+                "Run `pip install elevenlabs` to install."
+            )
+
+        speech_stream = generate(text=query, model='eleven_multilingual_v1', stream=True)
+        stream(speech_stream)
+    
--- a/libs/langchain/tests/unit_tests/tools/test_public_api.py
+++ b/libs/langchain/tests/unit_tests/tools/test_public_api.py
@ -36,6 +36,7 @@ _EXPECTED = [
    "EdenAiTextModerationTool",
    "EdenAiTextToSpeechTool",
    "EdenaiTool",
+    "ElevenLabsText2SpeechTool",
    "ExtractHyperlinksTool",
    "ExtractTextTool",
    "FileSearchTool",