Integration with eleven labs

This commit is contained in:
mateusz.wosinski 2023-08-31 16:02:24 +02:00
parent 27944cb611
commit 800fe4a73f
6 changed files with 347 additions and 0 deletions

View File

@ -0,0 +1,243 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "c8871563-02fe-49f2-901e-c0f05d655a6b",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from dotenv import load_dotenv\n",
"load_dotenv()"
]
},
{
"cell_type": "markdown",
"id": "a991a6f8-1897-4f49-a191-ae3bdaeda856",
"metadata": {},
"source": [
"# Eleven Labs Text2Speech\n",
"\n",
"This notebook shows how to interact with the `ElevenLabs API` to achieve text-to-speech capabilities."
]
},
{
"cell_type": "markdown",
"id": "9eeb311e-e1bd-4959-8536-4d267f302eb3",
"metadata": {},
"source": [
"First, you need to set up an ElevenLabs account. You can follow the instructions [here](https://docs.elevenlabs.io/welcome/introduction)."
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "0a309c0e-5310-4eaa-8af9-bcbc252e45da",
"metadata": {},
"outputs": [],
"source": [
"# !pip install elevenlabs"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f097c3b1-f761-43cb-aad0-8ba2e93e5f5f",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"\n",
"os.environ[\"ELEVEN_API_KEY\"] = \"\""
]
},
{
"cell_type": "markdown",
"id": "434b2454-2bff-484d-822c-4026a9dc1383",
"metadata": {},
"source": [
"## Usage"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "2f57a647-9214-4562-a8cf-f263a15d1f40",
"metadata": {},
"outputs": [],
"source": [
"from langchain.tools import ElevenLabsText2SpeechTool\n",
"tts = ElevenLabsText2SpeechTool()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "2ff86b69-de9d-4922-ada9-88f98b5c7569",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'eleven_labs_text2speech'"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tts.name"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "f1984844-aa75-4f83-9d42-1c8052d87cc0",
"metadata": {},
"outputs": [],
"source": [
"speech_file = tts.run(\"Hello world! I am real slim shady\")\n",
"tts.play(speech_file)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3bcced62-4e7c-40ca-95ed-0680baca3082",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"id": "a152766d-5f06-48b1-ac89-b4e8d88d3c9f",
"metadata": {},
"source": [
"## Use within an Agent"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "37626aea-0cf0-4849-9c00-c0f40515ffe0",
"metadata": {},
"outputs": [],
"source": [
"from langchain import OpenAI\n",
"from langchain.agents import initialize_agent, AgentType, load_tools"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "c168f28e-d5b7-4c93-bed8-0ab317b4a44b",
"metadata": {},
"outputs": [],
"source": [
"llm = OpenAI(temperature=0)\n",
"tools = load_tools(['eleven_labs_text2speech'])\n",
"agent = initialize_agent(\n",
" tools=tools,\n",
" llm=llm,\n",
" agent=AgentType.STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION,\n",
" verbose=True,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "336bf95a-3ccb-4963-aac3-638a4df2ed78",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\n",
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
"\u001b[32;1m\u001b[1;3mAction:\n",
"```\n",
"{\n",
" \"action\": \"eleven_labs_text2speech\",\n",
" \"action_input\": {\n",
" \"query\": \"Why did the chicken cross the playground? To get to the other slide!\"\n",
" }\n",
"}\n",
"```\n",
"\n",
"\u001b[0m\n",
"Observation: \u001b[36;1m\u001b[1;3m/tmp/tmp8z9e6xf6.wav\u001b[0m\n",
"Thought:\u001b[32;1m\u001b[1;3m I have the audio file ready to be played\n",
"Action:\n",
"```\n",
"{\n",
" \"action\": \"Final Answer\",\n",
" \"action_input\": \"/tmp/tmp8z9e6xf6.wav\"\n",
"}\n",
"```\n",
"\u001b[0m\n",
"\n",
"\u001b[1m> Finished chain.\u001b[0m\n"
]
}
],
"source": [
"audio_file = agent.run(\"Tell me a joke and read it out for me.\")"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "f0aa7aa9-4682-4599-8cae-59347d9e5210",
"metadata": {},
"outputs": [],
"source": [
"tts.play(audio_file)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "caffa8af-6d12-40c4-a25c-bdb28c204a09",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.16"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@ -32,6 +32,7 @@ from langchain.tools.requests.tool import (
RequestsPostTool,
RequestsPutTool,
)
from langchain.tools.eleven_labs.text2speech import ElevenLabsText2SpeechTool
from langchain.tools.scenexplain.tool import SceneXplainTool
from langchain.tools.searx_search.tool import SearxSearchResults, SearxSearchRun
from langchain.tools.shell.tool import ShellTool
@ -284,6 +285,9 @@ def _get_dataforseo_api_search(**kwargs: Any) -> BaseTool:
def _get_dataforseo_api_search_json(**kwargs: Any) -> BaseTool:
return DataForSeoAPISearchResults(api_wrapper=DataForSeoAPIWrapper(**kwargs))
def _get_eleven_labs_text2speech() -> BaseTool:
return ElevenLabsText2SpeechTool()
_EXTRA_LLM_TOOLS: Dict[
str,
@ -340,6 +344,9 @@ _EXTRA_OPTIONAL_TOOLS: Dict[str, Tuple[Callable[[KwArg(Any)], BaseTool], List[st
_get_dataforseo_api_search_json,
["api_login", "api_password", "aiosession"],
),
"eleven_labs_text2speech": (
_get_eleven_labs_text2speech, ["eleven_api_key"]
)
}

View File

@ -44,6 +44,7 @@ from langchain.tools.edenai import (
EdenAiTextToSpeechTool,
EdenaiTool,
)
from langchain.tools.eleven_labs.text2speech import ElevenLabsText2SpeechTool
from langchain.tools.file_management import (
CopyFileTool,
DeleteFileTool,
@ -167,6 +168,7 @@ __all__ = [
"EdenAiSpeechToTextTool",
"EdenAiTextModerationTool",
"EdenaiTool",
"ElevenLabsText2SpeechTool",
"ExtractHyperlinksTool",
"ExtractTextTool",
"FileSearchTool",

View File

@ -0,0 +1,8 @@
"""Eleven Labs Services Tools."""
from langchain.tools.eleven_labs.text2speech import (
ElevenLabsText2SpeechTool
)
__all__ = [ElevenLabsText2SpeechTool]

View File

@ -0,0 +1,86 @@
import tempfile
from typing import Dict
from langchain.pydantic_v1 import root_validator
from langchain.tools.base import BaseTool
from langchain.utils import get_from_dict_or_env
class ElevenLabsText2SpeechTool(BaseTool):
"""Tool that queries the Eleven Labs Text2Speech API.
In order to set this up, follow instructions at:
https://docs.elevenlabs.io/welcome/introduction
"""
name: str = "eleven_labs_text2speech"
description: str = (
"A wrapper around Eleven Labs Text2Speech. "
"Useful for when you need to convert text to speech. "
"It supports multiple languages, including English, German, Polish, Spanish, Italian, French, Portuguese, and Hindi. "
)
@root_validator(pre=True)
def validate_environment(cls, values: Dict) -> Dict:
"""Validate that api key exists in environment."""
_ = get_from_dict_or_env(
values, "eleven_api_key", "ELEVEN_API_KEY"
)
return values
def _text2speech(self, text: str) -> str:
try:
from elevenlabs import generate
except ImportError:
raise ImportError(
"elevenlabs is not installed. "
"Run `pip install elevenlabs` to install."
)
speech = generate(text=text, model='eleven_multilingual_v1')
with tempfile.NamedTemporaryFile(
mode="bx", suffix=".wav", delete=False
) as f:
f.write(speech)
return f.name
def _run(self, query: str) -> str:
"""Use the tool."""
try:
speech_file = self._text2speech(query)
return speech_file
except Exception as e:
raise RuntimeError(f"Error while running ElevenLabsText2SpeechTool: {e}")
def play(self, speech_file: str) -> None:
"""Play the text as speech."""
try:
from elevenlabs import play
except ImportError:
raise ImportError(
"elevenlabs is not installed. "
"Run `pip install elevenlabs` to install."
)
with open(speech_file, mode="rb") as f:
speech = f.read()
play(speech)
def stream(self, query: str) -> None:
"""Stream the text as speech."""
try:
from elevenlabs import stream, generate
except ImportError:
raise ImportError(
"elevenlabs is not installed. "
"Run `pip install elevenlabs` to install."
)
speech_stream = generate(text=query, model='eleven_multilingual_v1', stream=True)
stream(speech_stream)

View File

@ -36,6 +36,7 @@ _EXPECTED = [
"EdenAiTextModerationTool",
"EdenAiTextToSpeechTool",
"EdenaiTool",
"ElevenLabsText2SpeechTool",
"ExtractHyperlinksTool",
"ExtractTextTool",
"FileSearchTool",