mirror of
https://github.com/hwchase17/langchain
synced 2024-11-06 03:20:49 +00:00
Add ElevenLabs text to speech tool (#10525)
This commit is contained in:
commit
303724980c
226
docs/extras/integrations/tools/eleven_labs_tts.ipynb
Normal file
226
docs/extras/integrations/tools/eleven_labs_tts.ipynb
Normal file
@ -0,0 +1,226 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "a991a6f8-1897-4f49-a191-ae3bdaeda856",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Eleven Labs Text2Speech\n",
|
||||||
|
"\n",
|
||||||
|
"This notebook shows how to interact with the `ElevenLabs API` to achieve text-to-speech capabilities."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "9eeb311e-e1bd-4959-8536-4d267f302eb3",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"First, you need to set up an ElevenLabs account. You can follow the instructions [here](https://docs.elevenlabs.io/welcome/introduction)."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 1,
|
||||||
|
"id": "0a309c0e-5310-4eaa-8af9-bcbc252e45da",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# !pip install elevenlabs"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "f097c3b1-f761-43cb-aad0-8ba2e93e5f5f",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import os\n",
|
||||||
|
"\n",
|
||||||
|
"os.environ[\"ELEVEN_API_KEY\"] = \"\""
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "434b2454-2bff-484d-822c-4026a9dc1383",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Usage"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 6,
|
||||||
|
"id": "2f57a647-9214-4562-a8cf-f263a15d1f40",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"'eleven_labs_text2speech'"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 6,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"from langchain.tools import ElevenLabsText2SpeechTool\n",
|
||||||
|
"\n",
|
||||||
|
"text_to_speak = \"Hello world! I am the real slim shady\"\n",
|
||||||
|
"\n",
|
||||||
|
"tts = ElevenLabsText2SpeechTool()\n",
|
||||||
|
"tts.name"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "d4613fed-66f0-47c6-be50-7e7670654427",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"We can generate audio, save it to the temporary file and then play it."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 7,
|
||||||
|
"id": "f1984844-aa75-4f83-9d42-1c8052d87cc0",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"speech_file = tts.run(text_to_speak)\n",
|
||||||
|
"tts.play(speech_file)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "42d89cd4-ac2a-4857-9787-c9018b4a8782",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Or stream audio directly."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 9,
|
||||||
|
"id": "d72822f8-3223-47e2-8d2e-6ff46b8c8645",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"tts.stream_speech(text_to_speak)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "a152766d-5f06-48b1-ac89-b4e8d88d3c9f",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Use within an Agent"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 12,
|
||||||
|
"id": "37626aea-0cf0-4849-9c00-c0f40515ffe0",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from langchain import OpenAI\n",
|
||||||
|
"from langchain.agents import initialize_agent, AgentType, load_tools"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 13,
|
||||||
|
"id": "c168f28e-d5b7-4c93-bed8-0ab317b4a44b",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"llm = OpenAI(temperature=0)\n",
|
||||||
|
"tools = load_tools([\"eleven_labs_text2speech\"])\n",
|
||||||
|
"agent = initialize_agent(\n",
|
||||||
|
" tools=tools,\n",
|
||||||
|
" llm=llm,\n",
|
||||||
|
" agent=AgentType.STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION,\n",
|
||||||
|
" verbose=True,\n",
|
||||||
|
")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 14,
|
||||||
|
"id": "336bf95a-3ccb-4963-aac3-638a4df2ed78",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||||
|
"\u001b[32;1m\u001b[1;3mAction:\n",
|
||||||
|
"```\n",
|
||||||
|
"{\n",
|
||||||
|
" \"action\": \"eleven_labs_text2speech\",\n",
|
||||||
|
" \"action_input\": {\n",
|
||||||
|
" \"query\": \"Why did the chicken cross the playground? To get to the other slide!\"\n",
|
||||||
|
" }\n",
|
||||||
|
"}\n",
|
||||||
|
"```\n",
|
||||||
|
"\n",
|
||||||
|
"\u001b[0m\n",
|
||||||
|
"Observation: \u001b[36;1m\u001b[1;3m/tmp/tmpsfg783f1.wav\u001b[0m\n",
|
||||||
|
"Thought:\u001b[32;1m\u001b[1;3m I have the audio file ready to be sent to the human\n",
|
||||||
|
"Action:\n",
|
||||||
|
"```\n",
|
||||||
|
"{\n",
|
||||||
|
" \"action\": \"Final Answer\",\n",
|
||||||
|
" \"action_input\": \"/tmp/tmpsfg783f1.wav\"\n",
|
||||||
|
"}\n",
|
||||||
|
"```\n",
|
||||||
|
"\n",
|
||||||
|
"\u001b[0m\n",
|
||||||
|
"\n",
|
||||||
|
"\u001b[1m> Finished chain.\u001b[0m\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"audio_file = agent.run(\"Tell me a joke and read it out for me.\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 15,
|
||||||
|
"id": "f0aa7aa9-4682-4599-8cae-59347d9e5210",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"tts.play(audio_file)"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3 (ipykernel)",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.9.16"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 5
|
||||||
|
}
|
@ -32,6 +32,7 @@ from langchain.tools.requests.tool import (
|
|||||||
RequestsPostTool,
|
RequestsPostTool,
|
||||||
RequestsPutTool,
|
RequestsPutTool,
|
||||||
)
|
)
|
||||||
|
from langchain.tools.eleven_labs.text2speech import ElevenLabsText2SpeechTool
|
||||||
from langchain.tools.scenexplain.tool import SceneXplainTool
|
from langchain.tools.scenexplain.tool import SceneXplainTool
|
||||||
from langchain.tools.searx_search.tool import SearxSearchResults, SearxSearchRun
|
from langchain.tools.searx_search.tool import SearxSearchResults, SearxSearchRun
|
||||||
from langchain.tools.shell.tool import ShellTool
|
from langchain.tools.shell.tool import ShellTool
|
||||||
@ -285,6 +286,10 @@ def _get_dataforseo_api_search_json(**kwargs: Any) -> BaseTool:
|
|||||||
return DataForSeoAPISearchResults(api_wrapper=DataForSeoAPIWrapper(**kwargs))
|
return DataForSeoAPISearchResults(api_wrapper=DataForSeoAPIWrapper(**kwargs))
|
||||||
|
|
||||||
|
|
||||||
|
def _get_eleven_labs_text2speech(**kwargs: Any) -> BaseTool:
|
||||||
|
return ElevenLabsText2SpeechTool(**kwargs)
|
||||||
|
|
||||||
|
|
||||||
_EXTRA_LLM_TOOLS: Dict[
|
_EXTRA_LLM_TOOLS: Dict[
|
||||||
str,
|
str,
|
||||||
Tuple[Callable[[Arg(BaseLanguageModel, "llm"), KwArg(Any)], BaseTool], List[str]],
|
Tuple[Callable[[Arg(BaseLanguageModel, "llm"), KwArg(Any)], BaseTool], List[str]],
|
||||||
@ -340,6 +345,7 @@ _EXTRA_OPTIONAL_TOOLS: Dict[str, Tuple[Callable[[KwArg(Any)], BaseTool], List[st
|
|||||||
_get_dataforseo_api_search_json,
|
_get_dataforseo_api_search_json,
|
||||||
["api_login", "api_password", "aiosession"],
|
["api_login", "api_password", "aiosession"],
|
||||||
),
|
),
|
||||||
|
"eleven_labs_text2speech": (_get_eleven_labs_text2speech, ["eleven_api_key"]),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -44,6 +44,7 @@ from langchain.tools.edenai import (
|
|||||||
EdenAiTextToSpeechTool,
|
EdenAiTextToSpeechTool,
|
||||||
EdenaiTool,
|
EdenaiTool,
|
||||||
)
|
)
|
||||||
|
from langchain.tools.eleven_labs.text2speech import ElevenLabsText2SpeechTool
|
||||||
from langchain.tools.file_management import (
|
from langchain.tools.file_management import (
|
||||||
CopyFileTool,
|
CopyFileTool,
|
||||||
DeleteFileTool,
|
DeleteFileTool,
|
||||||
@ -167,6 +168,7 @@ __all__ = [
|
|||||||
"EdenAiSpeechToTextTool",
|
"EdenAiSpeechToTextTool",
|
||||||
"EdenAiTextModerationTool",
|
"EdenAiTextModerationTool",
|
||||||
"EdenaiTool",
|
"EdenaiTool",
|
||||||
|
"ElevenLabsText2SpeechTool",
|
||||||
"ExtractHyperlinksTool",
|
"ExtractHyperlinksTool",
|
||||||
"ExtractTextTool",
|
"ExtractTextTool",
|
||||||
"FileSearchTool",
|
"FileSearchTool",
|
||||||
|
5
libs/langchain/langchain/tools/eleven_labs/__init__.py
Normal file
5
libs/langchain/langchain/tools/eleven_labs/__init__.py
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
"""Eleven Labs Services Tools."""
|
||||||
|
|
||||||
|
from langchain.tools.eleven_labs.text2speech import ElevenLabsText2SpeechTool
|
||||||
|
|
||||||
|
__all__ = ["ElevenLabsText2SpeechTool"]
|
8
libs/langchain/langchain/tools/eleven_labs/models.py
Normal file
8
libs/langchain/langchain/tools/eleven_labs/models.py
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
from enum import Enum
|
||||||
|
|
||||||
|
|
||||||
|
class ElevenLabsModel(str, Enum):
|
||||||
|
"""Models available for Eleven Labs Text2Speech."""
|
||||||
|
|
||||||
|
MULTI_LINGUAL = "eleven_multilingual_v1"
|
||||||
|
MONO_LINGUAL = "eleven_monolingual_v1"
|
80
libs/langchain/langchain/tools/eleven_labs/text2speech.py
Normal file
80
libs/langchain/langchain/tools/eleven_labs/text2speech.py
Normal file
@ -0,0 +1,80 @@
|
|||||||
|
import tempfile
|
||||||
|
from enum import Enum
|
||||||
|
from typing import Any, Dict, Optional, Union
|
||||||
|
|
||||||
|
from langchain.callbacks.manager import CallbackManagerForToolRun
|
||||||
|
from langchain.pydantic_v1 import root_validator
|
||||||
|
from langchain.tools.base import BaseTool
|
||||||
|
from langchain.utils import get_from_dict_or_env
|
||||||
|
|
||||||
|
|
||||||
|
def _import_elevenlabs() -> Any:
|
||||||
|
try:
|
||||||
|
import elevenlabs
|
||||||
|
except ImportError as e:
|
||||||
|
raise ImportError(
|
||||||
|
"Cannot import elevenlabs, please install `pip install elevenlabs`."
|
||||||
|
) from e
|
||||||
|
return elevenlabs
|
||||||
|
|
||||||
|
|
||||||
|
class ElevenLabsModel(str, Enum):
|
||||||
|
"""Models available for Eleven Labs Text2Speech."""
|
||||||
|
|
||||||
|
MULTI_LINGUAL = "eleven_multilingual_v1"
|
||||||
|
MONO_LINGUAL = "eleven_monolingual_v1"
|
||||||
|
|
||||||
|
|
||||||
|
class ElevenLabsText2SpeechTool(BaseTool):
|
||||||
|
"""Tool that queries the Eleven Labs Text2Speech API.
|
||||||
|
|
||||||
|
In order to set this up, follow instructions at:
|
||||||
|
https://docs.elevenlabs.io/welcome/introduction
|
||||||
|
"""
|
||||||
|
|
||||||
|
model: Union[ElevenLabsModel, str] = ElevenLabsModel.MULTI_LINGUAL
|
||||||
|
|
||||||
|
name: str = "eleven_labs_text2speech"
|
||||||
|
description: str = (
|
||||||
|
"A wrapper around Eleven Labs Text2Speech. "
|
||||||
|
"Useful for when you need to convert text to speech. "
|
||||||
|
"It supports multiple languages, including English, German, Polish, "
|
||||||
|
"Spanish, Italian, French, Portuguese, and Hindi. "
|
||||||
|
)
|
||||||
|
|
||||||
|
@root_validator(pre=True)
|
||||||
|
def validate_environment(cls, values: Dict) -> Dict:
|
||||||
|
"""Validate that api key exists in environment."""
|
||||||
|
_ = get_from_dict_or_env(values, "eleven_api_key", "ELEVEN_API_KEY")
|
||||||
|
|
||||||
|
return values
|
||||||
|
|
||||||
|
def _run(
|
||||||
|
self, query: str, run_manager: Optional[CallbackManagerForToolRun] = None
|
||||||
|
) -> str:
|
||||||
|
"""Use the tool."""
|
||||||
|
elevenlabs = _import_elevenlabs()
|
||||||
|
try:
|
||||||
|
speech = elevenlabs.generate(text=query, model=self.model)
|
||||||
|
with tempfile.NamedTemporaryFile(
|
||||||
|
mode="bx", suffix=".wav", delete=False
|
||||||
|
) as f:
|
||||||
|
f.write(speech)
|
||||||
|
return f.name
|
||||||
|
except Exception as e:
|
||||||
|
raise RuntimeError(f"Error while running ElevenLabsText2SpeechTool: {e}")
|
||||||
|
|
||||||
|
def play(self, speech_file: str) -> None:
|
||||||
|
"""Play the text as speech."""
|
||||||
|
elevenlabs = _import_elevenlabs()
|
||||||
|
with open(speech_file, mode="rb") as f:
|
||||||
|
speech = f.read()
|
||||||
|
|
||||||
|
elevenlabs.play(speech)
|
||||||
|
|
||||||
|
def stream_speech(self, query: str) -> None:
|
||||||
|
"""Stream the text as speech as it is generated.
|
||||||
|
Play the text in your speakers."""
|
||||||
|
elevenlabs = _import_elevenlabs()
|
||||||
|
speech_stream = elevenlabs.generate(text=query, model=self.model, stream=True)
|
||||||
|
elevenlabs.stream(speech_stream)
|
@ -36,6 +36,7 @@ _EXPECTED = [
|
|||||||
"EdenAiTextModerationTool",
|
"EdenAiTextModerationTool",
|
||||||
"EdenAiTextToSpeechTool",
|
"EdenAiTextToSpeechTool",
|
||||||
"EdenaiTool",
|
"EdenaiTool",
|
||||||
|
"ElevenLabsText2SpeechTool",
|
||||||
"ExtractHyperlinksTool",
|
"ExtractHyperlinksTool",
|
||||||
"ExtractTextTool",
|
"ExtractTextTool",
|
||||||
"FileSearchTool",
|
"FileSearchTool",
|
||||||
|
Loading…
Reference in New Issue
Block a user