community[minor]: add hugging face text-to-speech inference API (#18880)

Description: I implemented a tool to use Hugging Face text-to-speech inference API. Issue: n/a Dependencies: n/a Twitter handle: No Twitter, but do have [LinkedIn](https://www.linkedin.com/in/robby-horvath/) lol. --------- Co-authored-by: Robby <h0rv@users.noreply.github.com> Co-authored-by: Eugene Yurtsev <eyurtsev@gmail.com>
6 months ago · f7e8a382cc
parent 73eb3f8fd9
commit f7e8a382cc
4 changed files with 212 additions and 0 deletions
--- a/libs/community/langchain_community/tools/audio/init.py
+++ b/libs/community/langchain_community/tools/audio/init.py
@ -0,0 +1,7 @@
 from langchain_community.tools.audio.huggingface_text_to_speech_inference import (
    HuggingFaceTextToSpeechModelInference,
 )
 __all__ = [
    "HuggingFaceTextToSpeechModelInference",
 ]
--- a/libs/community/langchain_community/tools/audio/huggingface_text_to_speech_inference.py
+++ b/libs/community/langchain_community/tools/audio/huggingface_text_to_speech_inference.py
@ -0,0 +1,118 @@
 import logging
 import os
 import uuid
 from datetime import datetime
 from typing import Callable, Literal, Optional
 import requests
 from langchain_core.callbacks import CallbackManagerForToolRun
 from langchain_core.pydantic_v1 import SecretStr
 from langchain_core.tools import BaseTool
 logger = logging.getLogger(__name__)
 class HuggingFaceTextToSpeechModelInference(BaseTool):
    """HuggingFace Text-to-Speech Model Inference.
    Requirements:
        - Environment variable ``HUGGINGFACE_API_KEY`` must be set,
          or passed as a named parameter to the constructor.
    """
    name: str = "openai_text_to_speech"
    """Name of the tool."""
    description: str = "A wrapper around OpenAI Text-to-Speech API. "
    """Description of the tool."""
    model: str
    """Model name."""
    file_extension: str
    """File extension of the output audio file."""
    destination_dir: str
    """Directory to save the output audio file."""
    file_namer: Callable[[], str]
    """Function to generate unique file names."""
    api_url: str
    huggingface_api_key: SecretStr
    _HUGGINGFACE_API_KEY_ENV_NAME = "HUGGINGFACE_API_KEY"
    _HUGGINGFACE_API_URL_ROOT = "https://api-inference.huggingface.co/models"
    def __init__(
        self,
        model: str,
        file_extension: str,
        *,
        destination_dir: str = "./tts",
        file_naming_func: Literal["uuid", "timestamp"] = "uuid",
        huggingface_api_key: Optional[SecretStr] = None,
    ) -> None:
        if not huggingface_api_key:
            huggingface_api_key = SecretStr(
                os.getenv(self._HUGGINGFACE_API_KEY_ENV_NAME, "")
            )
        if (
            not huggingface_api_key
            or not huggingface_api_key.get_secret_value()
            or huggingface_api_key.get_secret_value() == ""
        ):
            raise ValueError(
                f"'{self._HUGGINGFACE_API_KEY_ENV_NAME}' must be or set or passed"
            )
        if file_naming_func == "uuid":
            file_namer = lambda: str(uuid.uuid4())  # noqa: E731
        elif file_naming_func == "timestamp":
            file_namer = lambda: str(int(datetime.now().timestamp()))  # noqa: E731
        else:
            raise ValueError(
                f"Invalid value for 'file_naming_func': {file_naming_func}"
            )
        super().__init__(
            model=model,
            file_extension=file_extension,
            api_url=f"{self._HUGGINGFACE_API_URL_ROOT}/{model}",
            destination_dir=destination_dir,
            file_namer=file_namer,
            huggingface_api_key=huggingface_api_key,
        )
    def _run(
        self,
        query: str,
        run_manager: Optional[CallbackManagerForToolRun] = None,
    ) -> str:
        response = requests.post(
            self.api_url,
            headers={
                "Authorization": f"Bearer {self.huggingface_api_key.get_secret_value()}"
            },
            json={"inputs": query},
        )
        audio_bytes = response.content
        try:
            os.makedirs(self.destination_dir, exist_ok=True)
        except Exception as e:
            logger.error(f"Error creating directory '{self.destination_dir}': {e}")
            raise
        output_file = os.path.join(
            self.destination_dir,
            f"{str(self.file_namer())}.{self.file_extension}",
        )
        try:
            with open(output_file, mode="xb") as f:
                f.write(audio_bytes)
        except FileExistsError:
            raise ValueError("Output name must be unique")
        except Exception as e:
            logger.error(f"Error occurred while creating file: {e}")
            raise
        return output_file
--- a/libs/community/tests/unit_tests/tools/audio/init.py
+++ b/libs/community/tests/unit_tests/tools/audio/init.py
--- a/libs/community/tests/unit_tests/tools/audio/test_tools.py
+++ b/libs/community/tests/unit_tests/tools/audio/test_tools.py
@ -0,0 +1,87 @@
 """Test Audio Tools."""
 import os
 import tempfile
 import uuid
 from unittest.mock import Mock, mock_open, patch
 import pytest
 from langchain_core.pydantic_v1 import SecretStr
 from langchain_community.tools.audio import HuggingFaceTextToSpeechModelInference
 AUDIO_FORMAT_EXT = "wav"
 def test_huggingface_tts_constructor() -> None:
    with pytest.raises(ValueError):
        os.environ.pop("HUGGINGFACE_API_KEY", None)
        HuggingFaceTextToSpeechModelInference(
            model="test/model",
            file_extension=AUDIO_FORMAT_EXT,
        )
    with pytest.raises(ValueError):
        HuggingFaceTextToSpeechModelInference(
            model="test/model",
            file_extension=AUDIO_FORMAT_EXT,
            huggingface_api_key=SecretStr(""),
        )
    HuggingFaceTextToSpeechModelInference(
        model="test/model",
        file_extension=AUDIO_FORMAT_EXT,
        huggingface_api_key=SecretStr("foo"),
    )
    os.environ["HUGGINGFACE_API_KEY"] = "foo"
    HuggingFaceTextToSpeechModelInference(
        model="test/model",
        file_extension=AUDIO_FORMAT_EXT,
    )
 def test_huggingface_tts_run_with_requests_mock() -> None:
    os.environ["HUGGINGFACE_API_KEY"] = "foo"
    with tempfile.TemporaryDirectory() as tmp_dir, patch(
        "uuid.uuid4"
    ) as mock_uuid, patch("requests.post") as mock_inference, patch(
        "builtins.open", mock_open()
    ) as mock_file:
        input_query = "Dummy input"
        mock_uuid_value = uuid.UUID("00000000-0000-0000-0000-000000000000")
        mock_uuid.return_value = mock_uuid_value
        expected_output_file_base_name = os.path.join(tmp_dir, str(mock_uuid_value))
        expected_output_file = f"{expected_output_file_base_name}.{AUDIO_FORMAT_EXT}"
        test_audio_content = b"test_audio_bytes"
        tts = HuggingFaceTextToSpeechModelInference(
            model="test/model",
            file_extension=AUDIO_FORMAT_EXT,
            destination_dir=tmp_dir,
            file_naming_func="uuid",
        )
        # Mock the requests.post response
        mock_response = Mock()
        mock_response.content = test_audio_content
        mock_inference.return_value = mock_response
        output_path = tts._run(input_query)
        assert output_path == expected_output_file
        mock_inference.assert_called_once_with(
            tts.api_url,
            headers={
                "Authorization": f"Bearer {tts.huggingface_api_key.get_secret_value()}"
            },
            json={"inputs": input_query},
        )
        mock_file.assert_called_once_with(expected_output_file, mode="xb")
        mock_file.return_value.write.assert_called_once_with(test_audio_content)