diff --git a/docs/docs/integrations/llms/titan_takeoff.ipynb b/docs/docs/integrations/llms/titan_takeoff.ipynb index fe4cdc8a64..e1b9a2f0fa 100644 --- a/docs/docs/integrations/llms/titan_takeoff.ipynb +++ b/docs/docs/integrations/llms/titan_takeoff.ipynb @@ -108,7 +108,7 @@ "from langchain.llms import TitanTakeoff\n", "\n", "llm = TitanTakeoff(\n", - " baseURL=\"http://localhost:8000\",\n", + " base_url=\"http://localhost:8000\",\n", " generate_max_length=128,\n", " temperature=1.0\n", ")\n", diff --git a/docs/docs/integrations/llms/titan_takeoff_pro.ipynb b/docs/docs/integrations/llms/titan_takeoff_pro.ipynb new file mode 100644 index 0000000000..12eb6c3094 --- /dev/null +++ b/docs/docs/integrations/llms/titan_takeoff_pro.ipynb @@ -0,0 +1,100 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Titan Takeoff Pro\n", + "\n", + "`TitanML` helps businesses build and deploy better, smaller, cheaper, and faster NLP models through our training, compression, and inference optimization platform.\n", + "\n", + ">Note: These docs are for the Pro version of Titan Takeoff. For the community version, see the page for Titan Takeoff.\n", + "\n", + "Our inference server, [Titan Takeoff (Pro Version)](https://docs.titanml.co/docs/titan-takeoff/pro-features/feature-comparison) enables deployment of LLMs locally on your hardware in a single command. Most generative model architectures are supported, such as Falcon, Llama 2, GPT2, T5 and many more." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Example usage\n", + "Here are some helpful examples to get started using the Pro version of Titan Takeoff Server.\n", + "No parameters are needed by default, but a baseURL that points to your desired URL where Takeoff is running can be specified and generation parameters can be supplied." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.llms import TitanTakeoffPro\n", + "from langchain.prompts import PromptTemplate\n", + "from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler\n", + "from langchain.callbacks.manager import CallbackManager\n", + "\n", + "# Example 1: Basic use\n", + "llm = TitanTakeoffPro()\n", + "output = llm(\"What is the weather in London in August?\")\n", + "print(output)\n", + "\n", + "\n", + "# Example 2: Specifying a port and other generation parameters\n", + "llm = TitanTakeoffPro(\n", + " base_url=\"http://localhost:3000\",\n", + " min_new_tokens=128,\n", + " max_new_tokens=512,\n", + " no_repeat_ngram_size=2,\n", + " sampling_topk= 1,\n", + " sampling_topp= 1.0,\n", + " sampling_temperature= 1.0,\n", + " repetition_penalty= 1.0,\n", + " regex_string= \"\",\n", + ")\n", + "output = llm(\"What is the largest rainforest in the world?\")\n", + "print(output)\n", + "\n", + "\n", + "# Example 3: Using generate for multiple inputs\n", + "llm = TitanTakeoffPro()\n", + "rich_output = llm.generate([\"What is Deep Learning?\", \"What is Machine Learning?\"])\n", + "print(rich_output.generations)\n", + "\n", + "\n", + "# Example 4: Streaming output\n", + "llm = TitanTakeoffPro(streaming=True, callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]))\n", + "prompt = \"What is the capital of France?\"\n", + "llm(prompt)\n", + "\n", + "# Example 5: Using LCEL\n", + "llm = TitanTakeoffPro()\n", + "prompt = PromptTemplate.from_template(\"Tell me about {topic}\")\n", + "chain = prompt | llm\n", + "chain.invoke({\"topic\": \"the universe\"})" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/libs/langchain/langchain/llms/__init__.py b/libs/langchain/langchain/llms/__init__.py index 88b4e79d6d..351a75f378 100644 --- a/libs/langchain/langchain/llms/__init__.py +++ b/libs/langchain/langchain/llms/__init__.py @@ -444,6 +444,12 @@ def _import_titan_takeoff() -> Any: return TitanTakeoff +def _import_titan_takeoff_pro() -> Any: + from langchain.llms.titan_takeoff_pro import TitanTakeoffPro + + return TitanTakeoffPro + + def _import_together() -> Any: from langchain.llms.together import Together @@ -639,6 +645,8 @@ def __getattr__(name: str) -> Any: return _import_textgen() elif name == "TitanTakeoff": return _import_titan_takeoff() + elif name == "TitanTakeoffPro": + return _import_titan_takeoff_pro() elif name == "Together": return _import_together() elif name == "Tongyi": @@ -735,6 +743,7 @@ __all__ = [ "SelfHostedPipeline", "StochasticAI", "TitanTakeoff", + "TitanTakeoffPro", "Tongyi", "VertexAI", "VertexAIModelGarden", @@ -813,6 +822,7 @@ def get_type_to_cls_dict() -> Dict[str, Callable[[], Type[BaseLLM]]]: "together": _import_together, "tongyi": _import_tongyi, "titan_takeoff": _import_titan_takeoff, + "titan_takeoff_pro": _import_titan_takeoff_pro, "vertexai": _import_vertex, "vertexai_model_garden": _import_vertex_model_garden, "openllm": _import_openllm, diff --git a/libs/langchain/langchain/llms/titan_takeoff_pro.py b/libs/langchain/langchain/llms/titan_takeoff_pro.py new file mode 100644 index 0000000000..0a8dc78f3e --- /dev/null +++ b/libs/langchain/langchain/llms/titan_takeoff_pro.py @@ -0,0 +1,215 @@ +from typing import Any, Iterator, List, Mapping, Optional + +import requests +from requests.exceptions import ConnectionError + +from langchain.callbacks.manager import CallbackManagerForLLMRun +from langchain.llms.base import LLM +from langchain.llms.utils import enforce_stop_tokens +from langchain.schema.output import GenerationChunk + + +class TitanTakeoffPro(LLM): + base_url: Optional[str] = "http://localhost:3000" + """Specifies the baseURL to use for the Titan Takeoff Pro API. + Default = http://localhost:3000. + """ + + max_new_tokens: Optional[int] = None + """Maximum tokens generated.""" + + min_new_tokens: Optional[int] = None + """Minimum tokens generated.""" + + sampling_topk: Optional[int] = None + """Sample predictions from the top K most probable candidates.""" + + sampling_topp: Optional[float] = None + """Sample from predictions whose cumulative probability exceeds this value. + """ + + sampling_temperature: Optional[float] = None + """Sample with randomness. Bigger temperatures are associated with + more randomness and 'creativity'. + """ + + repetition_penalty: Optional[float] = None + """Penalise the generation of tokens that have been generated before. + Set to > 1 to penalize. + """ + + regex_string: Optional[str] = None + """A regex string for constrained generation.""" + + no_repeat_ngram_size: Optional[int] = None + """Prevent repetitions of ngrams of this size. Default = 0 (turned off).""" + + streaming: bool = False + """Whether to stream the output. Default = False.""" + + @property + def _default_params(self) -> Mapping[str, Any]: + """Get the default parameters for calling Titan Takeoff Server (Pro).""" + return { + **( + {"regex_string": self.regex_string} + if self.regex_string is not None + else {} + ), + **( + {"sampling_temperature": self.sampling_temperature} + if self.sampling_temperature is not None + else {} + ), + **( + {"sampling_topp": self.sampling_topp} + if self.sampling_topp is not None + else {} + ), + **( + {"repetition_penalty": self.repetition_penalty} + if self.repetition_penalty is not None + else {} + ), + **( + {"max_new_tokens": self.max_new_tokens} + if self.max_new_tokens is not None + else {} + ), + **( + {"min_new_tokens": self.min_new_tokens} + if self.min_new_tokens is not None + else {} + ), + **( + {"sampling_topk": self.sampling_topk} + if self.sampling_topk is not None + else {} + ), + **( + {"no_repeat_ngram_size": self.no_repeat_ngram_size} + if self.no_repeat_ngram_size is not None + else {} + ), + } + + @property + def _llm_type(self) -> str: + """Return type of llm.""" + return "titan_takeoff_pro" + + def _call( + self, + prompt: str, + stop: Optional[List[str]] = None, + run_manager: Optional[CallbackManagerForLLMRun] = None, + **kwargs: Any, + ) -> str: + """Call out to Titan Takeoff (Pro) generate endpoint. + + Args: + prompt: The prompt to pass into the model. + stop: Optional list of stop words to use when generating. + + Returns: + The string generated by the model. + + Example: + .. code-block:: python + + prompt = "What is the capital of the United Kingdom?" + response = model(prompt) + + """ + try: + if self.streaming: + text_output = "" + for chunk in self._stream( + prompt=prompt, + stop=stop, + run_manager=run_manager, + ): + text_output += chunk.text + return text_output + url = f"{self.base_url}/generate" + params = {"text": prompt, **self._default_params} + + response = requests.post(url, json=params) + response.raise_for_status() + response.encoding = "utf-8" + + text = "" + if "text" in response.json(): + text = response.json()["text"] + text = text.replace("", "") + else: + raise ValueError("Something went wrong.") + if stop is not None: + text = enforce_stop_tokens(text, stop) + return text + except ConnectionError: + raise ConnectionError( + "Could not connect to Titan Takeoff (Pro) server. \ + Please make sure that the server is running." + ) + + def _stream( + self, + prompt: str, + stop: Optional[List[str]] = None, + run_manager: Optional[CallbackManagerForLLMRun] = None, + **kwargs: Any, + ) -> Iterator[GenerationChunk]: + """Call out to Titan Takeoff (Pro) stream endpoint. + + Args: + prompt: The prompt to pass into the model. + stop: Optional list of stop words to use when generating. + + Returns: + The string generated by the model. + + Yields: + A dictionary like object containing a string token. + + Example: + .. code-block:: python + + prompt = "What is the capital of the United Kingdom?" + response = model(prompt) + + """ + url = f"{self.base_url}/generate_stream" + params = {"text": prompt, **self._default_params} + + response = requests.post(url, json=params, stream=True) + response.encoding = "utf-8" + buffer = "" + for text in response.iter_content(chunk_size=1, decode_unicode=True): + buffer += text + if "data:" in buffer: + # Remove the first instance of "data:" from the buffer. + if buffer.startswith("data:"): + buffer = "" + if len(buffer.split("data:", 1)) == 2: + content, _ = buffer.split("data:", 1) + buffer = content.rstrip("\n") + # Trim the buffer to only have content after the "data:" part. + if buffer: # Ensure that there's content to process. + chunk = GenerationChunk(text=buffer) + buffer = "" # Reset buffer for the next set of data. + yield chunk + if run_manager: + run_manager.on_llm_new_token(token=chunk.text) + + # Yield any remaining content in the buffer. + if buffer: + chunk = GenerationChunk(text=buffer.replace("", "")) + yield chunk + if run_manager: + run_manager.on_llm_new_token(token=chunk.text) + + @property + def _identifying_params(self) -> Mapping[str, Any]: + """Get the identifying parameters.""" + return {"base_url": self.base_url, **{}, **self._default_params} diff --git a/libs/langchain/tests/integration_tests/llms/test_titan_takeoff_pro.py b/libs/langchain/tests/integration_tests/llms/test_titan_takeoff_pro.py new file mode 100644 index 0000000000..333d22b80f --- /dev/null +++ b/libs/langchain/tests/integration_tests/llms/test_titan_takeoff_pro.py @@ -0,0 +1,18 @@ +"""Test Titan Takeoff wrapper.""" + + +import responses + +from langchain.llms.titan_takeoff_pro import TitanTakeoffPro + + +@responses.activate +def test_titan_takeoff_pro_call() -> None: + """Test valid call to Titan Takeoff.""" + url = "http://localhost:3000/generate" + responses.add(responses.POST, url, json={"message": "2 + 2 is 4"}, status=200) + + # response = requests.post(url) + llm = TitanTakeoffPro() + output = llm("What is 2 + 2?") + assert isinstance(output, str)