diff --git a/docs/modules/models/llms/integrations/mosaicml.ipynb b/docs/modules/models/llms/integrations/mosaicml.ipynb new file mode 100644 index 00000000..50bdfb91 --- /dev/null +++ b/docs/modules/models/llms/integrations/mosaicml.ipynb @@ -0,0 +1,105 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# MosaicML\n", + "\n", + "[MosaicML](https://docs.mosaicml.com/en/latest/inference.html) offers a managed inference service. You can either use a variety of open source models, or deploy your own.\n", + "\n", + "This example goes over how to use LangChain to interact with MosaicML Inference for text completion." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# sign up for an account: https://forms.mosaicml.com/demo?utm_source=langchain\n", + "\n", + "from getpass import getpass\n", + "\n", + "MOSAICML_API_TOKEN = getpass()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "os.environ[\"MOSAICML_API_TOKEN\"] = MOSAICML_API_TOKEN" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.llms import MosaicML\n", + "from langchain import PromptTemplate, LLMChain" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "template = \"\"\"Question: {question}\"\"\"\n", + "\n", + "prompt = PromptTemplate(template=template, input_variables=[\"question\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "llm = MosaicML(inject_instruction_format=True, model_kwargs={'do_sample': False})" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "llm_chain = LLMChain(prompt=prompt, llm=llm)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "question = \"What is one good reason why you should train a large language model on domain specific data?\"\n", + "\n", + "llm_chain.run(question)" + ] + } + ], + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/modules/models/text_embedding/examples/mosaicml.ipynb b/docs/modules/models/text_embedding/examples/mosaicml.ipynb new file mode 100644 index 00000000..1bbf5cff --- /dev/null +++ b/docs/modules/models/text_embedding/examples/mosaicml.ipynb @@ -0,0 +1,109 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# MosaicML embeddings\n", + "\n", + "[MosaicML](https://docs.mosaicml.com/en/latest/inference.html) offers a managed inference service. You can either use a variety of open source models, or deploy your own.\n", + "\n", + "This example goes over how to use LangChain to interact with MosaicML Inference for text embedding." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# sign up for an account: https://forms.mosaicml.com/demo?utm_source=langchain\n", + "\n", + "from getpass import getpass\n", + "\n", + "MOSAICML_API_TOKEN = getpass()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "os.environ[\"MOSAICML_API_TOKEN\"] = MOSAICML_API_TOKEN" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.embeddings import MosaicMLInstructorEmbeddings" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "embeddings = MosaicMLInstructorEmbeddings(\n", + " query_instruction=\"Represent the query for retrieval: \"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "query_text = \"This is a test query.\"\n", + "query_result = embeddings.embed_query(query_text)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "document_text = \"This is a test document.\"\n", + "document_result = embeddings.embed_documents([document_text])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "query_numpy = np.array(query_result)\n", + "document_numpy = np.array(document_result[0])\n", + "similarity = np.dot(query_numpy, document_numpy) / (np.linalg.norm(query_numpy)*np.linalg.norm(document_numpy))\n", + "print(f\"Cosine similarity between document and query: {similarity}\")" + ] + } + ], + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/langchain/embeddings/__init__.py b/langchain/embeddings/__init__.py index 5ba96520..5c6ae21f 100644 --- a/langchain/embeddings/__init__.py +++ b/langchain/embeddings/__init__.py @@ -17,6 +17,7 @@ from langchain.embeddings.huggingface import ( from langchain.embeddings.huggingface_hub import HuggingFaceHubEmbeddings from langchain.embeddings.jina import JinaEmbeddings from langchain.embeddings.llamacpp import LlamaCppEmbeddings +from langchain.embeddings.mosaicml import MosaicMLInstructorEmbeddings from langchain.embeddings.openai import OpenAIEmbeddings from langchain.embeddings.sagemaker_endpoint import SagemakerEndpointEmbeddings from langchain.embeddings.self_hosted import SelfHostedEmbeddings @@ -40,6 +41,7 @@ __all__ = [ "TensorflowHubEmbeddings", "SagemakerEndpointEmbeddings", "HuggingFaceInstructEmbeddings", + "MosaicMLInstructorEmbeddings", "SelfHostedEmbeddings", "SelfHostedHuggingFaceEmbeddings", "SelfHostedHuggingFaceInstructEmbeddings", diff --git a/langchain/embeddings/mosaicml.py b/langchain/embeddings/mosaicml.py new file mode 100644 index 00000000..8c01bfaa --- /dev/null +++ b/langchain/embeddings/mosaicml.py @@ -0,0 +1,137 @@ +"""Wrapper around MosaicML APIs.""" +from typing import Any, Dict, List, Mapping, Optional, Tuple + +import requests +from pydantic import BaseModel, Extra, root_validator + +from langchain.embeddings.base import Embeddings +from langchain.utils import get_from_dict_or_env + + +class MosaicMLInstructorEmbeddings(BaseModel, Embeddings): + """Wrapper around MosaicML's embedding inference service. + + To use, you should have the + environment variable ``MOSAICML_API_TOKEN`` set with your API token, or pass + it as a named parameter to the constructor. + + Example: + .. code-block:: python + + from langchain.llms import MosaicMLInstructorEmbeddings + endpoint_url = ( + "https://models.hosted-on.mosaicml.hosting/instructor-large/v1/predict" + ) + mosaic_llm = MosaicMLInstructorEmbeddings( + endpoint_url=endpoint_url, + mosaicml_api_token="my-api-key" + ) + """ + + endpoint_url: str = ( + "https://models.hosted-on.mosaicml.hosting/instructor-large/v1/predict" + ) + """Endpoint URL to use.""" + embed_instruction: str = "Represent the document for retrieval: " + """Instruction used to embed documents.""" + query_instruction: str = ( + "Represent the question for retrieving supporting documents: " + ) + """Instruction used to embed the query.""" + retry_sleep: float = 1.0 + """How long to try sleeping for if a rate limit is encountered""" + + mosaicml_api_token: Optional[str] = None + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + + @root_validator() + def validate_environment(cls, values: Dict) -> Dict: + """Validate that api key and python package exists in environment.""" + mosaicml_api_token = get_from_dict_or_env( + values, "mosaicml_api_token", "MOSAICML_API_TOKEN" + ) + values["mosaicml_api_token"] = mosaicml_api_token + return values + + @property + def _identifying_params(self) -> Mapping[str, Any]: + """Get the identifying parameters.""" + return {"endpoint_url": self.endpoint_url} + + def _embed( + self, input: List[Tuple[str, str]], is_retry: bool = False + ) -> List[List[float]]: + payload = {"input_strings": input} + + # HTTP headers for authorization + headers = { + "Authorization": f"{self.mosaicml_api_token}", + "Content-Type": "application/json", + } + + # send request + try: + response = requests.post(self.endpoint_url, headers=headers, json=payload) + except requests.exceptions.RequestException as e: + raise ValueError(f"Error raised by inference endpoint: {e}") + + try: + parsed_response = response.json() + + if "error" in parsed_response: + # if we get rate limited, try sleeping for 1 second + if ( + not is_retry + and "rate limit exceeded" in parsed_response["error"].lower() + ): + import time + + time.sleep(self.retry_sleep) + + return self._embed(input, is_retry=True) + + raise ValueError( + f"Error raised by inference API: {parsed_response['error']}" + ) + + if "data" not in parsed_response: + raise ValueError( + f"Error raised by inference API, no key data: {parsed_response}" + ) + embeddings = parsed_response["data"] + except requests.exceptions.JSONDecodeError as e: + raise ValueError( + f"Error raised by inference API: {e}.\nResponse: {response.text}" + ) + + return embeddings + + def embed_documents(self, texts: List[str]) -> List[List[float]]: + """Embed documents using a MosaicML deployed instructor embedding model. + + Args: + texts: The list of texts to embed. + + Returns: + List of embeddings, one for each text. + """ + instruction_pairs = [(self.embed_instruction, text) for text in texts] + embeddings = self._embed(instruction_pairs) + return embeddings + + def embed_query(self, text: str) -> List[float]: + """Embed a query using a MosaicML deployed instructor embedding model. + + Args: + text: The text to embed. + + Returns: + Embeddings for the text. + """ + instruction_pair = (self.query_instruction, text) + embedding = self._embed([instruction_pair])[0] + return embedding diff --git a/langchain/llms/__init__.py b/langchain/llms/__init__.py index d5fd54da..4eb92e4a 100644 --- a/langchain/llms/__init__.py +++ b/langchain/llms/__init__.py @@ -22,6 +22,7 @@ from langchain.llms.huggingface_text_gen_inference import HuggingFaceTextGenInfe from langchain.llms.human import HumanInputLLM from langchain.llms.llamacpp import LlamaCpp from langchain.llms.modal import Modal +from langchain.llms.mosaicml import MosaicML from langchain.llms.nlpcloud import NLPCloud from langchain.llms.openai import AzureOpenAI, OpenAI, OpenAIChat from langchain.llms.openlm import OpenLM @@ -51,6 +52,7 @@ __all__ = [ "GPT4All", "LlamaCpp", "Modal", + "MosaicML", "NLPCloud", "OpenAI", "OpenAIChat", @@ -94,6 +96,7 @@ type_to_cls_dict: Dict[str, Type[BaseLLM]] = { "huggingface_endpoint": HuggingFaceEndpoint, "llamacpp": LlamaCpp, "modal": Modal, + "mosaic": MosaicML, "sagemaker_endpoint": SagemakerEndpoint, "nlpcloud": NLPCloud, "human-input": HumanInputLLM, diff --git a/langchain/llms/mosaicml.py b/langchain/llms/mosaicml.py new file mode 100644 index 00000000..0a8b8561 --- /dev/null +++ b/langchain/llms/mosaicml.py @@ -0,0 +1,173 @@ +"""Wrapper around MosaicML APIs.""" +from typing import Any, Dict, List, Mapping, Optional + +import requests +from pydantic import Extra, root_validator + +from langchain.callbacks.manager import CallbackManagerForLLMRun +from langchain.llms.base import LLM +from langchain.llms.utils import enforce_stop_tokens +from langchain.utils import get_from_dict_or_env + +INSTRUCTION_KEY = "### Instruction:" +RESPONSE_KEY = "### Response:" +INTRO_BLURB = ( + "Below is an instruction that describes a task. " + "Write a response that appropriately completes the request." +) +PROMPT_FOR_GENERATION_FORMAT = """{intro} +{instruction_key} +{instruction} +{response_key} +""".format( + intro=INTRO_BLURB, + instruction_key=INSTRUCTION_KEY, + instruction="{instruction}", + response_key=RESPONSE_KEY, +) + + +class MosaicML(LLM): + """Wrapper around MosaicML's LLM inference service. + + To use, you should have the + environment variable ``MOSAICML_API_TOKEN`` set with your API token, or pass + it as a named parameter to the constructor. + + Example: + .. code-block:: python + + from langchain.llms import MosaicML + endpoint_url = ( + "https://models.hosted-on.mosaicml.hosting/mpt-7b-instruct/v1/predict" + ) + mosaic_llm = MosaicML( + endpoint_url=endpoint_url, + mosaicml_api_token="my-api-key" + ) + """ + + endpoint_url: str = ( + "https://models.hosted-on.mosaicml.hosting/mpt-7b-instruct/v1/predict" + ) + """Endpoint URL to use.""" + inject_instruction_format: bool = False + """Whether to inject the instruction format into the prompt.""" + model_kwargs: Optional[dict] = None + """Key word arguments to pass to the model.""" + retry_sleep: float = 1.0 + """How long to try sleeping for if a rate limit is encountered""" + + mosaicml_api_token: Optional[str] = None + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + + @root_validator() + def validate_environment(cls, values: Dict) -> Dict: + """Validate that api key and python package exists in environment.""" + mosaicml_api_token = get_from_dict_or_env( + values, "mosaicml_api_token", "MOSAICML_API_TOKEN" + ) + values["mosaicml_api_token"] = mosaicml_api_token + return values + + @property + def _identifying_params(self) -> Mapping[str, Any]: + """Get the identifying parameters.""" + _model_kwargs = self.model_kwargs or {} + return { + **{"endpoint_url": self.endpoint_url}, + **{"model_kwargs": _model_kwargs}, + } + + @property + def _llm_type(self) -> str: + """Return type of llm.""" + return "mosaicml" + + def _transform_prompt(self, prompt: str) -> str: + """Transform prompt.""" + if self.inject_instruction_format: + prompt = PROMPT_FOR_GENERATION_FORMAT.format( + instruction=prompt, + ) + return prompt + + def _call( + self, + prompt: str, + stop: Optional[List[str]] = None, + run_manager: Optional[CallbackManagerForLLMRun] = None, + is_retry: bool = False, + ) -> str: + """Call out to a MosaicML LLM inference endpoint. + + Args: + prompt: The prompt to pass into the model. + stop: Optional list of stop words to use when generating. + + Returns: + The string generated by the model. + + Example: + .. code-block:: python + + response = mosaic_llm("Tell me a joke.") + """ + _model_kwargs = self.model_kwargs or {} + + prompt = self._transform_prompt(prompt) + + payload = {"input_strings": [prompt]} + payload.update(_model_kwargs) + + # HTTP headers for authorization + headers = { + "Authorization": f"{self.mosaicml_api_token}", + "Content-Type": "application/json", + } + + # send request + try: + response = requests.post(self.endpoint_url, headers=headers, json=payload) + except requests.exceptions.RequestException as e: + raise ValueError(f"Error raised by inference endpoint: {e}") + + try: + parsed_response = response.json() + + if "error" in parsed_response: + # if we get rate limited, try sleeping for 1 second + if ( + not is_retry + and "rate limit exceeded" in parsed_response["error"].lower() + ): + import time + + time.sleep(self.retry_sleep) + + return self._call(prompt, stop, run_manager, is_retry=True) + + raise ValueError( + f"Error raised by inference API: {parsed_response['error']}" + ) + + if "data" not in parsed_response: + raise ValueError( + f"Error raised by inference API, no key data: {parsed_response}" + ) + generated_text = parsed_response["data"] + except requests.exceptions.JSONDecodeError as e: + raise ValueError( + f"Error raised by inference API: {e}.\nResponse: {response.text}" + ) + + text = generated_text[0][len(prompt) :] + + # TODO: replace when MosaicML supports custom stop tokens natively + if stop is not None: + text = enforce_stop_tokens(text, stop) + return text diff --git a/tests/integration_tests/embeddings/test_mosaicml.py b/tests/integration_tests/embeddings/test_mosaicml.py new file mode 100644 index 00000000..a04c6f2c --- /dev/null +++ b/tests/integration_tests/embeddings/test_mosaicml.py @@ -0,0 +1,58 @@ +"""Test mosaicml embeddings.""" +from langchain.embeddings.mosaicml import MosaicMLInstructorEmbeddings + + +def test_mosaicml_embedding_documents() -> None: + """Test MosaicML embeddings.""" + documents = ["foo bar"] + embedding = MosaicMLInstructorEmbeddings() + output = embedding.embed_documents(documents) + assert len(output) == 1 + assert len(output[0]) == 768 + + +def test_mosaicml_embedding_documents_multiple() -> None: + """Test MosaicML embeddings with multiple documents.""" + documents = ["foo bar", "bar foo", "foo"] + embedding = MosaicMLInstructorEmbeddings() + output = embedding.embed_documents(documents) + assert len(output) == 3 + assert len(output[0]) == 768 + assert len(output[1]) == 768 + assert len(output[2]) == 768 + + +def test_mosaicml_embedding_query() -> None: + """Test MosaicML embeddings of queries.""" + document = "foo bar" + embedding = MosaicMLInstructorEmbeddings() + output = embedding.embed_query(document) + assert len(output) == 768 + + +def test_mosaicml_embedding_endpoint() -> None: + """Test MosaicML embeddings with a different endpoint""" + documents = ["foo bar"] + embedding = MosaicMLInstructorEmbeddings( + endpoint_url="https://models.hosted-on.mosaicml.hosting/instructor-xl/v1/predict" + ) + output = embedding.embed_documents(documents) + assert len(output) == 1 + assert len(output[0]) == 768 + + +def test_mosaicml_embedding_query_instruction() -> None: + """Test MosaicML embeddings with a different query instruction.""" + document = "foo bar" + embedding = MosaicMLInstructorEmbeddings(query_instruction="Embed this query:") + output = embedding.embed_query(document) + assert len(output) == 768 + + +def test_mosaicml_embedding_document_instruction() -> None: + """Test MosaicML embeddings with a different query instruction.""" + documents = ["foo bar"] + embedding = MosaicMLInstructorEmbeddings(embed_instruction="Embed this document:") + output = embedding.embed_documents(documents) + assert len(output) == 1 + assert len(output[0]) == 768 diff --git a/tests/integration_tests/llms/test_mosaicml.py b/tests/integration_tests/llms/test_mosaicml.py new file mode 100644 index 00000000..2b532ab6 --- /dev/null +++ b/tests/integration_tests/llms/test_mosaicml.py @@ -0,0 +1,78 @@ +"""Test MosaicML API wrapper.""" +import pytest + +from langchain.llms.mosaicml import PROMPT_FOR_GENERATION_FORMAT, MosaicML + + +def test_mosaicml_llm_call() -> None: + """Test valid call to MosaicML.""" + llm = MosaicML(model_kwargs={}) + output = llm("Say foo:") + assert isinstance(output, str) + + +def test_mosaicml_endpoint_change() -> None: + """Test valid call to MosaicML.""" + new_url = "https://models.hosted-on.mosaicml.hosting/dolly-12b/v1/predict" + llm = MosaicML(endpoint_url=new_url) + assert llm.endpoint_url == new_url + output = llm("Say foo:") + assert isinstance(output, str) + + +def test_mosaicml_extra_kwargs() -> None: + llm = MosaicML(model_kwargs={"max_new_tokens": 1}) + assert llm.model_kwargs == {"max_new_tokens": 1} + + output = llm("Say foo:") + + assert isinstance(output, str) + + # should only generate one new token (which might be a new line or whitespace token) + assert len(output.split()) <= 1 + + +def test_instruct_prompt() -> None: + """Test instruct prompt.""" + llm = MosaicML(inject_instruction_format=True, model_kwargs={"do_sample": False}) + instruction = "Repeat the word foo" + prompt = llm._transform_prompt(instruction) + expected_prompt = PROMPT_FOR_GENERATION_FORMAT.format(instruction=instruction) + assert prompt == expected_prompt + output = llm(prompt) + assert isinstance(output, str) + + +def test_retry_logic() -> None: + """Tests that two queries (which would usually exceed the rate limit) works""" + llm = MosaicML(inject_instruction_format=True, model_kwargs={"do_sample": False}) + instruction = "Repeat the word foo" + prompt = llm._transform_prompt(instruction) + expected_prompt = PROMPT_FOR_GENERATION_FORMAT.format(instruction=instruction) + assert prompt == expected_prompt + output = llm(prompt) + assert isinstance(output, str) + output = llm(prompt) + assert isinstance(output, str) + + +def test_short_retry_does_not_loop() -> None: + """Tests that two queries with a short retry sleep does not infinite loop""" + llm = MosaicML( + inject_instruction_format=True, + model_kwargs={"do_sample": False}, + retry_sleep=0.1, + ) + instruction = "Repeat the word foo" + prompt = llm._transform_prompt(instruction) + expected_prompt = PROMPT_FOR_GENERATION_FORMAT.format(instruction=instruction) + assert prompt == expected_prompt + + with pytest.raises( + ValueError, + match="Error raised by inference API: Rate limit exceeded: 1 per 1 second", + ): + output = llm(prompt) + assert isinstance(output, str) + output = llm(prompt) + assert isinstance(output, str)