diff --git a/docs/extras/integrations/llms/deepsparse.ipynb b/docs/extras/integrations/llms/deepsparse.ipynb new file mode 100644 index 0000000000..05ac4be716 --- /dev/null +++ b/docs/extras/integrations/llms/deepsparse.ipynb @@ -0,0 +1,78 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "15d7ce70-8879-42a0-86d9-a3d604a3ec83", + "metadata": {}, + "source": [ + "# DeepSparse\n", + "\n", + "This page covers how to use the [DeepSparse](https://github.com/neuralmagic/deepsparse) inference runtime within LangChain.\n", + "It is broken into two parts: installation and setup, and then examples of DeepSparse usage.\n", + "\n", + "## Installation and Setup\n", + "\n", + "- Install the Python package with `pip install deepsparse`\n", + "- Choose a [SparseZoo model](https://sparsezoo.neuralmagic.com/?useCase=text_generation) or export a support model to ONNX [using Optimum](https://github.com/neuralmagic/notebooks/blob/main/notebooks/opt-text-generation-deepsparse-quickstart/OPT_Text_Generation_DeepSparse_Quickstart.ipynb)\n", + "\n", + "\n", + "There exists a DeepSparse LLM wrapper, that provides a unified interface for all models:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "79d24d37-737a-428c-b6c5-84c1633070d7", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.llms import DeepSparse\n", + "\n", + "llm = DeepSparse(model='zoo:nlg/text_generation/codegen_mono-350m/pytorch/huggingface/bigpython_bigquery_thepile/base-none')\n", + "\n", + "print(llm('def fib():'))" + ] + }, + { + "cell_type": "markdown", + "id": "ea7ea674-d6b0-49d9-9c2b-014032973be6", + "metadata": {}, + "source": [ + "Additional parameters can be passed using the `config` parameter:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ff61b845-41e6-4457-8625-6e21a11bfe7c", + "metadata": {}, + "outputs": [], + "source": [ + "config = {'max_generated_tokens': 256}\n", + "\n", + "llm = DeepSparse(model='zoo:nlg/text_generation/codegen_mono-350m/pytorch/huggingface/bigpython_bigquery_thepile/base-none', config=config)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.1" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/extras/integrations/providers/deepsparse.mdx b/docs/extras/integrations/providers/deepsparse.mdx new file mode 100644 index 0000000000..1682f757fe --- /dev/null +++ b/docs/extras/integrations/providers/deepsparse.mdx @@ -0,0 +1,35 @@ +# DeepSparse + +This page covers how to use the [DeepSparse](https://github.com/neuralmagic/deepsparse) inference runtime within LangChain. +It is broken into two parts: installation and setup, and then examples of DeepSparse usage. + +## Installation and Setup + +- Install the Python package with `pip install deepsparse` +- Choose a [SparseZoo model](https://sparsezoo.neuralmagic.com/?useCase=text_generation) or export a support model to ONNX [using Optimum](https://github.com/neuralmagic/notebooks/blob/main/notebooks/opt-text-generation-deepsparse-quickstart/OPT_Text_Generation_DeepSparse_Quickstart.ipynb) + +## Wrappers + +### LLM + +There exists a DeepSparse LLM wrapper, which you can access with: + +```python +from langchain.llms import DeepSparse +``` + +It provides a unified interface for all models: + +```python +llm = DeepSparse(model='zoo:nlg/text_generation/codegen_mono-350m/pytorch/huggingface/bigpython_bigquery_thepile/base-none') + +print(llm('def fib():')) +``` + +Additional parameters can be passed using the `config` parameter: + +```python +config = {'max_generated_tokens': 256} + +llm = DeepSparse(model='zoo:nlg/text_generation/codegen_mono-350m/pytorch/huggingface/bigpython_bigquery_thepile/base-none', config=config) +``` diff --git a/libs/langchain/langchain/llms/__init__.py b/libs/langchain/langchain/llms/__init__.py index 256ea95858..5eae603caf 100644 --- a/libs/langchain/langchain/llms/__init__.py +++ b/libs/langchain/langchain/llms/__init__.py @@ -38,6 +38,7 @@ from langchain.llms.cohere import Cohere from langchain.llms.ctransformers import CTransformers from langchain.llms.databricks import Databricks from langchain.llms.deepinfra import DeepInfra +from langchain.llms.deepsparse import DeepSparse from langchain.llms.edenai import EdenAI from langchain.llms.fake import FakeListLLM from langchain.llms.fireworks import Fireworks, FireworksChat @@ -103,6 +104,7 @@ __all__ = [ "Cohere", "Databricks", "DeepInfra", + "DeepSparse", "EdenAI", "FakeListLLM", "Fireworks", @@ -172,6 +174,7 @@ type_to_cls_dict: Dict[str, Type[BaseLLM]] = { "ctransformers": CTransformers, "databricks": Databricks, "deepinfra": DeepInfra, + "deepsparse": DeepSparse, "edenai": EdenAI, "fake-list": FakeListLLM, "forefrontai": ForefrontAI, diff --git a/libs/langchain/langchain/llms/deepsparse.py b/libs/langchain/langchain/llms/deepsparse.py new file mode 100644 index 0000000000..e53a701f6c --- /dev/null +++ b/libs/langchain/langchain/llms/deepsparse.py @@ -0,0 +1,87 @@ +# flake8: noqa +from typing import Any, Dict, Optional, List + +from pydantic import root_validator + +from langchain.callbacks.manager import CallbackManagerForLLMRun +from langchain.llms.base import LLM +from langchain.llms.utils import enforce_stop_tokens + + +class DeepSparse(LLM): + """Neural Magic DeepSparse LLM interface. + + To use, you should have the ``deepsparse`` or ``deepsparse-nightly`` + python package installed. See https://github.com/neuralmagic/deepsparse + + This interface let's you deploy optimized LLMs straight from the + [SparseZoo](https://sparsezoo.neuralmagic.com/?useCase=text_generation) + Example: + .. code-block:: python + from langchain.llms import DeepSparse + llm = DeepSparse(model="zoo:nlg/text_generation/codegen_mono-350m/pytorch/huggingface/bigpython_bigquery_thepile/base-none") + """ # noqa: E501 + + pipeline: Any #: :meta private: + + model: str + """The path to a model file or directory or the name of a SparseZoo model stub.""" + + config: Optional[Dict[str, Any]] = None + """Key word arguments passed to the pipeline.""" + + @property + def _identifying_params(self) -> Dict[str, Any]: + """Get the identifying parameters.""" + return { + "model": self.model, + "config": self.config, + } + + @property + def _llm_type(self) -> str: + """Return type of llm.""" + return "deepsparse" + + @root_validator() + def validate_environment(cls, values: Dict) -> Dict: + """Validate that ``deepsparse`` package is installed.""" + try: + from deepsparse import Pipeline + except ImportError: + raise ImportError( + "Could not import `deepsparse` package. " + "Please install it with `pip install deepsparse`" + ) + + config = values["config"] or {} + + values["pipeline"] = Pipeline.create( + task="text_generation", + model_path=values["model"], + **config, + ) + return values + + def _call( + self, + prompt: str, + stop: Optional[List[str]] = None, + run_manager: Optional[CallbackManagerForLLMRun] = None, + **kwargs: Any, + ) -> str: + """Generate text from a prompt. + Args: + prompt: The prompt to generate text from. + stop: A list of strings to stop generation when encountered. + Returns: + The generated text. + Example: + .. code-block:: python + response = llm("Tell me a joke.") + """ + text = self.pipeline(sequences=prompt).sequences[0] + + if stop is not None: + text = enforce_stop_tokens(text, stop) + return "".join(text) diff --git a/libs/langchain/tests/integration_tests/llms/test_deepsparse.py b/libs/langchain/tests/integration_tests/llms/test_deepsparse.py new file mode 100644 index 0000000000..3b6480cef8 --- /dev/null +++ b/libs/langchain/tests/integration_tests/llms/test_deepsparse.py @@ -0,0 +1,17 @@ +"""Test DeepSparse wrapper.""" +from langchain.llms import DeepSparse + + +def test_deepsparse_call() -> None: + """Test valid call to DeepSparse.""" + config = {"max_generated_tokens": 5, "use_deepsparse_cache": False} + + llm = DeepSparse( + model="zoo:nlg/text_generation/codegen_mono-350m/pytorch/huggingface/bigpython_bigquery_thepile/base-none", + config=config, + ) + + output = llm("def ") + assert isinstance(output, str) + assert len(output) > 1 + assert output == "ids_to_names"