mirror of https://github.com/hwchase17/langchain
Adds DeepSparse as an LLM (#9184)
Adds [DeepSparse](https://github.com/neuralmagic/deepsparse) as an LLM backend. DeepSparse supports running various open-source sparsified models hosted on [SparseZoo](https://sparsezoo.neuralmagic.com/) for performance gains on CPUs. Twitter handles: @mgoin_ @neuralmagic --------- Co-authored-by: Bagatur <baskaryan@gmail.com>pull/9192/head
parent
0fa69d8988
commit
621da3c164
@ -0,0 +1,78 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "15d7ce70-8879-42a0-86d9-a3d604a3ec83",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# DeepSparse\n",
|
||||
"\n",
|
||||
"This page covers how to use the [DeepSparse](https://github.com/neuralmagic/deepsparse) inference runtime within LangChain.\n",
|
||||
"It is broken into two parts: installation and setup, and then examples of DeepSparse usage.\n",
|
||||
"\n",
|
||||
"## Installation and Setup\n",
|
||||
"\n",
|
||||
"- Install the Python package with `pip install deepsparse`\n",
|
||||
"- Choose a [SparseZoo model](https://sparsezoo.neuralmagic.com/?useCase=text_generation) or export a support model to ONNX [using Optimum](https://github.com/neuralmagic/notebooks/blob/main/notebooks/opt-text-generation-deepsparse-quickstart/OPT_Text_Generation_DeepSparse_Quickstart.ipynb)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"There exists a DeepSparse LLM wrapper, that provides a unified interface for all models:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "79d24d37-737a-428c-b6c5-84c1633070d7",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.llms import DeepSparse\n",
|
||||
"\n",
|
||||
"llm = DeepSparse(model='zoo:nlg/text_generation/codegen_mono-350m/pytorch/huggingface/bigpython_bigquery_thepile/base-none')\n",
|
||||
"\n",
|
||||
"print(llm('def fib():'))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "ea7ea674-d6b0-49d9-9c2b-014032973be6",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Additional parameters can be passed using the `config` parameter:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ff61b845-41e6-4457-8625-6e21a11bfe7c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"config = {'max_generated_tokens': 256}\n",
|
||||
"\n",
|
||||
"llm = DeepSparse(model='zoo:nlg/text_generation/codegen_mono-350m/pytorch/huggingface/bigpython_bigquery_thepile/base-none', config=config)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
@ -0,0 +1,35 @@
|
||||
# DeepSparse
|
||||
|
||||
This page covers how to use the [DeepSparse](https://github.com/neuralmagic/deepsparse) inference runtime within LangChain.
|
||||
It is broken into two parts: installation and setup, and then examples of DeepSparse usage.
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
- Install the Python package with `pip install deepsparse`
|
||||
- Choose a [SparseZoo model](https://sparsezoo.neuralmagic.com/?useCase=text_generation) or export a support model to ONNX [using Optimum](https://github.com/neuralmagic/notebooks/blob/main/notebooks/opt-text-generation-deepsparse-quickstart/OPT_Text_Generation_DeepSparse_Quickstart.ipynb)
|
||||
|
||||
## Wrappers
|
||||
|
||||
### LLM
|
||||
|
||||
There exists a DeepSparse LLM wrapper, which you can access with:
|
||||
|
||||
```python
|
||||
from langchain.llms import DeepSparse
|
||||
```
|
||||
|
||||
It provides a unified interface for all models:
|
||||
|
||||
```python
|
||||
llm = DeepSparse(model='zoo:nlg/text_generation/codegen_mono-350m/pytorch/huggingface/bigpython_bigquery_thepile/base-none')
|
||||
|
||||
print(llm('def fib():'))
|
||||
```
|
||||
|
||||
Additional parameters can be passed using the `config` parameter:
|
||||
|
||||
```python
|
||||
config = {'max_generated_tokens': 256}
|
||||
|
||||
llm = DeepSparse(model='zoo:nlg/text_generation/codegen_mono-350m/pytorch/huggingface/bigpython_bigquery_thepile/base-none', config=config)
|
||||
```
|
@ -0,0 +1,87 @@
|
||||
# flake8: noqa
|
||||
from typing import Any, Dict, Optional, List
|
||||
|
||||
from pydantic import root_validator
|
||||
|
||||
from langchain.callbacks.manager import CallbackManagerForLLMRun
|
||||
from langchain.llms.base import LLM
|
||||
from langchain.llms.utils import enforce_stop_tokens
|
||||
|
||||
|
||||
class DeepSparse(LLM):
|
||||
"""Neural Magic DeepSparse LLM interface.
|
||||
|
||||
To use, you should have the ``deepsparse`` or ``deepsparse-nightly``
|
||||
python package installed. See https://github.com/neuralmagic/deepsparse
|
||||
|
||||
This interface let's you deploy optimized LLMs straight from the
|
||||
[SparseZoo](https://sparsezoo.neuralmagic.com/?useCase=text_generation)
|
||||
Example:
|
||||
.. code-block:: python
|
||||
from langchain.llms import DeepSparse
|
||||
llm = DeepSparse(model="zoo:nlg/text_generation/codegen_mono-350m/pytorch/huggingface/bigpython_bigquery_thepile/base-none")
|
||||
""" # noqa: E501
|
||||
|
||||
pipeline: Any #: :meta private:
|
||||
|
||||
model: str
|
||||
"""The path to a model file or directory or the name of a SparseZoo model stub."""
|
||||
|
||||
config: Optional[Dict[str, Any]] = None
|
||||
"""Key word arguments passed to the pipeline."""
|
||||
|
||||
@property
|
||||
def _identifying_params(self) -> Dict[str, Any]:
|
||||
"""Get the identifying parameters."""
|
||||
return {
|
||||
"model": self.model,
|
||||
"config": self.config,
|
||||
}
|
||||
|
||||
@property
|
||||
def _llm_type(self) -> str:
|
||||
"""Return type of llm."""
|
||||
return "deepsparse"
|
||||
|
||||
@root_validator()
|
||||
def validate_environment(cls, values: Dict) -> Dict:
|
||||
"""Validate that ``deepsparse`` package is installed."""
|
||||
try:
|
||||
from deepsparse import Pipeline
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"Could not import `deepsparse` package. "
|
||||
"Please install it with `pip install deepsparse`"
|
||||
)
|
||||
|
||||
config = values["config"] or {}
|
||||
|
||||
values["pipeline"] = Pipeline.create(
|
||||
task="text_generation",
|
||||
model_path=values["model"],
|
||||
**config,
|
||||
)
|
||||
return values
|
||||
|
||||
def _call(
|
||||
self,
|
||||
prompt: str,
|
||||
stop: Optional[List[str]] = None,
|
||||
run_manager: Optional[CallbackManagerForLLMRun] = None,
|
||||
**kwargs: Any,
|
||||
) -> str:
|
||||
"""Generate text from a prompt.
|
||||
Args:
|
||||
prompt: The prompt to generate text from.
|
||||
stop: A list of strings to stop generation when encountered.
|
||||
Returns:
|
||||
The generated text.
|
||||
Example:
|
||||
.. code-block:: python
|
||||
response = llm("Tell me a joke.")
|
||||
"""
|
||||
text = self.pipeline(sequences=prompt).sequences[0]
|
||||
|
||||
if stop is not None:
|
||||
text = enforce_stop_tokens(text, stop)
|
||||
return "".join(text)
|
@ -0,0 +1,17 @@
|
||||
"""Test DeepSparse wrapper."""
|
||||
from langchain.llms import DeepSparse
|
||||
|
||||
|
||||
def test_deepsparse_call() -> None:
|
||||
"""Test valid call to DeepSparse."""
|
||||
config = {"max_generated_tokens": 5, "use_deepsparse_cache": False}
|
||||
|
||||
llm = DeepSparse(
|
||||
model="zoo:nlg/text_generation/codegen_mono-350m/pytorch/huggingface/bigpython_bigquery_thepile/base-none",
|
||||
config=config,
|
||||
)
|
||||
|
||||
output = llm("def ")
|
||||
assert isinstance(output, str)
|
||||
assert len(output) > 1
|
||||
assert output == "ids_to_names"
|
Loading…
Reference in New Issue