From efbc03bda8dfe04a2ff00d4966d9abdc9f2a375c Mon Sep 17 00:00:00 2001 From: Samantha Whitmore Date: Tue, 8 Nov 2022 06:24:23 -0800 Subject: [PATCH] NLPCloud client integration (#81) lots of kwargs! generation docs here: https://docs.nlpcloud.com/#generation This somewhat breaks the paradigm introduced in LLM base class as the stop sequence isn't a list, and should rightfully be introduced at the time of initialization of the class, along with the other kwargs that depend on its presence (e.g. remove_end_sequence, etc.) curious if you'd want to refactor LLM base class to take out stop as a specific named kwarg? --- langchain/llms/__init__.py | 3 +- langchain/llms/nlpcloud.py | 136 ++++++++++++++++++ requirements.txt | 1 + tests/integration_tests/llms/test_nlpcloud.py | 10 ++ 4 files changed, 149 insertions(+), 1 deletion(-) create mode 100644 langchain/llms/nlpcloud.py create mode 100644 tests/integration_tests/llms/test_nlpcloud.py diff --git a/langchain/llms/__init__.py b/langchain/llms/__init__.py index 3d83e5e733..fab84d41d8 100644 --- a/langchain/llms/__init__.py +++ b/langchain/llms/__init__.py @@ -1,6 +1,7 @@ """Wrappers on top of large language models APIs.""" from langchain.llms.cohere import Cohere from langchain.llms.huggingface_hub import HuggingFaceHub +from langchain.llms.nlpcloud import NLPCloud from langchain.llms.openai import OpenAI -__all__ = ["Cohere", "OpenAI", "HuggingFaceHub"] +__all__ = ["Cohere", "NLPCloud", "OpenAI", "HuggingFaceHub"] diff --git a/langchain/llms/nlpcloud.py b/langchain/llms/nlpcloud.py new file mode 100644 index 0000000000..9d28bb2407 --- /dev/null +++ b/langchain/llms/nlpcloud.py @@ -0,0 +1,136 @@ +"""Wrapper around NLPCloud APIs.""" +import os +from typing import Any, Dict, List, Mapping, Optional + +from pydantic import BaseModel, Extra, root_validator + +from langchain.llms.base import LLM + + +class NLPCloud(BaseModel, LLM): + """Wrapper around NLPCloud large language models. + + To use, you should have the ``nlpcloud`` python package installed, and the + environment variable ``NLPCLOUD_API_KEY`` set with your API key. + + Example: + .. code-block:: python + + from langchain import NLPCloud + nlpcloud = NLPCloud(model="gpt-neox-20b") + """ + + client: Any #: :meta private: + model_name: str = "gpt-neox-20b" + """Model name to use.""" + temperature: float = 0.7 + """What sampling temperature to use.""" + min_length: int = 1 + """The minimum number of tokens to generate in the completion.""" + max_length: int = 256 + """The maximum number of tokens to generate in the completion.""" + length_no_input: bool = True + """Whether min_length and max_length should include the length of the input.""" + remove_input: bool = True + """Remove input text from API response""" + remove_end_sequence: bool = True + """Whether or not to remove the end sequence token.""" + bad_words: List[str] = [] + """List of tokens not allowed to be generated.""" + top_p: int = 1 + """Total probability mass of tokens to consider at each step.""" + top_k: int = 50 + """The number of highest probability tokens to keep for top-k filtering.""" + repetition_penalty: float = 1.0 + """Penalizes repeated tokens. 1.0 means no penalty.""" + length_penalty: float = 1.0 + """Exponential penalty to the length.""" + do_sample: bool = True + """Whether to use sampling (True) or greedy decoding.""" + num_beams: int = 1 + """Number of beams for beam search.""" + early_stopping: bool = False + """Whether to stop beam search at num_beams sentences.""" + num_return_sequences: int = 1 + """How many completions to generate for each prompt.""" + + nlpcloud_api_key: Optional[str] = os.environ.get("NLPCLOUD_API_KEY") + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + + @root_validator() + def validate_environment(cls, values: Dict) -> Dict: + """Validate that api key and python package exists in environment.""" + nlpcloud_api_key = values.get("nlpcloud_api_key") + + if nlpcloud_api_key is None or nlpcloud_api_key == "": + raise ValueError( + "Did not find NLPCloud API key, please add an environment variable" + " `NLPCLOUD_API_KEY` which contains it, or pass `nlpcloud_api_key`" + " as a named parameter." + ) + try: + import nlpcloud + + values["client"] = nlpcloud.Client( + values["model_name"], nlpcloud_api_key, gpu=True, lang="en" + ) + except ImportError: + raise ValueError( + "Could not import nlpcloud python package. " + "Please it install it with `pip install nlpcloud`." + ) + return values + + @property + def _default_params(self) -> Mapping[str, Any]: + """Get the default parameters for calling NLPCloud API.""" + return { + "temperature": self.temperature, + "min_length": self.min_length, + "max_length": self.max_length, + "length_no_input": self.length_no_input, + "remove_input": self.remove_input, + "remove_end_sequence": self.remove_end_sequence, + "bad_words": self.bad_words, + "top_p": self.top_p, + "top_k": self.top_k, + "repetition_penalty": self.repetition_penalty, + "length_penalty": self.length_penalty, + "do_sample": self.do_sample, + "num_beams": self.num_beams, + "early_stopping": self.early_stopping, + "num_return_sequences": self.num_return_sequences, + } + + def __call__(self, prompt: str, stop: Optional[List[str]] = None) -> str: + """Call out to NLPCloud's create endpoint. + + Args: + prompt: The prompt to pass into the model. + stop: Not supported by this interface (pass in init method) + + Returns: + The string generated by the model. + + Example: + .. code-block:: python + + response = nlpcloud("Tell me a joke.") + """ + if stop and len(stop) > 1: + raise ValueError( + "NLPCloud only supports a single stop sequence per generation." + "Pass in a list of length 1." + ) + elif stop and len(stop) == 1: + end_sequence = stop[0] + else: + end_sequence = None + response = self.client.generation( + prompt, end_sequence=end_sequence, **self._default_params + ) + return response["generated_text"] diff --git a/requirements.txt b/requirements.txt index cbb622111b..417c7ed65b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,6 +3,7 @@ cohere openai google-search-results +nlpcloud playwright wikipedia huggingface_hub diff --git a/tests/integration_tests/llms/test_nlpcloud.py b/tests/integration_tests/llms/test_nlpcloud.py new file mode 100644 index 0000000000..9e4664e4f5 --- /dev/null +++ b/tests/integration_tests/llms/test_nlpcloud.py @@ -0,0 +1,10 @@ +"""Test NLPCloud API wrapper.""" + +from langchain.llms.nlpcloud import NLPCloud + + +def test_nlpcloud_call() -> None: + """Test valid call to nlpcloud.""" + llm = NLPCloud(max_length=10) + output = llm("Say foo:") + assert isinstance(output, str)