diff --git a/docs/ecosystem/gpt4all.md b/docs/ecosystem/gpt4all.md new file mode 100644 index 00000000..ec8b1ce1 --- /dev/null +++ b/docs/ecosystem/gpt4all.md @@ -0,0 +1,37 @@ +# GPT4All + +This page covers how to use the `GPT4All` wrapper within LangChain. +It is broken into two parts: installation and setup, and then usage with an example. + +## Installation and Setup +- Install the Python package with `pip install pyllamacpp` +- Download a [GPT4All model](https://github.com/nomic-ai/gpt4all) and place it in your desired directory + +## Usage + +### GPT4All + +To use the GPT4All wrapper, you need to provide the path to the pre-trained model file and the model's configuration. +```python +from langchain.llms import GPT4All + +# Instantiate the model +model = GPT4All(model="./models/gpt4all-model.bin", n_ctx=512, n_threads=8) + +# Generate text +response = model("Once upon a time, ") +``` + +You can also customize the generation parameters, such as n_predict, temp, top_p, top_k, and others. + +Example: + +```python +model = GPT4All(model="./models/gpt4all-model.bin", n_predict=55, temp=0) +response = model("Once upon a time, ") +``` +## Model File + +You can find links to model file downloads at the [GPT4all](https://github.com/nomic-ai/gpt4all) repository. They will need to be converted to `ggml` format to work, as specified in the [pyllamacpp](https://github.com/nomic-ai/pyllamacpp) repository. + +For a more detailed walkthrough of this, see [this notebook](../modules/models/llms/integrations/gpt4all.ipynb) \ No newline at end of file diff --git a/docs/modules/models/llms/integrations/gpt4all.ipynb b/docs/modules/models/llms/integrations/gpt4all.ipynb new file mode 100644 index 00000000..14d04961 --- /dev/null +++ b/docs/modules/models/llms/integrations/gpt4all.ipynb @@ -0,0 +1,85 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# OpenAI\n", + "\n", + "This example goes over how to use LangChain to interact with GPT4All models" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!pip install pyllamacpp" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.llms import GPT4All\n", + "from langchain import PromptTemplate, LLMChain" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "template = \"\"\"Question: {question}\n", + "\n", + "Answer: Let's think step by step.\"\"\"\n", + "\n", + "prompt = PromptTemplate(template=template, input_variables=[\"question\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# You'll need to download a compatible model and convert it to ggml.\n", + "# See: https://github.com/nomic-ai/gpt4all for more information.\n", + "llm = GPT4All(model_path=\"./models/gpt4all-model.bin\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "llm_chain = LLMChain(prompt=prompt, llm=llm)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "question = \"What NFL team won the Super Bowl in the year Justin Bieber was born?\"\n", + "\n", + "llm_chain.run(question)" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/langchain/llms/__init__.py b/langchain/llms/__init__.py index b5acdc12..9e0a0b28 100644 --- a/langchain/llms/__init__.py +++ b/langchain/llms/__init__.py @@ -11,6 +11,7 @@ from langchain.llms.cohere import Cohere from langchain.llms.deepinfra import DeepInfra from langchain.llms.forefrontai import ForefrontAI from langchain.llms.gooseai import GooseAI +from langchain.llms.gpt4all import GPT4All from langchain.llms.huggingface_endpoint import HuggingFaceEndpoint from langchain.llms.huggingface_hub import HuggingFaceHub from langchain.llms.huggingface_pipeline import HuggingFacePipeline @@ -36,6 +37,7 @@ __all__ = [ "DeepInfra", "ForefrontAI", "GooseAI", + "GPT4All", "LlamaCpp", "Modal", "NLPCloud", @@ -67,6 +69,7 @@ type_to_cls_dict: Dict[str, Type[BaseLLM]] = { "deepinfra": DeepInfra, "forefrontai": ForefrontAI, "gooseai": GooseAI, + "gpt4all": GPT4All, "huggingface_hub": HuggingFaceHub, "huggingface_endpoint": HuggingFaceEndpoint, "llamacpp": LlamaCpp, diff --git a/langchain/llms/gpt4all.py b/langchain/llms/gpt4all.py new file mode 100644 index 00000000..ed927289 --- /dev/null +++ b/langchain/llms/gpt4all.py @@ -0,0 +1,183 @@ +"""Wrapper for the GPT4All model.""" +from typing import Any, Dict, List, Mapping, Optional, Set + +from pydantic import BaseModel, Extra, Field, root_validator + +from langchain.llms.base import LLM +from langchain.llms.utils import enforce_stop_tokens + + +class GPT4All(LLM, BaseModel): + r"""Wrapper around GPT4All language models. + + To use, you should have the ``pyllamacpp`` python package installed, the + pre-trained model file, and the model's config information. + + Example: + .. code-block:: python + + from langchain.llms import GPT4All + model = GPT4All(model="./models/gpt4all-model.bin", n_ctx=512, n_threads=8) + + # Simplest invocation + response = model("Once upon a time, ") + """ + + model: str + """Path to the pre-trained GPT4All model file.""" + + n_ctx: int = Field(512, alias="n_ctx") + """Token context window.""" + + n_parts: int = Field(-1, alias="n_parts") + """Number of parts to split the model into. + If -1, the number of parts is automatically determined.""" + + seed: int = Field(0, alias="seed") + """Seed. If -1, a random seed is used.""" + + f16_kv: bool = Field(False, alias="f16_kv") + """Use half-precision for key/value cache.""" + + logits_all: bool = Field(False, alias="logits_all") + """Return logits for all tokens, not just the last token.""" + + vocab_only: bool = Field(False, alias="vocab_only") + """Only load the vocabulary, no weights.""" + + use_mlock: bool = Field(False, alias="use_mlock") + """Force system to keep model in RAM.""" + + embedding: bool = Field(False, alias="embedding") + """Use embedding mode only.""" + + n_threads: Optional[int] = Field(4, alias="n_threads") + """Number of threads to use.""" + + n_predict: Optional[int] = 256 + """The maximum number of tokens to generate.""" + + temp: Optional[float] = 0.8 + """The temperature to use for sampling.""" + + top_p: Optional[float] = 0.95 + """The top-p value to use for sampling.""" + + top_k: Optional[int] = 40 + """The top-k value to use for sampling.""" + + echo: Optional[bool] = False + """Whether to echo the prompt.""" + + stop: Optional[List[str]] = [] + """A list of strings to stop generation when encountered.""" + + repeat_last_n: Optional[int] = 64 + "Last n tokens to penalize" + + repeat_penalty: Optional[float] = 1.3 + """The penalty to apply to repeated tokens.""" + + n_batch: int = Field(1, alias="n_batch") + """Batch size for prompt processing.""" + + streaming: bool = False + """Whether to stream the results or not.""" + + client: Any = None #: :meta private: + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + + @property + def _default_params(self) -> Dict[str, Any]: + """Get the identifying parameters.""" + return { + "seed": self.seed, + "n_predict": self.n_predict, + "n_threads": self.n_threads, + "n_batch": self.n_batch, + "repeat_last_n": self.repeat_last_n, + "repeat_penalty": self.repeat_penalty, + "top_k": self.top_k, + "top_p": self.top_p, + "temp": self.temp, + } + + @staticmethod + def _llama_param_names() -> Set[str]: + """Get the identifying parameters.""" + return { + "seed", + "n_ctx", + "n_parts", + "f16_kv", + "logits_all", + "vocab_only", + "use_mlock", + "embedding", + } + + @root_validator() + def validate_environment(cls, values: Dict) -> Dict: + """Validate that the python package exists in the environment.""" + try: + from pyllamacpp.model import Model as GPT4AllModel + + llama_keys = cls._llama_param_names() + model_kwargs = {k: v for k, v in values.items() if k in llama_keys} + values["client"] = GPT4AllModel( + ggml_model=values["model"], + **model_kwargs, + ) + + except ImportError: + raise ValueError( + "Could not import pyllamacpp python package. " + "Please install it with `pip install pyllamacpp`." + ) + return values + + @property + def _identifying_params(self) -> Mapping[str, Any]: + """Get the identifying parameters.""" + return { + "model": self.model, + **self._default_params, + **{ + k: v + for k, v in self.__dict__.items() + if k in GPT4All._llama_param_names() + }, + } + + @property + def _llm_type(self) -> str: + """Return the type of llm.""" + return "gpt4all" + + def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str: + r"""Call out to GPT4All's generate method. + + Args: + prompt: The prompt to pass into the model. + stop: A list of strings to stop generation when encountered. + + Returns: + The string generated by the model. + + Example: + .. code-block:: python + + prompt = "Once upon a time, " + response = model(prompt, n_predict=55) + """ + text = self.client.generate( + prompt, + **self._default_params, + ) + if stop is not None: + text = enforce_stop_tokens(text, stop) + return text diff --git a/tests/integration_tests/llms/test_gpt4all.py b/tests/integration_tests/llms/test_gpt4all.py new file mode 100644 index 00000000..f338355d --- /dev/null +++ b/tests/integration_tests/llms/test_gpt4all.py @@ -0,0 +1,34 @@ +# flake8: noqa +"""Test Llama.cpp wrapper.""" +import os +from urllib.request import urlretrieve + +from langchain.llms import GPT4All + + +def _download_model() -> str: + """Download model. + From https://the-eye.eu/public/AI/models/nomic-ai/gpt4all/gpt4all-lora-quantized.bin, + convert to new ggml format and return model path.""" + model_url = "https://the-eye.eu/public/AI/models/nomic-ai/gpt4all/gpt4all-lora-quantized.bin" + tokenizer_url = "https://huggingface.co/decapoda-research/llama-7b-hf/resolve/main/tokenizer.model" + conversion_script = "https://github.com/nomic-ai/pyllamacpp/blob/main/pyllamacpp/scripts/convert_gpt4all.py" + local_filename = model_url.split("/")[-1] + + if not os.path.exists("convert_gpt4all.py"): + urlretrieve(conversion_script, "convert_gpt4all.py") + if not os.path.exists("tokenizer.model"): + urlretrieve(tokenizer_url, "tokenizer.model") + if not os.path.exists(local_filename): + urlretrieve(model_url, local_filename) + os.system(f"python convert_gpt4all.py.py . tokenizer.model") + + return local_filename + + +def test_gpt4all_inference() -> None: + """Test valid gpt4all inference.""" + model_path = _download_model() + llm = GPT4All(model=model_path) + output = llm("Say foo:") + assert isinstance(output, str)