diff --git a/docs/ecosystem/gpt4all.md b/docs/ecosystem/gpt4all.md index ec8b1ce1..81f073e3 100644 --- a/docs/ecosystem/gpt4all.md +++ b/docs/ecosystem/gpt4all.md @@ -1,21 +1,21 @@ # GPT4All -This page covers how to use the `GPT4All` wrapper within LangChain. -It is broken into two parts: installation and setup, and then usage with an example. +This page covers how to use the `GPT4All` wrapper within LangChain. The tutorial is divided into two parts: installation and setup, followed by usage with an example. ## Installation and Setup - Install the Python package with `pip install pyllamacpp` -- Download a [GPT4All model](https://github.com/nomic-ai/gpt4all) and place it in your desired directory +- Download a [GPT4All model](https://github.com/nomic-ai/pyllamacpp#supported-model) and place it in your desired directory ## Usage ### GPT4All To use the GPT4All wrapper, you need to provide the path to the pre-trained model file and the model's configuration. + ```python from langchain.llms import GPT4All -# Instantiate the model +# Instantiate the model. Callbacks support token-wise streaming model = GPT4All(model="./models/gpt4all-model.bin", n_ctx=512, n_threads=8) # Generate text @@ -24,14 +24,24 @@ response = model("Once upon a time, ") You can also customize the generation parameters, such as n_predict, temp, top_p, top_k, and others. -Example: +To stream the model's predictions, add in a CallbackManager. ```python -model = GPT4All(model="./models/gpt4all-model.bin", n_predict=55, temp=0) -response = model("Once upon a time, ") +from langchain.llms import GPT4All +from langchain.callbacks.base import CallbackManager +from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler +# There are many CallbackHandlers supported, such as +# from langchain.callbacks.streamlit import StreamlitCallbackHandler + +callback_manager = CallbackManager([StreamingStdOutCallbackHandler()]) +model = GPT4All(model="./models/gpt4all-model.bin", n_ctx=512, n_threads=8, callback_handler=callback_handler, verbose=True) + +# Generate text. Tokens are streamed throught the callback manager. +model("Once upon a time, ") ``` + ## Model File -You can find links to model file downloads at the [GPT4all](https://github.com/nomic-ai/gpt4all) repository. They will need to be converted to `ggml` format to work, as specified in the [pyllamacpp](https://github.com/nomic-ai/pyllamacpp) repository. +You can find links to model file downloads in the [pyllamacpp](https://github.com/nomic-ai/pyllamacpp) repository. For a more detailed walkthrough of this, see [this notebook](../modules/models/llms/integrations/gpt4all.ipynb) \ No newline at end of file diff --git a/docs/modules/models/llms/integrations/gpt4all.ipynb b/docs/modules/models/llms/integrations/gpt4all.ipynb index 07c6b0d5..987bb93f 100644 --- a/docs/modules/models/llms/integrations/gpt4all.ipynb +++ b/docs/modules/models/llms/integrations/gpt4all.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# GPT4all\n", + "# GPT4All\n", "\n", "This example goes over how to use LangChain to interact with GPT4All models" ] @@ -15,7 +15,7 @@ "metadata": {}, "outputs": [], "source": [ - "!pip install pyllamacpp" + "%pip install pyllamacpp > /dev/null" ] }, { @@ -24,8 +24,10 @@ "metadata": {}, "outputs": [], "source": [ + "from langchain import PromptTemplate, LLMChain\n", "from langchain.llms import GPT4All\n", - "from langchain import PromptTemplate, LLMChain" + "from langchain.callbacks.base import CallbackManager\n", + "from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler" ] }, { @@ -41,15 +43,70 @@ "prompt = PromptTemplate(template=template, input_variables=[\"question\"])" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Specify Model\n", + "\n", + "To run locally, download a compatible ggml-formatted model. For more info, visit https://github.com/nomic-ai/pyllamacpp\n", + "\n", + "Note that new models are uploaded regularly - check the link above for the most recent `.bin` URL" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "local_path = './models/gpt4all-lora-quantized-ggml.bin' # replace with your desired local file path" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Uncomment the below block to download a model. You may want to update `url` to a new version." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# import requests\n", + "\n", + "# from pathlib import Path\n", + "# from tqdm import tqdm\n", + "\n", + "# Path(local_path).parent.mkdir(parents=True, exist_ok=True)\n", + "\n", + "# # Example model. Check https://github.com/nomic-ai/pyllamacpp for the latest models.\n", + "# url = 'https://the-eye.eu/public/AI/models/nomic-ai/gpt4all/gpt4all-lora-quantized-ggml.bin'\n", + "\n", + "# # send a GET request to the URL to download the file. Stream since it's large\n", + "# response = requests.get(url, stream=True)\n", + "\n", + "# # open the file in binary mode and write the contents of the response to it in chunks\n", + "# # This is a large file, so be prepared to wait.\n", + "# with open(local_path, 'wb') as f:\n", + "# for chunk in tqdm(response.iter_content(chunk_size=8192)):\n", + "# if chunk:\n", + "# f.write(chunk)" + ] + }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "# You'll need to download a compatible model and convert it to ggml.\n", - "# See: https://github.com/nomic-ai/gpt4all for more information.\n", - "llm = GPT4All(model=\"./models/gpt4all-model.bin\")" + "# Callbacks support token-wise streaming\n", + "callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])\n", + "# Verbose is required to pass to the callback manager\n", + "llm = GPT4All(model=local_path, callback_manager=callback_manager, verbose=True)" ] }, { @@ -89,7 +146,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.1" + "version": "3.11.2" } }, "nbformat": 4, diff --git a/langchain/llms/gpt4all.py b/langchain/llms/gpt4all.py index fa6b5fd5..bf0300bb 100644 --- a/langchain/llms/gpt4all.py +++ b/langchain/llms/gpt4all.py @@ -1,4 +1,5 @@ """Wrapper for the GPT4All model.""" +from functools import partial from typing import Any, Dict, List, Mapping, Optional, Set from pydantic import Extra, Field, root_validator @@ -174,8 +175,12 @@ class GPT4All(LLM): prompt = "Once upon a time, " response = model(prompt, n_predict=55) """ + text_callback = partial( + self.callback_manager.on_llm_new_token, verbose=self.verbose + ) text = self.client.generate( prompt, + new_text_callback=text_callback, **self._default_params, ) if stop is not None: