[Breaking] Migrate GPT4All to use PyGPT4All (#3934)

Seems the pyllamacpp package is no longer the supported bindings from
gpt4all. Tested that this works locally.

Given that the older models weren't very performant, I think it's better
to migrate now without trying to include a lot of try / except blocks

---------

Co-authored-by: Nissan Pow <npow@users.noreply.github.com>
Co-authored-by: Nissan Pow <pownissa@amazon.com>
fix_agent_callbacks
Zander Chase 1 year ago committed by GitHub
parent f0a4bbb8e2
commit c4cb55a0c5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -1,173 +1,173 @@
{ {
"cells": [ "cells": [
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
"source": [ "source": [
"# GPT4All\n", "# GPT4All\n",
"\n", "\n",
"[GitHub:nomic-ai/gpt4all](https://github.com/nomic-ai/gpt4all) an ecosystem of open-source chatbots trained on a massive collections of clean assistant data including code, stories and dialogue.\n", "[GitHub:nomic-ai/gpt4all](https://github.com/nomic-ai/gpt4all) an ecosystem of open-source chatbots trained on a massive collections of clean assistant data including code, stories and dialogue.\n",
"\n", "\n",
"This example goes over how to use LangChain to interact with `GPT4All` models." "This example goes over how to use LangChain to interact with `GPT4All` models."
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 1, "execution_count": 1,
"metadata": { "metadata": {
"tags": [] "tags": []
}, },
"outputs": [ "outputs": [
{ {
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"Note: you may need to restart the kernel to use updated packages.\n" "Note: you may need to restart the kernel to use updated packages.\n"
] ]
} }
], ],
"source": [ "source": [
"%pip install pyllamacpp > /dev/null" "%pip install pygpt4all > /dev/null"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 1, "execution_count": 1,
"metadata": { "metadata": {
"tags": [] "tags": []
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain import PromptTemplate, LLMChain\n", "from langchain import PromptTemplate, LLMChain\n",
"from langchain.llms import GPT4All\n", "from langchain.llms import GPT4All\n",
"from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler" "from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 2, "execution_count": 2,
"metadata": { "metadata": {
"tags": [] "tags": []
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"template = \"\"\"Question: {question}\n", "template = \"\"\"Question: {question}\n",
"\n", "\n",
"Answer: Let's think step by step.\"\"\"\n", "Answer: Let's think step by step.\"\"\"\n",
"\n", "\n",
"prompt = PromptTemplate(template=template, input_variables=[\"question\"])" "prompt = PromptTemplate(template=template, input_variables=[\"question\"])"
] ]
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
"source": [ "source": [
"### Specify Model\n", "### Specify Model\n",
"\n", "\n",
"To run locally, download a compatible ggml-formatted model. For more info, visit https://github.com/nomic-ai/pyllamacpp\n", "To run locally, download a compatible ggml-formatted model. For more info, visit https://github.com/nomic-ai/pygpt4all\n",
"\n", "\n",
"For full installation instructions go [here](https://gpt4all.io/index.html).\n", "For full installation instructions go [here](https://gpt4all.io/index.html).\n",
"\n", "\n",
"The GPT4All Chat installer needs to decompress a 3GB LLM model during the installation process!\n", "The GPT4All Chat installer needs to decompress a 3GB LLM model during the installation process!\n",
"\n", "\n",
"Note that new models are uploaded regularly - check the link above for the most recent `.bin` URL" "Note that new models are uploaded regularly - check the link above for the most recent `.bin` URL"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"local_path = './models/gpt4all-lora-quantized-ggml.bin' # replace with your desired local file path" "local_path = './models/ggml-gpt4all-l13b-snoozy.bin' # replace with your desired local file path"
] ]
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
"source": [ "source": [
"Uncomment the below block to download a model. You may want to update `url` to a new version." "Uncomment the below block to download a model. You may want to update `url` to a new version."
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"# import requests\n", "# import requests\n",
"\n", "\n",
"# from pathlib import Path\n", "# from pathlib import Path\n",
"# from tqdm import tqdm\n", "# from tqdm import tqdm\n",
"\n", "\n",
"# Path(local_path).parent.mkdir(parents=True, exist_ok=True)\n", "# Path(local_path).parent.mkdir(parents=True, exist_ok=True)\n",
"\n", "\n",
"# # Example model. Check https://github.com/nomic-ai/pyllamacpp for the latest models.\n", "# # Example model. Check https://github.com/nomic-ai/pygpt4all for the latest models.\n",
"# url = 'https://the-eye.eu/public/AI/models/nomic-ai/gpt4all/gpt4all-lora-quantized-ggml.bin'\n", "# url = 'http://gpt4all.io/models/ggml-gpt4all-l13b-snoozy.bin'\n",
"\n", "\n",
"# # send a GET request to the URL to download the file. Stream since it's large\n", "# # send a GET request to the URL to download the file. Stream since it's large\n",
"# response = requests.get(url, stream=True)\n", "# response = requests.get(url, stream=True)\n",
"\n", "\n",
"# # open the file in binary mode and write the contents of the response to it in chunks\n", "# # open the file in binary mode and write the contents of the response to it in chunks\n",
"# # This is a large file, so be prepared to wait.\n", "# # This is a large file, so be prepared to wait.\n",
"# with open(local_path, 'wb') as f:\n", "# with open(local_path, 'wb') as f:\n",
"# for chunk in tqdm(response.iter_content(chunk_size=8192)):\n", "# for chunk in tqdm(response.iter_content(chunk_size=8192)):\n",
"# if chunk:\n", "# if chunk:\n",
"# f.write(chunk)" "# f.write(chunk)"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"# Callbacks support token-wise streaming\n", "# Callbacks support token-wise streaming\n",
"callbacks = [StreamingStdOutCallbackHandler()]\n", "callbacks = [StreamingStdOutCallbackHandler()]\n",
"# Verbose is required to pass to the callback manager\n", "# Verbose is required to pass to the callback manager\n",
"llm = GPT4All(model=local_path, callbacks=callbacks, verbose=True)" "llm = GPT4All(model=local_path, callbacks=callbacks, verbose=True)"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"llm_chain = LLMChain(prompt=prompt, llm=llm)" "llm_chain = LLMChain(prompt=prompt, llm=llm)"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"question = \"What NFL team won the Super Bowl in the year Justin Bieber was born?\"\n", "question = \"What NFL team won the Super Bowl in the year Justin Bieber was born?\"\n",
"\n", "\n",
"llm_chain.run(question)" "llm_chain.run(question)"
] ]
} }
], ],
"metadata": { "metadata": {
"kernelspec": { "kernelspec": {
"display_name": "Python 3 (ipykernel)", "display_name": "Python 3 (ipykernel)",
"language": "python", "language": "python",
"name": "python3" "name": "python3"
}, },
"language_info": { "language_info": {
"codemirror_mode": { "codemirror_mode": {
"name": "ipython", "name": "ipython",
"version": 3 "version": 3
}, },
"file_extension": ".py", "file_extension": ".py",
"mimetype": "text/x-python", "mimetype": "text/x-python",
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.10.6" "version": "3.11.2"
} }
}, },
"nbformat": 4, "nbformat": 4,
"nbformat_minor": 4 "nbformat_minor": 4
} }

@ -12,7 +12,7 @@ from langchain.llms.utils import enforce_stop_tokens
class GPT4All(LLM): class GPT4All(LLM):
r"""Wrapper around GPT4All language models. r"""Wrapper around GPT4All language models.
To use, you should have the ``pyllamacpp`` python package installed, the To use, you should have the ``pygpt4all`` python package installed, the
pre-trained model file, and the model's config information. pre-trained model file, and the model's config information.
Example: Example:
@ -126,19 +126,19 @@ class GPT4All(LLM):
def validate_environment(cls, values: Dict) -> Dict: def validate_environment(cls, values: Dict) -> Dict:
"""Validate that the python package exists in the environment.""" """Validate that the python package exists in the environment."""
try: try:
from pyllamacpp.model import Model as GPT4AllModel from pygpt4all.models.gpt4all import GPT4All as GPT4AllModel
llama_keys = cls._llama_param_names() llama_keys = cls._llama_param_names()
model_kwargs = {k: v for k, v in values.items() if k in llama_keys} model_kwargs = {k: v for k, v in values.items() if k in llama_keys}
values["client"] = GPT4AllModel( values["client"] = GPT4AllModel(
ggml_model=values["model"], model_path=values["model"],
**model_kwargs, **model_kwargs,
) )
except ImportError: except ImportError:
raise ValueError( raise ValueError(
"Could not import pyllamacpp python package. " "Could not import pygpt4all python package. "
"Please install it with `pip install pyllamacpp`." "Please install it with `pip install pygpt4all`."
) )
return values return values

@ -7,21 +7,12 @@ from langchain.llms import GPT4All
def _download_model() -> str: def _download_model() -> str:
"""Download model. """Download model."""
From https://the-eye.eu/public/AI/models/nomic-ai/gpt4all/gpt4all-lora-quantized.bin, model_url = "http://gpt4all.io/models/ggml-gpt4all-l13b-snoozy.bin"
convert to new ggml format and return model path."""
model_url = "https://the-eye.eu/public/AI/models/nomic-ai/gpt4all/gpt4all-lora-quantized.bin"
tokenizer_url = "https://huggingface.co/decapoda-research/llama-7b-hf/resolve/main/tokenizer.model"
conversion_script = "https://github.com/nomic-ai/pyllamacpp/blob/main/pyllamacpp/scripts/convert_gpt4all.py"
local_filename = model_url.split("/")[-1] local_filename = model_url.split("/")[-1]
if not os.path.exists("convert_gpt4all.py"):
urlretrieve(conversion_script, "convert_gpt4all.py")
if not os.path.exists("tokenizer.model"):
urlretrieve(tokenizer_url, "tokenizer.model")
if not os.path.exists(local_filename): if not os.path.exists(local_filename):
urlretrieve(model_url, local_filename) urlretrieve(model_url, local_filename)
os.system(f"python convert_gpt4all.py.py . tokenizer.model")
return local_filename return local_filename

Loading…
Cancel
Save