diff --git a/docs/ecosystem/gpt4all.md b/docs/ecosystem/gpt4all.md
index ec8b1ce1..81f073e3 100644
--- a/docs/ecosystem/gpt4all.md
+++ b/docs/ecosystem/gpt4all.md
@@ -1,21 +1,21 @@
 # GPT4All
 
-This page covers how to use the `GPT4All` wrapper within LangChain.
-It is broken into two parts: installation and setup, and then usage with an example.
+This page covers how to use the `GPT4All` wrapper within LangChain. The tutorial is divided into two parts: installation and setup, followed by usage with an example.
 
 ## Installation and Setup
 - Install the Python package with `pip install pyllamacpp`
-- Download a [GPT4All model](https://github.com/nomic-ai/gpt4all) and place it in your desired directory
+- Download a [GPT4All model](https://github.com/nomic-ai/pyllamacpp#supported-model) and place it in your desired directory
 
 ## Usage
 
 ### GPT4All
 
 To use the GPT4All wrapper, you need to provide the path to the pre-trained model file and the model's configuration.
+
 ```python
 from langchain.llms import GPT4All
 
-# Instantiate the model
+# Instantiate the model. Callbacks support token-wise streaming
 model = GPT4All(model="./models/gpt4all-model.bin", n_ctx=512, n_threads=8)
 
 # Generate text
@@ -24,14 +24,24 @@ response = model("Once upon a time, ")
 
 You can also customize the generation parameters, such as n_predict, temp, top_p, top_k, and others.
 
-Example:
+To stream the model's predictions, add in a CallbackManager.
 
 ```python
-model = GPT4All(model="./models/gpt4all-model.bin", n_predict=55, temp=0)
-response = model("Once upon a time, ")
+from langchain.llms import GPT4All
+from langchain.callbacks.base import CallbackManager
+from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
+# There are many CallbackHandlers supported, such as
+# from langchain.callbacks.streamlit import StreamlitCallbackHandler
+
+callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
+model = GPT4All(model="./models/gpt4all-model.bin", n_ctx=512, n_threads=8, callback_handler=callback_handler, verbose=True)
+
+# Generate text. Tokens are streamed throught the callback manager.
+model("Once upon a time, ")
 ```
+
 ## Model File
 
-You can find links to model file downloads at the [GPT4all](https://github.com/nomic-ai/gpt4all) repository. They will need to be converted to `ggml` format to work, as specified in the [pyllamacpp](https://github.com/nomic-ai/pyllamacpp) repository.
+You can find links to model file downloads in the [pyllamacpp](https://github.com/nomic-ai/pyllamacpp) repository.
 
 For a more detailed walkthrough of this, see [this notebook](../modules/models/llms/integrations/gpt4all.ipynb)
\ No newline at end of file
diff --git a/docs/modules/models/llms/integrations/gpt4all.ipynb b/docs/modules/models/llms/integrations/gpt4all.ipynb
index 07c6b0d5..987bb93f 100644
--- a/docs/modules/models/llms/integrations/gpt4all.ipynb
+++ b/docs/modules/models/llms/integrations/gpt4all.ipynb
@@ -4,7 +4,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# GPT4all\n",
+    "# GPT4All\n",
     "\n",
     "This example goes over how to use LangChain to interact with GPT4All models"
    ]
@@ -15,7 +15,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "!pip install pyllamacpp"
+    "%pip install pyllamacpp > /dev/null"
    ]
   },
   {
@@ -24,8 +24,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "from langchain import PromptTemplate, LLMChain\n",
     "from langchain.llms import GPT4All\n",
-    "from langchain import PromptTemplate, LLMChain"
+    "from langchain.callbacks.base import CallbackManager\n",
+    "from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler"
    ]
   },
   {
@@ -41,15 +43,70 @@
     "prompt = PromptTemplate(template=template, input_variables=[\"question\"])"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Specify Model\n",
+    "\n",
+    "To run locally, download a compatible ggml-formatted model. For more info, visit https://github.com/nomic-ai/pyllamacpp\n",
+    "\n",
+    "Note that new models are uploaded regularly - check the link above for the most recent `.bin` URL"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "local_path = './models/gpt4all-lora-quantized-ggml.bin'  # replace with your desired local file path"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Uncomment the below block to download a model. You may want to update `url` to a new version."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# import requests\n",
+    "\n",
+    "# from pathlib import Path\n",
+    "# from tqdm import tqdm\n",
+    "\n",
+    "# Path(local_path).parent.mkdir(parents=True, exist_ok=True)\n",
+    "\n",
+    "# # Example model. Check https://github.com/nomic-ai/pyllamacpp for the latest models.\n",
+    "# url = 'https://the-eye.eu/public/AI/models/nomic-ai/gpt4all/gpt4all-lora-quantized-ggml.bin'\n",
+    "\n",
+    "# # send a GET request to the URL to download the file. Stream since it's large\n",
+    "# response = requests.get(url, stream=True)\n",
+    "\n",
+    "# # open the file in binary mode and write the contents of the response to it in chunks\n",
+    "# # This is a large file, so be prepared to wait.\n",
+    "# with open(local_path, 'wb') as f:\n",
+    "#     for chunk in tqdm(response.iter_content(chunk_size=8192)):\n",
+    "#         if chunk:\n",
+    "#             f.write(chunk)"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# You'll need to download a compatible model and convert it to ggml.\n",
-    "# See: https://github.com/nomic-ai/gpt4all for more information.\n",
-    "llm = GPT4All(model=\"./models/gpt4all-model.bin\")"
+    "# Callbacks support token-wise streaming\n",
+    "callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])\n",
+    "# Verbose is required to pass to the callback manager\n",
+    "llm = GPT4All(model=local_path, callback_manager=callback_manager, verbose=True)"
    ]
   },
   {
@@ -89,7 +146,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.1"
+   "version": "3.11.2"
   }
  },
  "nbformat": 4,
diff --git a/langchain/llms/gpt4all.py b/langchain/llms/gpt4all.py
index fa6b5fd5..bf0300bb 100644
--- a/langchain/llms/gpt4all.py
+++ b/langchain/llms/gpt4all.py
@@ -1,4 +1,5 @@
 """Wrapper for the GPT4All model."""
+from functools import partial
 from typing import Any, Dict, List, Mapping, Optional, Set
 
 from pydantic import Extra, Field, root_validator
@@ -174,8 +175,12 @@ class GPT4All(LLM):
                 prompt = "Once upon a time, "
                 response = model(prompt, n_predict=55)
         """
+        text_callback = partial(
+            self.callback_manager.on_llm_new_token, verbose=self.verbose
+        )
         text = self.client.generate(
             prompt,
+            new_text_callback=text_callback,
             **self._default_params,
         )
         if stop is not None: