diff --git a/docs/extras/modules/model_io/models/llms/integrations/huggingface_textgen_inference.ipynb b/docs/extras/modules/model_io/models/llms/integrations/huggingface_textgen_inference.ipynb
index 72c79cee7b..6aacfc8a31 100644
--- a/docs/extras/modules/model_io/models/llms/integrations/huggingface_textgen_inference.ipynb
+++ b/docs/extras/modules/model_io/models/llms/integrations/huggingface_textgen_inference.ipynb
@@ -48,6 +48,36 @@
     ")\n",
     "llm(\"What did foo say about bar?\")"
    ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Streaming"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain.llms import HuggingFaceTextGenInference\n",
+    "from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler\n",
+    "\n",
+    "\n",
+    "llm = HuggingFaceTextGenInference(\n",
+    "    inference_server_url=\"http://localhost:8010/\",\n",
+    "    max_new_tokens=512,\n",
+    "    top_k=10,\n",
+    "    top_p=0.95,\n",
+    "    typical_p=0.95,\n",
+    "    temperature=0.01,\n",
+    "    repetition_penalty=1.03,\n",
+    "    stream=True\n",
+    ")\n",
+    "llm(\"What did foo say about bar?\", callbacks=[StreamingStdOutCallbackHandler()])"
+   ]
   }
  ],
  "metadata": {
diff --git a/docs/snippets/modules/model_io/models/llms/how_to/streaming_llm.mdx b/docs/snippets/modules/model_io/models/llms/how_to/streaming_llm.mdx
index f15474a7e9..88240bd1c1 100644
--- a/docs/snippets/modules/model_io/models/llms/how_to/streaming_llm.mdx
+++ b/docs/snippets/modules/model_io/models/llms/how_to/streaming_llm.mdx
@@ -1,5 +1,4 @@
-Currently, we support streaming for the `OpenAI`, `ChatOpenAI`, and `ChatAnthropic` implementations. To utilize streaming, use a [`CallbackHandler`](https://github.com/hwchase17/langchain/blob/master/langchain/callbacks/base.py) that implements `on_llm_new_token`. In this example, we are using `StreamingStdOutCallbackHandler`.
-
+Currently, we support streaming for a broad range of LLM implementations, including but not limited to `OpenAI`, `ChatOpenAI`, `ChatAnthropic`, `Hugging Face Text Generation Inference`, and `Replicate`. This feature has been expanded to accommodate most of the models. To utilize streaming, use a [`CallbackHandler`](https://github.com/hwchase17/langchain/blob/master/langchain/callbacks/base.py) that implements `on_llm_new_token`. In this example, we are using `StreamingStdOutCallbackHandler`.
 ```python
 from langchain.llms import OpenAI
 from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler