diff --git a/docs/docs/integrations/chat/google_vertex_ai_palm.ipynb b/docs/docs/integrations/chat/google_vertex_ai_palm.ipynb
index 436e2fd142..0858988f0d 100644
--- a/docs/docs/integrations/chat/google_vertex_ai_palm.ipynb
+++ b/docs/docs/integrations/chat/google_vertex_ai_palm.ipynb
@@ -34,13 +34,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {
     "tags": []
    },
    "outputs": [],
    "source": [
-    "#!pip install langchain google-cloud-aiplatform"
+    "!pip install -U google-cloud-aiplatform"
    ]
   },
   {
@@ -57,41 +57,27 @@
    "cell_type": "code",
    "execution_count": 3,
    "metadata": {},
-   "outputs": [],
-   "source": [
-    "chat = ChatVertexAI()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 34,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "system = \"You are a helpful assistant who translate English to French\"\n",
-    "human = \"Translate this sentence from English to French. I love programming.\"\n",
-    "prompt = ChatPromptTemplate.from_messages([(\"system\", system), (\"human\", human)])\n",
-    "messages = prompt.format_messages()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "AIMessage(content=\" J'aime la programmation.\", additional_kwargs={}, example=False)"
+       "AIMessage(content=\" J'aime la programmation.\")"
       ]
      },
-     "execution_count": 9,
+     "execution_count": 3,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "chat(messages)"
+    "system = \"You are a helpful assistant who translate English to French\"\n",
+    "human = \"Translate this sentence from English to French. I love programming.\"\n",
+    "prompt = ChatPromptTemplate.from_messages([(\"system\", system), (\"human\", human)])\n",
+    "\n",
+    "chat = ChatVertexAI()\n",
+    "\n",
+    "chain = prompt | chat\n",
+    "chain.invoke({})"
    ]
   },
   {
@@ -103,35 +89,29 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "system = (\n",
-    "    \"You are a helpful assistant that translates {input_language} to {output_language}.\"\n",
-    ")\n",
-    "human = \"{text}\"\n",
-    "prompt = ChatPromptTemplate.from_messages([(\"system\", system), (\"human\", human)])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "AIMessage(content=' 私はプログラミングが大好きです。', additional_kwargs={}, example=False)"
+       "AIMessage(content=' プログラミングが大好きです')"
       ]
      },
-     "execution_count": 13,
+     "execution_count": 4,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
+    "system = (\n",
+    "    \"You are a helpful assistant that translates {input_language} to {output_language}.\"\n",
+    ")\n",
+    "human = \"{text}\"\n",
+    "prompt = ChatPromptTemplate.from_messages([(\"system\", system), (\"human\", human)])\n",
+    "\n",
     "chain = prompt | chat\n",
+    "\n",
     "chain.invoke(\n",
     "    {\n",
     "        \"input_language\": \"English\",\n",
@@ -162,20 +142,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "chat = ChatVertexAI(\n",
-    "    model_name=\"codechat-bison\", max_output_tokens=1000, temperature=0.5\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 5,
    "metadata": {
     "tags": []
    },
@@ -185,20 +152,39 @@
      "output_type": "stream",
      "text": [
       " ```python\n",
-      "def is_prime(x): \n",
-      "    if (x <= 1): \n",
+      "def is_prime(n):\n",
+      "    if n <= 1:\n",
       "        return False\n",
-      "    for i in range(2, x): \n",
-      "        if (x % i == 0): \n",
+      "    for i in range(2, n):\n",
+      "        if n % i == 0:\n",
       "            return False\n",
       "    return True\n",
+      "\n",
+      "def find_prime_numbers(n):\n",
+      "    prime_numbers = []\n",
+      "    for i in range(2, n + 1):\n",
+      "        if is_prime(i):\n",
+      "            prime_numbers.append(i)\n",
+      "    return prime_numbers\n",
+      "\n",
+      "print(find_prime_numbers(100))\n",
+      "```\n",
+      "\n",
+      "Output:\n",
+      "\n",
+      "```\n",
+      "[2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97]\n",
       "```\n"
      ]
     }
    ],
    "source": [
-    "# For simple string in string out usage, we can use the `predict` method:\n",
-    "print(chat.predict(\"Write a Python function to identify all prime numbers\"))"
+    "chat = ChatVertexAI(\n",
+    "    model_name=\"codechat-bison\", max_output_tokens=1000, temperature=0.5\n",
+    ")\n",
+    "\n",
+    "message = chat.invoke(\"Write a Python function to identify all prime numbers\")\n",
+    "print(message.content)"
    ]
   },
   {
@@ -207,66 +193,42 @@
    "source": [
     "## Asynchronous calls\n",
     "\n",
-    "We can make asynchronous calls via the `agenerate` and `ainvoke` methods."
+    "We can make asynchronous calls via the Runnables [Async Interface](/docs/expression_language/interface)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [],
    "source": [
+    "# for running these examples in the notebook:\n",
     "import asyncio\n",
     "\n",
-    "# import nest_asyncio\n",
-    "# nest_asyncio.apply()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 35,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "LLMResult(generations=[[ChatGeneration(text=\" J'aime la programmation.\", generation_info=None, message=AIMessage(content=\" J'aime la programmation.\", additional_kwargs={}, example=False))]], llm_output={}, run=[RunInfo(run_id=UUID('223599ef-38f8-4c79-ac6d-a5013060eb9d'))])"
-      ]
-     },
-     "execution_count": 35,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "chat = ChatVertexAI(\n",
-    "    model_name=\"chat-bison\",\n",
-    "    max_output_tokens=1000,\n",
-    "    temperature=0.7,\n",
-    "    top_p=0.95,\n",
-    "    top_k=40,\n",
-    ")\n",
+    "import nest_asyncio\n",
     "\n",
-    "asyncio.run(chat.agenerate([messages]))"
+    "nest_asyncio.apply()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 36,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "AIMessage(content=' अहं प्रोग्रामिंग प्रेमामि', additional_kwargs={}, example=False)"
+       "AIMessage(content=' Why do you love programming?')"
       ]
      },
-     "execution_count": 36,
+     "execution_count": 7,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
+    "chain = prompt | chat\n",
+    "\n",
     "asyncio.run(\n",
     "    chain.ainvoke(\n",
     "        {\n",
@@ -289,56 +251,51 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import sys"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 32,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      " 1. China (1,444,216,107)\n",
-      "2. India (1,393,409,038)\n",
-      "3. United States (332,403,650)\n",
-      "4. Indonesia (273,523,615)\n",
-      "5. Pakistan (220,892,340)\n",
-      "6. Brazil (212,559,409)\n",
-      "7. Nigeria (206,139,589)\n",
-      "8. Bangladesh (164,689,383)\n",
-      "9. Russia (145,934,462)\n",
-      "10. Mexico (128,932,488)\n",
-      "11. Japan (126,476,461)\n",
-      "12. Ethiopia (115,063,982)\n",
-      "13. Philippines (109,581,078)\n",
-      "14. Egypt (102,334,404)\n",
-      "15. Vietnam (97,338,589)"
+      " The five most populous countries in the world are:\n",
+      "1. China (1.4 billion)\n",
+      "2. India (1.3 billion)\n",
+      "3. United States (331 million)\n",
+      "4. Indonesia (273 million)\n",
+      "5. Pakistan (220 million)"
      ]
     }
    ],
    "source": [
+    "import sys\n",
+    "\n",
     "prompt = ChatPromptTemplate.from_messages(\n",
-    "    [(\"human\", \"List out the 15 most populous countries in the world\")]\n",
+    "    [(\"human\", \"List out the 5 most populous countries in the world\")]\n",
     ")\n",
-    "messages = prompt.format_messages()\n",
-    "for chunk in chat.stream(messages):\n",
+    "\n",
+    "chat = ChatVertexAI()\n",
+    "\n",
+    "chain = prompt | chat\n",
+    "\n",
+    "for chunk in chain.stream({}):\n",
     "    sys.stdout.write(chunk.content)\n",
     "    sys.stdout.flush()"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "poetry-venv",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
-   "name": "poetry-venv"
+   "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {
@@ -350,7 +307,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.1"
+   "version": "3.11.4"
   },
   "vscode": {
    "interpreter": {
diff --git a/libs/langchain/langchain/chat_models/vertexai.py b/libs/langchain/langchain/chat_models/vertexai.py
index d6010e27fc..2902945cc2 100644
--- a/libs/langchain/langchain/chat_models/vertexai.py
+++ b/libs/langchain/langchain/chat_models/vertexai.py
@@ -242,7 +242,7 @@ class ChatVertexAI(_VertexAICommon, BaseChatModel):
     ) -> Iterator[ChatGenerationChunk]:
         question = _get_question(messages)
         history = _parse_chat_history(messages[:-1])
-        params = self._prepare_params(stop=stop, **kwargs)
+        params = self._prepare_params(stop=stop, stream=True, **kwargs)
         examples = kwargs.get("examples", None)
         if examples:
             params["examples"] = _parse_examples(examples)
diff --git a/libs/langchain/tests/integration_tests/chat_models/test_vertexai.py b/libs/langchain/tests/integration_tests/chat_models/test_vertexai.py
index 47cd280b8e..d8da1e31eb 100644
--- a/libs/langchain/tests/integration_tests/chat_models/test_vertexai.py
+++ b/libs/langchain/tests/integration_tests/chat_models/test_vertexai.py
@@ -11,7 +11,12 @@ from typing import Optional
 from unittest.mock import MagicMock, Mock, patch
 
 import pytest
-from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
+from langchain_core.messages import (
+    AIMessage,
+    AIMessageChunk,
+    HumanMessage,
+    SystemMessage,
+)
 from langchain_core.outputs import LLMResult
 
 from langchain.chat_models import ChatVertexAI
@@ -41,6 +46,7 @@ def test_vertexai_single_call(model_name: str) -> None:
     assert isinstance(response.content, str)
 
 
+@pytest.mark.scheduled
 def test_candidates() -> None:
     model = ChatVertexAI(model_name="chat-bison@001", temperature=0.3, n=2)
     message = HumanMessage(content="Hello")
@@ -62,6 +68,16 @@ async def test_vertexai_agenerate() -> None:
     assert response.generations[0][0] == sync_response.generations[0][0]
 
 
+@pytest.mark.scheduled
+async def test_vertexai_stream() -> None:
+    model = ChatVertexAI(temperature=0)
+    message = HumanMessage(content="Hello")
+
+    sync_response = model.stream([message])
+    for chunk in sync_response:
+        assert isinstance(chunk, AIMessageChunk)
+
+
 @pytest.mark.scheduled
 def test_vertexai_single_call_with_context() -> None:
     model = ChatVertexAI()