Merge branch 'master' into fix_split_text_with_regex

3 weeks ago · f71359e4a2
parent 420bb37fc4 5fa5a73dc0
commit f71359e4a2
15 changed files with 1186 additions and 102 deletions
--- a/docs/docs/how_to/add_scores_retriever.ipynb
+++ b/docs/docs/how_to/add_scores_retriever.ipynb
@ -0,0 +1,446 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "9d59582a-6473-4b34-929b-3e94cb443c3d",
+   "metadata": {},
+   "source": [
+    "# How to add scores to retriever results\n",
+    "\n",
+    "Retrievers will return sequences of [Document](https://api.python.langchain.com/en/latest/documents/langchain_core.documents.base.Document.html) objects, which by default include no information about the process that retrieved them (e.g., a similarity score against a query). Here we demonstrate how to add retrieval scores to the `.metadata` of documents:\n",
+    "1. From [vectorstore retrievers](/docs/how_to/vectorstore_retriever);\n",
+    "2. From higher-order LangChain retrievers, such as [SelfQueryRetriever](/docs/how_to/self_query) or [MultiVectorRetriever](/docs/how_to/multi_vector).\n",
+    "\n",
+    "For (1), we will implement a short wrapper function around the corresponding vector store. For (2), we will update a method of the corresponding class.\n",
+    "\n",
+    "## Create vector store\n",
+    "\n",
+    "First we populate a vector store with some data. We will use a [PineconeVectorStore](https://api.python.langchain.com/en/latest/vectorstores/langchain_pinecone.vectorstores.PineconeVectorStore.html), but this guide is compatible with any LangChain vector store that implements a `.similarity_search_with_score` method."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "b8cfcb1b-64ee-4b91-8d82-ce7803834985",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_core.documents import Document\n",
+    "from langchain_openai import OpenAIEmbeddings\n",
+    "from langchain_pinecone import PineconeVectorStore\n",
+    "\n",
+    "docs = [\n",
+    "    Document(\n",
+    "        page_content=\"A bunch of scientists bring back dinosaurs and mayhem breaks loose\",\n",
+    "        metadata={\"year\": 1993, \"rating\": 7.7, \"genre\": \"science fiction\"},\n",
+    "    ),\n",
+    "    Document(\n",
+    "        page_content=\"Leo DiCaprio gets lost in a dream within a dream within a dream within a ...\",\n",
+    "        metadata={\"year\": 2010, \"director\": \"Christopher Nolan\", \"rating\": 8.2},\n",
+    "    ),\n",
+    "    Document(\n",
+    "        page_content=\"A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea\",\n",
+    "        metadata={\"year\": 2006, \"director\": \"Satoshi Kon\", \"rating\": 8.6},\n",
+    "    ),\n",
+    "    Document(\n",
+    "        page_content=\"A bunch of normal-sized women are supremely wholesome and some men pine after them\",\n",
+    "        metadata={\"year\": 2019, \"director\": \"Greta Gerwig\", \"rating\": 8.3},\n",
+    "    ),\n",
+    "    Document(\n",
+    "        page_content=\"Toys come alive and have a blast doing so\",\n",
+    "        metadata={\"year\": 1995, \"genre\": \"animated\"},\n",
+    "    ),\n",
+    "    Document(\n",
+    "        page_content=\"Three men walk into the Zone, three men walk out of the Zone\",\n",
+    "        metadata={\n",
+    "            \"year\": 1979,\n",
+    "            \"director\": \"Andrei Tarkovsky\",\n",
+    "            \"genre\": \"thriller\",\n",
+    "            \"rating\": 9.9,\n",
+    "        },\n",
+    "    ),\n",
+    "]\n",
+    "\n",
+    "vectorstore = PineconeVectorStore.from_documents(\n",
+    "    docs, index_name=\"sample\", embedding=OpenAIEmbeddings()\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "22ac5ef6-ce18-427f-a91c-62b38a8b41e9",
+   "metadata": {},
+   "source": [
+    "## Retriever\n",
+    "\n",
+    "To obtain scores from a vector store retriever, we wrap the underlying vector store's `.similarity_search_with_score` method in a short function that packages scores into the associated document's metadata.\n",
+    "\n",
+    "We add a `@chain` decorator to the function to create a [Runnable](/docs/concepts/#langchain-expression-language) that can be used similarly to a typical retriever."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "7e5677c3-f6ee-4974-ab5f-a0f50c199d45",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from typing import List\n",
+    "\n",
+    "from langchain_core.documents import Document\n",
+    "from langchain_core.runnables import chain\n",
+    "\n",
+    "\n",
+    "@chain\n",
+    "def retriever(query: str) -> List[Document]:\n",
+    "    docs, scores = zip(*vectorstore.similarity_search_with_score(query))\n",
+    "    for doc, score in zip(docs, scores):\n",
+    "        doc.metadata[\"score\"] = score\n",
+    "\n",
+    "    return docs"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "c9cad75e-b955-4012-989c-3c1820b49ba9",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(Document(page_content='A bunch of scientists bring back dinosaurs and mayhem breaks loose', metadata={'genre': 'science fiction', 'rating': 7.7, 'year': 1993.0, 'score': 0.84429127}),\n",
+       " Document(page_content='Toys come alive and have a blast doing so', metadata={'genre': 'animated', 'year': 1995.0, 'score': 0.792038262}),\n",
+       " Document(page_content='Three men walk into the Zone, three men walk out of the Zone', metadata={'director': 'Andrei Tarkovsky', 'genre': 'thriller', 'rating': 9.9, 'year': 1979.0, 'score': 0.751571238}),\n",
+       " Document(page_content='A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea', metadata={'director': 'Satoshi Kon', 'rating': 8.6, 'year': 2006.0, 'score': 0.747471571}))"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "result = retriever.invoke(\"dinosaur\")\n",
+    "result"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6671308a-be8d-4c15-ae1f-5bd07b342560",
+   "metadata": {},
+   "source": [
+    "Note that similarity scores from the retrieval step are included in the metadata of the above documents."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "af2e73a0-46a1-47e2-8103-68aaa637642a",
+   "metadata": {},
+   "source": [
+    "## SelfQueryRetriever\n",
+    "\n",
+    "`SelfQueryRetriever` will use a LLM to generate a query that is potentially structured-- for example, it can construct filters for the retrieval on top of the usual semantic-similarity driven selection. See [this guide](/docs/how_to/self_query) for more detail.\n",
+    "\n",
+    "`SelfQueryRetriever` includes a short (1 - 2 line) method `_get_docs_with_query` that executes the `vectorstore` search. We can subclass `SelfQueryRetriever` and override this method to propagate similarity scores.\n",
+    "\n",
+    "First, following the [how-to guide](/docs/how_to/self_query), we will need to establish some metadata on which to filter:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "8280b829-2e81-4454-8adc-9a0930047fa2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain.chains.query_constructor.base import AttributeInfo\n",
+    "from langchain.retrievers.self_query.base import SelfQueryRetriever\n",
+    "from langchain_openai import ChatOpenAI\n",
+    "\n",
+    "metadata_field_info = [\n",
+    "    AttributeInfo(\n",
+    "        name=\"genre\",\n",
+    "        description=\"The genre of the movie. One of ['science fiction', 'comedy', 'drama', 'thriller', 'romance', 'action', 'animated']\",\n",
+    "        type=\"string\",\n",
+    "    ),\n",
+    "    AttributeInfo(\n",
+    "        name=\"year\",\n",
+    "        description=\"The year the movie was released\",\n",
+    "        type=\"integer\",\n",
+    "    ),\n",
+    "    AttributeInfo(\n",
+    "        name=\"director\",\n",
+    "        description=\"The name of the movie director\",\n",
+    "        type=\"string\",\n",
+    "    ),\n",
+    "    AttributeInfo(\n",
+    "        name=\"rating\", description=\"A 1-10 rating for the movie\", type=\"float\"\n",
+    "    ),\n",
+    "]\n",
+    "document_content_description = \"Brief summary of a movie\"\n",
+    "llm = ChatOpenAI(temperature=0)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0a6c6fa8-1e2f-45ee-83e9-a6cbd82292d2",
+   "metadata": {},
+   "source": [
+    "We then override the `_get_docs_with_query` to use the `similarity_search_with_score` method of the underlying vector store: "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "62c8f3fa-8b64-4afb-87c4-ccbbf9a8bc54",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from typing import Any, Dict\n",
+    "\n",
+    "\n",
+    "class CustomSelfQueryRetriever(SelfQueryRetriever):\n",
+    "    def _get_docs_with_query(\n",
+    "        self, query: str, search_kwargs: Dict[str, Any]\n",
+    "    ) -> List[Document]:\n",
+    "        \"\"\"Get docs, adding score information.\"\"\"\n",
+    "        docs, scores = zip(\n",
+    "            *vectorstore.similarity_search_with_score(query, **search_kwargs)\n",
+    "        )\n",
+    "        for doc, score in zip(docs, scores):\n",
+    "            doc.metadata[\"score\"] = score\n",
+    "\n",
+    "        return docs"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "56e40109-1db6-44c7-a6e6-6989175e267c",
+   "metadata": {},
+   "source": [
+    "Invoking this retriever will now include similarity scores in the document metadata. Note that the underlying structured-query capabilities of `SelfQueryRetriever` are retained."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "3359a1ee-34ff-41b6-bded-64c05785b333",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(Document(page_content='A bunch of scientists bring back dinosaurs and mayhem breaks loose', metadata={'genre': 'science fiction', 'rating': 7.7, 'year': 1993.0, 'score': 0.84429127}),)"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "retriever = CustomSelfQueryRetriever.from_llm(\n",
+    "    llm,\n",
+    "    vectorstore,\n",
+    "    document_content_description,\n",
+    "    metadata_field_info,\n",
+    ")\n",
+    "\n",
+    "\n",
+    "result = retriever.invoke(\"dinosaur movie with rating less than 8\")\n",
+    "result"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "689ab3ba-3494-448b-836e-05fbe1ffd51c",
+   "metadata": {},
+   "source": [
+    "## MultiVectorRetriever\n",
+    "\n",
+    "`MultiVectorRetriever` allows you to associate multiple vectors with a single document. This can be useful in a number of applications. For example, we can index small chunks of a larger document and run the retrieval on the chunks, but return the larger \"parent\" document when invoking the retriever. [ParentDocumentRetriever](/docs/how_to/parent_document_retriever/), a subclass of `MultiVectorRetriever`, includes convenience methods for populating a vector store to support this. Further applications are detailed in this [how-to guide](/docs/how_to/multi_vector/).\n",
+    "\n",
+    "To propagate similarity scores through this retriever, we can again subclass `MultiVectorRetriever` and override a method. This time we will override `_get_relevant_documents`.\n",
+    "\n",
+    "First, we prepare some fake data. We generate fake \"whole documents\" and store them in a document store; here we will use a simple [InMemoryStore](https://api.python.langchain.com/en/latest/stores/langchain_core.stores.InMemoryBaseStore.html)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "a112e545-7b53-4fcd-9c4a-7a42a5cc646d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain.storage import InMemoryStore\n",
+    "from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
+    "\n",
+    "# The storage layer for the parent documents\n",
+    "docstore = InMemoryStore()\n",
+    "fake_whole_documents = [\n",
+    "    (\"fake_id_1\", Document(page_content=\"fake whole document 1\")),\n",
+    "    (\"fake_id_2\", Document(page_content=\"fake whole document 2\")),\n",
+    "]\n",
+    "docstore.mset(fake_whole_documents)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "453b7415-4a6d-45d4-a329-9c1d7271d1b2",
+   "metadata": {},
+   "source": [
+    "Next we will add some fake \"sub-documents\" to our vector store. We can link these sub-documents to the parent documents by populating the `\"doc_id\"` key in its metadata."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "314519c0-dde4-41ea-a1ab-d3cf1c17c63f",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['62a85353-41ff-4346-bff7-be6c8ec2ed89',\n",
+       " '5d4a0e83-4cc5-40f1-bc73-ed9cbad0ee15',\n",
+       " '8c1d9a56-120f-45e4-ba70-a19cd19a38f4']"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "docs = [\n",
+    "    Document(\n",
+    "        page_content=\"A snippet from a larger document discussing cats.\",\n",
+    "        metadata={\"doc_id\": \"fake_id_1\"},\n",
+    "    ),\n",
+    "    Document(\n",
+    "        page_content=\"A snippet from a larger document discussing discourse.\",\n",
+    "        metadata={\"doc_id\": \"fake_id_1\"},\n",
+    "    ),\n",
+    "    Document(\n",
+    "        page_content=\"A snippet from a larger document discussing chocolate.\",\n",
+    "        metadata={\"doc_id\": \"fake_id_2\"},\n",
+    "    ),\n",
+    "]\n",
+    "\n",
+    "vectorstore.add_documents(docs)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e391f7f3-5a58-40fd-89fa-a0815c5146f7",
+   "metadata": {},
+   "source": [
+    "To propagate the scores, we subclass `MultiVectorRetriever` and override its `_get_relevant_documents` method. Here we will make two changes:\n",
+    "\n",
+    "1. We will add similarity scores to the metadata of the corresponding \"sub-documents\" using the `similarity_search_with_score` method of the underlying vector store as above;\n",
+    "2. We will include a list of these sub-documents in the metadata of the retrieved parent document. This surfaces what snippets of text were identified by the retrieval, together with their corresponding similarity scores."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "1de61de7-1b58-41d6-9dea-939fef7d741d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from collections import defaultdict\n",
+    "\n",
+    "from langchain.retrievers import MultiVectorRetriever\n",
+    "from langchain_core.callbacks import CallbackManagerForRetrieverRun\n",
+    "\n",
+    "\n",
+    "class CustomMultiVectorRetriever(MultiVectorRetriever):\n",
+    "    def _get_relevant_documents(\n",
+    "        self, query: str, *, run_manager: CallbackManagerForRetrieverRun\n",
+    "    ) -> List[Document]:\n",
+    "        \"\"\"Get documents relevant to a query.\n",
+    "        Args:\n",
+    "            query: String to find relevant documents for\n",
+    "            run_manager: The callbacks handler to use\n",
+    "        Returns:\n",
+    "            List of relevant documents\n",
+    "        \"\"\"\n",
+    "        results = self.vectorstore.similarity_search_with_score(\n",
+    "            query, **self.search_kwargs\n",
+    "        )\n",
+    "\n",
+    "        # Map doc_ids to list of sub-documents, adding scores to metadata\n",
+    "        id_to_doc = defaultdict(list)\n",
+    "        for doc, score in results:\n",
+    "            doc_id = doc.metadata.get(\"doc_id\")\n",
+    "            if doc_id:\n",
+    "                doc.metadata[\"score\"] = score\n",
+    "                id_to_doc[doc_id].append(doc)\n",
+    "\n",
+    "        # Fetch documents corresponding to doc_ids, retaining sub_docs in metadata\n",
+    "        docs = []\n",
+    "        for _id, sub_docs in id_to_doc.items():\n",
+    "            docstore_docs = self.docstore.mget([_id])\n",
+    "            if docstore_docs:\n",
+    "                if doc := docstore_docs[0]:\n",
+    "                    doc.metadata[\"sub_docs\"] = sub_docs\n",
+    "                    docs.append(doc)\n",
+    "\n",
+    "        return docs"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "7af27b38-631c-463f-9d66-bcc985f06a4f",
+   "metadata": {},
+   "source": [
+    "Invoking this retriever, we can see that it identifies the correct parent document, including the relevant snippet from the sub-document with similarity score."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "dc42a1be-22e1-4ade-b1bd-bafb85f2424f",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[Document(page_content='fake whole document 1', metadata={'sub_docs': [Document(page_content='A snippet from a larger document discussing cats.', metadata={'doc_id': 'fake_id_1', 'score': 0.831276655})]})]"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "retriever = CustomMultiVectorRetriever(vectorstore=vectorstore, docstore=docstore)\n",
+    "\n",
+    "retriever.invoke(\"cat\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/docs/docs/how_to/function_calling.ipynb
+++ b/docs/docs/how_to/function_calling.ipynb
@ -696,7 +696,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.10.1"
+   "version": "3.9.1"
  }
 },
 "nbformat": 4,
--- a/docs/docs/how_to/index.mdx
+++ b/docs/docs/how_to/index.mdx
@ -143,6 +143,7 @@ Retrievers are responsible for taking a query and returning relevant documents.
 - [How to: generate multiple queries to retrieve data for](/docs/how_to/MultiQueryRetriever)
 - [How to: use contextual compression to compress the data retrieved](/docs/how_to/contextual_compression)
 - [How to: write a custom retriever class](/docs/how_to/custom_retriever)
+- [How to: add similarity scores to retriever results](/docs/how_to/add_scores_retriever)
 - [How to: combine the results from multiple retrievers](/docs/how_to/ensemble_retriever)
 - [How to: reorder retrieved results to put most relevant documents not in the middle](/docs/how_to/long_context_reorder)
 - [How to: generate multiple embeddings per document](/docs/how_to/multi_vector)
@ -171,6 +172,7 @@ LangChain Tools contain a description of the tool (to pass to the language model
 - [How to: add a human in the loop to tool usage](/docs/how_to/tools_human)
 - [How to: do parallel tool use](/docs/how_to/tools_parallel)
 - [How to: handle errors when calling tools](/docs/how_to/tools_error)
+- [How to: call tools using multi-modal data](/docs/how_to/tool_calls_multi_modal)

 ### Agents

--- a/docs/docs/how_to/structured_output.ipynb
+++ b/docs/docs/how_to/structured_output.ipynb
@ -24,18 +24,21 @@
    "- [Function/tool calling](/docs/concepts/#functiontool-calling)\n",
    ":::\n",
    "\n",
-    "It is often useful to have a model return output that matches some specific schema. One common use-case is extracting data from arbitrary text to insert into a traditional database or use with some other downstrem system. This guide will show you a few different strategies you can use to do this.\n",
-    "\n",
+    "It is often useful to have a model return output that matches a specific schema. One common use-case is extracting data from text to insert into a database or use with some other downstream system. This guide covers a few strategies for getting structured outputs from a model.\n",
    "\n",
    "## The `.with_structured_output()` method\n",
    "\n",
-    "There are several strategies that models can use under the hood. For some of the most popular model providers, including [OpenAI](/docs/integrations/platforms/openai/), [Anthropic](/docs/integrations/platforms/anthropic/), and [Mistral](/docs/integrations/providers/mistralai/), LangChain implements a common interface that abstracts away these strategies called `.with_structured_output`.\n",
+    ":::info Supported models\n",
+    "\n",
+    "You can find a [list of models that support this method here](/docs/integrations/chat/).\n",
    "\n",
-    "By invoking this method (and passing in [JSON schema](https://json-schema.org/) or a [Pydantic](https://docs.pydantic.dev/latest/) model) the model will add whatever model parameters + output parsers are necessary to get back structured output matching the requested schema. If the model supports more than one way to do this (e.g., function calling vs JSON mode) - you can configure which method to use by passing into that method.\n",
+    ":::\n",
    "\n",
-    "You can find the [current list of models that support this method here](/docs/integrations/chat/).\n",
+    "This is the easiest and most reliable way to get structured outputs. `with_structured_output()` is implemented for models that provide native APIs for structuring outputs, like tool/function calling or JSON mode, and makes use of these capabilities under the hood.\n",
    "\n",
-    "Let's look at some examples of this in action! We'll use Pydantic to create a simple response schema.\n",
+    "This method takes a schema as input which specifies the names, types, and descriptions of the desired output attributes. The method returns a model-like Runnable, except that instead of outputting strings or Messages it outputs objects corresponding to the given schema. The schema can be specified as a [JSON Schema](https://json-schema.org/) or a Pydantic class. If JSON Schema is used then a dictionary will be returned by the Runnable, and if a Pydantic class is used then Pydantic objects will be returned.\n",
+    "\n",
+    "As an example, let's get a model to generate a joke and separate the setup from the punchline:\n",
    "\n",
    "```{=mdx}\n",
    "import ChatModelTabs from \"@theme/ChatModelTabs\";\n",
@ -58,25 +61,30 @@
    "\n",
    "from langchain_openai import ChatOpenAI\n",
    "\n",
-    "llm = ChatOpenAI(\n",
-    "    model=\"gpt-4-0125-preview\",\n",
-    "    temperature=0,\n",
-    ")"
+    "llm = ChatOpenAI(model=\"gpt-4-0125-preview\", temperature=0)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a808a401-be1f-49f9-ad13-58dd68f7db5f",
+   "metadata": {},
+   "source": [
+    "If we want the model to return a Pydantic object, we just need to pass in desired the Pydantic class:"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 38,
   "id": "070bf702",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
-       "Joke(setup='Why was the cat sitting on the computer?', punchline='Because it wanted to keep an eye on the mouse!', rating=None)"
+       "Joke(setup='Why was the cat sitting on the computer?', punchline='To keep an eye on the mouse!', rating=None)"
      ]
     },
-     "execution_count": 13,
+     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
@ -88,6 +96,8 @@
    "\n",
    "\n",
    "class Joke(BaseModel):\n",
+    "    \"\"\"Joke to tell user.\"\"\"\n",
+    "\n",
    "    setup: str = Field(description=\"The setup of the joke\")\n",
    "    punchline: str = Field(description=\"The punchline to the joke\")\n",
    "    rating: Optional[int] = Field(description=\"How funny the joke is, from 1 to 10\")\n",
@ -98,25 +108,27 @@
    "structured_llm.invoke(\"Tell me a joke about cats\")"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "id": "00890a47-3cdf-4805-b8f1-6d110f0633d3",
+   "metadata": {},
+   "source": [
+    ":::tip\n",
+    "Beyond just the structure of the Pydantic class, the name of the Pydantic class, the docstring, and the names and provided descriptions of parameters are very important. Most of the time `with_structured_output` is using a model's function/tool calling API, and you can effectively think of all of this information as being added to the model prompt.\n",
+    ":::"
+   ]
+  },
  {
   "cell_type": "markdown",
   "id": "deddb6d3",
   "metadata": {},
   "source": [
-    "The result is a Pydantic model. Note that name of the model and the names and provided descriptions of parameters are very important, as they help guide the model's output.\n",
-    "\n",
-    "We can also pass in an OpenAI-style JSON schema dict if you prefer not to use Pydantic. This dict should contain three properties:\n",
-    "\n",
-    "- `name`: The name of the schema to output.\n",
-    "- `description`: A high level description of the schema to output.\n",
-    "- `parameters`: The nested details of the schema you want to extract, formatted as a [JSON schema](https://json-schema.org/) dict.\n",
-    "\n",
-    "In this case, the response is also a dict:"
+    "We can also pass in a [JSON Schema](https://json-schema.org/) dict if you prefer not to use Pydantic. In this case, the response is also a dict:"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 8,
   "id": "6700994a",
   "metadata": {},
   "outputs": [
@ -124,30 +136,37 @@
     "data": {
      "text/plain": [
       "{'setup': 'Why was the cat sitting on the computer?',\n",
-       " 'punchline': 'To keep an eye on the mouse!'}"
+       " 'punchline': 'Because it wanted to keep an eye on the mouse!',\n",
+       " 'rating': 8}"
      ]
     },
-     "execution_count": 3,
+     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
-    "structured_llm = llm.with_structured_output(\n",
-    "    {\n",
-    "        \"name\": \"joke\",\n",
-    "        \"description\": \"Joke to tell user.\",\n",
-    "        \"parameters\": {\n",
-    "            \"title\": \"Joke\",\n",
-    "            \"type\": \"object\",\n",
-    "            \"properties\": {\n",
-    "                \"setup\": {\"type\": \"string\", \"description\": \"The setup for the joke\"},\n",
-    "                \"punchline\": {\"type\": \"string\", \"description\": \"The joke's punchline\"},\n",
-    "            },\n",
-    "            \"required\": [\"setup\", \"punchline\"],\n",
+    "json_schema = {\n",
+    "    \"title\": \"joke\",\n",
+    "    \"description\": \"Joke to tell user.\",\n",
+    "    \"type\": \"object\",\n",
+    "    \"properties\": {\n",
+    "        \"setup\": {\n",
+    "            \"type\": \"string\",\n",
+    "            \"description\": \"The setup of the joke\",\n",
    "        },\n",
-    "    }\n",
-    ")\n",
+    "        \"punchline\": {\n",
+    "            \"type\": \"string\",\n",
+    "            \"description\": \"The punchline to the joke\",\n",
+    "        },\n",
+    "        \"rating\": {\n",
+    "            \"type\": \"integer\",\n",
+    "            \"description\": \"How funny the joke is, from 1 to 10\",\n",
+    "        },\n",
+    "    },\n",
+    "    \"required\": [\"setup\", \"punchline\"],\n",
+    "}\n",
+    "structured_llm = llm.with_structured_output(json_schema)\n",
    "\n",
    "structured_llm.invoke(\"Tell me a joke about cats\")"
   ]
@ -159,7 +178,7 @@
   "source": [
    "### Choosing between multiple schemas\n",
    "\n",
-    "If you have multiple schemas that are valid outputs for the model, you can use Pydantic's `Union` type:"
+    "The simplest way to let the model choose from multiple schemas is to create a parent Pydantic class that has a Union-typed attribute:"
   ]
  },
  {
@ -171,7 +190,7 @@
    {
     "data": {
      "text/plain": [
-       "Response(output=Joke(setup='Why was the cat sitting on the computer?', punchline='Because it wanted to keep an eye on the mouse!'))"
+       "Response(output=Joke(setup='Why was the cat sitting on the computer?', punchline='To keep an eye on the mouse!', rating=8))"
      ]
     },
     "execution_count": 4,
@ -182,15 +201,10 @@
   "source": [
    "from typing import Union\n",
    "\n",
-    "from langchain_core.pydantic_v1 import BaseModel, Field\n",
-    "\n",
-    "\n",
-    "class Joke(BaseModel):\n",
-    "    setup: str = Field(description=\"The setup of the joke\")\n",
-    "    punchline: str = Field(description=\"The punchline to the joke\")\n",
-    "\n",
    "\n",
    "class ConversationalResponse(BaseModel):\n",
+    "    \"\"\"Respond in a conversational manner. Be kind and helpful.\"\"\"\n",
+    "\n",
    "    response: str = Field(description=\"A conversational response to the user's query\")\n",
    "\n",
    "\n",
@ -212,7 +226,7 @@
    {
     "data": {
      "text/plain": [
-       "Response(output=ConversationalResponse(response=\"I'm just a collection of code, so I don't have feelings, but thanks for asking! How can I assist you today?\"))"
+       "Response(output=ConversationalResponse(response=\"I'm just a digital assistant, so I don't have feelings, but I'm here and ready to help you. How can I assist you today?\"))"
      ]
     },
     "execution_count": 5,
@ -229,9 +243,225 @@
   "id": "e28c14d3",
   "metadata": {},
   "source": [
-    "If you are using JSON Schema, you can take advantage of other more complex schema descriptions to create a similar effect.\n",
+    "Alternatively, you can use tool calling directly to allow the model to choose between options, if your [chosen model supports it](/docs/integrations/chat/). This involves a bit more parsing and setup but in some instances leads to better performance because you don't have to use nested schemas. See [this how-to guide](/docs/how_to/tool_calling/) for more details."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9a40f703-7fd2-4fe0-ab2a-fa2d711ba009",
+   "metadata": {},
+   "source": [
+    "### Streaming\n",
+    "\n",
+    "We can stream outputs from our structured model when the output type is a dict (i.e., when the schema is specified as a JSON Schema dict). \n",
+    "\n",
+    ":::info\n",
+    "\n",
+    "Note that what's yielded is already aggregated chunks, not deltas.\n",
+    "\n",
+    ":::"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 43,
+   "id": "aff89877-28a3-472f-a1aa-eff893fe7736",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{}\n",
+      "{'setup': ''}\n",
+      "{'setup': 'Why'}\n",
+      "{'setup': 'Why was'}\n",
+      "{'setup': 'Why was the'}\n",
+      "{'setup': 'Why was the cat'}\n",
+      "{'setup': 'Why was the cat sitting'}\n",
+      "{'setup': 'Why was the cat sitting on'}\n",
+      "{'setup': 'Why was the cat sitting on the'}\n",
+      "{'setup': 'Why was the cat sitting on the computer'}\n",
+      "{'setup': 'Why was the cat sitting on the computer?'}\n",
+      "{'setup': 'Why was the cat sitting on the computer?', 'punchline': ''}\n",
+      "{'setup': 'Why was the cat sitting on the computer?', 'punchline': 'Because'}\n",
+      "{'setup': 'Why was the cat sitting on the computer?', 'punchline': 'Because it'}\n",
+      "{'setup': 'Why was the cat sitting on the computer?', 'punchline': 'Because it wanted'}\n",
+      "{'setup': 'Why was the cat sitting on the computer?', 'punchline': 'Because it wanted to'}\n",
+      "{'setup': 'Why was the cat sitting on the computer?', 'punchline': 'Because it wanted to keep'}\n",
+      "{'setup': 'Why was the cat sitting on the computer?', 'punchline': 'Because it wanted to keep an'}\n",
+      "{'setup': 'Why was the cat sitting on the computer?', 'punchline': 'Because it wanted to keep an eye'}\n",
+      "{'setup': 'Why was the cat sitting on the computer?', 'punchline': 'Because it wanted to keep an eye on'}\n",
+      "{'setup': 'Why was the cat sitting on the computer?', 'punchline': 'Because it wanted to keep an eye on the'}\n",
+      "{'setup': 'Why was the cat sitting on the computer?', 'punchline': 'Because it wanted to keep an eye on the mouse'}\n",
+      "{'setup': 'Why was the cat sitting on the computer?', 'punchline': 'Because it wanted to keep an eye on the mouse!'}\n",
+      "{'setup': 'Why was the cat sitting on the computer?', 'punchline': 'Because it wanted to keep an eye on the mouse!', 'rating': 8}\n"
+     ]
+    }
+   ],
+   "source": [
+    "structured_llm = llm.with_structured_output(json_schema)\n",
+    "\n",
+    "for chunk in structured_llm.stream(\"Tell me a joke about cats\"):\n",
+    "    print(chunk)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0a526cdf-e736-451b-96be-22e8986d3863",
+   "metadata": {},
+   "source": [
+    "### Few-shot prompting\n",
+    "\n",
+    "For more complex schemas it's very useful to add few-shot examples to the prompt. This can be done in a few ways.\n",
+    "\n",
+    "The simplest and most universal way is to add examples to a system message in the prompt:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 47,
+   "id": "283ba784-2072-47ee-9b2c-1119e3c69e8e",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'setup': 'Woodpecker',\n",
+       " 'punchline': \"Woodpecker goes 'knock knock', but don't worry, they never expect you to answer the door!\",\n",
+       " 'rating': 8}"
+      ]
+     },
+     "execution_count": 47,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from langchain_core.prompts import ChatPromptTemplate\n",
+    "\n",
+    "system = \"\"\"You are a hilarious comedian. Your specialty is knock-knock jokes. \\\n",
+    "Return a joke which has the setup (the response to \"Who's there?\") and the final punchline (the response to \"<setup> who?\").\n",
+    "\n",
+    "Here are some examples of jokes:\n",
+    "\n",
+    "example_user: Tell me a joke about planes\n",
+    "example_assistant: {{\"setup\": \"Why don't planes ever get tired?\", \"punchline\": \"Because they have rest wings!\", \"rating\": 2}}\n",
    "\n",
-    "You can also use tool calling directly to allow the model to choose between options, if your chosen model supports it. This involves a bit more parsing and setup. See [this how-to guide](/docs/how_to/tool_calling/) for more details."
+    "example_user: Tell me another joke about planes\n",
+    "example_assistant: {{\"setup\": \"Cargo\", \"punchline\": \"Cargo 'vroom vroom', but planes go 'zoom zoom'!\", \"rating\": 10}}\n",
+    "\n",
+    "example_user: Now about caterpillars\n",
+    "example_assistant: {{\"setup\": \"Caterpillar\", \"punchline\": \"Caterpillar really slow, but watch me turn into a butterfly and steal the show!\", \"rating\": 5}}\"\"\"\n",
+    "\n",
+    "prompt = ChatPromptTemplate.from_messages([(\"system\", system), (\"human\", \"{input}\")])\n",
+    "\n",
+    "few_shot_structured_llm = prompt | structured_llm\n",
+    "few_shot_structured_llm.invoke(\"what's something funny about woodpeckers\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3c12b389-153d-44d1-af34-37e5b926d3db",
+   "metadata": {},
+   "source": [
+    "When the underlying method for structuring outputs is tool calling, we can pass in our examples as explicit tool calls. You can check if the model you're using makes use of tool calling in its API reference."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 46,
+   "id": "d7381cb0-b2c3-4302-a319-ed72d0b9e43f",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'setup': 'Crocodile',\n",
+       " 'punchline': \"Crocodile 'see you later', but in a while, it becomes an alligator!\",\n",
+       " 'rating': 7}"
+      ]
+     },
+     "execution_count": 46,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from langchain_core.messages import AIMessage, HumanMessage, ToolMessage\n",
+    "\n",
+    "examples = [\n",
+    "    HumanMessage(\"Tell me a joke about planes\", name=\"example_user\"),\n",
+    "    AIMessage(\n",
+    "        \"\",\n",
+    "        name=\"example_assistant\",\n",
+    "        tool_calls=[\n",
+    "            {\n",
+    "                \"name\": \"joke\",\n",
+    "                \"args\": {\n",
+    "                    \"setup\": \"Why don't planes ever get tired?\",\n",
+    "                    \"punchline\": \"Because they have rest wings!\",\n",
+    "                    \"rating\": 2,\n",
+    "                },\n",
+    "                \"id\": \"1\",\n",
+    "            }\n",
+    "        ],\n",
+    "    ),\n",
+    "    # Most tool-calling models expect a ToolMessage(s) to follow an AIMessage with tool calls.\n",
+    "    ToolMessage(\"\", tool_call_id=\"1\"),\n",
+    "    # Some models also expect an AIMessage to follow any ToolMessages,\n",
+    "    # so you may need to add an AIMessage here.\n",
+    "    HumanMessage(\"Tell me another joke about planes\", name=\"example_user\"),\n",
+    "    AIMessage(\n",
+    "        \"\",\n",
+    "        name=\"example_assistant\",\n",
+    "        tool_calls=[\n",
+    "            {\n",
+    "                \"name\": \"joke\",\n",
+    "                \"args\": {\n",
+    "                    \"setup\": \"Cargo\",\n",
+    "                    \"punchline\": \"Cargo 'vroom vroom', but planes go 'zoom zoom'!\",\n",
+    "                    \"rating\": 10,\n",
+    "                },\n",
+    "                \"id\": \"2\",\n",
+    "            }\n",
+    "        ],\n",
+    "    ),\n",
+    "    ToolMessage(\"\", tool_call_id=\"2\"),\n",
+    "    HumanMessage(\"Now about caterpillars\", name=\"example_user\"),\n",
+    "    AIMessage(\n",
+    "        \"\",\n",
+    "        tool_calls=[\n",
+    "            {\n",
+    "                \"name\": \"joke\",\n",
+    "                \"args\": {\n",
+    "                    \"setup\": \"Caterpillar\",\n",
+    "                    \"punchline\": \"Caterpillar really slow, but watch me turn into a butterfly and steal the show!\",\n",
+    "                    \"rating\": 5,\n",
+    "                },\n",
+    "                \"id\": \"3\",\n",
+    "            }\n",
+    "        ],\n",
+    "    ),\n",
+    "    ToolMessage(\"\", tool_call_id=\"3\"),\n",
+    "]\n",
+    "system = \"\"\"You are a hilarious comedian. Your specialty is knock-knock jokes. \\\n",
+    "Return a joke which has the setup (the response to \"Who's there?\") \\\n",
+    "and the final punchline (the response to \"<setup> who?\").\"\"\"\n",
+    "\n",
+    "prompt = ChatPromptTemplate.from_messages(\n",
+    "    [(\"system\", system), (\"placeholder\", \"{examples}\"), (\"human\", \"{input}\")]\n",
+    ")\n",
+    "few_shot_structured_llm = prompt | structured_llm\n",
+    "few_shot_structured_llm.invoke({\"input\": \"crocodiles\", \"examples\": examples})"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "498d893b-ceaa-47ff-a9d8-4faa60702715",
+   "metadata": {},
+   "source": [
+    "For more on few shot prompting when using tool calling, see [here](/docs/how_to/function_calling/#Few-shot-prompting)."
   ]
  },
  {
@ -239,9 +469,17 @@
   "id": "39d7a555",
   "metadata": {},
   "source": [
-    "### Specifying the output method (Advanced)\n",
+    "### (Advanced) Specifying the method for structuring outputs\n",
    "\n",
-    "For models that support more than one means of outputting data, you can specify the preferred one like this:"
+    "For models that support more than one means of structuring outputs (i.e., they support both tool calling and JSON mode), you can specify which method to use with the `method=` argument.\n",
+    "\n",
+    ":::info JSON mode\n",
+    "\n",
+    "If using JSON mode you'll have to still specify the desired schema in the model prompt. The schema you pass to `with_structured_output` will only be used for parsing the model outputs, it will not be passed to the model the way it is with tool calling.\n",
+    "\n",
+    "To see if the model you're using supports JSON mode, check its entry in the [API reference](https://api.python.langchain.com/en/latest/langchain_api_reference.html).\n",
+    "\n",
+    ":::"
   ]
  },
  {
@ -253,7 +491,7 @@
    {
     "data": {
      "text/plain": [
-       "Joke(setup='Why was the cat sitting on the computer?', punchline='Because it wanted to keep an eye on the mouse!')"
+       "Joke(setup='Why was the cat sitting on the computer?', punchline='Because it wanted to keep an eye on the mouse!', rating=None)"
      ]
     },
     "execution_count": 6,
@ -274,13 +512,9 @@
   "id": "5e92a98a",
   "metadata": {},
   "source": [
-    "In the above example, we use OpenAI's alternate JSON mode capability along with a more specific prompt.\n",
+    "## Prompting and parsing model directly\n",
    "\n",
-    "For specifics about the model you choose, peruse its entry in the [API reference pages](https://api.python.langchain.com/en/latest/langchain_api_reference.html).\n",
-    "\n",
-    "## Prompting techniques\n",
-    "\n",
-    "You can also prompt models to outputting information in a given format. This approach relies on designing good prompts and then parsing the output of the models. This is the only option for models that don't support `.with_structured_output()` or other built-in approaches.\n",
+    "Not all models support `.with_structured_output()`, since not all models have tool calling or JSON mode support. For such models you'll need to directly prompt the model to use a specific format, and use an output parser to extract the structured response from the raw model output.\n",
    "\n",
    "### Using `PydanticOutputParser`\n",
    "\n",
@ -289,7 +523,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 31,
   "id": "6e514455",
   "metadata": {},
   "outputs": [],
@ -341,7 +575,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 37,
   "id": "3d73d33d",
   "metadata": {},
   "outputs": [
@ -366,7 +600,7 @@
   "source": [
    "query = \"Anna is 23 years old and she is 6 feet tall\"\n",
    "\n",
-    "print(prompt.format_prompt(query=query).to_string())"
+    "print(prompt.invoke(query).to_string())"
   ]
  },
  {
@ -542,25 +776,13 @@
    "\n",
    "chain.invoke({\"query\": query})"
   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "7a39221a",
-   "metadata": {},
-   "source": [
-    "## Next steps\n",
-    "\n",
-    "Now you've learned a few methods to make a model output structured data.\n",
-    "\n",
-    "To learn more, check out the other how-to guides in this section, or the conceptual guide on tool calling."
-   ]
  }
 ],
 "metadata": {
  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "poetry-venv-2",
   "language": "python",
-   "name": "python3"
+   "name": "poetry-venv-2"
  },
  "language_info": {
   "codemirror_mode": {
--- a/docs/docs/how_to/tool_calls_multi_modal.ipynb
+++ b/docs/docs/how_to/tool_calls_multi_modal.ipynb
@ -0,0 +1,160 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "4facdf7f-680e-4d28-908b-2b8408e2a741",
+   "metadata": {},
+   "source": [
+    "# How to call tools with multi-modal data\n",
+    "\n",
+    "Here we demonstrate how to call tools with multi-modal data, such as images.\n",
+    "\n",
+    "Some multi-modal models, such as those that can reason over images or audio, support [tool calling](/docs/concepts/#functiontool-calling) features as well.\n",
+    "\n",
+    "To call tools using such models, simply bind tools to them in the [usual way](/docs/how_to/tool_calling), and invoke the model using content blocks of the desired type (e.g., containing image data).\n",
+    "\n",
+    "Below, we demonstrate examples using [OpenAI](/docs/integrations/platforms/openai) and [Anthropic](/docs/integrations/platforms/anthropic). We will use the same image and tool in all cases. Let's first select an image, and build a placeholder tool that expects as input the string \"sunny\", \"cloudy\", or \"rainy\". We will ask the models to describe the weather in the image."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "0d9fd81a-b7f0-445a-8e3d-cfc2d31fdd59",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from typing import Literal\n",
+    "\n",
+    "from langchain_core.tools import tool\n",
+    "\n",
+    "image_url = \"https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg\"\n",
+    "\n",
+    "\n",
+    "@tool\n",
+    "def weather_tool(weather: Literal[\"sunny\", \"cloudy\", \"rainy\"]) -> None:\n",
+    "    \"\"\"Describe the weather\"\"\"\n",
+    "    pass"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8656018e-c56d-47d2-b2be-71e87827f90a",
+   "metadata": {},
+   "source": [
+    "## OpenAI\n",
+    "\n",
+    "For OpenAI, we can feed the image URL directly in a content block of type \"image_url\":"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "a8819cf3-5ddc-44f0-889a-19ca7b7fe77e",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[{'name': 'weather_tool', 'args': {'weather': 'sunny'}, 'id': 'call_mRYL50MtHdeNuNIjSCm5UPmB'}]\n"
+     ]
+    }
+   ],
+   "source": [
+    "from langchain_core.messages import HumanMessage\n",
+    "from langchain_openai import ChatOpenAI\n",
+    "\n",
+    "model = ChatOpenAI(model=\"gpt-4o\").bind_tools([weather_tool])\n",
+    "\n",
+    "message = HumanMessage(\n",
+    "    content=[\n",
+    "        {\"type\": \"text\", \"text\": \"describe the weather in this image\"},\n",
+    "        {\"type\": \"image_url\", \"image_url\": {\"url\": image_url}},\n",
+    "    ],\n",
+    ")\n",
+    "response = model.invoke([message])\n",
+    "print(response.tool_calls)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e5738224-1109-4bf8-8976-ff1570dd1d46",
+   "metadata": {},
+   "source": [
+    "Note that we recover tool calls with parsed arguments in LangChain's [standard format](/docs/how_to/tool_calling) in the model response."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0cee63ff-e09f-4dd8-8323-912edbde94f6",
+   "metadata": {},
+   "source": [
+    "## Anthropic\n",
+    "\n",
+    "For Anthropic, we can format a base64-encoded image into a content block of type \"image\", as below:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "d90c4590-71c8-42b1-99ff-03a9eca8082e",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[{'name': 'weather_tool', 'args': {'weather': 'sunny'}, 'id': 'toolu_016m9KfknJqx5fVRYk4tkF6s'}]\n"
+     ]
+    }
+   ],
+   "source": [
+    "import base64\n",
+    "\n",
+    "import httpx\n",
+    "from langchain_anthropic import ChatAnthropic\n",
+    "\n",
+    "image_data = base64.b64encode(httpx.get(image_url).content).decode(\"utf-8\")\n",
+    "\n",
+    "model = ChatAnthropic(model=\"claude-3-sonnet-20240229\").bind_tools([weather_tool])\n",
+    "\n",
+    "message = HumanMessage(\n",
+    "    content=[\n",
+    "        {\"type\": \"text\", \"text\": \"describe the weather in this image\"},\n",
+    "        {\n",
+    "            \"type\": \"image\",\n",
+    "            \"source\": {\n",
+    "                \"type\": \"base64\",\n",
+    "                \"media_type\": \"image/jpeg\",\n",
+    "                \"data\": image_data,\n",
+    "            },\n",
+    "        },\n",
+    "    ],\n",
+    ")\n",
+    "response = model.invoke([message])\n",
+    "print(response.tool_calls)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/docs/docusaurus.config.js
+++ b/docs/docusaurus.config.js
@ -124,7 +124,7 @@ const config = {
    /** @type {import('@docusaurus/preset-classic').ThemeConfig} */
    ({
      announcementBar: {
-        content: 'You are viewing the <strong>preview</strong> LangChain v0.2 docs. Note that 0.2 Search features are currently unstable and in progress. View the <a href="/v0.1/docs/get_started/introduction/">stable 0.1 docs here</a>.',
+        content: 'You are viewing the <strong>preview</strong> LangChain v0.2 docs. View the <a href="/v0.1/docs/get_started/introduction/">stable 0.1 docs here</a>.',
        isCloseable: true,
      },
      docs: {
@ -310,9 +310,9 @@ const config = {
        // this is linked to erick@langchain.dev currently
        apiKey: "6c01842d6a88772ed2236b9c85806441",

-        indexName: "python-langchain",
+        indexName: "python-langchain-0.2",

-        contextualSearch: true,
+        contextualSearch: false,
      },
    }),

--- a/docs/scripts/notebook_convert.py
+++ b/docs/scripts/notebook_convert.py
@ -84,12 +84,8 @@ class CustomRegexRemovePreprocessor(Preprocessor):
        pattern = re.compile(r"(?s)(?:\s*\Z)|(?:.*#\s*\|\s*output:\s*false.*)")
        rtn = not pattern.match(cell.source)
        if not rtn:
-            print("--remove--")
-            print(cell.source)
            return False
        else:
-            print("--keep--")
-            print(cell.source)
            return True

    def preprocess(self, nb, resources):
--- a/libs/core/langchain_core/example_selectors/semantic_similarity.py
+++ b/libs/core/langchain_core/example_selectors/semantic_similarity.py
@ -22,7 +22,7 @@ class _VectorStoreExampleSelector(BaseExampleSelector, BaseModel, ABC):
    """Example selector that selects examples based on SemanticSimilarity."""

    vectorstore: VectorStore
-    """VectorStore than contains information about examples."""
+    """VectorStore that contains information about examples."""
    k: int = 4
    """Number of examples to select."""
    example_keys: Optional[List[str]] = None
--- a/libs/core/langchain_core/language_models/base.py
+++ b/libs/core/langchain_core/language_models/base.py
@ -204,7 +204,9 @@ class BaseLanguageModel(
    def with_structured_output(
        self, schema: Union[Dict, Type[BaseModel]], **kwargs: Any
    ) -> Runnable[LanguageModelInput, Union[Dict, BaseModel]]:
-        """Implement this if there is a way of steering the model to generate responses that match a given schema."""  # noqa: E501
+        """Not implemented on this class."""
+        # Implement this on child class if there is a way of steering the model to
+        # generate responses that match a given schema.
        raise NotImplementedError()

    @deprecated("0.1.7", alternative="invoke", removal="0.3.0")
--- a/libs/core/pyproject.toml
+++ b/libs/core/pyproject.toml
@ -80,6 +80,11 @@ select = [
 disallow_untyped_defs = "True"
 exclude = ["notebooks", "examples", "example_data", "langchain_core/pydantic"]

+[[tool.mypy.overrides]]
+# conditional dependencies introduced by langsmith-sdk
+module = ["numpy", "pytest"]
+ignore_missing_imports = true
+
 [tool.coverage.run]
 omit = ["tests/*"]

--- a/libs/core/tests/unit_tests/runnables/test_runnable_events.py
+++ b/libs/core/tests/unit_tests/runnables/test_runnable_events.py
@ -1220,6 +1220,108 @@ async def test_event_stream_on_chain_with_tool() -> None:
    ]


+@pytest.mark.xfail(reason="Fix order of callback invocations in RunnableSequence")
+async def test_chain_ordering() -> None:
+    """Test the event stream with a tool."""
+
+    def foo(a: str) -> str:
+        return a
+
+    def bar(a: str) -> str:
+        return a
+
+    chain = RunnableLambda(foo) | RunnableLambda(bar)
+    iterable = chain.astream_events("q", version="v1")
+
+    events = []
+
+    for _ in range(10):
+        try:
+            next_chunk = await iterable.__anext__()
+            events.append(next_chunk)
+        except Exception:
+            break
+
+    events = _with_nulled_run_id(events)
+    for event in events:
+        event["tags"] = sorted(event["tags"])
+
+    assert events == [
+        {
+            "data": {"input": "q"},
+            "event": "on_chain_start",
+            "metadata": {},
+            "name": "RunnableSequence",
+            "run_id": "",
+            "tags": [],
+        },
+        {
+            "data": {},
+            "event": "on_chain_start",
+            "metadata": {},
+            "name": "foo",
+            "run_id": "",
+            "tags": ["seq:step:1"],
+        },
+        {
+            "data": {"chunk": "q"},
+            "event": "on_chain_stream",
+            "metadata": {},
+            "name": "foo",
+            "run_id": "",
+            "tags": ["seq:step:1"],
+        },
+        {
+            "data": {"input": "q", "output": "q"},
+            "event": "on_chain_end",
+            "metadata": {},
+            "name": "foo",
+            "run_id": "",
+            "tags": ["seq:step:1"],
+        },
+        {
+            "data": {},
+            "event": "on_chain_start",
+            "metadata": {},
+            "name": "bar",
+            "run_id": "",
+            "tags": ["seq:step:2"],
+        },
+        {
+            "data": {"chunk": "q"},
+            "event": "on_chain_stream",
+            "metadata": {},
+            "name": "bar",
+            "run_id": "",
+            "tags": ["seq:step:2"],
+        },
+        {
+            "data": {"chunk": "q"},
+            "event": "on_chain_stream",
+            "metadata": {},
+            "name": "RunnableSequence",
+            "run_id": "",
+            "tags": [],
+        },
+        {
+            "data": {"input": "q", "output": "q"},
+            "event": "on_chain_end",
+            "metadata": {},
+            "name": "bar",
+            "run_id": "",
+            "tags": ["seq:step:2"],
+        },
+        {
+            "data": {"output": "q"},
+            "event": "on_chain_end",
+            "metadata": {},
+            "name": "RunnableSequence",
+            "run_id": "",
+            "tags": [],
+        },
+    ]
+
+
 async def test_event_stream_with_retry() -> None:
    """Test the event stream with a tool."""

@ -1552,3 +1654,124 @@ async def test_runnable_with_message_history() -> None:
            AIMessage(content="world", id="ai4"),
        ]
    }
+
+
+EXPECTED_EVENTS = [
+    {
+        "data": {"input": 1},
+        "event": "on_chain_start",
+        "metadata": {},
+        "name": "add_one_proxy",
+        "run_id": "",
+        "tags": [],
+    },
+    {
+        "data": {},
+        "event": "on_chain_start",
+        "metadata": {},
+        "name": "add_one",
+        "run_id": "",
+        "tags": [],
+    },
+    {
+        "data": {"chunk": 2},
+        "event": "on_chain_stream",
+        "metadata": {},
+        "name": "add_one",
+        "run_id": "",
+        "tags": [],
+    },
+    {
+        "data": {"input": 1, "output": 2},
+        "event": "on_chain_end",
+        "metadata": {},
+        "name": "add_one",
+        "run_id": "",
+        "tags": [],
+    },
+    {
+        "data": {"chunk": 2},
+        "event": "on_chain_stream",
+        "metadata": {},
+        "name": "add_one_proxy",
+        "run_id": "",
+        "tags": [],
+    },
+    {
+        "data": {"output": 2},
+        "event": "on_chain_end",
+        "metadata": {},
+        "name": "add_one_proxy",
+        "run_id": "",
+        "tags": [],
+    },
+]
+
+
+@pytest.mark.xfail(
+    reason="This test is failing due to missing functionality."
+    "Need to implement logic in _transform_stream_with_config that mimics the async "
+    "variant that uses tap_output_iter"
+)
+async def test_sync_in_async_stream_lambdas() -> None:
+    """Test invoking nested runnable lambda."""
+
+    def add_one_(x: int) -> int:
+        return x + 1
+
+    add_one = RunnableLambda(add_one_)
+
+    async def add_one_proxy_(x: int, config: RunnableConfig) -> int:
+        streaming = add_one.stream(x, config)
+        results = [result for result in streaming]
+        return results[0]
+
+    add_one_proxy = RunnableLambda(add_one_proxy_)  # type: ignore
+
+    events = await _collect_events(add_one_proxy.astream_events(1, version="v1"))
+    assert events == EXPECTED_EVENTS
+
+
+async def test_async_in_async_stream_lambdas() -> None:
+    """Test invoking nested runnable lambda."""
+
+    async def add_one(x: int) -> int:
+        return x + 1
+
+    add_one_ = RunnableLambda(add_one)  # type: ignore
+
+    async def add_one_proxy(x: int, config: RunnableConfig) -> int:
+        # Use sync streaming
+        streaming = add_one_.astream(x, config)
+        results = [result async for result in streaming]
+        return results[0]
+
+    add_one_proxy_ = RunnableLambda(add_one_proxy)  # type: ignore
+
+    events = await _collect_events(add_one_proxy_.astream_events(1, version="v1"))
+    assert events == EXPECTED_EVENTS
+
+
+@pytest.mark.xfail(
+    reason="This test is failing due to missing functionality."
+    "Need to implement logic in _transform_stream_with_config that mimics the async "
+    "variant that uses tap_output_iter"
+)
+async def test_sync_in_sync_lambdas() -> None:
+    """Test invoking nested runnable lambda."""
+
+    def add_one(x: int) -> int:
+        return x + 1
+
+    add_one_ = RunnableLambda(add_one)
+
+    def add_one_proxy(x: int, config: RunnableConfig) -> int:
+        # Use sync streaming
+        streaming = add_one_.stream(x, config)
+        results = [result for result in streaming]
+        return results[0]
+
+    add_one_proxy_ = RunnableLambda(add_one_proxy)
+
+    events = await _collect_events(add_one_proxy_.astream_events(1, version="v1"))
+    assert events == EXPECTED_EVENTS
--- a/libs/partners/mongodb/langchain_mongodb/vectorstores.py
+++ b/libs/partners/mongodb/langchain_mongodb/vectorstores.py
@ -16,6 +16,7 @@ from typing import (
 )

 import numpy as np
+from bson import ObjectId, json_util
 from langchain_core.documents import Document
 from langchain_core.embeddings import Embeddings
 from langchain_core.runnables.config import run_in_executor
@ -31,7 +32,7 @@ VST = TypeVar("VST", bound=VectorStore)

 logger = logging.getLogger(__name__)

-DEFAULT_INSERT_BATCH_SIZE = 100
+DEFAULT_INSERT_BATCH_SIZE = 100_000


 class MongoDBAtlasVectorSearch(VectorStore):
@ -150,18 +151,24 @@ class MongoDBAtlasVectorSearch(VectorStore):
        """
        batch_size = kwargs.get("batch_size", DEFAULT_INSERT_BATCH_SIZE)
        _metadatas: Union[List, Generator] = metadatas or ({} for _ in texts)
-        texts_batch = []
-        metadatas_batch = []
+        texts_batch = texts
+        metadatas_batch = _metadatas
        result_ids = []
-        for i, (text, metadata) in enumerate(zip(texts, _metadatas)):
-            texts_batch.append(text)
-            metadatas_batch.append(metadata)
-            if (i + 1) % batch_size == 0:
-                result_ids.extend(self._insert_texts(texts_batch, metadatas_batch))
-                texts_batch = []
-                metadatas_batch = []
+        if batch_size:
+            texts_batch = []
+            metadatas_batch = []
+            size = 0
+            for i, (text, metadata) in enumerate(zip(texts, _metadatas)):
+                size += len(text) + len(metadata)
+                texts_batch.append(text)
+                metadatas_batch.append(metadata)
+                if (i + 1) % batch_size == 0 or size >= 47_000_000:
+                    result_ids.extend(self._insert_texts(texts_batch, metadatas_batch))
+                    texts_batch = []
+                    metadatas_batch = []
+                    size = 0
        if texts_batch:
-            result_ids.extend(self._insert_texts(texts_batch, metadatas_batch))
+            result_ids.extend(self._insert_texts(texts_batch, metadatas_batch))  # type: ignore
        return result_ids

    def _insert_texts(self, texts: List[str], metadatas: List[Dict[str, Any]]) -> List:
@ -210,9 +217,23 @@ class MongoDBAtlasVectorSearch(VectorStore):
            pipeline.extend(post_filter_pipeline)
        cursor = self._collection.aggregate(pipeline)  # type: ignore[arg-type]
        docs = []
+
+        def _make_serializable(obj: Dict[str, Any]) -> None:
+            for k, v in obj.items():
+                if isinstance(v, dict):
+                    _make_serializable(v)
+                elif isinstance(v, list) and v and isinstance(v[0], ObjectId):
+                    obj[k] = [json_util.default(item) for item in v]
+                elif isinstance(v, ObjectId):
+                    obj[k] = json_util.default(v)
+
        for res in cursor:
            text = res.pop(self._text_key)
            score = res.pop("score")
+            # Make every ObjectId found JSON-Serializable
+            # following format used in bson.json_util.loads
+            # e.g. loads('{"_id": {"$oid": "664..."}}') == {'_id': ObjectId('664..')} # noqa: E501
+            _make_serializable(res)
            docs.append((Document(page_content=text, metadata=res), score))
        return docs

--- a/libs/partners/mongodb/pyproject.toml
+++ b/libs/partners/mongodb/pyproject.toml
@ -1,6 +1,6 @@
 [tool.poetry]
 name = "langchain-mongodb"
-version = "0.1.3"
+version = "0.1.4"
 description = "An integration package connecting MongoDB and LangChain"
 authors = []
 readme = "README.md"
@ -28,7 +28,7 @@ pytest-watcher = "^0.3.4"
 pytest-asyncio = "^0.21.1"
 langchain = { path = "../../langchain", develop = true }
 langchain-core = { path = "../../core", develop = true }
-langchain-text-splitters = {path = "../../text-splitters", develop = true}
+langchain-text-splitters = { path = "../../text-splitters", develop = true }

 [tool.poetry.group.codespell]
 optional = true
--- a/libs/partners/mongodb/tests/unit_tests/test_vectorstores.py
+++ b/libs/partners/mongodb/tests/unit_tests/test_vectorstores.py
@ -1,6 +1,8 @@
+from json import dumps, loads
 from typing import Any, Optional

 import pytest
+from bson import ObjectId, json_util
 from langchain_core.documents import Document
 from langchain_core.embeddings import Embeddings
 from pymongo.collection import Collection
@ -75,6 +77,11 @@ class TestMongoDBAtlasVectorSearch:
        output = vectorstore.similarity_search("", k=1)
        assert output[0].page_content == page_content
        assert output[0].metadata.get("c") == metadata
+        # Validate the ObjectId provided is json serializable
+        assert loads(dumps(output[0].page_content)) == output[0].page_content
+        assert loads(dumps(output[0].metadata)) == output[0].metadata
+        json_metadata = dumps(output[0].metadata)  # normal json.dumps
+        assert isinstance(json_util.loads(json_metadata)["_id"], ObjectId)

    def test_from_documents(
        self, embedding_openai: Embeddings, collection: MockCollection
--- a/libs/partners/mongodb/tests/utils.py
+++ b/libs/partners/mongodb/tests/utils.py
@ -1,9 +1,9 @@
 from __future__ import annotations

-import uuid
 from copy import deepcopy
 from typing import Any, Dict, List, Mapping, Optional, cast

+from bson import ObjectId
 from langchain_core.callbacks.manager import (
    AsyncCallbackManagerForLLMRun,
    CallbackManagerForLLMRun,
@ -162,7 +162,7 @@ class MockCollection(Collection):

    def insert_many(self, to_insert: List[Any], *args, **kwargs) -> InsertManyResult:  # type: ignore
        mongodb_inserts = [
-            {"_id": str(uuid.uuid4()), "score": 1, **insert} for insert in to_insert
+            {"_id": ObjectId(), "score": 1, **insert} for insert in to_insert
        ]
        self._data.extend(mongodb_inserts)
        return self._insert_result or InsertManyResult(