docs[patch]: Adds multimodal column to chat models table, move up in concepts (#22837)

CC @hwchase17 @baskaryan
4 months ago · cb654a3245
parent 45b394268c
commit cb654a3245
2 changed files with 18 additions and 8 deletions
--- a/docs/docs/concepts.mdx
+++ b/docs/docs/concepts.mdx
@ -155,6 +155,16 @@ Please see the [tool calling section](/docs/concepts/#functiontool-calling) for

 For specifics on how to use chat models, see the [relevant how-to guides here](/docs/how_to/#chat-models).

+#### Multimodality
+
+Some chat models are multimodal, accepting images, audio and even video as inputs. These are still less common, meaning model providers haven't standardized on the "best" way to define the API. Multimodal **outputs** are even less common. As such, we've kept our multimodal abstractions fairly light weight and plan to further solidify the multimodal APIs and interaction patterns as the field matures.
+
+In LangChain, most chat models that support multimodal inputs also accept those values in OpenAI's content blocks format. So far this is restricted to image inputs. For models like Gemini which support video and other bytes input, the APIs also support the native, model-specific representations.
+
+For specifics on how to use multimodal models, see the [relevant how-to guides here](/docs/how_to/#multimodal).
+
+For a full list of LangChain model providers with multimodal models, [check out this table](/docs/integrations/chat/#advanced-features).
+
 ### LLMs
 <span data-heading-keywords="llm,llms"></span>

@ -514,14 +524,6 @@ If you are still using AgentExecutor, do not fear: we still have a guide on [how
 It is recommended, however, that you start to transition to LangGraph.
 In order to assist in this we have put together a [transition guide on how to do so](/docs/how_to/migrate_agent).

-### Multimodal
-
-Some models are multimodal, accepting images, audio and even video as inputs. These are still less common, meaning model providers haven't standardized on the "best" way to define the API. Multimodal **outputs** are even less common. As such, we've kept our multimodal abstractions fairly light weight and plan to further solidify the multimodal APIs and interaction patterns as the field matures.
-
-In LangChain, most chat models that support multimodal inputs also accept those values in OpenAI's content blocks format. So far this is restricted to image inputs. For models like Gemini which support video and other bytes input, the APIs also support the native, model-specific representations.
-
-For specifics on how to use multimodal models, see the [relevant how-to guides here](/docs/how_to/#multimodal).
-
 ### Callbacks

 LangChain provides a callbacks system that allows you to hook into the various stages of your LLM application. This is useful for logging, monitoring, streaming, and other tasks.
--- a/docs/scripts/model_feat_table.py
+++ b/docs/scripts/model_feat_table.py
@ -18,6 +18,7 @@ CHAT_MODEL_FEAT_TABLE = {
    "ChatAnthropic": {
        "tool_calling": True,
        "structured_output": True,
+        "multimodal": True,
        "package": "langchain-anthropic",
        "link": "/docs/integrations/chat/anthropic/",
    },
@ -39,6 +40,7 @@ CHAT_MODEL_FEAT_TABLE = {
        "tool_calling": True,
        "structured_output": True,
        "json_mode": True,
+        "multimodal": True,
        "package": "langchain-openai",
        "link": "/docs/integrations/chat/azure_chat_openai/",
    },
@ -46,6 +48,7 @@ CHAT_MODEL_FEAT_TABLE = {
        "tool_calling": True,
        "structured_output": True,
        "json_mode": True,
+        "multimodal": True,
        "package": "langchain-openai",
        "link": "/docs/integrations/chat/openai/",
    },
@ -59,11 +62,13 @@ CHAT_MODEL_FEAT_TABLE = {
    "ChatVertexAI": {
        "tool_calling": True,
        "structured_output": True,
+        "multimodal": True,
        "package": "langchain-google-vertexai",
        "link": "/docs/integrations/chat/google_vertex_ai_palm/",
    },
    "ChatGoogleGenerativeAI": {
        "tool_calling": True,
+        "multimodal": True,
        "package": "langchain-google-genai",
        "link": "/docs/integrations/chat/google_generative_ai/",
    },
@ -138,6 +143,7 @@ sidebar_position: 0
 sidebar_class_name: hidden
 keywords: [compatibility, bind_tools, tool calling, function calling, structured output, with_structured_output, json mode, local model]
 custom_edit_url:
+hide_table_of_contents: true
 ---

 # Chat models
@ -213,6 +219,7 @@ def get_chat_model_table() -> str:
        "structured_output",
        "json_mode",
        "local",
+        "multimodal",
        "package",
    ]
    title = [
@ -221,6 +228,7 @@ def get_chat_model_table() -> str:
        "[Structured output](/docs/how_to/structured_output/)",
        "JSON mode",
        "Local",
+        "[Multimodal](/docs/how_to/multimodal_inputs/)",
        "Package",
    ]
    rows = [title, [":-"] + [":-:"] * (len(title) - 1)]