From ee94ef55ee6ab064da08340817955f821dfa6261 Mon Sep 17 00:00:00 2001
From: Harutaka Kawamura <hkawamura0130@gmail.com>
Date: Mon, 4 Dec 2023 09:07:09 +0900
Subject: [PATCH] docs[patch]: Update MLflow and Databricks docs (#14011)

Depends on #13699. Updates the existing mlflow and databricks examples.

---------

Co-authored-by: Ben Wilson <39283302+BenWilson2@users.noreply.github.com>
---
 docs/docs/integrations/llms/databricks.ipynb  | 130 +++++++++++++++---
 .../docs/integrations/providers/databricks.md |  66 +++++++--
 docs/docs/integrations/providers/mlflow.mdx   | 119 ++++++++++++++++
 .../providers/mlflow_ai_gateway.mdx           |   6 +
 4 files changed, 290 insertions(+), 31 deletions(-)
 create mode 100644 docs/docs/integrations/providers/mlflow.mdx
diff --git a/docs/docs/integrations/llms/databricks.ipynb b/docs/docs/integrations/llms/databricks.ipynb
index cc3e4f9a24..1033a1e71e 100644
--- a/docs/docs/integrations/llms/databricks.ipynb
+++ b/docs/docs/integrations/llms/databricks.ipynb
@@ -20,27 +20,121 @@
     "This example notebook shows how to wrap Databricks endpoints as LLMs in LangChain.\n",
     "It supports two endpoint types:\n",
     "* Serving endpoint, recommended for production and development,\n",
-    "* Cluster driver proxy app, recommended for iteractive development."
+    "* Cluster driver proxy app, recommended for interactive development."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Installation\n",
+    "\n",
+    "`mlflow >= 2.9 ` is required to run the code in this notebook. If it's not installed, please install it using this command:\n",
+    "\n",
+    "```\n",
+    "pip install mlflow>=2.9\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Wrapping a serving endpoint: External model\n",
+    "\n",
+    "Prerequisite:\n",
+    "\n",
+    "- Register an OpenAI API key as a secret:\n",
+    "\n",
+    "  ```bash\n",
+    "  databricks secrets create-scope <scope>\n",
+    "  databricks secrets put-secret <scope> openai-api-key --string-value $OPENAI_API_KEY\n",
+    "  ```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The following code creates a new serving endpoint with OpenAI's GPT-4 model for chat and generates a response using the endpoint."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "application/vnd.databricks.v1+cell": {
-     "cellMetadata": {
-      "byteLimit": 2048000,
-      "rowLimit": 10000
-     },
-     "inputWidgets": {},
-     "nuid": "bf07455f-aac9-4873-a8e7-7952af0f8c82",
-     "showTitle": false,
-     "title": ""
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "content='Hello! How can I assist you today?'\n"
+     ]
     }
-   },
-   "outputs": [],
+   ],
    "source": [
-    "from langchain.llms import Databricks"
+    "from langchain.chat_models import ChatDatabricks\n",
+    "from langchain.schema.messages import HumanMessage\n",
+    "from mlflow.deployments import get_deploy_client\n",
+    "\n",
+    "client = get_deploy_client(\"databricks\")\n",
+    "\n",
+    "secret = \"secrets/<scope>/openai-api-key\"  # replace `<scope>` with your scope\n",
+    "name = \"my-chat\"  # rename this if my-chat already exists\n",
+    "client.create_endpoint(\n",
+    "    name=name,\n",
+    "    config={\n",
+    "        \"served_entities\": [\n",
+    "            {\n",
+    "                \"name\": \"my-chat\",\n",
+    "                \"external_model\": {\n",
+    "                    \"name\": \"gpt-4\",\n",
+    "                    \"provider\": \"openai\",\n",
+    "                    \"task\": \"llm/v1/chat\",\n",
+    "                    \"openai_config\": {\n",
+    "                        \"openai_api_key\": \"{{\" + secret + \"}}\",\n",
+    "                    },\n",
+    "                },\n",
+    "            }\n",
+    "        ],\n",
+    "    },\n",
+    ")\n",
+    "\n",
+    "chat = ChatDatabricks(\n",
+    "    target_uri=\"databricks\",\n",
+    "    endpoint=name,\n",
+    "    temperature=0.1,\n",
+    ")\n",
+    "chat([HumanMessage(content=\"hello\")])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Wrapping a serving endpoint: Foundation model\n",
+    "\n",
+    "The following code uses the `databricks-bge-large-en` serving endpoint (no endpoint creation is required) to generate embeddings from input text."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[0.051055908203125, 0.007221221923828125, 0.003879547119140625]\n"
+     ]
+    }
+   ],
+   "source": [
+    "from langchain.embeddings import DatabricksEmbeddings\n",
+    "\n",
+    "embeddings = DatabricksEmbeddings(endpoint=\"databricks-bge-large-en\")\n",
+    "embeddings.embed_query(\"hello\")[:3]"
    ]
   },
   {
@@ -56,7 +150,7 @@
     }
    },
    "source": [
-    "## Wrapping a serving endpoint\n",
+    "## Wrapping a serving endpoint: Custom model\n",
     "\n",
     "Prerequisites:\n",
     "* An LLM was registered and deployed to [a Databricks serving endpoint](https://docs.databricks.com/machine-learning/model-serving/index.html).\n",
@@ -97,6 +191,8 @@
     }
    ],
    "source": [
+    "from langchain.llms import Databricks\n",
+    "\n",
     "# If running a Databricks notebook attached to an interactive cluster in \"single user\"\n",
     "# or \"no isolation shared\" mode, you only need to specify the endpoint name to create\n",
     "# a `Databricks` instance to query a serving endpoint in the same workspace.\n",
@@ -524,7 +620,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.10"
+   "version": "3.9.18"
   },
   "orig_nbformat": 4
  },
diff --git a/docs/docs/integrations/providers/databricks.md b/docs/docs/integrations/providers/databricks.md
index 3c5c19de5c..7c8aa4c8fa 100644
--- a/docs/docs/integrations/providers/databricks.md
+++ b/docs/docs/integrations/providers/databricks.md
@@ -7,9 +7,8 @@ Databricks embraces the LangChain ecosystem in various ways:
 
 1. Databricks connector for the SQLDatabase Chain: SQLDatabase.from_databricks() provides an easy way to query your data on Databricks through LangChain
 2. Databricks MLflow integrates with LangChain: Tracking and serving LangChain applications with fewer steps
-3. Databricks MLflow AI Gateway
-4. Databricks as an LLM provider: Deploy your fine-tuned LLMs on Databricks via serving endpoints or cluster driver proxy apps, and query it as langchain.llms.Databricks
-5. Databricks Dolly: Databricks open-sourced Dolly which allows for commercial use, and can be accessed through the Hugging Face Hub
+3. Databricks as an LLM provider: Deploy your fine-tuned LLMs on Databricks via serving endpoints or cluster driver proxy apps, and query it as langchain.llms.Databricks
+4. Databricks Dolly: Databricks open-sourced Dolly which allows for commercial use, and can be accessed through the Hugging Face Hub
 
 Databricks connector for the SQLDatabase Chain
 ----------------------------------------------
@@ -25,19 +24,58 @@ Databricks provides a fully managed and hosted version of MLflow integrated with
 
 Databricks MLflow makes it more convenient to develop LangChain applications on Databricks. For MLflow tracking, you don't need to set the tracking uri. For MLflow Model Serving, you can save LangChain Chains in the MLflow langchain flavor, and then register and serve the Chain with a few clicks on Databricks, with credentials securely managed by MLflow Model Serving.
 
-Databricks MLflow AI Gateway
-----------------------------
+Databricks External Models
+--------------------------
 
-See [MLflow AI Gateway](/docs/integrations/providers/mlflow_ai_gateway).
+[Databricks External Models](https://docs.databricks.com/generative-ai/external-models/index.html) is a service that is designed to streamline the usage and management of various large language model (LLM) providers, such as OpenAI and Anthropic, within an organization. It offers a high-level interface that simplifies the interaction with these services by providing a unified endpoint to handle specific LLM related requests. The following example creates an endpoint that serves OpenAI's GPT-4 model and generates a chat response from it:
+
+```python
+from langchain.chat_models import ChatDatabricks
+from langchain.schema.messages import HumanMessage
+from mlflow.deployments import get_deploy_client
+
+
+client = get_deploy_client("databricks")
+name = f"chat"
+client.create_endpoint(
+    name=name,
+    config={
+        "served_entities": [
+            {
+                "name": "test",
+                "external_model": {
+                    "name": "gpt-4",
+                    "provider": "openai",
+                    "task": "llm/v1/chat",
+                    "openai_config": {
+                        "openai_api_key": "{{secrets/<scope>/<key>}}",
+                    },
+                },
+            }
+        ],
+    },
+)
+chat = ChatDatabricks(endpoint=name, temperature=0.1)
+print(chat([HumanMessage(content="hello")]))
+# -> content='Hello! How can I assist you today?'
+```
+
+Databricks Foundation Model APIs
+--------------------------------
+
+[Databricks Foundation Model APIs](https://docs.databricks.com/machine-learning/foundation-models/index.html) allow you to access and query state-of-the-art open source models from dedicated serving endpoints. With Foundation Model APIs, developers can quickly and easily build applications that leverage a high-quality generative AI model without maintaining their own model deployment. The following example uses the `databricks-bge-large-en` endpoint to generate embeddings from  text:
+
+```python
+from langchain.llms import DatabricksEmbeddings
+
+
+embeddings = DatabricksEmbeddings(endpoint="databricks-bge-large-en")
+print(embeddings.embed_query("hello")[:3])
+# -> [0.051055908203125, 0.007221221923828125, 0.003879547119140625, ...]
+```
 
 Databricks as an LLM provider
 -----------------------------
 
-The notebook [Wrap Databricks endpoints as LLMs](/docs/integrations/llms/databricks) illustrates the method to wrap Databricks endpoints as LLMs in LangChain. It supports two types of endpoints: the serving endpoint, which is recommended for both production and development, and the cluster driver proxy app, which is recommended for interactive development. 
-
-Databricks endpoints support Dolly, but are also great for hosting models like MPT-7B or any other models from the Hugging Face ecosystem. Databricks endpoints can also be used with proprietary models like OpenAI to provide a governance layer for enterprises.
-
-Databricks Dolly
-----------------
-
-Databricks’ Dolly is an instruction-following large language model trained on the Databricks machine learning platform that is licensed for commercial use. The model is available on Hugging Face Hub as databricks/dolly-v2-12b. See the notebook [Hugging Face Hub](/docs/integrations/llms/huggingface_hub) for instructions to access it through the Hugging Face Hub integration with LangChain. 
+The notebook [Wrap Databricks endpoints as LLMs](/docs/integrations/llms/databricks#wrapping-a-serving-endpoint-custom-model) demonstrates how to serve a custom model that has been registered by MLflow as a Databricks endpoint.
+It supports two types of endpoints: the serving endpoint, which is recommended for both production and development, and the cluster driver proxy app, which is recommended for interactive development. 
diff --git a/docs/docs/integrations/providers/mlflow.mdx b/docs/docs/integrations/providers/mlflow.mdx
new file mode 100644
index 0000000000..159a693cc0
--- /dev/null
+++ b/docs/docs/integrations/providers/mlflow.mdx
@@ -0,0 +1,119 @@
+# MLflow Deployments for LLMs
+
+>[The MLflow Deployments for LLMs](https://www.mlflow.org/docs/latest/llms/deployments/index.html) is a powerful tool designed to streamline the usage and management of various large
+> language model (LLM) providers, such as OpenAI and Anthropic, within an organization. It offers a high-level interface
+> that simplifies the interaction with these services by providing a unified endpoint to handle specific LLM related requests.
+
+## Installation and Setup
+
+Install `mlflow` with MLflow Deployments dependencies:
+
+```sh
+pip install 'mlflow[genai]'
+```
+
+Set the OpenAI API key as an environment variable:
+
+```sh
+export OPENAI_API_KEY=...
+```
+
+Create a configuration file:
+
+```yaml
+endpoints:
+  - name: completions
+    endpoint_type: llm/v1/completions
+    model:
+      provider: openai
+      name: text-davinci-003
+      config:
+        openai_api_key: $OPENAI_API_KEY
+
+  - name: embeddings
+    endpoint_type: llm/v1/embeddings
+    model:
+      provider: openai
+      name: text-embedding-ada-002
+      config:
+        openai_api_key: $OPENAI_API_KEY
+```
+
+Start the deployments server:
+
+```sh
+mlflow deployments start-server --config-path /path/to/config.yaml
+```
+
+## Example provided by `MLflow`
+
+>The `mlflow.langchain` module provides an API for logging and loading `LangChain` models.
+> This module exports multivariate LangChain models in the langchain flavor and univariate LangChain
+> models in the pyfunc flavor.
+
+See the [API documentation and examples](https://www.mlflow.org/docs/latest/python_api/mlflow.langchain) for more information.
+
+## Completions Example
+
+```python
+import mlflow
+from langchain.chains import LLMChain, PromptTemplate
+from langchain.llms import Mlflow
+
+llm = Mlflow(
+    target_uri="http://127.0.0.1:5000",
+    endpoint="completions",
+)
+
+llm_chain = LLMChain(
+    llm=Mlflow,
+    prompt=PromptTemplate(
+        input_variables=["adjective"],
+        template="Tell me a {adjective} joke",
+    ),
+)
+result = llm_chain.run(adjective="funny")
+print(result)
+
+with mlflow.start_run():
+    model_info = mlflow.langchain.log_model(chain, "model")
+
+model = mlflow.pyfunc.load_model(model_info.model_uri)
+print(model.predict([{"adjective": "funny"}]))
+```
+
+## Embeddings Example
+
+```python
+from langchain.embeddings import MlflowEmbeddings
+
+embeddings = MlflowEmbeddings(
+    target_uri="http://127.0.0.1:5000",
+    endpoint="embeddings",
+)
+
+print(embeddings.embed_query("hello"))
+print(embeddings.embed_documents(["hello"]))
+```
+
+## Chat Example
+
+```python
+from langchain.chat_models import ChatMlflow
+from langchain.schema import HumanMessage, SystemMessage
+
+chat = ChatMlflow(
+    target_uri="http://127.0.0.1:5000",
+    endpoint="chat",
+)
+
+messages = [
+    SystemMessage(
+        content="You are a helpful assistant that translates English to French."
+    ),
+    HumanMessage(
+        content="Translate this sentence from English to French: I love programming."
+    ),
+]
+print(chat(messages))
+```
diff --git a/docs/docs/integrations/providers/mlflow_ai_gateway.mdx b/docs/docs/integrations/providers/mlflow_ai_gateway.mdx
index 3b944092f1..3c716724c3 100644
--- a/docs/docs/integrations/providers/mlflow_ai_gateway.mdx
+++ b/docs/docs/integrations/providers/mlflow_ai_gateway.mdx
@@ -1,5 +1,11 @@
 # MLflow AI Gateway
 
+:::warning
+
+MLflow AI Gateway has been deprecated. Please use [MLflow Deployments for LLMs](./mlflow) instead.
+
+:::
+
 >[The MLflow AI Gateway](https://www.mlflow.org/docs/latest/gateway/index) service is a powerful tool designed to streamline the usage and management of various large 
 > language model (LLM) providers, such as OpenAI and Anthropic, within an organization. It offers a high-level interface 
 > that simplifies the interaction with these services by providing a unified endpoint to handle specific LLM related requests.