From bee136efa4393219302208a1a458d32129f5d539 Mon Sep 17 00:00:00 2001
From: Zander Chase <130414180+vowelparrot@users.noreply.github.com>
Date: Tue, 16 May 2023 13:26:43 +0000
Subject: [PATCH] Update Tracing Walkthrough (#4760)

Add client methods to read / list runs and sessions.

Update walkthrough to:
- Let the user create a dataset from the runs without going to the UI
- Use the new CLI command to start the server

Improve the error message when `docker` isn't found
---
 langchain/callbacks/tracers/base.py           |   10 +-
 langchain/callbacks/tracers/schemas.py        |    2 +-
 langchain/cli/main.py                         |   37 +-
 langchain/client/langchain.py                 |   74 +-
 langchain/client/models.py                    |   49 +-
 .../client/tracing_datasets.ipynb             | 2152 ++++++++---------
 langchain/server.py                           |    9 +-
 7 files changed, 1214 insertions(+), 1119 deletions(-)

diff --git a/langchain/callbacks/tracers/base.py b/langchain/callbacks/tracers/base.py
index f66863f9..4c7ddbac 100644
--- a/langchain/callbacks/tracers/base.py
+++ b/langchain/callbacks/tracers/base.py
@@ -56,7 +56,11 @@ class BaseTracer(BaseCallbackHandler, ABC):
                 raise TracerException(
                     f"Parent run with UUID {run.parent_run_id} not found."
                 )
-            if run.child_execution_order > parent_run.child_execution_order:
+            if (
+                run.child_execution_order is not None
+                and parent_run.child_execution_order is not None
+                and run.child_execution_order > parent_run.child_execution_order
+            ):
                 parent_run.child_execution_order = run.child_execution_order
         self.run_map.pop(str(run.id))
 
@@ -68,6 +72,10 @@ class BaseTracer(BaseCallbackHandler, ABC):
         parent_run = self.run_map.get(parent_run_id)
         if parent_run is None:
             raise TracerException(f"Parent run with UUID {parent_run_id} not found.")
+        if parent_run.child_execution_order is None:
+            raise TracerException(
+                f"Parent run with UUID {parent_run_id} has no child execution order."
+            )
 
         return parent_run.child_execution_order + 1
 
diff --git a/langchain/callbacks/tracers/schemas.py b/langchain/callbacks/tracers/schemas.py
index d34bb182..a1252ac5 100644
--- a/langchain/callbacks/tracers/schemas.py
+++ b/langchain/callbacks/tracers/schemas.py
@@ -108,7 +108,7 @@ class RunBase(BaseModel):
     extra: dict
     error: Optional[str]
     execution_order: int
-    child_execution_order: int
+    child_execution_order: Optional[int]
     serialized: dict
     inputs: dict
     outputs: Optional[dict]
diff --git a/langchain/cli/main.py b/langchain/cli/main.py
index 74b8830e..0de633a1 100644
--- a/langchain/cli/main.py
+++ b/langchain/cli/main.py
@@ -5,6 +5,7 @@ import shutil
 import subprocess
 from contextlib import contextmanager
 from pathlib import Path
+from subprocess import CalledProcessError
 from typing import Generator, List, Optional
 
 import requests
@@ -19,10 +20,29 @@ _DIR = Path(__file__).parent
 
 
 def get_docker_compose_command() -> List[str]:
-    if shutil.which("docker-compose") is None:
+    """Get the correct docker compose command for this system."""
+    try:
+        subprocess.check_call(
+            ["docker", "compose", "--version"],
+            stdout=subprocess.DEVNULL,
+            stderr=subprocess.DEVNULL,
+        )
         return ["docker", "compose"]
-    else:
-        return ["docker-compose"]
+    except (CalledProcessError, FileNotFoundError):
+        try:
+            subprocess.check_call(
+                ["docker-compose", "--version"],
+                stdout=subprocess.DEVNULL,
+                stderr=subprocess.DEVNULL,
+            )
+            return ["docker-compose"]
+        except (CalledProcessError, FileNotFoundError):
+            raise ValueError(
+                "Neither 'docker compose' nor 'docker-compose'"
+                " commands are available. Please install the Docker"
+                " server following the instructions for your operating"
+                " system at https://docs.docker.com/engine/install/"
+            )
 
 
 def get_ngrok_url(auth_token: Optional[str]) -> str:
@@ -85,6 +105,12 @@ class ServerCommand:
         )
         self.ngrok_path = Path(__file__).absolute().parent / "docker-compose.ngrok.yaml"
 
+    def _open_browser(self, url: str) -> None:
+        try:
+            subprocess.run(["open", url])
+        except FileNotFoundError:
+            pass
+
     def _start_local(self) -> None:
         command = [
             *self.docker_compose_command,
@@ -107,7 +133,7 @@ class ServerCommand:
         )
 
         logger.info("\tLANGCHAIN_TRACING_V2=true")
-        subprocess.run(["open", "http://localhost"])
+        self._open_browser("http://localhost")
 
     def _start_and_expose(self, auth_token: Optional[str]) -> None:
         with create_ngrok_config(auth_token=auth_token):
@@ -138,7 +164,8 @@ class ServerCommand:
         )
         logger.info("\tLANGCHAIN_TRACING_V2=true")
         logger.info(f"\tLANGCHAIN_ENDPOINT={ngrok_url}")
-        subprocess.run(["open", "http://localhost"])
+        self._open_browser("http://0.0.0.0:4040")
+        self._open_browser("http://localhost")
 
     def start(self, *, expose: bool = False, auth_token: Optional[str] = None) -> None:
         """Run the LangChainPlus server locally.
diff --git a/langchain/client/langchain.py b/langchain/client/langchain.py
index 4c3848b1..b98dc109 100644
--- a/langchain/client/langchain.py
+++ b/langchain/client/langchain.py
@@ -27,9 +27,16 @@ from requests import Response
 
 from langchain.base_language import BaseLanguageModel
 from langchain.callbacks.tracers.langchain import LangChainTracer
+from langchain.callbacks.tracers.schemas import Run, TracerSession
 from langchain.chains.base import Chain
 from langchain.chat_models.base import BaseChatModel
-from langchain.client.models import Dataset, DatasetCreate, Example, ExampleCreate
+from langchain.client.models import (
+    Dataset,
+    DatasetCreate,
+    Example,
+    ExampleCreate,
+    ListRunsQueryParams,
+)
 from langchain.llms.base import BaseLLM
 from langchain.schema import ChatResult, LLMResult, messages_from_dict
 from langchain.utils import raise_for_status_with_text, xor_args
@@ -192,6 +199,71 @@ class LangChainPlusClient(BaseSettings):
             raise ValueError(f"Dataset {file_name} already exists")
         return Dataset(**result)
 
+    def read_run(self, run_id: str) -> Run:
+        """Read a run from the LangChain+ API."""
+        response = self._get(f"/runs/{run_id}")
+        raise_for_status_with_text(response)
+        return Run(**response.json())
+
+    def list_runs(
+        self,
+        *,
+        session_id: Optional[str] = None,
+        session_name: Optional[str] = None,
+        run_type: Optional[str] = None,
+        **kwargs: Any,
+    ) -> List[Run]:
+        """List runs from the LangChain+ API."""
+        if session_name is not None:
+            if session_id is not None:
+                raise ValueError("Only one of session_id or session_name may be given")
+            session_id = self.read_session(session_name=session_name).id
+        query_params = ListRunsQueryParams(
+            session_id=session_id, run_type=run_type, **kwargs
+        )
+        filtered_params = {
+            k: v for k, v in query_params.dict().items() if v is not None
+        }
+        response = self._get("/runs", params=filtered_params)
+        raise_for_status_with_text(response)
+        return [Run(**run) for run in response.json()]
+
+    @xor_args(("session_id", "session_name"))
+    def read_session(
+        self, *, session_id: Optional[str] = None, session_name: Optional[str] = None
+    ) -> TracerSession:
+        """Read a session from the LangChain+ API."""
+        path = "/sessions"
+        params: Dict[str, Any] = {"limit": 1, "tenant_id": self.tenant_id}
+        if session_id is not None:
+            path += f"/{session_id}"
+        elif session_name is not None:
+            params["name"] = session_name
+        else:
+            raise ValueError("Must provide dataset_name or dataset_id")
+        response = self._get(
+            path,
+            params=params,
+        )
+        raise_for_status_with_text(response)
+        response = self._get(
+            path,
+            params=params,
+        )
+        raise_for_status_with_text(response)
+        result = response.json()
+        if isinstance(result, list):
+            if len(result) == 0:
+                raise ValueError(f"Dataset {session_name} not found")
+            return TracerSession(**result[0])
+        return TracerSession(**response.json())
+
+    def list_sessions(self) -> List[TracerSession]:
+        """List sessions from the LangChain+ API."""
+        response = self._get("/sessions")
+        raise_for_status_with_text(response)
+        return [TracerSession(**session) for session in response.json()]
+
     def create_dataset(self, dataset_name: str, description: str) -> Dataset:
         """Create a dataset in the LangChain+ API."""
         dataset = DatasetCreate(
diff --git a/langchain/client/models.py b/langchain/client/models.py
index a7a19b10..037172f6 100644
--- a/langchain/client/models.py
+++ b/langchain/client/models.py
@@ -2,9 +2,9 @@ from datetime import datetime
 from typing import Any, Dict, List, Optional
 from uuid import UUID
 
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, Field, root_validator
 
-from langchain.callbacks.tracers.schemas import Run
+from langchain.callbacks.tracers.schemas import Run, RunTypeEnum
 
 
 class ExampleBase(BaseModel):
@@ -52,3 +52,48 @@ class Dataset(DatasetBase):
     id: UUID
     created_at: datetime
     modified_at: Optional[datetime] = Field(default=None)
+
+
+class ListRunsQueryParams(BaseModel):
+    """Query params for GET /runs endpoint."""
+
+    class Config:
+        extra = "forbid"
+
+    id: Optional[List[UUID]]
+    """Filter runs by id."""
+    parent_run: Optional[UUID]
+    """Filter runs by parent run."""
+    run_type: Optional[RunTypeEnum]
+    """Filter runs by type."""
+    session: Optional[UUID] = Field(default=None, alias="session_id")
+    """Only return runs within a session."""
+    reference_example: Optional[UUID]
+    """Only return runs that reference the specified dataset example."""
+    execution_order: Optional[int]
+    """Filter runs by execution order."""
+    error: Optional[bool]
+    """Whether to return only runs that errored."""
+    offset: Optional[int]
+    """The offset of the first run to return."""
+    limit: Optional[int]
+    """The maximum number of runs to return."""
+    start_time: Optional[datetime] = Field(
+        default=None,
+        alias="start_before",
+        description="Query Runs that started <= this time",
+    )
+    end_time: Optional[datetime] = Field(
+        default=None,
+        alias="end_after",
+        description="Query Runs that ended >= this time",
+    )
+
+    @root_validator
+    def validate_time_range(cls, values: Dict[str, Any]) -> Dict[str, Any]:
+        """Validate that start_time <= end_time."""
+        start_time = values.get("start_time")
+        end_time = values.get("end_time")
+        if start_time and end_time and start_time > end_time:
+            raise ValueError("start_time must be <= end_time")
+        return values
diff --git a/langchain/experimental/client/tracing_datasets.ipynb b/langchain/experimental/client/tracing_datasets.ipynb
index dfb1b819..7b96a9b3 100644
--- a/langchain/experimental/client/tracing_datasets.ipynb
+++ b/langchain/experimental/client/tracing_datasets.ipynb
@@ -1,1109 +1,1055 @@
 {
-   "cells": [{
-         "cell_type": "markdown",
-         "id": "1a4596ea-a631-416d-a2a4-3577c140493d",
-         "metadata": {},
-         "source": [
-            "# Running Chains on Traced Datasets\n",
-            "\n",
-            "Developing applications with language models can be uniquely challenging. To manage this complexity and ensure reliable performance, LangChain provides tracing and evaluation functionality. This notebook demonstrates how to run Chains, which are language model functions, as well as Chat models, and LLMs on previously captured datasets or traces. Some common use cases for this approach include:\n",
-            "\n",
-            "- Running an evaluation chain to grade previous runs.\n",
-            "- Comparing different chains, LLMs, and agents on traced datasets.\n",
-            "- Executing a stochastic chain multiple times over a dataset to generate metrics before deployment.\n",
-            "\n",
-            "Please note that this notebook assumes you have LangChain+ tracing running in the background. It is also configured to work only with the V2 endpoints. To set it up, follow the [tracing directions here](..\\/..\\/tracing\\/local_installation.md).\n",
-            " \n",
-            "We'll start by creating a client to connect to LangChain+."
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": 18,
-         "id": "904db9a5-f387-4a57-914c-c8af8d39e249",
-         "metadata": {
-            "tags": []
-         },
-         "outputs": [{
-               "name": "stdout",
-               "output_type": "stream",
-               "text": [
-                  "You can click the link below to view the UI\n"
-               ]
-            },
-            {
-               "data": {
-                  "text/html": [
-                     "<a href=\"http://localhost\", target=\"_blank\" rel=\"noopener\">LangChain+ Client</a>"
-                  ],
-                  "text/plain": [
-                     "LangChainPlusClient (API URL: http://localhost:8000)"
-                  ]
-               },
-               "execution_count": 18,
-               "metadata": {},
-               "output_type": "execute_result"
-            }
-         ],
-         "source": [
-            "from langchain.client import LangChainPlusClient\n",
-            "\n",
-            "client = LangChainPlusClient(\n",
-            "    api_url=\"http://localhost:8000\",\n",
-            "    api_key=None,\n",
-            "    # tenant_id=\"your_tenant_uuid\",  # This is required when connecting to a hosted LangChain instance\n",
-            ")\n",
-            "print(\"You can click the link below to view the UI\")\n",
-            "client"
-         ]
-      },
-      {
-         "cell_type": "markdown",
-         "id": "2d77d064-41b4-41fb-82e6-2d16461269ec",
-         "metadata": {
-            "tags": []
-         },
-         "source": [
-            "## Capture traces\n",
-            "\n",
-            "If you have been using LangChainPlus already, you may have datasets available. To view all saved datasets, run:\n",
-            "\n",
-            "```\n",
-            "datasets = client.list_datasets()\n",
-            "print(datasets)\n",
-            "```\n",
-            "\n",
-            "Datasets can be created in a number of ways, most often by collecting `Run`'s captured through the LangChain tracing API and converting a set of runs to a dataset.\n",
-            "\n",
-            "The V2 tracing API is currently accessible using the `tracing_v2_enabled` context manager. Assuming the server was succesfully started above, running LangChain Agents, Chains, LLMs, and other primitives will then automatically capture traces. We'll start with a simple math example.\n",
-            "\n",
-            "**Note** You can also use the `LANGCHAIN_TRACING_V2` environment variable to enable tracing for all runs by default, regardless of whether or not those runs happen within the `tracing_v2_enabled` context manager (i.e. `os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"`)"
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": 19,
-         "id": "4417e0b8-a26f-4a11-b7eb-ba7a18e73885",
-         "metadata": {
-            "tags": []
-         },
-         "outputs": [],
-         "source": [
-            "from langchain.callbacks.manager import tracing_v2_enabled"
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": 20,
-         "id": "7c801853-8e96-404d-984c-51ace59cbbef",
-         "metadata": {
-            "tags": []
-         },
-         "outputs": [],
-         "source": [
-            "from langchain.chat_models import ChatOpenAI\n",
-            "from langchain.agents import initialize_agent, load_tools\n",
-            "from langchain.agents import AgentType\n",
-            "\n",
-            "llm = ChatOpenAI(temperature=0)\n",
-            "tools = load_tools(['serpapi', 'llm-math'], llm=llm)\n",
-            "agent = initialize_agent(tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=False)"
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": 4,
-         "id": "19537902-b95c-4390-80a4-f6c9a937081e",
-         "metadata": {
-            "tags": []
-         },
-         "outputs": [{
-               "name": "stderr",
-               "output_type": "stream",
-               "text": [
-                  "/Users/wfh/code/lc/lckg/langchain/callbacks/manager.py:78: UserWarning: The experimental tracing v2 is in development. This is not yet stable and may change in the future.\n",
-                  "  warnings.warn(\n"
-               ]
-            },
-            {
-               "name": "stdout",
-               "output_type": "stream",
-               "text": [
-                  "The current population of Canada as of 2023 is 39,566,248.\n",
-                  "Anwar Hadid is Dua Lipa's boyfriend and his age raised to the 0.43 power is approximately 3.87.\n",
-                  "LLMMathChain._evaluate(\"\n",
-                  "(age)**0.43\n",
-                  "\") raised error: 'age'. Please try again with a valid numerical expression\n",
-                  "The distance between Paris and Boston is approximately 3448 miles.\n",
-                  "unknown format from LLM: Sorry, I cannot answer this question as it requires information from the future.\n",
-                  "LLMMathChain._evaluate(\"\n",
-                  "(total number of points scored in the 2023 super bowl)**0.23\n",
-                  "\") raised error: invalid syntax. Perhaps you forgot a comma? (<expr>, line 1). Please try again with a valid numerical expression\n",
-                  "Could not parse LLM output: `The final answer is that there were no more points scored in the 2023 Super Bowl than in the 2022 Super Bowl.`\n",
-                  "1.9347796717823205\n",
-                  "77\n",
-                  "0.2791714614499425\n"
-               ]
-            }
-         ],
-         "source": [
-            "inputs = [\n",
-            "'How many people live in canada as of 2023?',\n",
-            " \"who is dua lipa's boyfriend? what is his age raised to the .43 power?\",\n",
-            " \"what is dua lipa's boyfriend age raised to the .43 power?\",\n",
-            " 'how far is it from paris to boston in miles',\n",
-            " 'what was the total number of points scored in the 2023 super bowl? what is that number raised to the .23 power?',\n",
-            " 'what was the total number of points scored in the 2023 super bowl raised to the .23 power?',\n",
-            " 'how many more points were scored in the 2023 super bowl than in the 2022 super bowl?',\n",
-            " 'what is 153 raised to .1312 power?',\n",
-            " \"who is kendall jenner's boyfriend? what is his height (in inches) raised to .13 power?\",\n",
-            " 'what is 1213 divided by 4345?'\n",
-            "]\n",
-            "with tracing_v2_enabled(session_name=\"search_and_math_chain\"):\n",
-            "    for input_example in inputs:\n",
-            "        try:\n",
-            "            print(agent.run(input_example))\n",
-            "        except Exception as e:\n",
-            "            # The agent sometimes makes mistakes! These will be captured by the tracing.\n",
-            "            print(e)\n",
-            "           "
-         ]
-      },
-      {
-         "cell_type": "markdown",
-         "id": "6c43c311-4e09-4d57-9ef3-13afb96ff430",
-         "metadata": {},
-         "source": [
-            "## Creating the Dataset\n",
-            "\n",
-            "Now that you've captured a session entitled 'search_and_math_chain', it's time to create a dataset:\n",
-            "\n",
-            "   1. Navigate to the UI by clicking on the link below.\n",
-            "   2. Select the 'search_and_math_chain' session from the list.\n",
-            "   3. Next to the fist example, click \"+ to Dataset\".\n",
-            "   4. Click \"Create Dataset\" and create a title **\"calculator-example-dataset\"**.\n",
-            "   5. Add the other examples to the dataset as well"
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": 5,
-         "id": "d14a9881-2a01-404c-8c56-0b78565c3ff4",
-         "metadata": {
-            "tags": []
-         },
-         "outputs": [],
-         "source": [
-            "dataset_name = \"calculator-example-dataset\""
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": 6,
-         "id": "7bfb4699-62c3-400a-b3e7-7fb8ad3a68ad",
-         "metadata": {
-            "tags": []
-         },
-         "outputs": [{
-            "data": {
-               "text/html": [
-                  "<a href=\"http://localhost\", target=\"_blank\" rel=\"noopener\">LangChain+ Client</a>"
-               ],
-               "text/plain": [
-                  "LangChainPlusClient (API URL: http://localhost:8000)"
-               ]
-            },
-            "execution_count": 6,
-            "metadata": {},
-            "output_type": "execute_result"
-         }],
-         "source": [
-            "client"
-         ]
-      },
-      {
-         "cell_type": "markdown",
-         "id": "db79dea2-fbaa-4c12-9083-f6154b51e2d3",
-         "metadata": {
-            "jp-MarkdownHeadingCollapsed": true,
-            "tags": []
-         },
-         "source": [
-            "**Optional:** If you didn't run the trace above, you can also create datasets by uploading dataframes or CSV files."
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": 7,
-         "id": "1baa677c-5642-4378-8e01-3aa1647f19d6",
-         "metadata": {
-            "tags": []
-         },
-         "outputs": [],
-         "source": [
-            "# !pip install datasets > /dev/null\n",
-            "# !pip install pandas > /dev/null"
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": 7,
-         "id": "60d14593-c61f-449f-a38f-772ca43707c2",
-         "metadata": {
-            "tags": []
-         },
-         "outputs": [{
-               "name": "stderr",
-               "output_type": "stream",
-               "text": [
-                  "Found cached dataset json (/Users/wfh/.cache/huggingface/datasets/LangChainDatasets___json/LangChainDatasets--agent-search-calculator-8a025c0ce5fb99d2/0.0.0/e347ab1c932092252e717ff3f949105a4dd28b27e842dd53157d2f72e276c2e4)\n"
-               ]
-            },
-            {
-               "data": {
-                  "application/vnd.jupyter.widget-view+json": {
-                     "model_id": "c34edde8de5340888b3278d1ac427417",
-                     "version_major": 2,
-                     "version_minor": 0
-                  },
-                  "text/plain": [
-                     "  0%|          | 0/1 [00:00<?, ?it/s]"
-                  ]
-               },
-               "metadata": {},
-               "output_type": "display_data"
-            },
-            {
-               "data": {
-                  "text/html": [
-                     "<div>\n",
-                     "<style scoped>\n",
-                     "    .dataframe tbody tr th:only-of-type {\n",
-                     "        vertical-align: middle;\n",
-                     "    }\n",
-                     "\n",
-                     "    .dataframe tbody tr th {\n",
-                     "        vertical-align: top;\n",
-                     "    }\n",
-                     "\n",
-                     "    .dataframe thead th {\n",
-                     "        text-align: right;\n",
-                     "    }\n",
-                     "</style>\n",
-                     "<table border=\"1\" class=\"dataframe\">\n",
-                     "  <thead>\n",
-                     "    <tr style=\"text-align: right;\">\n",
-                     "      <th></th>\n",
-                     "      <th>input</th>\n",
-                     "      <th>output</th>\n",
-                     "    </tr>\n",
-                     "  </thead>\n",
-                     "  <tbody>\n",
-                     "    <tr>\n",
-                     "      <th>0</th>\n",
-                     "      <td>How many people live in canada as of 2023?</td>\n",
-                     "      <td>approximately 38,625,801</td>\n",
-                     "    </tr>\n",
-                     "    <tr>\n",
-                     "      <th>1</th>\n",
-                     "      <td>who is dua lipa's boyfriend? what is his age r...</td>\n",
-                     "      <td>her boyfriend is Romain Gravas. his age raised...</td>\n",
-                     "    </tr>\n",
-                     "    <tr>\n",
-                     "      <th>2</th>\n",
-                     "      <td>what is dua lipa's boyfriend age raised to the...</td>\n",
-                     "      <td>her boyfriend is Romain Gravas. his age raised...</td>\n",
-                     "    </tr>\n",
-                     "    <tr>\n",
-                     "      <th>3</th>\n",
-                     "      <td>how far is it from paris to boston in miles</td>\n",
-                     "      <td>approximately 3,435 mi</td>\n",
-                     "    </tr>\n",
-                     "    <tr>\n",
-                     "      <th>4</th>\n",
-                     "      <td>what was the total number of points scored in ...</td>\n",
-                     "      <td>approximately 2.682651500990882</td>\n",
-                     "    </tr>\n",
-                     "  </tbody>\n",
-                     "</table>\n",
-                     "</div>"
-                  ],
-                  "text/plain": [
-                     "                                               input  \\\n",
-                     "0         How many people live in canada as of 2023?   \n",
-                     "1  who is dua lipa's boyfriend? what is his age r...   \n",
-                     "2  what is dua lipa's boyfriend age raised to the...   \n",
-                     "3        how far is it from paris to boston in miles   \n",
-                     "4  what was the total number of points scored in ...   \n",
-                     "\n",
-                     "                                              output  \n",
-                     "0                           approximately 38,625,801  \n",
-                     "1  her boyfriend is Romain Gravas. his age raised...  \n",
-                     "2  her boyfriend is Romain Gravas. his age raised...  \n",
-                     "3                             approximately 3,435 mi  \n",
-                     "4                    approximately 2.682651500990882  "
-                  ]
-               },
-               "execution_count": 7,
-               "metadata": {},
-               "output_type": "execute_result"
-            }
-         ],
-         "source": [
-            "# import pandas as pd\n",
-            "# from langchain.evaluation.loading import load_dataset\n",
-            "\n",
-            "# dataset = load_dataset(\"agent-search-calculator\")\n",
-            "# df = pd.DataFrame(dataset, columns=[\"question\", \"answer\"])\n",
-            "# df.columns = [\"input\", \"output\"] # The chain we want to evaluate below expects inputs with the \"input\" key \n",
-            "# df.head()"
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": 8,
-         "id": "52a7ea76-79ca-4765-abf7-231e884040d6",
-         "metadata": {
-            "tags": []
-         },
-         "outputs": [],
-         "source": [
-            "# dataset_name = \"calculator-example-dataset\"\n",
-            "\n",
-            "# if dataset_name not in set([dataset.name for dataset in client.list_datasets()]):\n",
-            "#     dataset = client.upload_dataframe(df, \n",
-            "#                             name=dataset_name,\n",
-            "#                             description=\"A calculator example dataset\",\n",
-            "#                             input_keys=[\"input\"],\n",
-            "#                             output_keys=[\"output\"],\n",
-            "#                    )"
-         ]
-      },
-      {
-         "cell_type": "markdown",
-         "id": "07885b10",
-         "metadata": {
-            "tags": []
-         },
-         "source": [
-            "## Running a Chain on a Traced Dataset\n",
-            "\n",
-            "Once you have a dataset, you can run a compatible chain or other object over it to see its results. The run traces will automatically be associated with the dataset for easy attribution and analysis.\n",
-            "\n",
-            "**First, we'll define the chain we wish to run over the dataset.**\n",
-            "\n",
-            "In this case, we're using an agent, but it can be any simple chain."
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": 9,
-         "id": "c2b59104-b90e-466a-b7ea-c5bd0194263b",
-         "metadata": {
-            "tags": []
-         },
-         "outputs": [],
-         "source": [
-            "from langchain.chat_models import ChatOpenAI\n",
-            "from langchain.agents import initialize_agent, load_tools\n",
-            "from langchain.agents import AgentType\n",
-            "\n",
-            "llm = ChatOpenAI(temperature=0)\n",
-            "tools = load_tools(['serpapi', 'llm-math'], llm=llm)\n",
-            "agent = initialize_agent(tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=False)"
-         ]
-      },
-      {
-         "cell_type": "markdown",
-         "id": "84094a4a-1d76-461c-bc37-8c537939b466",
-         "metadata": {},
-         "source": [
-            "**Now we're ready to run the chain!**\n",
-            "\n",
-            "The docstring below hints ways you can configure the method to run."
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": 10,
-         "id": "112d7bdf-7e50-4c1a-9285-5bac8473f2ee",
-         "metadata": {
-            "tags": []
-         },
-         "outputs": [{
-            "data": {
-               "text/plain": [
-                  "\u001b[0;31mSignature:\u001b[0m\n",
-                  "\u001b[0mclient\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marun_on_dataset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\u001b[0m\n",
-                  "\u001b[0;34m\u001b[0m    \u001b[0mdataset_name\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;34m'str'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
-                  "\u001b[0;34m\u001b[0m    \u001b[0mllm_or_chain_factory\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;34m'MODEL_OR_CHAIN_FACTORY'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
-                  "\u001b[0;34m\u001b[0m    \u001b[0;34m*\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
-                  "\u001b[0;34m\u001b[0m    \u001b[0mconcurrency_level\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;34m'int'\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m5\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
-                  "\u001b[0;34m\u001b[0m    \u001b[0mnum_repetitions\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;34m'int'\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
-                  "\u001b[0;34m\u001b[0m    \u001b[0msession_name\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;34m'Optional[str]'\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
-                  "\u001b[0;34m\u001b[0m    \u001b[0mverbose\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;34m'bool'\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
-                  "\u001b[0;34m\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0;34m'Dict[str, Any]'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-                  "\u001b[0;31mDocstring:\u001b[0m\n",
-                  "Run the chain on a dataset and store traces to the specified session name.\n",
-                  "\n",
-                  "Args:\n",
-                  "    dataset_name: Name of the dataset to run the chain on.\n",
-                  "    llm_or_chain_factory: Language model or Chain constructor to run\n",
-                  "        over the dataset. The Chain constructor is used to permit\n",
-                  "        independent calls on each example without carrying over state.\n",
-                  "    concurrency_level: The number of async tasks to run concurrently.\n",
-                  "    num_repetitions: Number of times to run the model on each example.\n",
-                  "        This is useful when testing success rates or generating confidence\n",
-                  "        intervals.\n",
-                  "    session_name: Name of the session to store the traces in.\n",
-                  "        Defaults to {dataset_name}-{chain class name}-{datetime}.\n",
-                  "    verbose: Whether to print progress.\n",
-                  "\n",
-                  "Returns:\n",
-                  "    A dictionary mapping example ids to the model outputs.\n",
-                  "\u001b[0;31mFile:\u001b[0m      ~/code/lc/lckg/langchain/client/langchain.py\n",
-                  "\u001b[0;31mType:\u001b[0m      method"
-               ]
-            },
-            "metadata": {},
-            "output_type": "display_data"
-         }],
-         "source": [
-            "?client.arun_on_dataset"
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": 11,
-         "id": "6e10f823",
-         "metadata": {
-            "tags": []
-         },
-         "outputs": [],
-         "source": [
-            "# Since chains can be stateful (e.g. they can have memory), we need provide\n",
-            "# a way to initialize a new chain for each row in the dataset. This is done\n",
-            "# by passing in a factory function that returns a new chain for each row.\n",
-            "chain_factory = lambda: initialize_agent(tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=False)\n",
-            "\n",
-            "# If your chain is NOT stateful, your lambda can return the object directly\n",
-            "# to improve runtime performance. For example:\n",
-            "# chain_factory = lambda: agent"
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": 12,
-         "id": "a8088b7d-3ab6-4279-94c8-5116fe7cee33",
-         "metadata": {
-            "tags": []
-         },
-         "outputs": [{
-               "name": "stderr",
-               "output_type": "stream",
-               "text": [
-                  "/Users/wfh/code/lc/lckg/langchain/callbacks/manager.py:78: UserWarning: The experimental tracing v2 is in development. This is not yet stable and may change in the future.\n",
-                  "  warnings.warn(\n",
-                  "Chain failed for example 5523e460-6bb4-4a64-be37-bec0a98699a4. Error: LLMMathChain._evaluate(\"\n",
-                  "(total number of points scored in the 2023 super bowl)**0.23\n",
-                  "\") raised error: invalid syntax. Perhaps you forgot a comma? (<expr>, line 1). Please try again with a valid numerical expression\n"
-               ]
-            },
-            {
-               "name": "stdout",
-               "output_type": "stream",
-               "text": [
-                  "Processed examples: 2\r"
-               ]
-            },
-            {
-               "name": "stderr",
-               "output_type": "stream",
-               "text": [
-                  "Chain failed for example f193a3f6-1147-4ce6-a83e-fab1157dc88d. Error: unknown format from LLM: Assuming we don't have any information about the actual number of points scored in the 2023 super bowl, we cannot provide a mathematical expression to solve this problem.\n"
-               ]
-            },
-            {
-               "name": "stdout",
-               "output_type": "stream",
-               "text": [
-                  "Processed examples: 6\r"
-               ]
-            },
-            {
-               "name": "stderr",
-               "output_type": "stream",
-               "text": [
-                  "Chain failed for example 6d7bbb45-1dc0-4adc-be21-4f76a208a8d2. Error: LLMMathChain._evaluate(\"\n",
-                  "(age ** 0.43)\n",
-                  "\") raised error: 'age'. Please try again with a valid numerical expression\n"
-               ]
-            },
-            {
-               "name": "stdout",
-               "output_type": "stream",
-               "text": [
-                  "Processed examples: 10\r"
-               ]
-            }
-         ],
-         "source": [
-            "chain_results = await client.arun_on_dataset(\n",
-            "    dataset_name=dataset_name,\n",
-            "    llm_or_chain_factory=chain_factory,\n",
-            "    verbose=True\n",
-            ")\n",
-            "\n",
-            "# Sometimes, the agent will error due to parsing issues, incompatible tool inputs, etc.\n",
-            "# These are logged as warnings here and captured as errors in the tracing UI."
-         ]
-      },
-      {
-         "cell_type": "markdown",
-         "id": "d2737458-b20c-4288-8790-1f4a8d237b2a",
-         "metadata": {},
-         "source": [
-            "## Reviewing the Chain Results\n",
-            "\n",
-            "You can review the results of the run in the tracing UI below and navigating to the session \n",
-            "with the title 'calculator-example-dataset-AgentExecutor-YYYY-MM-DD-HH-MM-SS'"
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": 13,
-         "id": "136db492-d6ca-4215-96f9-439c23538241",
-         "metadata": {
-            "tags": []
-         },
-         "outputs": [{
-            "data": {
-               "text/html": [
-                  "<a href=\"http://localhost\", target=\"_blank\" rel=\"noopener\">LangChain+ Client</a>"
-               ],
-               "text/plain": [
-                  "LangChainPlusClient (API URL: http://localhost:8000)"
-               ]
-            },
-            "execution_count": 13,
-            "metadata": {},
-            "output_type": "execute_result"
-         }],
-         "source": [
-            "# You can navigate to the UI by clicking on the link below\n",
-            "client"
-         ]
-      },
-      {
-         "cell_type": "markdown",
-         "id": "c70cceb5-aa53-4851-bb12-386f092191f9",
-         "metadata": {},
-         "source": [
-            "### Running a Chat Model over a Traced Dataset\n",
-            "\n",
-            "We've shown how to run a _chain_ over a dataset, but you can also run an LLM or Chat model over a datasets formed from runs. \n",
-            "\n",
-            "First, we'll show an example using a ChatModel. This is useful for things like:\n",
-            "- Comparing results under different decoding parameters\n",
-            "- Comparing model providers\n",
-            "- Testing for regressions in model behavior\n",
-            "- Running multiple times with a temperature to gauge stability \n",
-            "\n",
-            "To speed things up, we'll upload a dataset we've previously captured directly to the tracing service."
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": 24,
-         "id": "64490d7c-9a18-49ed-a3ac-36049c522cb4",
-         "metadata": {
-            "tags": []
-         },
-         "outputs": [{
-               "name": "stderr",
-               "output_type": "stream",
-               "text": [
-                  "Found cached dataset parquet (/Users/wfh/.cache/huggingface/datasets/LangChainDatasets___parquet/LangChainDatasets--two-player-dnd-cc62c3037e2d9250/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)\n"
-               ]
-            },
-            {
-               "data": {
-                  "application/vnd.jupyter.widget-view+json": {
-                     "model_id": "0adb751cec11417b88072963325b481d",
-                     "version_major": 2,
-                     "version_minor": 0
-                  },
-                  "text/plain": [
-                     "  0%|          | 0/1 [00:00<?, ?it/s]"
-                  ]
-               },
-               "metadata": {},
-               "output_type": "display_data"
-            },
-            {
-               "data": {
-                  "text/html": [
-                     "<div>\n",
-                     "<style scoped>\n",
-                     "    .dataframe tbody tr th:only-of-type {\n",
-                     "        vertical-align: middle;\n",
-                     "    }\n",
-                     "\n",
-                     "    .dataframe tbody tr th {\n",
-                     "        vertical-align: top;\n",
-                     "    }\n",
-                     "\n",
-                     "    .dataframe thead th {\n",
-                     "        text-align: right;\n",
-                     "    }\n",
-                     "</style>\n",
-                     "<table border=\"1\" class=\"dataframe\">\n",
-                     "  <thead>\n",
-                     "    <tr style=\"text-align: right;\">\n",
-                     "      <th></th>\n",
-                     "      <th>generations</th>\n",
-                     "      <th>messages</th>\n",
-                     "    </tr>\n",
-                     "  </thead>\n",
-                     "  <tbody>\n",
-                     "    <tr>\n",
-                     "      <th>0</th>\n",
-                     "      <td>[[{'generation_info': None, 'message': {'conte...</td>\n",
-                     "      <td>[{'data': {'content': 'Here is the topic for a...</td>\n",
-                     "    </tr>\n",
-                     "    <tr>\n",
-                     "      <th>1</th>\n",
-                     "      <td>[[{'generation_info': None, 'message': {'conte...</td>\n",
-                     "      <td>[{'data': {'content': 'Here is the topic for a...</td>\n",
-                     "    </tr>\n",
-                     "    <tr>\n",
-                     "      <th>2</th>\n",
-                     "      <td>[[{'generation_info': None, 'message': {'conte...</td>\n",
-                     "      <td>[{'data': {'content': 'Here is the topic for a...</td>\n",
-                     "    </tr>\n",
-                     "    <tr>\n",
-                     "      <th>3</th>\n",
-                     "      <td>[[{'generation_info': None, 'message': {'conte...</td>\n",
-                     "      <td>[{'data': {'content': 'Here is the topic for a...</td>\n",
-                     "    </tr>\n",
-                     "    <tr>\n",
-                     "      <th>4</th>\n",
-                     "      <td>[[{'generation_info': None, 'message': {'conte...</td>\n",
-                     "      <td>[{'data': {'content': 'Here is the topic for a...</td>\n",
-                     "    </tr>\n",
-                     "  </tbody>\n",
-                     "</table>\n",
-                     "</div>"
-                  ],
-                  "text/plain": [
-                     "                                         generations  \\\n",
-                     "0  [[{'generation_info': None, 'message': {'conte...   \n",
-                     "1  [[{'generation_info': None, 'message': {'conte...   \n",
-                     "2  [[{'generation_info': None, 'message': {'conte...   \n",
-                     "3  [[{'generation_info': None, 'message': {'conte...   \n",
-                     "4  [[{'generation_info': None, 'message': {'conte...   \n",
-                     "\n",
-                     "                                            messages  \n",
-                     "0  [{'data': {'content': 'Here is the topic for a...  \n",
-                     "1  [{'data': {'content': 'Here is the topic for a...  \n",
-                     "2  [{'data': {'content': 'Here is the topic for a...  \n",
-                     "3  [{'data': {'content': 'Here is the topic for a...  \n",
-                     "4  [{'data': {'content': 'Here is the topic for a...  "
-                  ]
-               },
-               "execution_count": 24,
-               "metadata": {},
-               "output_type": "execute_result"
-            }
-         ],
-         "source": [
-            "import pandas as pd\n",
-            "from langchain.evaluation.loading import load_dataset\n",
-            "\n",
-            "chat_dataset = load_dataset(\"two-player-dnd\")\n",
-            "chat_df = pd.DataFrame(chat_dataset)\n",
-            "chat_df.head()"
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": 25,
-         "id": "348acd86-a927-4d60-8d52-02e64585e4fc",
-         "metadata": {
-            "tags": []
-         },
-         "outputs": [],
-         "source": [
-            "chat_dataset_name = \"two-player-dnd\"\n",
-            "\n",
-            "if chat_dataset_name not in set([dataset.name for dataset in client.list_datasets()]):\n",
-            "    client.upload_dataframe(chat_df, \n",
-            "                            name=chat_dataset_name,\n",
-            "                            description=\"An example dataset traced from chat models in a multiagent bidding dialogue\",\n",
-            "                            input_keys=[\"messages\"],\n",
-            "                            output_keys=[\"generations\"],\n",
-            "                   )"
-         ]
-      },
-      {
-         "cell_type": "markdown",
-         "id": "927a43b8-e4f9-4220-b75d-33e310bc318b",
-         "metadata": {},
-         "source": [
-            "#### Reviewing behavior with temperature\n",
-            "\n",
-            "Here, we will set `num_repetitions > 1` and set the temperature to 0.3 to see the variety of response types for a each example.\n"
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": 26,
-         "id": "a69dd183-ad5e-473d-b631-db90706e837f",
-         "metadata": {
-            "tags": []
-         },
-         "outputs": [],
-         "source": [
-            "from langchain.chat_models import ChatAnthropic\n",
-            "\n",
-            "chat_model = ChatAnthropic(temperature=.3)"
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": 27,
-         "id": "063da2a9-3692-4b7b-8edb-e474824fe416",
-         "metadata": {
-            "tags": []
-         },
-         "outputs": [{
-               "name": "stderr",
-               "output_type": "stream",
-               "text": [
-                  "/Users/wfh/code/lc/lckg/langchain/callbacks/manager.py:78: UserWarning: The experimental tracing v2 is in development. This is not yet stable and may change in the future.\n",
-                  "  warnings.warn(\n"
-               ]
-            },
-            {
-               "name": "stdout",
-               "output_type": "stream",
-               "text": [
-                  "Processed examples: 36\r"
-               ]
-            }
-         ],
-         "source": [
-            "chat_model_results = await client.arun_on_dataset(\n",
-            "    dataset_name=chat_dataset_name,\n",
-            "    llm_or_chain_factory=chat_model,\n",
-            "    concurrency_level=5, # Optional, sets the number of examples to run at a time\n",
-            "    num_repetitions=3,\n",
-            "    verbose=True\n",
-            ")\n",
-            "\n",
-            "# The 'experimental tracing v2' warning is expected, as we are still actively developing the v2 tracing API \n",
-            "# Since we are running examples concurrently,  you may run into some RateLimit warnings from your model\n",
-            "# provider. In most cases, the tests will still run to completion (the wrappers have backoff)."
-         ]
-      },
-      {
-         "cell_type": "markdown",
-         "id": "de7bfe08-215c-4328-b9b0-631d9a41f0e8",
-         "metadata": {
-            "tags": []
-         },
-         "source": [
-            "## Reviewing the Chat Model Results\n",
-            "\n",
-            "You can review the latest runs by clicking on the link below and navigating to the \"two-player-dnd\" session."
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": 19,
-         "id": "5b7a81f2-d19d-438b-a4bb-5678f746b965",
-         "metadata": {
-            "tags": []
-         },
-         "outputs": [{
-            "data": {
-               "text/html": [
-                  "<a href=\"http://localhost\", target=\"_blank\" rel=\"noopener\">LangChain+ Client</a>"
-               ],
-               "text/plain": [
-                  "LangChainPlusClient (API URL: http://localhost:8000)"
-               ]
-            },
-            "execution_count": 19,
-            "metadata": {},
-            "output_type": "execute_result"
-         }],
-         "source": [
-            "client"
-         ]
-      },
-      {
-         "cell_type": "markdown",
-         "id": "7896cbeb-345f-430b-ab5e-e108973174f8",
-         "metadata": {},
-         "source": [
-            "## Running an LLM over a Traced Dataset\n",
-            "\n",
-            "You can run an LLM over a dataset in much the same way as the chain and chat models, provided the dataset you've captured is in the appropriate format. We've cached one for you here, but using application-specific traces will be much more useful for your use cases."
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": 10,
-         "id": "d6805d0b-4612-4671-bffb-e6978992bd40",
-         "metadata": {
-            "tags": []
-         },
-         "outputs": [],
-         "source": [
-            "from langchain.llms import OpenAI\n",
-            "\n",
-            "llm = OpenAI(model_name='text-curie-001', temperature=0)"
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": 11,
-         "id": "5d7cb243-40c3-44dd-8158-a7b910441e9f",
-         "metadata": {
-            "tags": []
-         },
-         "outputs": [{
-               "name": "stderr",
-               "output_type": "stream",
-               "text": [
-                  "Found cached dataset parquet (/Users/wfh/.cache/huggingface/datasets/LangChainDatasets___parquet/LangChainDatasets--state-of-the-union-completions-a7eb4af13453cd35/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)\n"
-               ]
-            },
-            {
-               "data": {
-                  "application/vnd.jupyter.widget-view+json": {
-                     "model_id": "189832bd50114f129fb58e590d6e8267",
-                     "version_major": 2,
-                     "version_minor": 0
-                  },
-                  "text/plain": [
-                     "  0%|          | 0/1 [00:00<?, ?it/s]"
-                  ]
-               },
-               "metadata": {},
-               "output_type": "display_data"
-            },
-            {
-               "data": {
-                  "text/html": [
-                     "<div>\n",
-                     "<style scoped>\n",
-                     "    .dataframe tbody tr th:only-of-type {\n",
-                     "        vertical-align: middle;\n",
-                     "    }\n",
-                     "\n",
-                     "    .dataframe tbody tr th {\n",
-                     "        vertical-align: top;\n",
-                     "    }\n",
-                     "\n",
-                     "    .dataframe thead th {\n",
-                     "        text-align: right;\n",
-                     "    }\n",
-                     "</style>\n",
-                     "<table border=\"1\" class=\"dataframe\">\n",
-                     "  <thead>\n",
-                     "    <tr style=\"text-align: right;\">\n",
-                     "      <th></th>\n",
-                     "      <th>generations</th>\n",
-                     "      <th>ground_truth</th>\n",
-                     "      <th>prompt</th>\n",
-                     "    </tr>\n",
-                     "  </thead>\n",
-                     "  <tbody>\n",
-                     "    <tr>\n",
-                     "      <th>0</th>\n",
-                     "      <td>[[{'generation_info': {'finish_reason': 'stop'...</td>\n",
-                     "      <td>The pandemic has been punishing. \\n\\nAnd so ma...</td>\n",
-                     "      <td>Putin may circle Kyiv with tanks, but he will ...</td>\n",
-                     "    </tr>\n",
-                     "    <tr>\n",
-                     "      <th>1</th>\n",
-                     "      <td>[[]]</td>\n",
-                     "      <td>With a duty to one another to the American peo...</td>\n",
-                     "      <td>Madam Speaker, Madam Vice President, our First...</td>\n",
-                     "    </tr>\n",
-                     "    <tr>\n",
-                     "      <th>2</th>\n",
-                     "      <td>[[{'generation_info': {'finish_reason': 'stop'...</td>\n",
-                     "      <td>He thought he could roll into Ukraine and the ...</td>\n",
-                     "      <td>With a duty to one another to the American peo...</td>\n",
-                     "    </tr>\n",
-                     "    <tr>\n",
-                     "      <th>3</th>\n",
-                     "      <td>[[{'generation_info': {'finish_reason': 'lengt...</td>\n",
-                     "      <td>With a duty to one another to the American peo...</td>\n",
-                     "      <td>Madam Speaker, Madam Vice President, our First...</td>\n",
-                     "    </tr>\n",
-                     "    <tr>\n",
-                     "      <th>4</th>\n",
-                     "      <td>[[]]</td>\n",
-                     "      <td>And the costs and the threats to America and t...</td>\n",
-                     "      <td>Please rise if you are able and show that, Yes...</td>\n",
-                     "    </tr>\n",
-                     "  </tbody>\n",
-                     "</table>\n",
-                     "</div>"
-                  ],
-                  "text/plain": [
-                     "                                         generations  \\\n",
-                     "0  [[{'generation_info': {'finish_reason': 'stop'...   \n",
-                     "1                                               [[]]   \n",
-                     "2  [[{'generation_info': {'finish_reason': 'stop'...   \n",
-                     "3  [[{'generation_info': {'finish_reason': 'lengt...   \n",
-                     "4                                               [[]]   \n",
-                     "\n",
-                     "                                        ground_truth  \\\n",
-                     "0  The pandemic has been punishing. \\n\\nAnd so ma...   \n",
-                     "1  With a duty to one another to the American peo...   \n",
-                     "2  He thought he could roll into Ukraine and the ...   \n",
-                     "3  With a duty to one another to the American peo...   \n",
-                     "4  And the costs and the threats to America and t...   \n",
-                     "\n",
-                     "                                              prompt  \n",
-                     "0  Putin may circle Kyiv with tanks, but he will ...  \n",
-                     "1  Madam Speaker, Madam Vice President, our First...  \n",
-                     "2  With a duty to one another to the American peo...  \n",
-                     "3  Madam Speaker, Madam Vice President, our First...  \n",
-                     "4  Please rise if you are able and show that, Yes...  "
-                  ]
-               },
-               "execution_count": 11,
-               "metadata": {},
-               "output_type": "execute_result"
-            }
-         ],
-         "source": [
-            "completions_dataset = load_dataset(\"state-of-the-union-completions\")\n",
-            "completions_df = pd.DataFrame(completions_dataset)\n",
-            "completions_df.head()"
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": 12,
-         "id": "c7dcc1b2-7aef-44c0-ba0f-c812279099a5",
-         "metadata": {
-            "tags": []
-         },
-         "outputs": [],
-         "source": [
-            "completions_dataset_name = \"state-of-the-union-completions\"\n",
-            "\n",
-            "if completions_dataset_name not in set([dataset.name for dataset in client.list_datasets()]):\n",
-            "    client.upload_dataframe(completions_df, \n",
-            "                            name=completions_dataset_name,\n",
-            "                            description=\"An example dataset traced from completion endpoints over the state of the union address\",\n",
-            "                            input_keys=[\"prompt\"],\n",
-            "                            output_keys=[\"generations\"],\n",
-            "                   )"
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": 13,
-         "id": "e946138e-bf7c-43d7-861d-9c5740c933fa",
-         "metadata": {
-            "tags": []
-         },
-         "outputs": [{
-               "name": "stderr",
-               "output_type": "stream",
-               "text": [
-                  "/Users/wfh/code/lc/lckg/langchain/callbacks/manager.py:78: UserWarning: The experimental tracing v2 is in development. This is not yet stable and may change in the future.\n",
-                  "  warnings.warn(\n"
-               ]
-            },
-            {
-               "name": "stdout",
-               "output_type": "stream",
-               "text": [
-                  "55 processed\r"
-               ]
-            }
-         ],
-         "source": [
-            "# We also offer a synchronous method for running examples if a chain or llm's async methods aren't yet implemented\n",
-            "completions_model_results = client.run_on_dataset(\n",
-            "    dataset_name=completions_dataset_name,\n",
-            "    llm_or_chain_factory=llm,\n",
-            "    num_repetitions=1,\n",
-            "    verbose=True\n",
-            ")"
-         ]
-      },
-      {
-         "cell_type": "markdown",
-         "id": "cc86e8e6-cee2-429e-942b-289284d14816",
-         "metadata": {},
-         "source": [
-            "## Reviewing the LLM Results\n",
-            "\n",
-            "You can once again inspect the latest runs by clicking on the link below and navigating to the \"two-player-dnd\" session."
-         ]
-      },
-      {
-         "cell_type": "code",
-         "execution_count": 24,
-         "id": "2bf96f17-74c1-4f7d-8458-ae5ab5c6bd36",
-         "metadata": {
-            "tags": []
-         },
-         "outputs": [{
-            "data": {
-               "text/html": [
-                  "<a href=\"http://localhost\", target=\"_blank\" rel=\"noopener\">LangChain+ Client</a>"
-               ],
-               "text/plain": [
-                  "LangChainPlusClient (API URL: http://localhost:8000)"
-               ]
-            },
-            "execution_count": 24,
-            "metadata": {},
-            "output_type": "execute_result"
-         }],
-         "source": [
-            "client"
-         ]
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "1a4596ea-a631-416d-a2a4-3577c140493d",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "# Tracing and Datasets\n",
+    "\n",
+    "LangChain makes it easy to get started with Agents and other LLM applications. However, it can be tricky to get right, especially when you need to deliver a full product. To speed up your application development process, and to help monitor your applications in production, LangChain offers additional tracing tooling.\n",
+    "\n",
+    "When might you want to use tracing? Some situations we've found it useful include:\n",
+    "- Quickly debugging a new chain, agent, or set of tools\n",
+    "- Evaluating a given chain across different LLMs or Chat Models to compare results or improve prompts\n",
+    "- Running a given chain multiple time on a dataset to ensure it consistently meets a quality bar.\n",
+    "\n",
+    "\n",
+    "In this notebook, we'll show how to enable tracing in your LangChain applications and walk you a couple common ways to evaluate your agents.\n",
+    "We'll focus on using Datasets to benchmark Chain behavior.\n",
+    "\n",
+    "Bear in mind that this notebook is designed under the assumption that you're running LangChain+ server locally in the background, and it's set up to work specifically with the V2 endpoints. This is done using the folowing command in your terminal:\n",
+    "\n",
+    "\n",
+    "```\n",
+    "pip install --upgrade langchain\n",
+    "langchain server start\n",
+    "```\n",
+    "\n",
+    "Now, let's get started by creating a client to connect to LangChain+."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "904db9a5-f387-4a57-914c-c8af8d39e249",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "You can click the link below to view the UI\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<a href=\"http://localhost\", target=\"_blank\" rel=\"noopener\">LangChain+ Client</a>"
+      ],
+      "text/plain": [
+       "LangChainPlusClient (API URL: http://localhost:8000)"
+      ]
+     },
+     "execution_count": 1,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from langchain.client import LangChainPlusClient\n",
+    "\n",
+    "client = LangChainPlusClient(\n",
+    "    api_url=\"http://localhost:8000\",\n",
+    "    api_key=None,\n",
+    "    # tenant_id=\"your_tenant_uuid\",  # This is required when connecting to a hosted LangChain instance\n",
+    ")\n",
+    "print(\"You can click the link below to view the UI\")\n",
+    "client"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2d77d064-41b4-41fb-82e6-2d16461269ec",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "## Tracing Runs\n",
+    "\n",
+    "The V2 tracing API can be activated by setting the `LANGCHAIN_TRACING_V2` environment variable to true. Assuming you've successfully initiated the server as described earlier, running LangChain Agents, Chains, LLMs, and other primitives will automatically start capturing traces. Let's begin our exploration with a straightforward math example.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "4417e0b8-a26f-4a11-b7eb-ba7a18e73885",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
+    "os.environ[\"LANGCHAIN_SESSION\"] = \"Tracing Walkthrough\"\n",
+    "# os.environ[\"LANGCHAIN_ENDPOINT\"] = \"http://localhost:8000\" # The default. Update this if you wish to connect to a hosted LangChain instance\n",
+    "# os.environ[\"LANGCHAIN_API_KEY\"] = None # Update if you wish to authenticate with a hosted LangChain instance"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "7935e832-9ae1-4557-8d08-890c425f18e2",
+   "metadata": {},
+   "source": [
+    "**Note** You can also use the `tracing_v2_enabled` context manager to capture sessions within a given context:\n",
+    "```\n",
+    "from langchain.callbacks.manager import tracing_v2_enabled\n",
+    "with tracing_v2_enabled(\"My Session Name\"):\n",
+    "    ...\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "7c801853-8e96-404d-984c-51ace59cbbef",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "from langchain.chat_models import ChatOpenAI\n",
+    "from langchain.agents import initialize_agent, load_tools\n",
+    "from langchain.agents import AgentType\n",
+    "\n",
+    "llm = ChatOpenAI(temperature=0)\n",
+    "tools = load_tools(['serpapi', 'llm-math'], llm=llm)\n",
+    "agent = initialize_agent(tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "19537902-b95c-4390-80a4-f6c9a937081e",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "39,566,248\n",
+      "Anwar Hadid is Dua Lipa's boyfriend and his age raised to the 0.43 power is approximately 3.87.\n",
+      "LLMMathChain._evaluate(\"\n",
+      "(age)**0.43\n",
+      "\") raised error: 'age'. Please try again with a valid numerical expression\n",
+      "The distance between Paris and Boston is 3448 miles.\n",
+      "unknown format from LLM: Assuming we don't have any information about the actual number of points scored in the 2023 super bowl, we cannot provide a mathematical expression to solve this problem.\n",
+      "LLMMathChain._evaluate(\"\n",
+      "(total number of points scored in the 2023 super bowl)**0.23\n",
+      "\") raised error: invalid syntax. Perhaps you forgot a comma? (<expr>, line 1). Please try again with a valid numerical expression\n",
+      "15 points were scored more in the 2023 Super Bowl than in the 2022 Super Bowl.\n",
+      "1.9347796717823205\n",
+      "77\n",
+      "LLMMathChain._evaluate(\"\n",
+      "round(0.2791714614499425, 2)\n",
+      "\") raised error: 'VariableNode' object is not callable. Please try again with a valid numerical expression\n"
+     ]
+    }
+   ],
+   "source": [
+    "inputs = [\n",
+    "'How many people live in canada as of 2023?',\n",
+    " \"who is dua lipa's boyfriend? what is his age raised to the .43 power?\",\n",
+    " \"what is dua lipa's boyfriend age raised to the .43 power?\",\n",
+    " 'how far is it from paris to boston in miles',\n",
+    " 'what was the total number of points scored in the 2023 super bowl? what is that number raised to the .23 power?',\n",
+    " 'what was the total number of points scored in the 2023 super bowl raised to the .23 power?',\n",
+    " 'how many more points were scored in the 2023 super bowl than in the 2022 super bowl?',\n",
+    " 'what is 153 raised to .1312 power?',\n",
+    " \"who is kendall jenner's boyfriend? what is his height (in inches) raised to .13 power?\",\n",
+    " 'what is 1213 divided by 4345?'\n",
+    "]\n",
+    "\n",
+    "for input_example in inputs:\n",
+    "    try:\n",
+    "        print(agent.run(input_example))\n",
+    "    except Exception as e:\n",
+    "        # The agent sometimes makes mistakes! These will be captured by the tracing.\n",
+    "        print(e)\n",
+    "           "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6c43c311-4e09-4d57-9ef3-13afb96ff430",
+   "metadata": {},
+   "source": [
+    "## Creating the Dataset\n",
+    "\n",
+    "Now that you've captured a session entitled 'Tracing Walkthrough', it's time to create a dataset. We will do so using the `create_dataset` method below."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "d14a9881-2a01-404c-8c56-0b78565c3ff4",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "dataset_name = \"calculator-example-dataset\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "c0e12629-bca5-4438-8665-890d0cb9cc4a",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "runs = client.list_runs(\n",
+    "        session_name=os.environ[\"LANGCHAIN_SESSION\"],\n",
+    "        run_type=\"chain\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "17580c4b-bd04-4dde-9d21-9d4edd25b00d",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "if dataset_name not in set([dataset.name for dataset in client.list_datasets()]):\n",
+    "    dataset = client.create_dataset(dataset_name, description=\"A calculator example dataset\")\n",
+    "    # List all \"Chain\" runs in the current session \n",
+    "    runs = client.list_runs(\n",
+    "        session_name=os.environ[\"LANGCHAIN_SESSION\"],\n",
+    "        run_type=\"chain\")\n",
+    "    for run in runs:\n",
+    "        if run.name == \"AgentExecutor\":\n",
+    "            # We will only use examples from the top level AgentExecutor run here.\n",
+    "            client.create_example(inputs=run.inputs, outputs=run.outputs, dataset_id=dataset.id)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "db79dea2-fbaa-4c12-9083-f6154b51e2d3",
+   "metadata": {
+    "jp-MarkdownHeadingCollapsed": true,
+    "tags": []
+   },
+   "source": [
+    "**Creating a Dataset in the UI** \n",
+    "\n",
+    "Alternatively, you could create or edit the dataset in the UI using the following steps:\n",
+    "\n",
+    "   1. Navigate to the UI by clicking on the link below.\n",
+    "   2. Select the 'search_and_math_chain' session from the list.\n",
+    "   3. Next to the fist example, click \"+ to Dataset\".\n",
+    "   4. Click \"Create Dataset\" and create a title **\"calculator-example-dataset\"**.\n",
+    "   5. Add the other examples to the dataset as well\n",
+    "\n",
+    "Once you've used LangChain+ for a while, you will have a number of datasets to work with. To view all saved datasets, execute the following code:\n",
+    "\n",
+    "```\n",
+    "datasets = client.list_datasets()\n",
+    "print(datasets)\n",
+    "```\n",
+    "\n",
+    "\n",
+    "**Optional:** If you didn't run the trace above, you can also create datasets by uploading dataframes or CSV files."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "1baa677c-5642-4378-8e01-3aa1647f19d6",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# !pip install datasets > /dev/null\n",
+    "# !pip install pandas > /dev/null"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "60d14593-c61f-449f-a38f-772ca43707c2",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# import pandas as pd\n",
+    "# from langchain.evaluation.loading import load_dataset\n",
+    "\n",
+    "# dataset = load_dataset(\"agent-search-calculator\")\n",
+    "# df = pd.DataFrame(dataset, columns=[\"question\", \"answer\"])\n",
+    "# df.columns = [\"input\", \"output\"] # The chain we want to evaluate below expects inputs with the \"input\" key \n",
+    "# df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "52a7ea76-79ca-4765-abf7-231e884040d6",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# dataset_name = \"calculator-example-dataset\"\n",
+    "\n",
+    "# if dataset_name not in set([dataset.name for dataset in client.list_datasets()]):\n",
+    "#     dataset = client.upload_dataframe(df, \n",
+    "#                             name=dataset_name,\n",
+    "#                             description=\"A calculator example dataset\",\n",
+    "#                             input_keys=[\"input\"],\n",
+    "#                             output_keys=[\"output\"],\n",
+    "#                    )"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "07885b10",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "## Running a Chain on a Traced Dataset\n",
+    "\n",
+    "Once you have a dataset, you can run a compatible chain or other object over it to see its results. The run traces will automatically be associated with the dataset for easy attribution and analysis.\n",
+    "\n",
+    "**First, we'll define the chain we wish to run over the dataset.**\n",
+    "\n",
+    "In this case, we're using an agent, but it can be any simple chain."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "c2b59104-b90e-466a-b7ea-c5bd0194263b",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "from langchain.chat_models import ChatOpenAI\n",
+    "from langchain.agents import initialize_agent, load_tools\n",
+    "from langchain.agents import AgentType\n",
+    "\n",
+    "llm = ChatOpenAI(temperature=0)\n",
+    "tools = load_tools(['serpapi', 'llm-math'], llm=llm)\n",
+    "agent = initialize_agent(tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=False)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "84094a4a-1d76-461c-bc37-8c537939b466",
+   "metadata": {},
+   "source": [
+    "**Now we're ready to run the chain!**\n",
+    "\n",
+    "The docstring below hints ways you can configure the method to run."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "112d7bdf-7e50-4c1a-9285-5bac8473f2ee",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "\u001b[0;31mSignature:\u001b[0m\n",
+       "\u001b[0mclient\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marun_on_dataset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m    \u001b[0mdataset_name\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;34m'str'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m    \u001b[0mllm_or_chain_factory\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;34m'MODEL_OR_CHAIN_FACTORY'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m    \u001b[0;34m*\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m    \u001b[0mconcurrency_level\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;34m'int'\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m5\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m    \u001b[0mnum_repetitions\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;34m'int'\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m    \u001b[0msession_name\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;34m'Optional[str]'\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m    \u001b[0mverbose\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;34m'bool'\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
+       "\u001b[0;34m\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0;34m'Dict[str, Any]'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+       "\u001b[0;31mDocstring:\u001b[0m\n",
+       "Run the chain on a dataset and store traces to the specified session name.\n",
+       "\n",
+       "Args:\n",
+       "    dataset_name: Name of the dataset to run the chain on.\n",
+       "    llm_or_chain_factory: Language model or Chain constructor to run\n",
+       "        over the dataset. The Chain constructor is used to permit\n",
+       "        independent calls on each example without carrying over state.\n",
+       "    concurrency_level: The number of async tasks to run concurrently.\n",
+       "    num_repetitions: Number of times to run the model on each example.\n",
+       "        This is useful when testing success rates or generating confidence\n",
+       "        intervals.\n",
+       "    session_name: Name of the session to store the traces in.\n",
+       "        Defaults to {dataset_name}-{chain class name}-{datetime}.\n",
+       "    verbose: Whether to print progress.\n",
+       "\n",
+       "Returns:\n",
+       "    A dictionary mapping example ids to the model outputs.\n",
+       "\u001b[0;31mFile:\u001b[0m      ~/code/lc/lckg/langchain/client/langchain.py\n",
+       "\u001b[0;31mType:\u001b[0m      method"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "?client.arun_on_dataset"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "6e10f823",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# Since chains can be stateful (e.g. they can have memory), we need provide\n",
+    "# a way to initialize a new chain for each row in the dataset. This is done\n",
+    "# by passing in a factory function that returns a new chain for each row.\n",
+    "chain_factory = lambda: initialize_agent(tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=False)\n",
+    "\n",
+    "# If your chain is NOT stateful, your lambda can return the object directly\n",
+    "# to improve runtime performance. For example:\n",
+    "# chain_factory = lambda: agent"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "a8088b7d-3ab6-4279-94c8-5116fe7cee33",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Processed examples: 1\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Chain failed for example 8d4ff5b4-41fb-4986-80f1-025e6fec96b0. Error: unknown format from LLM: It is impossible to accurately predict the total number of points scored in a future event. Therefore, a mathematical expression cannot be provided.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Processed examples: 2\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Chain failed for example 178081fb-a44a-46d5-a23b-74a830da65f3. Error: LLMMathChain._evaluate(\"\n",
+      "(age)**0.43\n",
+      "\") raised error: 'age'. Please try again with a valid numerical expression\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Processed examples: 5\r"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Chain failed for example 7de97d34-50e2-4ec5-bc49-c8e6287ae73e. Error: LLMMathChain._evaluate(\"\n",
+      "(total number of points scored in the 2023 super bowl)**0.23\n",
+      "\") raised error: invalid syntax. Perhaps you forgot a comma? (<expr>, line 1). Please try again with a valid numerical expression\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Processed examples: 10\r"
+     ]
+    }
+   ],
+   "source": [
+    "chain_results = await client.arun_on_dataset(\n",
+    "    dataset_name=dataset_name,\n",
+    "    llm_or_chain_factory=chain_factory,\n",
+    "    concurrency_level=5, # Optional, sets the number of examples to run at a time\n",
+    "    verbose=True\n",
+    ")\n",
+    "\n",
+    "# Sometimes, the agent will error due to parsing issues, incompatible tool inputs, etc.\n",
+    "# These are logged as warnings here and captured as errors in the tracing UI."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d2737458-b20c-4288-8790-1f4a8d237b2a",
+   "metadata": {},
+   "source": [
+    "## Reviewing the Chain Results\n",
+    "\n",
+    "You can review the results of the run in the tracing UI below and navigating to the session \n",
+    "with the title 'calculator-example-dataset-AgentExecutor-YYYY-MM-DD-HH-MM-SS'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "136db492-d6ca-4215-96f9-439c23538241",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<a href=\"http://localhost\", target=\"_blank\" rel=\"noopener\">LangChain+ Client</a>"
+      ],
+      "text/plain": [
+       "LangChainPlusClient (API URL: http://localhost:8000)"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# You can navigate to the UI by clicking on the link below\n",
+    "client"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c70cceb5-aa53-4851-bb12-386f092191f9",
+   "metadata": {},
+   "source": [
+    "### Running a Chat Model over a Traced Dataset\n",
+    "\n",
+    "We've shown how to run a _chain_ over a dataset, but you can also run an LLM or Chat model over a datasets formed from runs. \n",
+    "\n",
+    "First, we'll show an example using a ChatModel. This is useful for things like:\n",
+    "- Comparing results under different decoding parameters\n",
+    "- Comparing model providers\n",
+    "- Testing for regressions in model behavior\n",
+    "- Running multiple times with a temperature to gauge stability \n",
+    "\n",
+    "To speed things up, we'll upload a dataset we've previously captured directly to the tracing service."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "64490d7c-9a18-49ed-a3ac-36049c522cb4",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Found cached dataset parquet (/Users/wfh/.cache/huggingface/datasets/LangChainDatasets___parquet/LangChainDatasets--two-player-dnd-cc62c3037e2d9250/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "44f3c72015944e2ea4c39516350ea15c",
+       "version_major": 2,
+       "version_minor": 0
       },
-      {
-         "cell_type": "code",
-         "execution_count": null,
-         "id": "df80cd88-cd6f-4fdc-965f-f74600e1f286",
-         "metadata": {},
-         "outputs": [],
-         "source": []
-      }
+      "text/plain": [
+       "  0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>generations</th>\n",
+       "      <th>messages</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>[[{'generation_info': None, 'message': {'conte...</td>\n",
+       "      <td>[{'data': {'content': 'Here is the topic for a...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>[[{'generation_info': None, 'message': {'conte...</td>\n",
+       "      <td>[{'data': {'content': 'Here is the topic for a...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>[[{'generation_info': None, 'message': {'conte...</td>\n",
+       "      <td>[{'data': {'content': 'Here is the topic for a...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>[[{'generation_info': None, 'message': {'conte...</td>\n",
+       "      <td>[{'data': {'content': 'Here is the topic for a...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>[[{'generation_info': None, 'message': {'conte...</td>\n",
+       "      <td>[{'data': {'content': 'Here is the topic for a...</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                         generations  \\\n",
+       "0  [[{'generation_info': None, 'message': {'conte...   \n",
+       "1  [[{'generation_info': None, 'message': {'conte...   \n",
+       "2  [[{'generation_info': None, 'message': {'conte...   \n",
+       "3  [[{'generation_info': None, 'message': {'conte...   \n",
+       "4  [[{'generation_info': None, 'message': {'conte...   \n",
+       "\n",
+       "                                            messages  \n",
+       "0  [{'data': {'content': 'Here is the topic for a...  \n",
+       "1  [{'data': {'content': 'Here is the topic for a...  \n",
+       "2  [{'data': {'content': 'Here is the topic for a...  \n",
+       "3  [{'data': {'content': 'Here is the topic for a...  \n",
+       "4  [{'data': {'content': 'Here is the topic for a...  "
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import pandas as pd\n",
+    "from langchain.evaluation.loading import load_dataset\n",
+    "\n",
+    "chat_dataset = load_dataset(\"two-player-dnd\")\n",
+    "chat_df = pd.DataFrame(chat_dataset)\n",
+    "chat_df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "348acd86-a927-4d60-8d52-02e64585e4fc",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "chat_dataset_name = \"two-player-dnd\"\n",
+    "\n",
+    "if chat_dataset_name not in set([dataset.name for dataset in client.list_datasets()]):\n",
+    "    client.upload_dataframe(chat_df, \n",
+    "                            name=chat_dataset_name,\n",
+    "                            description=\"An example dataset traced from chat models in a multiagent bidding dialogue\",\n",
+    "                            input_keys=[\"messages\"],\n",
+    "                            output_keys=[\"generations\"],\n",
+    "                   )"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "927a43b8-e4f9-4220-b75d-33e310bc318b",
+   "metadata": {},
+   "source": [
+    "#### Reviewing behavior with temperature\n",
+    "\n",
+    "Here, we will set `num_repetitions > 1` and set the temperature to 0.3 to see the variety of response types for a each example.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "id": "a69dd183-ad5e-473d-b631-db90706e837f",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "from langchain.chat_models import ChatAnthropic\n",
+    "\n",
+    "chat_model = ChatAnthropic(temperature=.3)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "id": "063da2a9-3692-4b7b-8edb-e474824fe416",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Processed examples: 36\r"
+     ]
+    }
+   ],
+   "source": [
+    "chat_model_results = await client.arun_on_dataset(\n",
+    "    dataset_name=chat_dataset_name,\n",
+    "    llm_or_chain_factory=chat_model,\n",
+    "    concurrency_level=5, # Optional, sets the number of examples to run at a time\n",
+    "    num_repetitions=3,\n",
+    "    verbose=True\n",
+    ")\n",
+    "\n",
+    "# The 'experimental tracing v2' warning is expected, as we are still actively developing the v2 tracing API \n",
+    "# Since we are running examples concurrently,  you may run into some RateLimit warnings from your model\n",
+    "# provider. In most cases, the tests will still run to completion (the wrappers have backoff)."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "de7bfe08-215c-4328-b9b0-631d9a41f0e8",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "## Reviewing the Chat Model Results\n",
+    "\n",
+    "You can review the latest runs by clicking on the link below and navigating to the \"two-player-dnd\" session."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "id": "5b7a81f2-d19d-438b-a4bb-5678f746b965",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<a href=\"http://localhost\", target=\"_blank\" rel=\"noopener\">LangChain+ Client</a>"
+      ],
+      "text/plain": [
+       "LangChainPlusClient (API URL: http://localhost:8000)"
+      ]
+     },
+     "execution_count": 20,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
    ],
+   "source": [
+    "client"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "7896cbeb-345f-430b-ab5e-e108973174f8",
+   "metadata": {},
+   "source": [
+    "## Running an LLM over a Traced Dataset\n",
+    "\n",
+    "You can run an LLM over a dataset in much the same way as the chain and chat models, provided the dataset you've captured is in the appropriate format. We've cached one for you here, but using application-specific traces will be much more useful for your use cases."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "id": "d6805d0b-4612-4671-bffb-e6978992bd40",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "from langchain.llms import OpenAI\n",
+    "\n",
+    "llm = OpenAI(model_name='text-curie-001', temperature=0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "id": "5d7cb243-40c3-44dd-8158-a7b910441e9f",
    "metadata": {
-      "kernelspec": {
-         "display_name": "Python 3 (ipykernel)",
-         "language": "python",
-         "name": "python3"
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Found cached dataset parquet (/Users/wfh/.cache/huggingface/datasets/LangChainDatasets___parquet/LangChainDatasets--state-of-the-union-completions-5347290a406c64c8/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "5ce2168f975241fbae82a76b4d70e4c4",
+       "version_major": 2,
+       "version_minor": 0
       },
-      "language_info": {
-         "codemirror_mode": {
-            "name": "ipython",
-            "version": 3
-         },
-         "file_extension": ".py",
-         "mimetype": "text/x-python",
-         "name": "python",
-         "nbconvert_exporter": "python",
-         "pygments_lexer": "ipython3",
-         "version": "3.11.2"
-      }
+      "text/plain": [
+       "  0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>generations</th>\n",
+       "      <th>ground_truth</th>\n",
+       "      <th>prompt</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>[[{'generation_info': {'finish_reason': 'stop'...</td>\n",
+       "      <td>The pandemic has been punishing. \\n\\nAnd so ma...</td>\n",
+       "      <td>Putin may circle Kyiv with tanks, but he will ...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>[[]]</td>\n",
+       "      <td>With a duty to one another to the American peo...</td>\n",
+       "      <td>Madam Speaker, Madam Vice President, our First...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>[[{'generation_info': {'finish_reason': 'stop'...</td>\n",
+       "      <td>He thought he could roll into Ukraine and the ...</td>\n",
+       "      <td>With a duty to one another to the American peo...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>[[]]</td>\n",
+       "      <td>And the costs and the threats to America and t...</td>\n",
+       "      <td>Please rise if you are able and show that, Yes...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>[[{'generation_info': {'finish_reason': 'stop'...</td>\n",
+       "      <td>Please rise if you are able and show that, Yes...</td>\n",
+       "      <td>Groups of citizens blocking tanks with their b...</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                         generations  \\\n",
+       "0  [[{'generation_info': {'finish_reason': 'stop'...   \n",
+       "1                                               [[]]   \n",
+       "2  [[{'generation_info': {'finish_reason': 'stop'...   \n",
+       "3                                               [[]]   \n",
+       "4  [[{'generation_info': {'finish_reason': 'stop'...   \n",
+       "\n",
+       "                                        ground_truth  \\\n",
+       "0  The pandemic has been punishing. \\n\\nAnd so ma...   \n",
+       "1  With a duty to one another to the American peo...   \n",
+       "2  He thought he could roll into Ukraine and the ...   \n",
+       "3  And the costs and the threats to America and t...   \n",
+       "4  Please rise if you are able and show that, Yes...   \n",
+       "\n",
+       "                                              prompt  \n",
+       "0  Putin may circle Kyiv with tanks, but he will ...  \n",
+       "1  Madam Speaker, Madam Vice President, our First...  \n",
+       "2  With a duty to one another to the American peo...  \n",
+       "3  Please rise if you are able and show that, Yes...  \n",
+       "4  Groups of citizens blocking tanks with their b...  "
+      ]
+     },
+     "execution_count": 22,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "completions_dataset = load_dataset(\"state-of-the-union-completions\")\n",
+    "completions_df = pd.DataFrame(completions_dataset)\n",
+    "completions_df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "id": "c7dcc1b2-7aef-44c0-ba0f-c812279099a5",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "completions_dataset_name = \"state-of-the-union-completions\"\n",
+    "\n",
+    "if completions_dataset_name not in set([dataset.name for dataset in client.list_datasets()]):\n",
+    "    client.upload_dataframe(completions_df, \n",
+    "                            name=completions_dataset_name,\n",
+    "                            description=\"An example dataset traced from completion endpoints over the state of the union address\",\n",
+    "                            input_keys=[\"prompt\"],\n",
+    "                            output_keys=[\"generations\"],\n",
+    "                   )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "id": "e946138e-bf7c-43d7-861d-9c5740c933fa",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "50 processed\r"
+     ]
+    }
+   ],
+   "source": [
+    "# We also offer a synchronous method for running examples if a chain or llm's async methods aren't yet implemented\n",
+    "completions_model_results = client.run_on_dataset(\n",
+    "    dataset_name=completions_dataset_name,\n",
+    "    llm_or_chain_factory=llm,\n",
+    "    num_repetitions=1,\n",
+    "    verbose=True\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cc86e8e6-cee2-429e-942b-289284d14816",
+   "metadata": {},
+   "source": [
+    "## Reviewing the LLM Results\n",
+    "\n",
+    "You can once again inspect the latest runs by clicking on the link below and navigating to the \"two-player-dnd\" session."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "id": "2bf96f17-74c1-4f7d-8458-ae5ab5c6bd36",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<a href=\"http://localhost\", target=\"_blank\" rel=\"noopener\">LangChain+ Client</a>"
+      ],
+      "text/plain": [
+       "LangChainPlusClient (API URL: http://localhost:8000)"
+      ]
+     },
+     "execution_count": 25,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "client"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "df80cd88-cd6f-4fdc-965f-f74600e1f286",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
    },
-   "nbformat": 4,
-   "nbformat_minor": 5
-}
\ No newline at end of file
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/langchain/server.py b/langchain/server.py
index 16eb3f31..750567ed 100644
--- a/langchain/server.py
+++ b/langchain/server.py
@@ -1,18 +1,15 @@
 """Script to run langchain-server locally using docker-compose."""
-import shutil
 import subprocess
 from pathlib import Path
 
+from langchain.cli.main import get_docker_compose_command
+
 
 def main() -> None:
     """Run the langchain server locally."""
     p = Path(__file__).absolute().parent / "docker-compose.yaml"
 
-    if shutil.which("docker-compose") is None:
-        docker_compose_command = ["docker", "compose"]
-    else:
-        docker_compose_command = ["docker-compose"]
-
+    docker_compose_command = get_docker_compose_command()
     subprocess.run([*docker_compose_command, "-f", str(p), "pull"])
     subprocess.run([*docker_compose_command, "-f", str(p), "up"])