mirror of https://github.com/hwchase17/langchain
Arthur Callback (#6972)
Co-authored-by: Max Cembalest <115359769+arthuractivemodeling@users.noreply.github.com>pull/6976/head
parent
8c73037dff
commit
13c62cf6b1
@ -0,0 +1,464 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "944e4194",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Arthur LangChain integration"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "b1ccdfe8",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"[Arthur](https://www.arthur.ai/) is a model monitoring and observability platform.\n",
|
||||||
|
"\n",
|
||||||
|
"This notebook shows how to register LLMs (chat and non-chat) as models with the Arthur platform. Then we show how to set up langchain LLMs with an Arthur callback that will automatically log model inferences to Arthur.\n",
|
||||||
|
"\n",
|
||||||
|
"For more information about how to use the Arthur SDK, visit our [docs](http://docs.arthur.ai), in particular our [model onboarding guide](https://docs.arthur.ai/user-guide/walkthroughs/model-onboarding/index.html)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 21,
|
||||||
|
"id": "961c6691",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from langchain.callbacks import ArthurCallbackHandler\n",
|
||||||
|
"from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler\n",
|
||||||
|
"from langchain.chat_models import ChatOpenAI, ChatAnthropic\n",
|
||||||
|
"from langchain.schema import HumanMessage\n",
|
||||||
|
"from langchain.llms import OpenAI, Cohere, HuggingFacePipeline"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 3,
|
||||||
|
"id": "a23d1963",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from arthurai import ArthurAI\n",
|
||||||
|
"from arthurai.common.constants import InputType, OutputType, Stage, ValueType\n",
|
||||||
|
"from arthurai.core.attributes import ArthurAttribute, AttributeCategory"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "4d1b90c0",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# ArthurModel for chatbot with only input text and output text attributes"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "1a4a4a8a",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Connect to Arthur client"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 4,
|
||||||
|
"id": "f49e9b79",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"arthur_url = \"https://app.arthur.ai\"\n",
|
||||||
|
"arthur_login = \"your-username-here\"\n",
|
||||||
|
"arthur = ArthurAI(url=arthur_url, login=arthur_login)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "c6e063bf",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Before you can register model inferences to Arthur, you must have a registered model with an ID in the Arthur platform. We will provide this ID to the ArthurCallbackHandler.\n",
|
||||||
|
"\n",
|
||||||
|
"You can register a model with Arthur here in the notebook using this `register_chat_llm()` function. This function returns the ID of the model saved to the platform. To use the function, uncomment `arthur_model_chatbot_id = register_chat_llm()` in the cell below."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 5,
|
||||||
|
"id": "31b17b5e",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"def register_chat_llm():\n",
|
||||||
|
"\n",
|
||||||
|
" arthur_model = arthur.model(\n",
|
||||||
|
" display_name=\"LangChainChat\",\n",
|
||||||
|
" input_type=InputType.NLP,\n",
|
||||||
|
" output_type=OutputType.TokenSequence\n",
|
||||||
|
" )\n",
|
||||||
|
"\n",
|
||||||
|
" arthur_model._add_attribute_to_model(ArthurAttribute(\n",
|
||||||
|
" name=\"my_input_text\",\n",
|
||||||
|
" stage=Stage.ModelPipelineInput,\n",
|
||||||
|
" value_type=ValueType.Unstructured_Text,\n",
|
||||||
|
" categorical=True,\n",
|
||||||
|
" is_unique=True\n",
|
||||||
|
" ))\n",
|
||||||
|
" arthur_model._add_attribute_to_model(ArthurAttribute(\n",
|
||||||
|
" name=\"my_output_text\",\n",
|
||||||
|
" stage=Stage.PredictedValue,\n",
|
||||||
|
" value_type=ValueType.Unstructured_Text,\n",
|
||||||
|
" categorical=True,\n",
|
||||||
|
" is_unique=False,\n",
|
||||||
|
" ))\n",
|
||||||
|
" \n",
|
||||||
|
" return arthur_model.save()\n",
|
||||||
|
"# arthur_model_chatbot_id = register_chat_llm()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "0d1d1e60",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Alternatively, you can set the `arthur_model_chatbot_id` variable to be the ID of your model on your [model dashboard](https://app.arthur.ai/)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 1,
|
||||||
|
"id": "cdfa02c8",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"arthur_model_chatbot_id = \"your-model-id-here\""
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "58be5234",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"This function creates a Langchain chat LLM with the ArthurCallbackHandler to log inferences to Arthur. We provide our `arthur_model_chatbot_id`, as well as the Arthur url and login we are using."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 7,
|
||||||
|
"id": "448a8fee",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"def make_langchain_chat_llm(chat_model=ChatOpenAI):\n",
|
||||||
|
" if chat_model not in [ChatOpenAI, ChatAnthropic]:\n",
|
||||||
|
" raise ValueError(\"For this notebook, use one of the chat models imported from langchain.chat_models\")\n",
|
||||||
|
" return chat_model(\n",
|
||||||
|
" streaming=True, \n",
|
||||||
|
" temperature=0.1,\n",
|
||||||
|
" callbacks=[\n",
|
||||||
|
" StreamingStdOutCallbackHandler(), \n",
|
||||||
|
" ArthurCallbackHandler.from_credentials(arthur_model_chatbot_id, arthur_url=arthur_url, arthur_login=arthur_login)\n",
|
||||||
|
" ])\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "17c182da",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 8,
|
||||||
|
"id": "2dfc00ed",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"chat_llm = make_langchain_chat_llm()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "139291f2",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Run the chatbot (it will save the chat history in the `history` list so that the conversation can reference earlier messages)\n",
|
||||||
|
"\n",
|
||||||
|
"Type `q` to quit"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 9,
|
||||||
|
"id": "7480a443",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"def run_langchain_chat_llm(llm):\n",
|
||||||
|
" history = []\n",
|
||||||
|
" while True:\n",
|
||||||
|
" user_input = input(\"\\n>>> input >>>\\n>>>: \")\n",
|
||||||
|
" if user_input == 'q': break\n",
|
||||||
|
" history.append(HumanMessage(content=user_input))\n",
|
||||||
|
" history.append(llm(history))"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 10,
|
||||||
|
"id": "6868ce71",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"run_langchain_chat_llm(chat_llm)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "a0be7d01",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# ArthurModel with input text, output text, token likelihoods, finish reason, and amount of token usage attributes"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "1ee4b741",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"This function registers an LLM with additional metadata attributes to log to Arthur with each inference\n",
|
||||||
|
"\n",
|
||||||
|
"As above, you can register your callback handler for an LLM using this function here in the notebook or by pasting the ID of an already-registered model from your [model dashboard](https://app.arthur.ai/)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 11,
|
||||||
|
"id": "e671836c",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"def register_llm():\n",
|
||||||
|
"\n",
|
||||||
|
" arthur_model = arthur.model(\n",
|
||||||
|
" display_name=\"LangChainLLM\",\n",
|
||||||
|
" input_type=InputType.NLP,\n",
|
||||||
|
" output_type=OutputType.TokenSequence\n",
|
||||||
|
" )\n",
|
||||||
|
" arthur_model._add_attribute_to_model(ArthurAttribute(\n",
|
||||||
|
" name=\"my_input_text\",\n",
|
||||||
|
" stage=Stage.ModelPipelineInput,\n",
|
||||||
|
" value_type=ValueType.Unstructured_Text,\n",
|
||||||
|
" categorical=True,\n",
|
||||||
|
" is_unique=True\n",
|
||||||
|
" ))\n",
|
||||||
|
" arthur_model._add_attribute_to_model(ArthurAttribute(\n",
|
||||||
|
" name=\"my_output_text\",\n",
|
||||||
|
" stage=Stage.PredictedValue,\n",
|
||||||
|
" value_type=ValueType.Unstructured_Text,\n",
|
||||||
|
" categorical=True,\n",
|
||||||
|
" is_unique=False,\n",
|
||||||
|
" token_attribute_link=\"my_output_likelihoods\"\n",
|
||||||
|
" ))\n",
|
||||||
|
" arthur_model._add_attribute_to_model(ArthurAttribute(\n",
|
||||||
|
" name=\"my_output_likelihoods\",\n",
|
||||||
|
" stage=Stage.PredictedValue,\n",
|
||||||
|
" value_type=ValueType.TokenLikelihoods,\n",
|
||||||
|
" token_attribute_link=\"my_output_text\"\n",
|
||||||
|
" ))\n",
|
||||||
|
" arthur_model._add_attribute_to_model(ArthurAttribute(\n",
|
||||||
|
" name=\"finish_reason\",\n",
|
||||||
|
" stage=Stage.NonInputData,\n",
|
||||||
|
" value_type=ValueType.String,\n",
|
||||||
|
" categorical=True,\n",
|
||||||
|
" categories=[\n",
|
||||||
|
" AttributeCategory(value='stop'),\n",
|
||||||
|
" AttributeCategory(value='length'),\n",
|
||||||
|
" AttributeCategory(value='content_filter'),\n",
|
||||||
|
" AttributeCategory(value='null')\n",
|
||||||
|
" ]\n",
|
||||||
|
" ))\n",
|
||||||
|
" arthur_model._add_attribute_to_model(ArthurAttribute(\n",
|
||||||
|
" name=\"prompt_tokens\",\n",
|
||||||
|
" stage=Stage.NonInputData,\n",
|
||||||
|
" value_type=ValueType.Integer\n",
|
||||||
|
" ))\n",
|
||||||
|
" arthur_model._add_attribute_to_model(ArthurAttribute(\n",
|
||||||
|
" name=\"completion_tokens\",\n",
|
||||||
|
" stage=Stage.NonInputData,\n",
|
||||||
|
" value_type=ValueType.Integer\n",
|
||||||
|
" ))\n",
|
||||||
|
" arthur_model._add_attribute_to_model(ArthurAttribute(\n",
|
||||||
|
" name=\"duration\",\n",
|
||||||
|
" stage=Stage.NonInputData,\n",
|
||||||
|
" value_type=ValueType.Float\n",
|
||||||
|
" ))\n",
|
||||||
|
" \n",
|
||||||
|
" return arthur_model.save()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 12,
|
||||||
|
"id": "2a6686f7",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"arthur_model_llm_id = \"your-model-id-here\""
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "2dcacb96",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"These functions create Langchain LLMs with the ArthurCallbackHandler to log inferences to Arthur.\n",
|
||||||
|
"\n",
|
||||||
|
"There are small differences in the underlying Langchain integrations with these libraries and the available metadata for model inputs & outputs"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 23,
|
||||||
|
"id": "34cf0072",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"def make_langchain_openai_llm():\n",
|
||||||
|
" return OpenAI(\n",
|
||||||
|
" temperature=0.1,\n",
|
||||||
|
" model_kwargs = {'logprobs': 3},\n",
|
||||||
|
" callbacks=[\n",
|
||||||
|
" ArthurCallbackHandler.from_credentials(arthur_model_llm_id, arthur_url=arthur_url, arthur_login=arthur_login)\n",
|
||||||
|
" ])\n",
|
||||||
|
"\n",
|
||||||
|
"def make_langchain_cohere_llm():\n",
|
||||||
|
" return Cohere(\n",
|
||||||
|
" temperature=0.1,\n",
|
||||||
|
" callbacks=[\n",
|
||||||
|
" ArthurCallbackHandler.from_credentials(arthur_model_chatbot_id, arthur_url=arthur_url, arthur_login=arthur_login)\n",
|
||||||
|
" ])\n",
|
||||||
|
"\n",
|
||||||
|
"def make_langchain_huggingface_llm():\n",
|
||||||
|
" llm = HuggingFacePipeline.from_model_id(\n",
|
||||||
|
" model_id=\"bert-base-uncased\", \n",
|
||||||
|
" task=\"text-generation\", \n",
|
||||||
|
" model_kwargs={\"temperature\":2.5, \"max_length\":64})\n",
|
||||||
|
" llm.callbacks = [\n",
|
||||||
|
" ArthurCallbackHandler.from_credentials(arthur_model_chatbot_id, arthur_url=arthur_url, arthur_login=arthur_login)\n",
|
||||||
|
" ]\n",
|
||||||
|
" return llm"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 24,
|
||||||
|
"id": "f40c3ce0",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"openai_llm = make_langchain_openai_llm()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 25,
|
||||||
|
"id": "8476d531",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"cohere_llm = make_langchain_cohere_llm()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 2,
|
||||||
|
"id": "7483b9d3",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"huggingface_llm = make_langchain_huggingface_llm()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "c17d8e86",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Run the LLM (each completion is independent, no chat history is saved as we were doing above with the chat llms)\n",
|
||||||
|
"\n",
|
||||||
|
"Type `q` to quit"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 17,
|
||||||
|
"id": "72ee0790",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"def run_langchain_llm(llm):\n",
|
||||||
|
" while True:\n",
|
||||||
|
" print(\"Type your text for completion:\\n\")\n",
|
||||||
|
" user_input = input(\"\\n>>> input >>>\\n>>>: \")\n",
|
||||||
|
" if user_input == 'q': break\n",
|
||||||
|
" print(llm(user_input), \"\\n================\\n\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 18,
|
||||||
|
"id": "fb864057",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"run_langchain_llm(openai_llm)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 19,
|
||||||
|
"id": "e6673769",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"run_langchain_llm(cohere_llm)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 1,
|
||||||
|
"id": "85541f1c",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"run_langchain_llm(huggingface_llm)"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3 (ipykernel)",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.10.8"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 5
|
||||||
|
}
|
@ -0,0 +1,297 @@
|
|||||||
|
"""ArthurAI's Callback Handler."""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
|
import uuid
|
||||||
|
from collections import defaultdict
|
||||||
|
from datetime import datetime
|
||||||
|
from time import time
|
||||||
|
from typing import TYPE_CHECKING, Any, DefaultDict, Dict, List, Optional, Union
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import pytz
|
||||||
|
|
||||||
|
from langchain.callbacks.base import BaseCallbackHandler
|
||||||
|
from langchain.schema import AgentAction, AgentFinish, LLMResult
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
import arthurai
|
||||||
|
from arthurai.core.models import ArthurModel
|
||||||
|
|
||||||
|
PROMPT_TOKENS = "prompt_tokens"
|
||||||
|
COMPLETION_TOKENS = "completion_tokens"
|
||||||
|
TOKEN_USAGE = "token_usage"
|
||||||
|
FINISH_REASON = "finish_reason"
|
||||||
|
DURATION = "duration"
|
||||||
|
|
||||||
|
|
||||||
|
def _lazy_load_arthur() -> arthurai:
|
||||||
|
"""Lazy load Arthur."""
|
||||||
|
try:
|
||||||
|
import arthurai
|
||||||
|
except ImportError as e:
|
||||||
|
raise ImportError(
|
||||||
|
"To use the ArthurCallbackHandler you need the"
|
||||||
|
" `arthurai` package. Please install it with"
|
||||||
|
" `pip install arthurai`.",
|
||||||
|
e,
|
||||||
|
)
|
||||||
|
|
||||||
|
return arthurai
|
||||||
|
|
||||||
|
|
||||||
|
class ArthurCallbackHandler(BaseCallbackHandler):
|
||||||
|
"""Callback Handler that logs to Arthur platform.
|
||||||
|
|
||||||
|
Arthur helps enterprise teams optimize model operations
|
||||||
|
and performance at scale. The Arthur API tracks model
|
||||||
|
performance, explainability, and fairness across tabular,
|
||||||
|
NLP, and CV models. Our API is model- and platform-agnostic,
|
||||||
|
and continuously scales with complex and dynamic enterprise needs.
|
||||||
|
To learn more about Arthur, visit our website at
|
||||||
|
https://www.arthur.ai/ or read the Arthur docs at
|
||||||
|
https://docs.arthur.ai/
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
arthur_model: ArthurModel,
|
||||||
|
) -> None:
|
||||||
|
"""Initialize callback handler."""
|
||||||
|
super().__init__()
|
||||||
|
arthurai = _lazy_load_arthur()
|
||||||
|
Stage = arthurai.common.constants.Stage
|
||||||
|
ValueType = arthurai.common.constants.ValueType
|
||||||
|
self.arthur_model = arthur_model
|
||||||
|
# save the attributes of this model to be used when preparing
|
||||||
|
# inferences to log to Arthur in on_llm_end()
|
||||||
|
self.attr_names = set([a.name for a in self.arthur_model.get_attributes()])
|
||||||
|
self.input_attr = [
|
||||||
|
x
|
||||||
|
for x in self.arthur_model.get_attributes()
|
||||||
|
if x.stage == Stage.ModelPipelineInput
|
||||||
|
and x.value_type == ValueType.Unstructured_Text
|
||||||
|
][0].name
|
||||||
|
self.output_attr = [
|
||||||
|
x
|
||||||
|
for x in self.arthur_model.get_attributes()
|
||||||
|
if x.stage == Stage.PredictedValue
|
||||||
|
and x.value_type == ValueType.Unstructured_Text
|
||||||
|
][0].name
|
||||||
|
self.token_likelihood_attr = None
|
||||||
|
if (
|
||||||
|
len(
|
||||||
|
[
|
||||||
|
x
|
||||||
|
for x in self.arthur_model.get_attributes()
|
||||||
|
if x.value_type == ValueType.TokenLikelihoods
|
||||||
|
]
|
||||||
|
)
|
||||||
|
> 0
|
||||||
|
):
|
||||||
|
self.token_likelihood_attr = [
|
||||||
|
x
|
||||||
|
for x in self.arthur_model.get_attributes()
|
||||||
|
if x.value_type == ValueType.TokenLikelihoods
|
||||||
|
][0].name
|
||||||
|
|
||||||
|
self.run_map: DefaultDict[str, Any] = defaultdict(dict)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_credentials(
|
||||||
|
cls,
|
||||||
|
model_id: str,
|
||||||
|
arthur_url: Optional[str] = "https://app.arthur.ai",
|
||||||
|
arthur_login: Optional[str] = None,
|
||||||
|
arthur_password: Optional[str] = None,
|
||||||
|
) -> ArthurCallbackHandler:
|
||||||
|
"""Initialize callback handler from Arthur credentials.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
model_id (str): The ID of the arthur model to log to.
|
||||||
|
arthur_url (str, optional): The URL of the Arthur instance to log to.
|
||||||
|
Defaults to "https://app.arthur.ai".
|
||||||
|
arthur_login (str, optional): The login to use to connect to Arthur.
|
||||||
|
Defaults to None.
|
||||||
|
arthur_password (str, optional): The password to use to connect to
|
||||||
|
Arthur. Defaults to None.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
ArthurCallbackHandler: The initialized callback handler.
|
||||||
|
"""
|
||||||
|
arthurai = _lazy_load_arthur()
|
||||||
|
ArthurAI = arthurai.ArthurAI
|
||||||
|
ResponseClientError = arthurai.common.exceptions.ResponseClientError
|
||||||
|
|
||||||
|
# connect to Arthur
|
||||||
|
if arthur_login is None:
|
||||||
|
try:
|
||||||
|
arthur_api_key = os.environ["ARTHUR_API_KEY"]
|
||||||
|
except KeyError:
|
||||||
|
raise ValueError(
|
||||||
|
"No Arthur authentication provided. Either give"
|
||||||
|
" a login to the ArthurCallbackHandler"
|
||||||
|
" or set an ARTHUR_API_KEY as an environment variable."
|
||||||
|
)
|
||||||
|
arthur = ArthurAI(url=arthur_url, access_key=arthur_api_key)
|
||||||
|
else:
|
||||||
|
if arthur_password is None:
|
||||||
|
arthur = ArthurAI(url=arthur_url, login=arthur_login)
|
||||||
|
else:
|
||||||
|
arthur = ArthurAI(
|
||||||
|
url=arthur_url, login=arthur_login, password=arthur_password
|
||||||
|
)
|
||||||
|
# get model from Arthur by the provided model ID
|
||||||
|
try:
|
||||||
|
arthur_model = arthur.get_model(model_id)
|
||||||
|
except ResponseClientError:
|
||||||
|
raise ValueError(
|
||||||
|
f"Was unable to retrieve model with id {model_id} from Arthur."
|
||||||
|
" Make sure the ID corresponds to a model that is currently"
|
||||||
|
" registered with your Arthur account."
|
||||||
|
)
|
||||||
|
return cls(arthur_model)
|
||||||
|
|
||||||
|
def on_llm_start(
|
||||||
|
self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any
|
||||||
|
) -> None:
|
||||||
|
"""On LLM start, save the input prompts"""
|
||||||
|
run_id = kwargs["run_id"]
|
||||||
|
self.run_map[run_id]["input_texts"] = prompts
|
||||||
|
self.run_map[run_id]["start_time"] = time()
|
||||||
|
|
||||||
|
def on_llm_end(self, response: LLMResult, **kwargs: Any) -> None:
|
||||||
|
"""On LLM end, send data to Arthur."""
|
||||||
|
|
||||||
|
run_id = kwargs["run_id"]
|
||||||
|
|
||||||
|
# get the run params from this run ID,
|
||||||
|
# or raise an error if this run ID has no corresponding metadata in self.run_map
|
||||||
|
try:
|
||||||
|
run_map_data = self.run_map[run_id]
|
||||||
|
except KeyError as e:
|
||||||
|
raise KeyError(
|
||||||
|
"This function has been called with a run_id"
|
||||||
|
" that was never registered in on_llm_start()."
|
||||||
|
" Restart and try running the LLM again"
|
||||||
|
) from e
|
||||||
|
|
||||||
|
# mark the duration time between on_llm_start() and on_llm_end()
|
||||||
|
time_from_start_to_end = time() - run_map_data["start_time"]
|
||||||
|
|
||||||
|
# create inferences to log to Arthur
|
||||||
|
inferences = []
|
||||||
|
for i, generations in enumerate(response.generations):
|
||||||
|
for generation in generations:
|
||||||
|
inference = {
|
||||||
|
"partner_inference_id": str(uuid.uuid4()),
|
||||||
|
"inference_timestamp": datetime.now(tz=pytz.UTC),
|
||||||
|
self.input_attr: run_map_data["input_texts"][i],
|
||||||
|
self.output_attr: generation.text,
|
||||||
|
}
|
||||||
|
|
||||||
|
if generation.generation_info is not None:
|
||||||
|
# add finish reason to the inference
|
||||||
|
# if generation info contains a finish reason and
|
||||||
|
# if the ArthurModel was registered to monitor finish_reason
|
||||||
|
if (
|
||||||
|
FINISH_REASON in generation.generation_info
|
||||||
|
and FINISH_REASON in self.attr_names
|
||||||
|
):
|
||||||
|
inference[FINISH_REASON] = generation.generation_info[
|
||||||
|
FINISH_REASON
|
||||||
|
]
|
||||||
|
|
||||||
|
# add token likelihoods data to the inference if the ArthurModel
|
||||||
|
# was registered to monitor token likelihoods
|
||||||
|
logprobs_data = generation.generation_info["logprobs"]
|
||||||
|
if (
|
||||||
|
logprobs_data is not None
|
||||||
|
and self.token_likelihood_attr is not None
|
||||||
|
):
|
||||||
|
logprobs = logprobs_data["top_logprobs"]
|
||||||
|
likelihoods = [
|
||||||
|
{k: np.exp(v) for k, v in logprobs[i].items()}
|
||||||
|
for i in range(len(logprobs))
|
||||||
|
]
|
||||||
|
inference[self.token_likelihood_attr] = likelihoods
|
||||||
|
|
||||||
|
# add token usage counts to the inference if the
|
||||||
|
# ArthurModel was registered to monitor token usage
|
||||||
|
if (
|
||||||
|
isinstance(response.llm_output, dict)
|
||||||
|
and TOKEN_USAGE in response.llm_output
|
||||||
|
):
|
||||||
|
token_usage = response.llm_output[TOKEN_USAGE]
|
||||||
|
if (
|
||||||
|
PROMPT_TOKENS in token_usage
|
||||||
|
and PROMPT_TOKENS in self.attr_names
|
||||||
|
):
|
||||||
|
inference[PROMPT_TOKENS] = token_usage[PROMPT_TOKENS]
|
||||||
|
if (
|
||||||
|
COMPLETION_TOKENS in token_usage
|
||||||
|
and COMPLETION_TOKENS in self.attr_names
|
||||||
|
):
|
||||||
|
inference[COMPLETION_TOKENS] = token_usage[COMPLETION_TOKENS]
|
||||||
|
|
||||||
|
# add inference duration to the inference if the ArthurModel
|
||||||
|
# was registered to monitor inference duration
|
||||||
|
if DURATION in self.attr_names:
|
||||||
|
inference[DURATION] = time_from_start_to_end
|
||||||
|
|
||||||
|
inferences.append(inference)
|
||||||
|
|
||||||
|
# send inferences to arthur
|
||||||
|
self.arthur_model.send_inferences(inferences)
|
||||||
|
|
||||||
|
def on_chain_start(
|
||||||
|
self, serialized: Dict[str, Any], inputs: Dict[str, Any], **kwargs: Any
|
||||||
|
) -> None:
|
||||||
|
"""On chain start, do nothing."""
|
||||||
|
|
||||||
|
def on_chain_end(self, outputs: Dict[str, Any], **kwargs: Any) -> None:
|
||||||
|
"""On chain end, do nothing."""
|
||||||
|
|
||||||
|
def on_llm_error(
|
||||||
|
self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
|
||||||
|
) -> None:
|
||||||
|
"""Do nothing when LLM outputs an error."""
|
||||||
|
|
||||||
|
def on_llm_new_token(self, token: str, **kwargs: Any) -> None:
|
||||||
|
"""On new token, pass."""
|
||||||
|
|
||||||
|
def on_chain_error(
|
||||||
|
self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
|
||||||
|
) -> None:
|
||||||
|
"""Do nothing when LLM chain outputs an error."""
|
||||||
|
|
||||||
|
def on_tool_start(
|
||||||
|
self,
|
||||||
|
serialized: Dict[str, Any],
|
||||||
|
input_str: str,
|
||||||
|
**kwargs: Any,
|
||||||
|
) -> None:
|
||||||
|
"""Do nothing when tool starts."""
|
||||||
|
|
||||||
|
def on_agent_action(self, action: AgentAction, **kwargs: Any) -> Any:
|
||||||
|
"""Do nothing when agent takes a specific action."""
|
||||||
|
|
||||||
|
def on_tool_end(
|
||||||
|
self,
|
||||||
|
output: str,
|
||||||
|
observation_prefix: Optional[str] = None,
|
||||||
|
llm_prefix: Optional[str] = None,
|
||||||
|
**kwargs: Any,
|
||||||
|
) -> None:
|
||||||
|
"""Do nothing when tool ends."""
|
||||||
|
|
||||||
|
def on_tool_error(
|
||||||
|
self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
|
||||||
|
) -> None:
|
||||||
|
"""Do nothing when tool outputs an error."""
|
||||||
|
|
||||||
|
def on_text(self, text: str, **kwargs: Any) -> None:
|
||||||
|
"""Do nothing"""
|
||||||
|
|
||||||
|
def on_agent_finish(self, finish: AgentFinish, **kwargs: Any) -> None:
|
||||||
|
"""Do nothing"""
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue