mirror of https://github.com/hwchase17/langchain
Add Exact match and Regex Match Evaluators (#11132)
parent
e355606b11
commit
33da8bd711
@ -1,318 +0,0 @@
|
|||||||
{
|
|
||||||
"cells": [
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "bce7335e-f3b2-44f3-90cc-8c0a23a89a21",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"import os\n",
|
|
||||||
"from langchain.agents import load_tools\n",
|
|
||||||
"from langchain.agents import initialize_agent\n",
|
|
||||||
"from langchain.chat_models import ChatOpenAI\n",
|
|
||||||
"from langchain.utilities import GoogleSearchAPIWrapper\n",
|
|
||||||
"from langchain.schema import (\n",
|
|
||||||
" SystemMessage,\n",
|
|
||||||
" HumanMessage,\n",
|
|
||||||
" AIMessage\n",
|
|
||||||
")\n",
|
|
||||||
"\n",
|
|
||||||
"# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n",
|
|
||||||
"# os.environ[\"LANGCHAIN_ENDPOINT\"] = \"https://api.smith.langchain.com\"\n",
|
|
||||||
"# os.environ[\"LANGCHAIN_API_KEY\"] = \"******\"\n",
|
|
||||||
"# os.environ[\"LANGCHAIN_PROJECT\"] = \"Jarvis\"\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"prefix_messages = [{\"role\": \"system\", \"content\": \"You are a helpful discord Chatbot.\"}]\n",
|
|
||||||
"\n",
|
|
||||||
"llm = ChatOpenAI(model_name='gpt-3.5-turbo', \n",
|
|
||||||
" temperature=0.5, \n",
|
|
||||||
" max_tokens = 2000)\n",
|
|
||||||
"tools = load_tools([\"serpapi\", \"llm-math\"], llm=llm)\n",
|
|
||||||
"agent = initialize_agent(tools,\n",
|
|
||||||
" llm,\n",
|
|
||||||
" agent=\"zero-shot-react-description\",\n",
|
|
||||||
" verbose=True,\n",
|
|
||||||
" handle_parsing_errors=True\n",
|
|
||||||
" )\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"async def on_ready():\n",
|
|
||||||
" print(f'{bot.user} has connected to Discord!')\n",
|
|
||||||
"\n",
|
|
||||||
"async def on_message(message):\n",
|
|
||||||
"\n",
|
|
||||||
" print(\"Detected bot name in message:\", message.content)\n",
|
|
||||||
"\n",
|
|
||||||
" # Capture the output of agent.run() in the response variable\n",
|
|
||||||
" response = agent.run(message.content)\n",
|
|
||||||
"\n",
|
|
||||||
" while response:\n",
|
|
||||||
" print(response)\n",
|
|
||||||
" chunk, response = response[:2000], response[2000:]\n",
|
|
||||||
" print(f\"Chunk: {chunk}\")\n",
|
|
||||||
" print(\"Response sent.\")\n",
|
|
||||||
"\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 22,
|
|
||||||
"id": "1551ce9f-b6de-4035-b6d6-825722823b48",
|
|
||||||
"metadata": {
|
|
||||||
"tags": []
|
|
||||||
},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from dataclasses import dataclass\n",
|
|
||||||
"@dataclass\n",
|
|
||||||
"class Message:\n",
|
|
||||||
" content: str"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 23,
|
|
||||||
"id": "6e6859ec-8544-4407-9663-6b53c0092903",
|
|
||||||
"metadata": {
|
|
||||||
"tags": []
|
|
||||||
},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"Detected bot name in message: Hi AI, how are you today?\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
|
||||||
"\u001b[32;1m\u001b[1;3mThis question is not something that can be answered using the available tools.\n",
|
|
||||||
"Action: N/A\u001b[0m\n",
|
|
||||||
"Observation: Invalid Format: Missing 'Action Input:' after 'Action:'\n",
|
|
||||||
"Thought:\u001b[32;1m\u001b[1;3mI need to follow the correct format for answering questions.\n",
|
|
||||||
"Action: N/A\u001b[0m\n",
|
|
||||||
"Observation: Invalid Format: Missing 'Action Input:' after 'Action:'\n",
|
|
||||||
"Thought:\u001b[32;1m\u001b[1;3mI need to follow the correct format for answering questions.\n",
|
|
||||||
"Action: N/A\u001b[0m\n",
|
|
||||||
"Observation: Invalid Format: Missing 'Action Input:' after 'Action:'\n",
|
|
||||||
"Thought:\u001b[32;1m\u001b[1;3mI need to follow the correct format for answering questions.\n",
|
|
||||||
"Action: N/A\u001b[0m\n",
|
|
||||||
"Observation: Invalid Format: Missing 'Action Input:' after 'Action:'\n",
|
|
||||||
"Thought:\u001b[32;1m\u001b[1;3mI need to follow the correct format for answering questions.\n",
|
|
||||||
"Action: N/A\u001b[0m\n",
|
|
||||||
"Observation: Invalid Format: Missing 'Action Input:' after 'Action:'\n",
|
|
||||||
"Thought:\u001b[32;1m\u001b[1;3mI need to follow the correct format for answering questions.\n",
|
|
||||||
"Action: N/A\u001b[0m\n",
|
|
||||||
"Observation: Invalid Format: Missing 'Action Input:' after 'Action:'\n",
|
|
||||||
"Thought:\u001b[32;1m\u001b[1;3mI need to follow the correct format for answering questions.\n",
|
|
||||||
"Action: N/A\u001b[0m\n",
|
|
||||||
"Observation: Invalid Format: Missing 'Action Input:' after 'Action:'\n",
|
|
||||||
"Thought:\u001b[32;1m\u001b[1;3mI need to follow the correct format for answering questions.\n",
|
|
||||||
"Action: N/A\u001b[0m\n",
|
|
||||||
"Observation: Invalid Format: Missing 'Action Input:' after 'Action:'\n",
|
|
||||||
"Thought:\u001b[32;1m\u001b[1;3mI need to follow the correct format for answering questions.\n",
|
|
||||||
"Action: N/A\u001b[0m\n",
|
|
||||||
"Observation: Invalid Format: Missing 'Action Input:' after 'Action:'\n",
|
|
||||||
"Thought:\u001b[32;1m\u001b[1;3mI need to follow the correct format for answering questions.\n",
|
|
||||||
"Action: N/A\u001b[0m\n",
|
|
||||||
"Observation: Invalid Format: Missing 'Action Input:' after 'Action:'\n",
|
|
||||||
"Thought:\u001b[32;1m\u001b[1;3mI need to follow the correct format for answering questions.\n",
|
|
||||||
"Action: N/A\u001b[0m\n",
|
|
||||||
"Observation: Invalid Format: Missing 'Action Input:' after 'Action:'\n",
|
|
||||||
"Thought:\u001b[32;1m\u001b[1;3mI need to follow the correct format for answering questions.\n",
|
|
||||||
"Action: N/A\u001b[0m\n",
|
|
||||||
"Observation: Invalid Format: Missing 'Action Input:' after 'Action:'\n",
|
|
||||||
"Thought:\u001b[32;1m\u001b[1;3mI need to follow the correct format for answering questions.\n",
|
|
||||||
"Action: N/A\u001b[0m\n",
|
|
||||||
"Observation: Invalid Format: Missing 'Action Input:' after 'Action:'\n",
|
|
||||||
"Thought:\u001b[32;1m\u001b[1;3mI need to follow the correct format for answering questions.\n",
|
|
||||||
"Action: N/A\u001b[0m\n",
|
|
||||||
"Observation: Invalid Format: Missing 'Action Input:' after 'Action:'\n",
|
|
||||||
"Thought:\u001b[32;1m\u001b[1;3mI need to follow the correct format for answering questions.\n",
|
|
||||||
"Action: N/A\u001b[0m\n",
|
|
||||||
"Observation: Invalid Format: Missing 'Action Input:' after 'Action:'\n",
|
|
||||||
"Thought:\u001b[32;1m\u001b[1;3m\u001b[0m\n",
|
|
||||||
"\n",
|
|
||||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
|
||||||
"Agent stopped due to iteration limit or time limit.\n",
|
|
||||||
"Chunk: Agent stopped due to iteration limit or time limit.\n",
|
|
||||||
"Response sent.\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"await on_message(Message(content=\"Hi AI, how are you today?\"))"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 24,
|
|
||||||
"id": "b850294c-7f8f-4e79-adcf-47e4e3a898df",
|
|
||||||
"metadata": {
|
|
||||||
"tags": []
|
|
||||||
},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from langsmith import Client\n",
|
|
||||||
"\n",
|
|
||||||
"client = Client()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 25,
|
|
||||||
"id": "6d089ddc-69bc-45a8-b8db-9962e4f1f5ee",
|
|
||||||
"metadata": {
|
|
||||||
"tags": []
|
|
||||||
},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from itertools import islice\n",
|
|
||||||
"\n",
|
|
||||||
"runs = list(islice(client.list_runs(), 10))"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 38,
|
|
||||||
"id": "f0349fac-5a98-400f-ba03-61ed4e1332be",
|
|
||||||
"metadata": {
|
|
||||||
"tags": []
|
|
||||||
},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"runs = sorted(runs, key=lambda x: x.start_time, reverse=True)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 26,
|
|
||||||
"id": "02f133f0-39ee-4b46-b443-12c1f9b76fff",
|
|
||||||
"metadata": {
|
|
||||||
"tags": []
|
|
||||||
},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"ids = [run.id for run in runs]"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 39,
|
|
||||||
"id": "3366dce4-0c38-4a7d-8111-046a58b24917",
|
|
||||||
"metadata": {
|
|
||||||
"tags": []
|
|
||||||
},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"runs2 = list(client.list_runs(id=ids))\n",
|
|
||||||
"runs2 = sorted(runs2, key=lambda x: x.start_time, reverse=True)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 42,
|
|
||||||
"id": "82915b90-39a0-47d6-9121-56a13f210f52",
|
|
||||||
"metadata": {
|
|
||||||
"tags": []
|
|
||||||
},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"data": {
|
|
||||||
"text/plain": [
|
|
||||||
"['a36092d2-4ad5-4fb4-9b0d-0dba9a2ed836',\n",
|
|
||||||
" '9398e6be-964f-4aa4-8de9-ad78cd4b7074']"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"execution_count": 42,
|
|
||||||
"metadata": {},
|
|
||||||
"output_type": "execute_result"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"[str(x) for x in ids[:2]]"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 48,
|
|
||||||
"id": "f610ec91-dc48-4a17-91c5-5c4675c77abc",
|
|
||||||
"metadata": {
|
|
||||||
"tags": []
|
|
||||||
},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from langsmith.run_helpers import traceable\n",
|
|
||||||
"\n",
|
|
||||||
"@traceable(run_type=\"llm\", name=\"\"\"<iframe width=\"560\" height=\"315\" src=\"https://www.youtube.com/embed/dQw4w9WgXcQ?start=5\" title=\"YouTube video player\" frameborder=\"0\" allow=\"accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share\" allowfullscreen></iframe>\"\"\")\n",
|
|
||||||
"def foo():\n",
|
|
||||||
" return \"bar\""
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 49,
|
|
||||||
"id": "bd317bd7-8b2a-433a-8ec3-098a84ba8e64",
|
|
||||||
"metadata": {
|
|
||||||
"tags": []
|
|
||||||
},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"data": {
|
|
||||||
"text/plain": [
|
|
||||||
"'bar'"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"execution_count": 49,
|
|
||||||
"metadata": {},
|
|
||||||
"output_type": "execute_result"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"foo()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 52,
|
|
||||||
"id": "b142519b-6885-415c-83b9-4a346fb90589",
|
|
||||||
"metadata": {
|
|
||||||
"tags": []
|
|
||||||
},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from langchain.llms import AzureOpenAI"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "5c50bb2b-72b8-4322-9b16-d857ecd9f347",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": []
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "Python 3 (ipykernel)",
|
|
||||||
"language": "python",
|
|
||||||
"name": "python3"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.11.2"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"nbformat": 4,
|
|
||||||
"nbformat_minor": 5
|
|
||||||
}
|
|
@ -0,0 +1,175 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "2da95378",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Exact Match\n",
|
||||||
|
"[![Open In Collab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/langchain-ai/langchain/blob/master/docs/extras/guides/evaluation/string/exact_match.ipynb)\n",
|
||||||
|
"\n",
|
||||||
|
"Probably the simplest ways to evaluate an LLM or runnable's string output against a reference label is by a simple string equivalence.\n",
|
||||||
|
"\n",
|
||||||
|
"This can be accessed using the `exact_match` evaluator."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 1,
|
||||||
|
"id": "0de44d01-1fea-4701-b941-c4fb74e521e7",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from langchain.evaluation import ExactMatchStringEvaluator\n",
|
||||||
|
"\n",
|
||||||
|
"evaluator = ExactMatchStringEvaluator()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "fe3baf5f-bfee-4745-bcd6-1a9b422ed46f",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Alternatively via the loader:"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 2,
|
||||||
|
"id": "f6790c46",
|
||||||
|
"metadata": {
|
||||||
|
"tags": []
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from langchain.evaluation import load_evaluator\n",
|
||||||
|
"\n",
|
||||||
|
"evaluator = load_evaluator(\"exact_match\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 3,
|
||||||
|
"id": "49ad9139",
|
||||||
|
"metadata": {
|
||||||
|
"tags": []
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"{'score': 0}"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 3,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"evaluator.evaluate_strings(\n",
|
||||||
|
" prediction=\"1 LLM.\",\n",
|
||||||
|
" reference=\"2 llm\",\n",
|
||||||
|
")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 4,
|
||||||
|
"id": "1f5e82a3-247e-45a8-85fc-6af53bf7ff82",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"{'score': 0}"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 4,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"evaluator.evaluate_strings(\n",
|
||||||
|
" prediction=\"LangChain\",\n",
|
||||||
|
" reference=\"langchain\",\n",
|
||||||
|
")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "b8ed1f12-09a6-4e90-a69d-c8df525ff293",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Configure the ExactMatchStringEvaluator\n",
|
||||||
|
"\n",
|
||||||
|
"You can relax the \"exactness\" when comparing strings."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 5,
|
||||||
|
"id": "0c079864-0175-4d06-9d3f-a0e51dd3977c",
|
||||||
|
"metadata": {
|
||||||
|
"tags": []
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"evaluator = ExactMatchStringEvaluator(\n",
|
||||||
|
" ignore_case=True,\n",
|
||||||
|
" ignore_numbers=True,\n",
|
||||||
|
" ignore_punctuation=True,\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"# Alternatively\n",
|
||||||
|
"# evaluator = load_evaluator(\"exact_match\", ignore_case=True, ignore_numbers=True, ignore_punctuation=True)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 6,
|
||||||
|
"id": "a8dfb900-14f3-4a1f-8736-dd1d86a1264c",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"{'score': 1}"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 6,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"evaluator.evaluate_strings(\n",
|
||||||
|
" prediction=\"1 LLM.\",\n",
|
||||||
|
" reference=\"2 llm\",\n",
|
||||||
|
")"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3 (ipykernel)",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.11.2"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 5
|
||||||
|
}
|
@ -0,0 +1,243 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "2da95378",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Regex Match\n",
|
||||||
|
"[![Open In Collab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/langchain-ai/langchain/blob/master/docs/extras/guides/evaluation/string/regex_match.ipynb)\n",
|
||||||
|
"\n",
|
||||||
|
"To evaluate chain or runnable string predictions against a custom regex, you can use the `regex_match` evaluator."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 1,
|
||||||
|
"id": "0de44d01-1fea-4701-b941-c4fb74e521e7",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from langchain.evaluation import RegexMatchStringEvaluator\n",
|
||||||
|
"\n",
|
||||||
|
"evaluator = RegexMatchStringEvaluator()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "fe3baf5f-bfee-4745-bcd6-1a9b422ed46f",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Alternatively via the loader:"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 2,
|
||||||
|
"id": "f6790c46",
|
||||||
|
"metadata": {
|
||||||
|
"tags": []
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from langchain.evaluation import load_evaluator\n",
|
||||||
|
"\n",
|
||||||
|
"evaluator = load_evaluator(\"regex_match\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 3,
|
||||||
|
"id": "49ad9139",
|
||||||
|
"metadata": {
|
||||||
|
"tags": []
|
||||||
|
},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"{'score': 1}"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 3,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"# Check for the presence of a YYYY-MM-DD string.\n",
|
||||||
|
"evaluator.evaluate_strings(\n",
|
||||||
|
" prediction=\"The delivery will be made on 2024-01-05\",\n",
|
||||||
|
" reference=\".*\\\\b\\\\d{4}-\\\\d{2}-\\\\d{2}\\\\b.*\"\n",
|
||||||
|
")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 4,
|
||||||
|
"id": "1f5e82a3-247e-45a8-85fc-6af53bf7ff82",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"{'score': 0}"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 4,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"# Check for the presence of a MM-DD-YYYY string.\n",
|
||||||
|
"evaluator.evaluate_strings(\n",
|
||||||
|
" prediction=\"The delivery will be made on 2024-01-05\",\n",
|
||||||
|
" reference=\".*\\\\b\\\\d{2}-\\\\d{2}-\\\\d{4}\\\\b.*\"\n",
|
||||||
|
")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 5,
|
||||||
|
"id": "168fcd92-dffb-4345-b097-02d0fedf52fd",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"{'score': 1}"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 5,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"# Check for the presence of a MM-DD-YYYY string.\n",
|
||||||
|
"evaluator.evaluate_strings(\n",
|
||||||
|
" prediction=\"The delivery will be made on 01-05-2024\",\n",
|
||||||
|
" reference=\".*\\\\b\\\\d{2}-\\\\d{2}-\\\\d{4}\\\\b.*\"\n",
|
||||||
|
")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "1d82dab5-6a49-4fe7-b3fb-8bcfb27d26e0",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Match against multiple patterns\n",
|
||||||
|
"\n",
|
||||||
|
"To match against multiple patterns, use a regex union \"|\"."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 6,
|
||||||
|
"id": "b87b915e-b7c2-476b-a452-99688a22293a",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"{'score': 1}"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 6,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"# Check for the presence of a MM-DD-YYYY string or YYYY-MM-DD\n",
|
||||||
|
"evaluator.evaluate_strings(\n",
|
||||||
|
" prediction=\"The delivery will be made on 01-05-2024\",\n",
|
||||||
|
" reference=\"|\".join([\".*\\\\b\\\\d{4}-\\\\d{2}-\\\\d{2}\\\\b.*\", \".*\\\\b\\\\d{2}-\\\\d{2}-\\\\d{4}\\\\b.*\"])\n",
|
||||||
|
")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "b8ed1f12-09a6-4e90-a69d-c8df525ff293",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Configure the RegexMatchStringEvaluator\n",
|
||||||
|
"\n",
|
||||||
|
"You can specify any regex flags to use when matching."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 7,
|
||||||
|
"id": "0c079864-0175-4d06-9d3f-a0e51dd3977c",
|
||||||
|
"metadata": {
|
||||||
|
"tags": []
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import re\n",
|
||||||
|
"\n",
|
||||||
|
"evaluator = RegexMatchStringEvaluator(\n",
|
||||||
|
" flags=re.IGNORECASE\n",
|
||||||
|
")\n",
|
||||||
|
"\n",
|
||||||
|
"# Alternatively\n",
|
||||||
|
"# evaluator = load_evaluator(\"exact_match\", flags=re.IGNORECASE)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 8,
|
||||||
|
"id": "a8dfb900-14f3-4a1f-8736-dd1d86a1264c",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"{'score': 1}"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 8,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"evaluator.evaluate_strings(\n",
|
||||||
|
" prediction=\"I LOVE testing\",\n",
|
||||||
|
" reference=\"I love testing\",\n",
|
||||||
|
")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "82de8d3e-c829-440e-a582-3fb70cecad3b",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3 (ipykernel)",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.11.2"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 5
|
||||||
|
}
|
@ -0,0 +1,97 @@
|
|||||||
|
import string
|
||||||
|
from typing import Any, List
|
||||||
|
|
||||||
|
from langchain.evaluation.schema import StringEvaluator
|
||||||
|
|
||||||
|
|
||||||
|
class ExactMatchStringEvaluator(StringEvaluator):
|
||||||
|
"""Compute an exact match between the prediction and the reference.
|
||||||
|
|
||||||
|
Examples
|
||||||
|
----------
|
||||||
|
>>> evaluator = ExactMatchChain()
|
||||||
|
>>> evaluator.evaluate_strings(
|
||||||
|
prediction="Mindy is the CTO",
|
||||||
|
reference="Mindy is the CTO",
|
||||||
|
) # This will return {'score': 1.0}
|
||||||
|
|
||||||
|
>>> evaluator.evaluate_strings(
|
||||||
|
prediction="Mindy is the CTO",
|
||||||
|
reference="Mindy is the CEO",
|
||||||
|
) # This will return {'score': 0.0}
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
ignore_case: bool = False,
|
||||||
|
ignore_punctuation: bool = False,
|
||||||
|
ignore_numbers: bool = False,
|
||||||
|
**kwargs: Any,
|
||||||
|
):
|
||||||
|
super().__init__()
|
||||||
|
self.ignore_case = ignore_case
|
||||||
|
self.ignore_punctuation = ignore_punctuation
|
||||||
|
self.ignore_numbers = ignore_numbers
|
||||||
|
|
||||||
|
@property
|
||||||
|
def requires_input(self) -> bool:
|
||||||
|
"""
|
||||||
|
This evaluator does not require input.
|
||||||
|
"""
|
||||||
|
return False
|
||||||
|
|
||||||
|
@property
|
||||||
|
def requires_reference(self) -> bool:
|
||||||
|
"""
|
||||||
|
This evaluator requires a reference.
|
||||||
|
"""
|
||||||
|
return True
|
||||||
|
|
||||||
|
@property
|
||||||
|
def input_keys(self) -> List[str]:
|
||||||
|
"""
|
||||||
|
Get the input keys.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List[str]: The input keys.
|
||||||
|
"""
|
||||||
|
return ["reference", "prediction"]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def evaluation_name(self) -> str:
|
||||||
|
"""
|
||||||
|
Get the evaluation name.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: The evaluation name.
|
||||||
|
"""
|
||||||
|
return "exact_match"
|
||||||
|
|
||||||
|
def _evaluate_strings( # type: ignore[arg-type,override]
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
prediction: str,
|
||||||
|
reference: str,
|
||||||
|
**kwargs: Any,
|
||||||
|
) -> dict:
|
||||||
|
"""
|
||||||
|
Evaluate the exact match between the prediction and the reference.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
prediction (str): The prediction string.
|
||||||
|
reference (Optional[str], optional): The reference string.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict: The evaluation results containing the score.
|
||||||
|
"""
|
||||||
|
if self.ignore_case:
|
||||||
|
prediction = prediction.lower()
|
||||||
|
reference = reference.lower()
|
||||||
|
if self.ignore_punctuation:
|
||||||
|
prediction = prediction.translate(str.maketrans("", "", string.punctuation))
|
||||||
|
reference = reference.translate(str.maketrans("", "", string.punctuation))
|
||||||
|
if self.ignore_numbers:
|
||||||
|
prediction = prediction.translate(str.maketrans("", "", string.digits))
|
||||||
|
reference = reference.translate(str.maketrans("", "", string.digits))
|
||||||
|
return {"score": int(prediction == reference)}
|
@ -0,0 +1,86 @@
|
|||||||
|
import re
|
||||||
|
from typing import Any, List
|
||||||
|
|
||||||
|
from langchain.evaluation.schema import StringEvaluator
|
||||||
|
|
||||||
|
|
||||||
|
class RegexMatchStringEvaluator(StringEvaluator):
|
||||||
|
"""Compute a regex match between the prediction and the reference.
|
||||||
|
|
||||||
|
Examples
|
||||||
|
----------
|
||||||
|
>>> evaluator = RegexMatchStringEvaluator(flags=re.IGNORECASE)
|
||||||
|
>>> evaluator.evaluate_strings(
|
||||||
|
prediction="Mindy is the CTO",
|
||||||
|
reference="^mindy.*cto$",
|
||||||
|
) # This will return {'score': 1.0} due to the IGNORECASE flag
|
||||||
|
|
||||||
|
>>> evaluator = RegexMatchStringEvaluator()
|
||||||
|
>>> evaluator.evaluate_strings(
|
||||||
|
prediction="Mindy is the CTO",
|
||||||
|
reference="^Mike.*CEO$",
|
||||||
|
) # This will return {'score': 0.0}
|
||||||
|
|
||||||
|
>>> evaluator.evaluate_strings(
|
||||||
|
prediction="Mindy is the CTO",
|
||||||
|
reference="^Mike.*CEO$|^Mindy.*CTO$",
|
||||||
|
) # This will return {'score': 1.0} as the prediction matches the second pattern in the union
|
||||||
|
""" # noqa: E501
|
||||||
|
|
||||||
|
def __init__(self, *, flags: int = 0, **kwargs: Any): # Default is no flags
|
||||||
|
super().__init__()
|
||||||
|
self.flags = flags
|
||||||
|
|
||||||
|
@property
|
||||||
|
def requires_input(self) -> bool:
|
||||||
|
"""
|
||||||
|
This evaluator does not require input.
|
||||||
|
"""
|
||||||
|
return False
|
||||||
|
|
||||||
|
@property
|
||||||
|
def requires_reference(self) -> bool:
|
||||||
|
"""
|
||||||
|
This evaluator requires a reference.
|
||||||
|
"""
|
||||||
|
return True
|
||||||
|
|
||||||
|
@property
|
||||||
|
def input_keys(self) -> List[str]:
|
||||||
|
"""
|
||||||
|
Get the input keys.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List[str]: The input keys.
|
||||||
|
"""
|
||||||
|
return ["reference", "prediction"]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def evaluation_name(self) -> str:
|
||||||
|
"""
|
||||||
|
Get the evaluation name.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: The evaluation name.
|
||||||
|
"""
|
||||||
|
return "regex_match"
|
||||||
|
|
||||||
|
def _evaluate_strings( # type: ignore[arg-type,override]
|
||||||
|
self,
|
||||||
|
*,
|
||||||
|
prediction: str,
|
||||||
|
reference: str,
|
||||||
|
**kwargs: Any,
|
||||||
|
) -> dict:
|
||||||
|
"""
|
||||||
|
Evaluate the regex match between the prediction and the reference.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
prediction (str): The prediction string.
|
||||||
|
reference (Optional[str], optional): The reference regex pattern.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict: The evaluation results containing the score.
|
||||||
|
"""
|
||||||
|
match = re.match(reference, prediction, flags=self.flags)
|
||||||
|
return {"score": int(bool(match))}
|
@ -0,0 +1,49 @@
|
|||||||
|
import pytest
|
||||||
|
|
||||||
|
from langchain.evaluation import ExactMatchStringEvaluator
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def exact_match_string_evaluator() -> ExactMatchStringEvaluator:
|
||||||
|
"""Create an ExactMatchStringEvaluator with default configuration."""
|
||||||
|
return ExactMatchStringEvaluator()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def exact_match_string_evaluator_ignore_case() -> ExactMatchStringEvaluator:
|
||||||
|
"""Create an ExactMatchStringEvaluator with ignore_case set to True."""
|
||||||
|
return ExactMatchStringEvaluator(ignore_case=True)
|
||||||
|
|
||||||
|
|
||||||
|
def test_default_exact_matching(
|
||||||
|
exact_match_string_evaluator: ExactMatchStringEvaluator,
|
||||||
|
) -> None:
|
||||||
|
prediction = "Mindy is the CTO"
|
||||||
|
reference = "Mindy is the CTO"
|
||||||
|
result = exact_match_string_evaluator.evaluate_strings(
|
||||||
|
prediction=prediction, reference=reference
|
||||||
|
)
|
||||||
|
assert result["score"] == 1.0
|
||||||
|
|
||||||
|
reference = "Mindy is the CEO"
|
||||||
|
result = exact_match_string_evaluator.evaluate_strings(
|
||||||
|
prediction=prediction, reference=reference
|
||||||
|
)
|
||||||
|
assert result["score"] == 0.0
|
||||||
|
|
||||||
|
|
||||||
|
def test_exact_matching_with_ignore_case(
|
||||||
|
exact_match_string_evaluator_ignore_case: ExactMatchStringEvaluator,
|
||||||
|
) -> None:
|
||||||
|
prediction = "Mindy is the CTO"
|
||||||
|
reference = "mindy is the cto"
|
||||||
|
result = exact_match_string_evaluator_ignore_case.evaluate_strings(
|
||||||
|
prediction=prediction, reference=reference
|
||||||
|
)
|
||||||
|
assert result["score"] == 1.0
|
||||||
|
|
||||||
|
reference = "mindy is the CEO"
|
||||||
|
result = exact_match_string_evaluator_ignore_case.evaluate_strings(
|
||||||
|
prediction=prediction, reference=reference
|
||||||
|
)
|
||||||
|
assert result["score"] == 0.0
|
@ -0,0 +1,45 @@
|
|||||||
|
import re
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from langchain.evaluation import RegexMatchStringEvaluator
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def regex_match_string_evaluator() -> RegexMatchStringEvaluator:
|
||||||
|
"""Create a RegexMatchStringEvaluator with default configuration."""
|
||||||
|
return RegexMatchStringEvaluator()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def regex_match_string_evaluator_ignore_case() -> RegexMatchStringEvaluator:
|
||||||
|
"""Create a RegexMatchStringEvaluator with IGNORECASE flag."""
|
||||||
|
return RegexMatchStringEvaluator(flags=re.IGNORECASE)
|
||||||
|
|
||||||
|
|
||||||
|
def test_default_regex_matching(
|
||||||
|
regex_match_string_evaluator: RegexMatchStringEvaluator,
|
||||||
|
) -> None:
|
||||||
|
prediction = "Mindy is the CTO"
|
||||||
|
reference = "^Mindy.*CTO$"
|
||||||
|
result = regex_match_string_evaluator.evaluate_strings(
|
||||||
|
prediction=prediction, reference=reference
|
||||||
|
)
|
||||||
|
assert result["score"] == 1.0
|
||||||
|
|
||||||
|
reference = "^Mike.*CEO$"
|
||||||
|
result = regex_match_string_evaluator.evaluate_strings(
|
||||||
|
prediction=prediction, reference=reference
|
||||||
|
)
|
||||||
|
assert result["score"] == 0.0
|
||||||
|
|
||||||
|
|
||||||
|
def test_regex_matching_with_ignore_case(
|
||||||
|
regex_match_string_evaluator_ignore_case: RegexMatchStringEvaluator,
|
||||||
|
) -> None:
|
||||||
|
prediction = "Mindy is the CTO"
|
||||||
|
reference = "^mindy.*cto$"
|
||||||
|
result = regex_match_string_evaluator_ignore_case.evaluate_strings(
|
||||||
|
prediction=prediction, reference=reference
|
||||||
|
)
|
||||||
|
assert result["score"] == 1.0
|
Loading…
Reference in New Issue