forked from Archives/langchain
d5b4393bb2
Co-authored-by: Vadym Barda <vadim.barda@gmail.com>
307 lines
7.3 KiB
Plaintext
307 lines
7.3 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "a4734146",
|
|
"metadata": {},
|
|
"source": [
|
|
"# LLM Math\n",
|
|
"\n",
|
|
"Evaluating chains that know how to do math."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 6,
|
|
"id": "fdd7afae",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Comment this out if you are NOT using tracing\n",
|
|
"import os\n",
|
|
"os.environ[\"LANGCHAIN_HANDLER\"] = \"langchain\""
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 7,
|
|
"id": "ce05ffea",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"application/vnd.jupyter.widget-view+json": {
|
|
"model_id": "d028a511cede4de2b845b9a9954d6bea",
|
|
"version_major": 2,
|
|
"version_minor": 0
|
|
},
|
|
"text/plain": [
|
|
"Downloading readme: 0%| | 0.00/21.0 [00:00<?, ?B/s]"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Downloading and preparing dataset json/LangChainDatasets--llm-math to /Users/harrisonchase/.cache/huggingface/datasets/LangChainDatasets___json/LangChainDatasets--llm-math-509b11d101165afa/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51...\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"application/vnd.jupyter.widget-view+json": {
|
|
"model_id": "a71c8e5a21dd4da5a20a354b544f7a58",
|
|
"version_major": 2,
|
|
"version_minor": 0
|
|
},
|
|
"text/plain": [
|
|
"Downloading data files: 0%| | 0/1 [00:00<?, ?it/s]"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
},
|
|
{
|
|
"data": {
|
|
"application/vnd.jupyter.widget-view+json": {
|
|
"model_id": "ae530ca624154a1a934075c47d1093a6",
|
|
"version_major": 2,
|
|
"version_minor": 0
|
|
},
|
|
"text/plain": [
|
|
"Downloading data: 0%| | 0.00/631 [00:00<?, ?B/s]"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
},
|
|
{
|
|
"data": {
|
|
"application/vnd.jupyter.widget-view+json": {
|
|
"model_id": "7a4968df05d84bc483aa2c5039aecafe",
|
|
"version_major": 2,
|
|
"version_minor": 0
|
|
},
|
|
"text/plain": [
|
|
"Extracting data files: 0%| | 0/1 [00:00<?, ?it/s]"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
},
|
|
{
|
|
"data": {
|
|
"application/vnd.jupyter.widget-view+json": {
|
|
"model_id": "",
|
|
"version_major": 2,
|
|
"version_minor": 0
|
|
},
|
|
"text/plain": [
|
|
"Generating train split: 0 examples [00:00, ? examples/s]"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Dataset json downloaded and prepared to /Users/harrisonchase/.cache/huggingface/datasets/LangChainDatasets___json/LangChainDatasets--llm-math-509b11d101165afa/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51. Subsequent calls will reuse this data.\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"application/vnd.jupyter.widget-view+json": {
|
|
"model_id": "9a2caed96225410fb1cc0f8f155eb766",
|
|
"version_major": 2,
|
|
"version_minor": 0
|
|
},
|
|
"text/plain": [
|
|
" 0%| | 0/1 [00:00<?, ?it/s]"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
}
|
|
],
|
|
"source": [
|
|
"from langchain.evaluation.loading import load_dataset\n",
|
|
"dataset = load_dataset(\"llm-math\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "8a998d6f",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Setting up a chain\n",
|
|
"Now we need to create some pipelines for doing math."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 10,
|
|
"id": "7078f7f8",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from langchain.llms import OpenAI\n",
|
|
"from langchain.chains import LLMMathChain"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 9,
|
|
"id": "2bd70c46",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"llm = OpenAI()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 11,
|
|
"id": "954c3270",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"chain = LLMMathChain(llm=llm)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 13,
|
|
"id": "f252027e",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"predictions = chain.apply(dataset)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 22,
|
|
"id": "c8af7041",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"numeric_output = [float(p['answer'].strip().strip(\"Answer: \")) for p in predictions]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 23,
|
|
"id": "cc09ffe4",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"correct = [example['answer'] == numeric_output[i] for i, example in enumerate(dataset)]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 24,
|
|
"id": "585244e4",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"1.0"
|
|
]
|
|
},
|
|
"execution_count": 24,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"sum(correct) / len(correct)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 25,
|
|
"id": "0d14ac78",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"input: 5\n",
|
|
"expected output : 5.0\n",
|
|
"prediction: 5.0\n",
|
|
"input: 5 + 3\n",
|
|
"expected output : 8.0\n",
|
|
"prediction: 8.0\n",
|
|
"input: 2^3.171\n",
|
|
"expected output : 9.006708689094099\n",
|
|
"prediction: 9.006708689094099\n",
|
|
"input: 2 ^3.171 \n",
|
|
"expected output : 9.006708689094099\n",
|
|
"prediction: 9.006708689094099\n",
|
|
"input: two to the power of three point one hundred seventy one\n",
|
|
"expected output : 9.006708689094099\n",
|
|
"prediction: 9.006708689094099\n",
|
|
"input: five + three squared minus 1\n",
|
|
"expected output : 13.0\n",
|
|
"prediction: 13.0\n",
|
|
"input: 2097 times 27.31\n",
|
|
"expected output : 57269.07\n",
|
|
"prediction: 57269.07\n",
|
|
"input: two thousand ninety seven times twenty seven point thirty one\n",
|
|
"expected output : 57269.07\n",
|
|
"prediction: 57269.07\n",
|
|
"input: 209758 / 2714\n",
|
|
"expected output : 77.28739867354459\n",
|
|
"prediction: 77.28739867354459\n",
|
|
"input: 209758.857 divided by 2714.31\n",
|
|
"expected output : 77.27888745205964\n",
|
|
"prediction: 77.27888745205964\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"for i, example in enumerate(dataset):\n",
|
|
" print(\"input: \", example[\"question\"])\n",
|
|
" print(\"expected output :\", example[\"answer\"])\n",
|
|
" print(\"prediction: \", numeric_output[i])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "b9021ffd",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3 (ipykernel)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.9.1"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|