Harrison/llm math (#1808)

Co-authored-by: Vadym Barda <vadim.barda@gmail.com>
2024-11-06 03:20:49 +00:00 · 2023-03-20 07:53:26 -07:00 · 2023-03-20 07:53:26 -07:00 · d5b4393bb2
commit d5b4393bb2
parent 7b6ff7fe00
2 changed files with 310 additions and 13 deletions
--- a/docs/use_cases/evaluation/llm_math.ipynb
+++ b/docs/use_cases/evaluation/llm_math.ipynb
@ -0,0 +1,306 @@
 {
 "cells": [
  {
   "cell_type": "markdown",
   "id": "a4734146",
   "metadata": {},
   "source": [
    "# LLM Math\n",
    "\n",
    "Evaluating chains that know how to do math."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "fdd7afae",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Comment this out if you are NOT using tracing\n",
    "import os\n",
    "os.environ[\"LANGCHAIN_HANDLER\"] = \"langchain\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "ce05ffea",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "d028a511cede4de2b845b9a9954d6bea",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading readme:   0%|          | 0.00/21.0 [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Downloading and preparing dataset json/LangChainDatasets--llm-math to /Users/harrisonchase/.cache/huggingface/datasets/LangChainDatasets___json/LangChainDatasets--llm-math-509b11d101165afa/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51...\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "a71c8e5a21dd4da5a20a354b544f7a58",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "ae530ca624154a1a934075c47d1093a6",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading data:   0%|          | 0.00/631 [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "7a4968df05d84bc483aa2c5039aecafe",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Generating train split: 0 examples [00:00, ? examples/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Dataset json downloaded and prepared to /Users/harrisonchase/.cache/huggingface/datasets/LangChainDatasets___json/LangChainDatasets--llm-math-509b11d101165afa/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51. Subsequent calls will reuse this data.\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "9a2caed96225410fb1cc0f8f155eb766",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "  0%|          | 0/1 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "from langchain.evaluation.loading import load_dataset\n",
    "dataset = load_dataset(\"llm-math\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "8a998d6f",
   "metadata": {},
   "source": [
    "## Setting up a chain\n",
    "Now we need to create some pipelines for doing math."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "7078f7f8",
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain.llms import OpenAI\n",
    "from langchain.chains import LLMMathChain"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "2bd70c46",
   "metadata": {},
   "outputs": [],
   "source": [
    "llm = OpenAI()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "954c3270",
   "metadata": {},
   "outputs": [],
   "source": [
    "chain = LLMMathChain(llm=llm)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "f252027e",
   "metadata": {},
   "outputs": [],
   "source": [
    "predictions = chain.apply(dataset)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "c8af7041",
   "metadata": {},
   "outputs": [],
   "source": [
    "numeric_output = [float(p['answer'].strip().strip(\"Answer: \")) for p in predictions]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "cc09ffe4",
   "metadata": {},
   "outputs": [],
   "source": [
    "correct = [example['answer'] == numeric_output[i] for i, example in enumerate(dataset)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "585244e4",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1.0"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sum(correct) / len(correct)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "0d14ac78",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "input:  5\n",
      "expected output : 5.0\n",
      "prediction:  5.0\n",
      "input:  5 + 3\n",
      "expected output : 8.0\n",
      "prediction:  8.0\n",
      "input:  2^3.171\n",
      "expected output : 9.006708689094099\n",
      "prediction:  9.006708689094099\n",
      "input:    2 ^3.171 \n",
      "expected output : 9.006708689094099\n",
      "prediction:  9.006708689094099\n",
      "input:  two to the power of three point one hundred seventy one\n",
      "expected output : 9.006708689094099\n",
      "prediction:  9.006708689094099\n",
      "input:  five + three squared minus 1\n",
      "expected output : 13.0\n",
      "prediction:  13.0\n",
      "input:  2097 times 27.31\n",
      "expected output : 57269.07\n",
      "prediction:  57269.07\n",
      "input:  two thousand ninety seven times twenty seven point thirty one\n",
      "expected output : 57269.07\n",
      "prediction:  57269.07\n",
      "input:  209758 / 2714\n",
      "expected output : 77.28739867354459\n",
      "prediction:  77.28739867354459\n",
      "input:  209758.857 divided by 2714.31\n",
      "expected output : 77.27888745205964\n",
      "prediction:  77.27888745205964\n"
     ]
    }
   ],
   "source": [
    "for i, example in enumerate(dataset):\n",
    "    print(\"input: \", example[\"question\"])\n",
    "    print(\"expected output :\", example[\"answer\"])\n",
    "    print(\"prediction: \", numeric_output[i])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b9021ffd",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
 }
--- a/langchain/chains/llm_math/prompt.py
+++ b/langchain/chains/llm_math/prompt.py
@ -1,26 +1,17 @@
 # flake8: noqa
 from langchain.prompts.prompt import PromptTemplate
-_PROMPT_TEMPLATE = """You are GPT-3, and you can't do math.
+_PROMPT_TEMPLATE = """Translate a math problem into Python code that can be executed in Python 3 REPL. Use the output of running this code to answer the question.
-You can do basic math, and your memorization abilities are impressive, but you can't do any complex calculations that a human could not do in their head. You also have an annoying tendency to just make up highly specific, but wrong, answers.
+Question: ${{Question with math problem.}}
 So we hooked you up to a Python 3 kernel, and now you can execute code. If anyone gives you a hard math problem, just use this format and we’ll take care of the rest:
 Question: ${{Question with hard calculation.}}
 ```python
-${{Code that prints what you need to know}}
+${{Code that solves the problem and prints the solution}}
 ```
 ```output
-${{Output of your code}}
+${{Output of running the code}}
 ```
 Answer: ${{Answer}}
 Otherwise, use this simpler format:
 Question: ${{Question without hard calculation}}
 Answer: ${{Answer}}
 Begin.
 Question: What is 37593 * 67?