From 209c1a12e8d1e314263bceb6b86c5c741dc3b22e Mon Sep 17 00:00:00 2001 From: Colin Jarvis Date: Wed, 26 Oct 2022 16:30:38 +0100 Subject: [PATCH] Resolved PR comments from Boris --- .gitignore | 3 +- ...ering_for_transaction_classification.ipynb | 5 +-- ...lass_classification_for_transactions.ipynb | 41 ++++++++----------- examples/helpers.py | 1 + 4 files changed, 20 insertions(+), 30 deletions(-) create mode 100644 examples/helpers.py diff --git a/.gitignore b/.gitignore index d4fa226..32d0573 100644 --- a/.gitignore +++ b/.gitignore @@ -128,8 +128,7 @@ dmypy.json # Pyre type checker .pyre/ -# helpers -*helpers.py +# Data *transactions*.jsonl /examples/data/transactions* *.DS_Store diff --git a/examples/Clustering_for_transaction_classification.ipynb b/examples/Clustering_for_transaction_classification.ipynb index 7071452..e4c3db5 100644 --- a/examples/Clustering_for_transaction_classification.ipynb +++ b/examples/Clustering_for_transaction_classification.ipynb @@ -41,10 +41,9 @@ "from sklearn.manifold import TSNE\n", "import matplotlib\n", "import matplotlib.pyplot as plt\n", + "import os\n", "\n", - "from helpers import OPENAI_API_KEY\n", - "\n", - "openai.api_key = OPENAI_API_KEY\n", + "openai.api_key = os.getenv(\"OPENAI_API_KEY\")\n", "COMPLETIONS_MODEL = \"text-davinci-002\"\n", "\n", "# This path leads to a file with embeddings created in the notebook linked above\n", diff --git a/examples/Multiclass_classification_for_transactions.ipynb b/examples/Multiclass_classification_for_transactions.ipynb index 7029ff2..2fd531d 100644 --- a/examples/Multiclass_classification_for_transactions.ipynb +++ b/examples/Multiclass_classification_for_transactions.ipynb @@ -40,10 +40,10 @@ "import openai\n", "import pandas as pd\n", "import numpy as np\n", + "import json\n", + "import os\n", "\n", - "from helpers import OPENAI_API_KEY\n", - "\n", - "openai.api_key = OPENAI_API_KEY\n", + "openai.api_key = os.getenv(\"OPENAI_API_KEY\")\n", "COMPLETIONS_MODEL = \"text-davinci-002\"" ] }, @@ -183,19 +183,9 @@ }, { "cell_type": "code", - "execution_count": 316, + "execution_count": 335, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "31\n", - "8\n", - "All good\n" - ] - } - ], + "outputs": [], "source": [ "def request_completion(prompt):\n", " \n", @@ -221,7 +211,9 @@ " \n", " return classification\n", "\n", - "import json\n", + "# This function takes your training and validation outputs from the prepare_data function of the Finetuning API, and\n", + "# confirms that each have the same number of classes.\n", + "# If they do not have the same number of classes the fine-tune will fail and return an error\n", "\n", "def check_finetune_classes(train_file,valid_file):\n", "\n", @@ -270,18 +262,17 @@ "outputs": [], "source": [ "zero_shot_prompt = '''You are a data expert working for the National Library of Scotland. \n", - " You are analysing all transactions over £25,000 in value and classifying them into one of five categories.\n", - " The five categories are Building Improvement, Literature & Archive, Utility Bills, Professional Services and Software/IT.\n", - " If you can't tell what it is, say Could not classify\n", - " \n", - " Transaction:\n", + "You are analysing all transactions over £25,000 in value and classifying them into one of five categories.\n", + "The five categories are Building Improvement, Literature & Archive, Utility Bills, Professional Services and Software/IT.\n", + "If you can't tell what it is, say Could not classify\n", " \n", - " Supplier: SUPPLIER_NAME\n", - " Description: DESCRIPTION_TEXT\n", - " Value: TRANSACTION_VALUE\n", + "Transaction:\n", " \n", + "Supplier: SUPPLIER_NAME\n", + "Description: DESCRIPTION_TEXT\n", + "Value: TRANSACTION_VALUE\n", " \n", - " The classification is:'''" + "The classification is:'''" ] }, { diff --git a/examples/helpers.py b/examples/helpers.py new file mode 100644 index 0000000..fcc4a68 --- /dev/null +++ b/examples/helpers.py @@ -0,0 +1 @@ +OPENAI_API_KEY="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" \ No newline at end of file