Add cost estimate feature

Calculates number of tokens/user cost and requires user permission to proceed.

User permission bypass is built-in to allow for non-human users.
This commit is contained in:
monkish54 2023-02-06 18:32:06 -08:00
parent a37ca621ed
commit c94866e9e9
2 changed files with 92 additions and 16 deletions

View File

@ -5,14 +5,48 @@ from langchain.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings from langchain.embeddings import OpenAIEmbeddings
import pickle import pickle
import dotenv import dotenv
import tiktoken
import sys
def num_tokens_from_string(string: str, encoding_name: str) -> int:
# Function to convert string to tokens and estimate user cost.
encoding = tiktoken.get_encoding(encoding_name)
num_tokens = len(encoding.encode(string))
total_price = ((num_tokens/1000) * 0.0004)
return num_tokens, total_price
def call_openai_api():
# Function to create a vector store from the documents and save it to disk.
store = FAISS.from_texts(docs, OpenAIEmbeddings(), metadatas=metadatas)
faiss.write_index(store.index, "docs.index")
store.index = None
with open("faiss_store.pkl", "wb") as f:
pickle.dump(store, f)
def get_user_permission():
# Function to ask user permission to call the OpenAI api and spend their OpenAI funds.
# Here we convert the docs list to a string and calculate the number of OpenAI tokens the string represents.
docs_content = (" ".join(docs))
tokens, total_price = num_tokens_from_string(string=docs_content, encoding_name="cl100k_base")
# Here we print the number of tokens and the approx user cost with some visually appealing formatting.
print(f"Number of Tokens = {format(tokens, ',d')}")
print(f"Approx Cost = ${format(total_price, ',.2f')}")
#Here we check for user permission before calling the API.
user_input = input("Price Okay? (Y/N) \n").lower()
if user_input == "y":
call_openai_api()
elif user_input == "":
call_openai_api()
else:
print("The API was not called. No money was spent.")
#Load .env file
dotenv.load_dotenv() dotenv.load_dotenv()
# Here we load in the data in the format that Notion exports it in. # Here we load in the data in the format that Notion exports it in.
ps = list(Path("scikit-learn").glob("**/*.rst")) ps = list(Path("scikit-learn").glob("**/*.rst"))
# parse all child directories
# parse all child directories
data = [] data = []
sources = [] sources = []
for p in ps: for p in ps:
@ -30,10 +64,14 @@ for i, d in enumerate(data):
docs.extend(splits) docs.extend(splits)
metadatas.extend([{"source": sources[i]}] * len(splits)) metadatas.extend([{"source": sources[i]}] * len(splits))
# Here we check for command line arguments for bot calls.
# Here we create a vector store from the documents and save it to disk. # If no argument exists or the permission_bypass_flag argument is not '-y',
store = FAISS.from_texts(docs, OpenAIEmbeddings(), metadatas=metadatas) # user permission is requested to call the API.
faiss.write_index(store.index, "docs.index") if len(sys.argv) > 1:
store.index = None permission_bypass_flag = sys.argv[1]
with open("faiss_store.pkl", "wb") as f: if permission_bypass_flag == '-y':
pickle.dump(store, f) call_openai_api()
else:
get_user_permission()
else:
get_user_permission()

View File

@ -1,6 +1,8 @@
import os import os
import pickle import pickle
import dotenv import dotenv
import tiktoken
import sys
import faiss import faiss
import shutil import shutil
from pathlib import Path from pathlib import Path
@ -28,6 +30,38 @@ def convert_rst_to_txt(src_dir, dst_dir):
f"-C {dst_dir} " f"-C {dst_dir} "
sphinx_main(args.split()) sphinx_main(args.split())
def num_tokens_from_string(string: str, encoding_name: str) -> int:
# Function to convert string to tokens and estimate user cost.
encoding = tiktoken.get_encoding(encoding_name)
num_tokens = len(encoding.encode(string))
total_price = ((num_tokens/1000) * 0.0004)
return num_tokens, total_price
def call_openai_api():
# Function to create a vector store from the documents and save it to disk.
store = FAISS.from_texts(docs, OpenAIEmbeddings(), metadatas=metadatas)
faiss.write_index(store.index, "docs.index")
store.index = None
with open("faiss_store.pkl", "wb") as f:
pickle.dump(store, f)
def get_user_permission():
# Function to ask user permission to call the OpenAI api and spend their OpenAI funds.
# Here we convert the docs list to a string and calculate the number of OpenAI tokens the string represents.
docs_content = (" ".join(docs))
tokens, total_price = num_tokens_from_string(string=docs_content, encoding_name="cl100k_base")
# Here we print the number of tokens and the approx user cost with some visually appealing formatting.
print(f"Number of Tokens = {format(tokens, ',d')}")
print(f"Approx Cost = ${format(total_price, ',.2f')}")
#Here we check for user permission before calling the API.
user_input = input("Price Okay? (Y/N) \n").lower()
if user_input == "y":
call_openai_api()
elif user_input == "":
call_openai_api()
else:
print("The API was not called. No money was spent.")
#Load .env file #Load .env file
dotenv.load_dotenv() dotenv.load_dotenv()
@ -58,13 +92,17 @@ for i, d in enumerate(data):
docs.extend(splits) docs.extend(splits)
metadatas.extend([{"source": sources[i]}] * len(splits)) metadatas.extend([{"source": sources[i]}] * len(splits))
# Here we check for command line arguments for bot calls.
# Here we create a vector store from the documents and save it to disk. # If no argument exists or the permission_bypass_flag argument is not '-y',
store = FAISS.from_texts(docs, OpenAIEmbeddings(), metadatas=metadatas) # user permission is requested to call the API.
faiss.write_index(store.index, "docs.index") if len(sys.argv) > 1:
store.index = None permission_bypass_flag = sys.argv[1]
with open("faiss_store.pkl", "wb") as f: if permission_bypass_flag == '-y':
pickle.dump(store, f) call_openai_api()
else:
get_user_permission()
else:
get_user_permission()
# Delete tmp folder # Delete tmp folder
# Commented out for now # Commented out for now