mirror of
https://github.com/arc53/DocsGPT
synced 2024-11-17 21:26:26 +00:00
commit
b6629ce7aa
@ -1,44 +1,68 @@
|
||||
import os
|
||||
import pickle
|
||||
|
||||
import dotenv
|
||||
import datetime
|
||||
from flask import Flask, request, render_template
|
||||
# os.environ["LANGCHAIN_HANDLER"] = "langchain"
|
||||
import faiss
|
||||
from langchain import OpenAI, VectorDBQA
|
||||
from langchain.chains.question_answering import load_qa_chain
|
||||
from langchain.prompts import PromptTemplate
|
||||
import requests
|
||||
from flask import Flask, request, render_template
|
||||
from langchain import FAISS
|
||||
from langchain import OpenAI, VectorDBQA, HuggingFaceHub, Cohere
|
||||
from langchain.chains.question_answering import load_qa_chain
|
||||
from langchain.embeddings import OpenAIEmbeddings, HuggingFaceHubEmbeddings, CohereEmbeddings, HuggingFaceInstructEmbeddings
|
||||
from langchain.prompts import PromptTemplate
|
||||
|
||||
# os.environ["LANGCHAIN_HANDLER"] = "langchain"
|
||||
|
||||
if os.getenv("LLM_NAME") is not None:
|
||||
llm_choice = os.getenv("LLM_NAME")
|
||||
else:
|
||||
llm_choice = "openai"
|
||||
|
||||
if os.getenv("EMBEDDINGS_NAME") is not None:
|
||||
embeddings_choice = os.getenv("EMBEDDINGS_NAME")
|
||||
else:
|
||||
embeddings_choice = "openai_text-embedding-ada-002"
|
||||
|
||||
|
||||
|
||||
if llm_choice == "manifest":
|
||||
from manifest import Manifest
|
||||
from langchain.llms.manifest import ManifestWrapper
|
||||
|
||||
manifest = Manifest(
|
||||
client_name="huggingface",
|
||||
client_connection="http://127.0.0.1:5000"
|
||||
)
|
||||
|
||||
# Redirect PosixPath to WindowsPath on Windows
|
||||
import platform
|
||||
|
||||
if platform.system() == "Windows":
|
||||
import pathlib
|
||||
|
||||
temp = pathlib.PosixPath
|
||||
pathlib.PosixPath = pathlib.WindowsPath
|
||||
|
||||
# loading the .env file
|
||||
dotenv.load_dotenv()
|
||||
|
||||
|
||||
with open("combine_prompt.txt", "r") as f:
|
||||
template = f.read()
|
||||
|
||||
# check if OPENAI_API_KEY is set
|
||||
if os.getenv("OPENAI_API_KEY") is not None:
|
||||
if os.getenv("API_KEY") is not None:
|
||||
api_key_set = True
|
||||
|
||||
else:
|
||||
api_key_set = False
|
||||
|
||||
|
||||
if os.getenv("EMBEDDINGS_KEY") is not None:
|
||||
embeddings_key_set = True
|
||||
else:
|
||||
embeddings_key_set = False
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
|
||||
@app.route("/")
|
||||
def home():
|
||||
return render_template("index.html", api_key_set=api_key_set)
|
||||
return render_template("index.html", api_key_set=api_key_set, llm_choice=llm_choice,
|
||||
embeddings_choice=embeddings_choice)
|
||||
|
||||
|
||||
@app.route("/api/answer", methods=["POST"])
|
||||
@ -48,7 +72,14 @@ def api_answer():
|
||||
if not api_key_set:
|
||||
api_key = data["api_key"]
|
||||
else:
|
||||
api_key = os.getenv("OPENAI_API_KEY")
|
||||
api_key = os.getenv("API_KEY")
|
||||
if not embeddings_key_set:
|
||||
embeddings_key = data["embeddings_key"]
|
||||
else:
|
||||
embeddings_key = os.getenv("EMBEDDINGS_KEY")
|
||||
|
||||
print(embeddings_key)
|
||||
print(api_key)
|
||||
|
||||
# check if the vectorstore is set
|
||||
if "active_docs" in data:
|
||||
@ -59,25 +90,32 @@ def api_answer():
|
||||
vectorstore = ""
|
||||
|
||||
# loading the index and the store and the prompt template
|
||||
index = faiss.read_index(f"{vectorstore}docs.index")
|
||||
# Note if you have used other embeddings than OpenAI, you need to change the embeddings
|
||||
if embeddings_choice == "openai_text-embedding-ada-002":
|
||||
docsearch = FAISS.load_local(vectorstore, OpenAIEmbeddings(openai_api_key=embeddings_key))
|
||||
elif embeddings_choice == "huggingface_sentence-transformers/all-mpnet-base-v2":
|
||||
docsearch = FAISS.load_local(vectorstore, HuggingFaceHubEmbeddings())
|
||||
elif embeddings_choice == "huggingface_hkunlp/instructor-large":
|
||||
docsearch = FAISS.load_local(vectorstore, HuggingFaceInstructEmbeddings())
|
||||
elif embeddings_choice == "cohere_medium":
|
||||
docsearch = FAISS.load_local(vectorstore, CohereEmbeddings(cohere_api_key=embeddings_key))
|
||||
|
||||
with open(f"{vectorstore}faiss_store.pkl", "rb") as f:
|
||||
store = pickle.load(f)
|
||||
|
||||
store.index = index
|
||||
# create a prompt template
|
||||
c_prompt = PromptTemplate(input_variables=["summaries", "question"], template=template)
|
||||
# create a chain with the prompt template and the store
|
||||
|
||||
#chain = VectorDBQA.from_llm(llm=OpenAI(openai_api_key=api_key, temperature=0), vectorstore=store, combine_prompt=c_prompt)
|
||||
# chain = VectorDBQA.from_chain_type(llm=OpenAI(openai_api_key=api_key, temperature=0), chain_type='map_reduce',
|
||||
# vectorstore=store)
|
||||
if llm_choice == "openai":
|
||||
llm = OpenAI(openai_api_key=api_key, temperature=0)
|
||||
elif llm_choice == "manifest":
|
||||
llm = ManifestWrapper(client=manifest, llm_kwargs={"temperature": 0.001, "max_tokens": 2048})
|
||||
elif llm_choice == "huggingface":
|
||||
llm = HuggingFaceHub(repo_id="bigscience/bloom", huggingfacehub_api_token=api_key)
|
||||
elif llm_choice == "cohere":
|
||||
llm = Cohere(model="command-xlarge-nightly", cohere_api_key=api_key)
|
||||
|
||||
qa_chain = load_qa_chain(OpenAI(openai_api_key=api_key, temperature=0), chain_type="map_reduce",
|
||||
qa_chain = load_qa_chain(llm=llm, chain_type="map_reduce",
|
||||
combine_prompt=c_prompt)
|
||||
chain = VectorDBQA(combine_documents_chain=qa_chain, vectorstore=store)
|
||||
|
||||
|
||||
chain = VectorDBQA(combine_documents_chain=qa_chain, vectorstore=docsearch, k=2)
|
||||
|
||||
# fetch the answer
|
||||
result = chain({"query": question})
|
||||
@ -94,6 +132,7 @@ def api_answer():
|
||||
# }
|
||||
return result
|
||||
|
||||
|
||||
@app.route("/api/docs_check", methods=["POST"])
|
||||
def check_docs():
|
||||
# check if docs exist in a vectorstore folder
|
||||
@ -119,6 +158,7 @@ def check_docs():
|
||||
|
||||
return {"status": 'loaded'}
|
||||
|
||||
|
||||
# handling CORS
|
||||
@app.after_request
|
||||
def after_request(response):
|
||||
|
BIN
application/index.faiss
Normal file
BIN
application/index.faiss
Normal file
Binary file not shown.
BIN
application/index.pkl
Normal file
BIN
application/index.pkl
Normal file
Binary file not shown.
@ -25,6 +25,7 @@ if (el) {
|
||||
|
||||
body: JSON.stringify({question: message,
|
||||
api_key: localStorage.getItem('apiKey'),
|
||||
embeddings_key: localStorage.getItem('apiKey'),
|
||||
active_docs: localStorage.getItem('activeDocs')}),
|
||||
})
|
||||
.then(response => response.json())
|
||||
|
@ -131,15 +131,19 @@ This will return a new DataFrame with all the columns from both tables, and only
|
||||
var option = document.createElement("option");
|
||||
if (docsIndex[key].name == docsIndex[key].language) {
|
||||
option.text = docsIndex[key].name + " " + docsIndex[key].version;
|
||||
option.value = docsIndex[key].name + "/" + ".project" + "/" + docsIndex[key].version + "/";
|
||||
option.value = docsIndex[key].name + "/" + ".project" + "/" + docsIndex[key].version + "/{{ embeddings_choice }}/";
|
||||
if (docsIndex[key].model == "{{ embeddings_choice }}") {
|
||||
select.add(option);
|
||||
}
|
||||
}
|
||||
else {
|
||||
option.text = docsIndex[key].name + " " + docsIndex[key].version;
|
||||
option.value = docsIndex[key].language + "/" + docsIndex[key].name + "/" + docsIndex[key].version + "/";
|
||||
option.value = docsIndex[key].language + "/" + docsIndex[key].name + "/" + docsIndex[key].version + "/{{ embeddings_choice }}/";
|
||||
if (docsIndex[key].model == "{{ embeddings_choice }}") {
|
||||
select.add(option);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
{% if not api_key_set %}
|
||||
|
@ -4,9 +4,15 @@ import pickle
|
||||
import tiktoken
|
||||
from langchain.vectorstores import FAISS
|
||||
from langchain.embeddings import OpenAIEmbeddings
|
||||
|
||||
#from langchain.embeddings import HuggingFaceEmbeddings
|
||||
#from langchain.embeddings import HuggingFaceInstructEmbeddings
|
||||
#from langchain.embeddings import CohereEmbeddings
|
||||
|
||||
from retry import retry
|
||||
|
||||
|
||||
|
||||
def num_tokens_from_string(string: str, encoding_name: str) -> int:
|
||||
# Function to convert string to tokens and estimate user cost.
|
||||
encoding = tiktoken.get_encoding(encoding_name)
|
||||
@ -33,30 +39,23 @@ def call_openai_api(docs, folder_name):
|
||||
#docs = docs[:n]
|
||||
c1 = 0
|
||||
store = FAISS.from_documents(docs_test, OpenAIEmbeddings())
|
||||
|
||||
# Uncomment for MPNet embeddings
|
||||
# model_name = "sentence-transformers/all-mpnet-base-v2"
|
||||
# hf = HuggingFaceEmbeddings(model_name=model_name)
|
||||
# store = FAISS.from_documents(docs_test, hf)
|
||||
for i in tqdm(docs, desc="Embedding 🦖", unit="docs", total=len(docs), bar_format='{l_bar}{bar}| Time Left: {remaining}'):
|
||||
try:
|
||||
import time
|
||||
store_add_texts_with_retry(store, i)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
print("Error on ", i)
|
||||
print("Saving progress")
|
||||
print(f"stopped at {c1} out of {len(docs)}")
|
||||
faiss.write_index(store.index, f"outputs/{folder_name}/docs.index")
|
||||
store_index_bak = store.index
|
||||
store.index = None
|
||||
with open(f"outputs/{folder_name}/faiss_store.pkl", "wb") as f:
|
||||
pickle.dump(store, f)
|
||||
print("Sleeping for 60 seconds and trying again")
|
||||
time.sleep(60)
|
||||
store.index = store_index_bak
|
||||
store.add_texts([i.page_content], metadatas=[i.metadata])
|
||||
store.save_local(f"outputs/{folder_name}")
|
||||
break
|
||||
c1 += 1
|
||||
|
||||
faiss.write_index(store.index, f"outputs/{folder_name}/docs.index")
|
||||
store.index = None
|
||||
with open(f"outputs/{folder_name}/faiss_store.pkl", "wb") as f:
|
||||
pickle.dump(store, f)
|
||||
store.save_local(f"outputs/{folder_name}")
|
||||
|
||||
def get_user_permission(docs, folder_name):
|
||||
# Function to ask user permission to call the OpenAI api and spend their OpenAI funds.
|
||||
|
Loading…
Reference in New Issue
Block a user