From 17047b6201d3393d72484d19709e5e2dd9820f5a Mon Sep 17 00:00:00 2001
From: Alex
Date: Fri, 3 Mar 2023 17:48:37 +0000
Subject: [PATCH 1/2] better prompts
---
application/app.py | 44 +++++++++++++++++++----------
application/combine_prompt.txt | 12 ++++----
application/combine_prompt_hist.txt | 12 ++++++--
application/question_prompt.txt | 4 +++
4 files changed, 47 insertions(+), 25 deletions(-)
create mode 100644 application/question_prompt.txt
diff --git a/application/app.py b/application/app.py
index 15ece58..933db17 100644
--- a/application/app.py
+++ b/application/app.py
@@ -1,18 +1,21 @@
import os
import json
import traceback
+import pprint
import dotenv
import requests
from flask import Flask, request, render_template
from langchain import FAISS
from langchain.llms import OpenAIChat
-from langchain import VectorDBQA, HuggingFaceHub, Cohere
+from langchain import VectorDBQA, HuggingFaceHub, Cohere, OpenAI
from langchain.chains.question_answering import load_qa_chain
-from langchain.embeddings import OpenAIEmbeddings, HuggingFaceHubEmbeddings, CohereEmbeddings, HuggingFaceInstructEmbeddings
+from langchain.embeddings import OpenAIEmbeddings, HuggingFaceHubEmbeddings, CohereEmbeddings, \
+ HuggingFaceInstructEmbeddings
from langchain.prompts import PromptTemplate
from error import bad_request
-# os.environ["LANGCHAIN_HANDLER"] = "langchain"
+
+os.environ["LANGCHAIN_HANDLER"] = "langchain"
if os.getenv("LLM_NAME") is not None:
llm_choice = os.getenv("LLM_NAME")
@@ -24,8 +27,6 @@ if os.getenv("EMBEDDINGS_NAME") is not None:
else:
embeddings_choice = "openai_text-embedding-ada-002"
-
-
if llm_choice == "manifest":
from manifest import Manifest
from langchain.llms.manifest import ManifestWrapper
@@ -53,6 +54,9 @@ with open("combine_prompt.txt", "r") as f:
with open("combine_prompt_hist.txt", "r") as f:
template_hist = f.read()
+with open("question_prompt.txt", "r") as f:
+ template_quest = f.read()
+
if os.getenv("API_KEY") is not None:
api_key_set = True
else:
@@ -76,7 +80,7 @@ def api_answer():
data = request.get_json()
question = data["question"]
history = data["history"]
- print('-'*5)
+ print('-' * 5)
if not api_key_set:
api_key = data["api_key"]
else:
@@ -95,7 +99,7 @@ def api_answer():
vectorstore = ""
else:
vectorstore = ""
-
+ #vectorstore = "outputs/inputs/"
# loading the index and the store and the prompt template
# Note if you have used other embeddings than OpenAI, you need to change the embeddings
if embeddings_choice == "openai_text-embedding-ada-002":
@@ -110,13 +114,19 @@ def api_answer():
# create a prompt template
if history:
history = json.loads(history)
- template_temp = template_hist.replace("{historyquestion}", history[0]).replace("{historyanswer}", history[1])
- c_prompt = PromptTemplate(input_variables=["summaries", "question"], template=template_temp, template_format="jinja2")
+ template_temp = template_hist.replace("{historyquestion}", history[0]).replace("{historyanswer}",
+ history[1])
+ c_prompt = PromptTemplate(input_variables=["summaries", "question"], template=template_temp,
+ template_format="jinja2")
else:
- c_prompt = PromptTemplate(input_variables=["summaries", "question"], template=template, template_format="jinja2")
+ c_prompt = PromptTemplate(input_variables=["summaries", "question"], template=template,
+ template_format="jinja2")
+ q_prompt = PromptTemplate(input_variables=["context", "question"], template=template_quest,
+ template_format="jinja2")
if llm_choice == "openai":
llm = OpenAIChat(openai_api_key=api_key, temperature=0)
+ #llm = OpenAI(openai_api_key=api_key, temperature=0)
elif llm_choice == "manifest":
llm = ManifestWrapper(client=manifest, llm_kwargs={"temperature": 0.001, "max_tokens": 2048})
elif llm_choice == "huggingface":
@@ -125,14 +135,17 @@ def api_answer():
llm = Cohere(model="command-xlarge-nightly", cohere_api_key=api_key)
qa_chain = load_qa_chain(llm=llm, chain_type="map_reduce",
- combine_prompt=c_prompt)
+ combine_prompt=c_prompt, question_prompt=q_prompt)
- chain = VectorDBQA(combine_documents_chain=qa_chain, vectorstore=docsearch, k=4)
+ chain = VectorDBQA(combine_documents_chain=qa_chain, vectorstore=docsearch, k=25, return_source_documents=True)
-
# fetch the answer
result = chain({"query": question})
- print(result)
+ # pprint.pprint(result)
+ # docs = docsearch.similarity_search(question, k=8)
+
+ for i in result['source_documents']:
+ print(i.page_content)
# some formatting for the frontend
result['answer'] = result['result']
@@ -141,6 +154,7 @@ def api_answer():
result['answer'] = result['answer'].split("SOURCES:")[0]
except:
pass
+ del result['source_documents']
# mock result
# result = {
@@ -152,7 +166,7 @@ def api_answer():
# print whole traceback
traceback.print_exc()
print(str(e))
- return bad_request(500,str(e))
+ return bad_request(500, str(e))
@app.route("/api/docs_check", methods=["POST"])
diff --git a/application/combine_prompt.txt b/application/combine_prompt.txt
index 5eaccb7..a008da3 100644
--- a/application/combine_prompt.txt
+++ b/application/combine_prompt.txt
@@ -1,6 +1,4 @@
-You are a DocsGPT bot assistant by Arc53 that provides help with programming libraries. You give thorough answers with code examples.
-Given the following extracted parts of a long document and a question, create a final answer with references ("SOURCES").
-ALWAYS return a "SOURCES" part in your answer.
+You are a DocsGPT, friendly and helpful AI assistant by Arc53 that provides help with documents. You give thorough answers with code examples if possible.
QUESTION: How to merge tables in pandas?
=========
@@ -12,12 +10,12 @@ Source: 30-pl
FINAL ANSWER: To merge two tables in pandas, you can use the pd.merge() function. The basic syntax is: \n\npd.merge(left, right, on, how) \n\nwhere left and right are the two tables to merge, on is the column to merge on, and how is the type of merge to perform. \n\nFor example, to merge the two tables df1 and df2 on the column 'id', you can use: \n\npd.merge(df1, df2, on='id', how='inner')
SOURCES: 28-pl 30-pl
-QUESTION: How to eat vegetables using pandas?
+QUESTION: How are you?
=========
-Content: ExtensionArray.repeat(repeats, axis=None) Returns a new ExtensionArray where each element of the current ExtensionArray is repeated consecutively a given number of times. \n\nParameters: repeats int or array of ints. The number of repetitions for each element. This should be a positive integer. Repeating 0 times will return an empty array. axis (0 or ‘index’, 1 or ‘columns’), default 0 The axis along which to repeat values. Currently only axis=0 is supported.
-Source: 0-pl
+CONTENT:
+SOURCE:
=========
-FINAL ANSWER: You can't eat vegetables using pandas. You can only eat them using your mouth.
+FINAL ANSWER: I am fine, thank you. How are you?
SOURCES:
QUESTION: {{ question }}
diff --git a/application/combine_prompt_hist.txt b/application/combine_prompt_hist.txt
index 4ab881c..509a4a0 100644
--- a/application/combine_prompt_hist.txt
+++ b/application/combine_prompt_hist.txt
@@ -1,6 +1,4 @@
-You are a DocsGPT bot assistant by Arc53 that provides help with programming libraries. You give thorough answers with code examples.
-Given the following extracted parts of a long document and a question, create a final answer with references ("SOURCES").
-ALWAYS return a "SOURCES" part in your answer. You can also remember things from previous questions and use them in your answer.
+You are a DocsGPT, friendly and helpful AI assistant by Arc53 that provides help with documents. You give thorough answers with code examples if possible.
QUESTION: How to merge tables in pandas?
=========
@@ -12,6 +10,14 @@ Source: 30-pl
FINAL ANSWER: To merge two tables in pandas, you can use the pd.merge() function. The basic syntax is: \n\npd.merge(left, right, on, how) \n\nwhere left and right are the two tables to merge, on is the column to merge on, and how is the type of merge to perform. \n\nFor example, to merge the two tables df1 and df2 on the column 'id', you can use: \n\npd.merge(df1, df2, on='id', how='inner')
SOURCES: 28-pl 30-pl
+QUESTION: How are you?
+=========
+CONTENT:
+SOURCE:
+=========
+FINAL ANSWER: I am fine, thank you. How are you?
+SOURCES:
+
QUESTION: {{ historyquestion }}
=========
CONTENT:
diff --git a/application/question_prompt.txt b/application/question_prompt.txt
new file mode 100644
index 0000000..0571b22
--- /dev/null
+++ b/application/question_prompt.txt
@@ -0,0 +1,4 @@
+Use the following portion of a long document to see if any of the text is relevant to answer the question.
+{{ context }}
+Question: {{ question }}
+Provide all relevant text to the question verbatim. Summarize if needed. If nothing relevant return "-".
\ No newline at end of file
From 02aca04efed6a970d845b91956ab7a5d8a7bbd04 Mon Sep 17 00:00:00 2001
From: Alex
Date: Fri, 3 Mar 2023 18:05:00 +0000
Subject: [PATCH 2/2] Update app.py
---
application/app.py | 9 +--------
1 file changed, 1 insertion(+), 8 deletions(-)
diff --git a/application/app.py b/application/app.py
index 933db17..f9ea185 100644
--- a/application/app.py
+++ b/application/app.py
@@ -1,7 +1,6 @@
import os
import json
import traceback
-import pprint
import dotenv
import requests
@@ -137,15 +136,10 @@ def api_answer():
qa_chain = load_qa_chain(llm=llm, chain_type="map_reduce",
combine_prompt=c_prompt, question_prompt=q_prompt)
- chain = VectorDBQA(combine_documents_chain=qa_chain, vectorstore=docsearch, k=25, return_source_documents=True)
+ chain = VectorDBQA(combine_documents_chain=qa_chain, vectorstore=docsearch, k=10)
# fetch the answer
result = chain({"query": question})
- # pprint.pprint(result)
- # docs = docsearch.similarity_search(question, k=8)
-
- for i in result['source_documents']:
- print(i.page_content)
# some formatting for the frontend
result['answer'] = result['result']
@@ -154,7 +148,6 @@ def api_answer():
result['answer'] = result['answer'].split("SOURCES:")[0]
except:
pass
- del result['source_documents']
# mock result
# result = {