mirror of
https://github.com/openai/openai-cookbook
synced 2024-11-08 01:10:29 +00:00
Merge pull request #315 from openai/isa/file-q-and-a-updates
Use ChatGPT API in File Q and A demo + minor fixes
This commit is contained in:
commit
a89c8a8742
@ -1,6 +1,11 @@
|
||||
// A function that takes a file name and a string and returns true if the file name is contained in the string
|
||||
// after removing punctuation and whitespace from both
|
||||
export const isFileNameInString = (fileName: string, str: string) => {
|
||||
// Check if the input string is null or undefined
|
||||
if (!str) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Convert both to lowercase and remove punctuation and whitespace
|
||||
const normalizedFileName = fileName
|
||||
.toLowerCase()
|
||||
|
@ -42,7 +42,11 @@ def get_answer_from_files(question, session_id, pinecone_index):
|
||||
break
|
||||
files_string += file_string
|
||||
|
||||
prompt = f"Given a question, try to answer it using the content of the file extracts below, and if you cannot answer, or find " \
|
||||
# Note: this is not the proper way to use the ChatGPT conversational format, but it works for now
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": f"Given a question, try to answer it using the content of the file extracts below, and if you cannot answer, or find " \
|
||||
f"a relevant file, just output \"I couldn't find the answer to that question in your files.\".\n\n" \
|
||||
f"If the answer is not contained in the files or if there are no file extracts, respond with \"I couldn't find the answer " \
|
||||
f"to that question in your files.\" If the question is not actually a question, respond with \"That's not a valid question.\"\n\n" \
|
||||
@ -54,20 +58,19 @@ def get_answer_from_files(question, session_id, pinecone_index):
|
||||
f"Question: {question}\n\n" \
|
||||
f"Files:\n{files_string}\n" \
|
||||
f"Answer:"
|
||||
},
|
||||
]
|
||||
|
||||
logging.info(f"[get_answer_from_files] prompt: {prompt}")
|
||||
|
||||
response = openai.Completion.create(
|
||||
prompt=prompt,
|
||||
temperature=0,
|
||||
response = openai.ChatCompletion.create(
|
||||
messages=messages,
|
||||
model=GENERATIVE_MODEL,
|
||||
max_tokens=1000,
|
||||
top_p=1,
|
||||
frequency_penalty=0,
|
||||
presence_penalty=0,
|
||||
engine=GENERATIVE_MODEL,
|
||||
temperature=0,
|
||||
)
|
||||
|
||||
answer = response.choices[0].text.strip()
|
||||
choices = response["choices"] # type: ignore
|
||||
answer = choices[0].message.content.strip()
|
||||
|
||||
logging.info(f"[get_answer_from_files] answer: {answer}")
|
||||
|
||||
return jsonify({"answer": answer})
|
||||
|
@ -8,7 +8,7 @@ SERVER_PORT: "8080"
|
||||
|
||||
# ---- OPENAI CONFIG -----
|
||||
EMBEDDINGS_MODEL: "text-embedding-ada-002"
|
||||
GENERATIVE_MODEL: "text-davinci-003"
|
||||
GENERATIVE_MODEL: "gpt-3.5-turbo" # use gpt-4 for better results
|
||||
EMBEDDING_DIMENSIONS: 1536
|
||||
TEXT_EMBEDDING_CHUNK_SIZE: 200
|
||||
# This is the minimum cosine similarity score that a file must have with the search query to be considered relevant
|
||||
|
@ -1,11 +1,11 @@
|
||||
Flask-Cors==3.0.10
|
||||
openai==0.13.0
|
||||
pinecone-client==2.0.13
|
||||
PyPDF2==2.10.4
|
||||
numpy==1.23.2
|
||||
scikit-learn==1.1.2
|
||||
docx2txt==0.8
|
||||
Flask-Cors>=3.0.10
|
||||
openai>=0.27.2
|
||||
pinecone-client>=2.0.13
|
||||
PyPDF2>=2.10.4
|
||||
numpy>=1.23.2
|
||||
scikit-learn>=1.1.2
|
||||
docx2txt>=0.8
|
||||
flask>=1.1.4
|
||||
jinja2==3.0.1
|
||||
PyYAML==6.0
|
||||
tiktoken==0.1.2
|
||||
jinja2>=3.0.1
|
||||
PyYAML>=6.0
|
||||
tiktoken>=0.1.2
|
14
apps/file-q-and-a/nextjs/package-lock.json
generated
14
apps/file-q-and-a/nextjs/package-lock.json
generated
@ -28,7 +28,7 @@
|
||||
"mammoth": "^1.5.1",
|
||||
"next": "13.1.2",
|
||||
"node-html-markdown": "^1.3.0",
|
||||
"openai": "^3.1.0",
|
||||
"openai": "^3.2.1",
|
||||
"pdf-parse": "^1.1.1",
|
||||
"react": "18.2.0",
|
||||
"react-dom": "18.2.0",
|
||||
@ -3978,9 +3978,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/openai": {
|
||||
"version": "3.1.0",
|
||||
"resolved": "https://registry.npmjs.org/openai/-/openai-3.1.0.tgz",
|
||||
"integrity": "sha512-v5kKFH5o+8ld+t0arudj833Mgm3GcgBnbyN9946bj6u7bvel4Yg6YFz2A4HLIYDzmMjIo0s6vSG9x73kOwvdCg==",
|
||||
"version": "3.2.1",
|
||||
"resolved": "https://registry.npmjs.org/openai/-/openai-3.2.1.tgz",
|
||||
"integrity": "sha512-762C9BNlJPbjjlWZi4WYK9iM2tAVAv0uUp1UmI34vb0CN5T2mjB/qM6RYBmNKMh/dN9fC+bxqPwWJZUTWW052A==",
|
||||
"dependencies": {
|
||||
"axios": "^0.26.0",
|
||||
"form-data": "^4.0.0"
|
||||
@ -8003,9 +8003,9 @@
|
||||
}
|
||||
},
|
||||
"openai": {
|
||||
"version": "3.1.0",
|
||||
"resolved": "https://registry.npmjs.org/openai/-/openai-3.1.0.tgz",
|
||||
"integrity": "sha512-v5kKFH5o+8ld+t0arudj833Mgm3GcgBnbyN9946bj6u7bvel4Yg6YFz2A4HLIYDzmMjIo0s6vSG9x73kOwvdCg==",
|
||||
"version": "3.2.1",
|
||||
"resolved": "https://registry.npmjs.org/openai/-/openai-3.2.1.tgz",
|
||||
"integrity": "sha512-762C9BNlJPbjjlWZi4WYK9iM2tAVAv0uUp1UmI34vb0CN5T2mjB/qM6RYBmNKMh/dN9fC+bxqPwWJZUTWW052A==",
|
||||
"requires": {
|
||||
"axios": "^0.26.0",
|
||||
"form-data": "^4.0.0"
|
||||
|
@ -29,7 +29,7 @@
|
||||
"mammoth": "^1.5.1",
|
||||
"next": "13.1.2",
|
||||
"node-html-markdown": "^1.3.0",
|
||||
"openai": "^3.1.0",
|
||||
"openai": "^3.2.1",
|
||||
"pdf-parse": "^1.1.1",
|
||||
"react": "18.2.0",
|
||||
"react-dom": "18.2.0",
|
||||
|
@ -74,6 +74,13 @@ function FileQandAArea(props: FileQandAAreaProps) {
|
||||
fileChunks: results,
|
||||
}),
|
||||
});
|
||||
|
||||
if (res.status === 500) {
|
||||
setAnswerError("Internal server error. Please try again later.");
|
||||
setAnswerLoading(false);
|
||||
return;
|
||||
}
|
||||
|
||||
const reader = res.body!.getReader();
|
||||
|
||||
while (true) {
|
||||
|
@ -40,8 +40,6 @@ export default async function handler(
|
||||
.join("\n")
|
||||
.slice(0, MAX_FILES_LENGTH);
|
||||
|
||||
console.log(filesString);
|
||||
|
||||
const prompt =
|
||||
`Given a question, try to answer it using the content of the file extracts below, and if you cannot answer, or find a relevant file, just output \"I couldn't find the answer to that question in your files.\".\n\n` +
|
||||
`If the answer is not contained in the files or if there are no file extracts, respond with \"I couldn't find the answer to that question in your files.\" If the question is not actually a question, respond with \"That's not a valid question.\"\n\n` +
|
||||
@ -53,7 +51,6 @@ export default async function handler(
|
||||
|
||||
const stream = completionStream({
|
||||
prompt,
|
||||
model: "text-davinci-003",
|
||||
});
|
||||
|
||||
// Set the response headers for streaming
|
||||
|
@ -27,6 +27,8 @@ export default async function handler(
|
||||
|
||||
// Create a formidable instance to parse the request as a multipart form
|
||||
const form = new formidable.IncomingForm();
|
||||
form.maxFileSize = 30 * 1024 * 1024; // Set the max file size to 30MB
|
||||
|
||||
try {
|
||||
const { fields, files } = await new Promise<{
|
||||
fields: Fields;
|
||||
|
@ -1,8 +1,9 @@
|
||||
import { IncomingMessage } from "http";
|
||||
import {
|
||||
ChatCompletionRequestMessageRoleEnum,
|
||||
Configuration,
|
||||
CreateChatCompletionResponse,
|
||||
CreateCompletionRequest,
|
||||
CreateCompletionResponse,
|
||||
OpenAIApi,
|
||||
} from "openai";
|
||||
|
||||
@ -30,24 +31,30 @@ type EmbeddingOptions = {
|
||||
export async function completion({
|
||||
prompt,
|
||||
fallback,
|
||||
max_tokens = 800,
|
||||
max_tokens,
|
||||
temperature = 0,
|
||||
model = "text-davinci-003",
|
||||
...otherOptions
|
||||
model = "gpt-3.5-turbo", // use gpt-4 for better results
|
||||
}: CompletionOptions) {
|
||||
try {
|
||||
const result = await openai.createCompletion({
|
||||
prompt,
|
||||
max_tokens,
|
||||
temperature,
|
||||
// Note: this is not the proper way to use the ChatGPT conversational format, but it works for now
|
||||
const messages = [
|
||||
{
|
||||
role: ChatCompletionRequestMessageRoleEnum.System,
|
||||
content: prompt ?? "",
|
||||
},
|
||||
];
|
||||
|
||||
const result = await openai.createChatCompletion({
|
||||
model,
|
||||
...otherOptions,
|
||||
messages,
|
||||
temperature,
|
||||
max_tokens: max_tokens ?? 800,
|
||||
});
|
||||
|
||||
if (!result.data.choices[0].text) {
|
||||
throw new Error("No text returned from the completions endpoint.");
|
||||
if (!result.data.choices[0].message) {
|
||||
throw new Error("No text returned from completions endpoint");
|
||||
}
|
||||
return result.data.choices[0].text;
|
||||
return result.data.choices[0].message.content;
|
||||
} catch (error) {
|
||||
if (fallback) return fallback;
|
||||
else throw error;
|
||||
@ -59,33 +66,65 @@ export async function* completionStream({
|
||||
fallback,
|
||||
max_tokens = 800,
|
||||
temperature = 0,
|
||||
model = "text-davinci-003",
|
||||
model = "gpt-3.5-turbo", // use gpt-4 for better results
|
||||
}: CompletionOptions) {
|
||||
try {
|
||||
const result = await openai.createCompletion(
|
||||
// Note: this is not the proper way to use the ChatGPT conversational format, but it works for now
|
||||
const messages = [
|
||||
{
|
||||
role: ChatCompletionRequestMessageRoleEnum.System,
|
||||
content: prompt ?? "",
|
||||
},
|
||||
];
|
||||
|
||||
const result = await openai.createChatCompletion(
|
||||
{
|
||||
prompt,
|
||||
max_tokens,
|
||||
temperature,
|
||||
model,
|
||||
messages,
|
||||
temperature,
|
||||
max_tokens: max_tokens ?? 800,
|
||||
stream: true,
|
||||
},
|
||||
{ responseType: "stream" }
|
||||
{
|
||||
responseType: "stream",
|
||||
}
|
||||
);
|
||||
|
||||
const stream = result.data as any as IncomingMessage;
|
||||
|
||||
for await (const chunk of stream) {
|
||||
const line = chunk.toString().trim();
|
||||
const message = line.split("data: ")[1];
|
||||
let buffer = "";
|
||||
const textDecoder = new TextDecoder();
|
||||
|
||||
for await (const chunk of stream) {
|
||||
buffer += textDecoder.decode(chunk, { stream: true });
|
||||
const lines = buffer.split("\n");
|
||||
|
||||
// Check if the last line is complete
|
||||
if (buffer.endsWith("\n")) {
|
||||
buffer = "";
|
||||
} else {
|
||||
buffer = lines.pop() || "";
|
||||
}
|
||||
|
||||
for (const line of lines) {
|
||||
const message = line.trim().split("data: ")[1];
|
||||
if (message === "[DONE]") {
|
||||
break;
|
||||
}
|
||||
|
||||
const data = JSON.parse(message) as CreateCompletionResponse;
|
||||
|
||||
yield data.choices[0].text;
|
||||
// Check if the message is not undefined and a valid JSON string
|
||||
if (message) {
|
||||
try {
|
||||
const data = JSON.parse(message) as CreateChatCompletionResponse;
|
||||
// @ts-ignore
|
||||
if (data.choices[0].delta?.content) {
|
||||
// @ts-ignore
|
||||
yield data.choices[0].delta?.content;
|
||||
}
|
||||
} catch (error) {
|
||||
console.error("Error parsing JSON message:", error);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
if (fallback) yield fallback;
|
||||
|
@ -1,6 +1,11 @@
|
||||
// A function that takes a file name and a string and returns true if the file name is contained in the string
|
||||
// after removing punctuation and whitespace from both
|
||||
export const isFileNameInString = (fileName: string, str: string) => {
|
||||
// Check if the input string is null or undefined
|
||||
if (!str) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Convert both to lowercase and remove punctuation and whitespace
|
||||
const normalizedFileName = fileName
|
||||
.toLowerCase()
|
||||
|
Loading…
Reference in New Issue
Block a user