from transformers import GPT2TokenizerFast import openai tokenizer = GPT2TokenizerFast.from_pretrained("gpt2") MAX_TOKENS_LIMIT = 2048 ANSWERS_INSTRUCTION = "Please answer the question according to the above context.\n" CONTEXT_TEMPLATE = "===\nContext: {context}\n===\n" def extract_instruction(instruction): """ Extract `instruction` parameter and format it properly. If not exist, return empty string. """ if instruction is None: return "" return f"{instruction.strip()}\n\n" def semantic_search( search_model, query_for_search, file_id=None, max_documents=None, examples=None ): """ :param examples: A list of {"text":...} or {"text": ..., "label": ...}. :return: a list of semantic search result dict of documents sorted by "score": [ { "document": ..., "object": "search_result", "score": ..., "text": ..., }, ... ] """ assert (examples is None) ^ (file_id is None) # xor if file_id is not None: # This is where you'd do an elastic search call. Since there isn't an example of this # we can query, we'll raise an error. # The return value from this would be a list of examples raise NotImplementedError() # This isn't quite accurate since Search is also being deprecated. See our search guide for more # information. search_result = openai.Search.create( model=search_model, documents=[x["text"] for x in examples], query=query_for_search, ) info_dict = {d["document"]: d for d in search_result["data"]} sorted_doc_ids = sorted( info_dict.keys(), key=lambda x: info_dict[x]["score"], reverse=True ) if max_documents: sorted_doc_ids = sorted_doc_ids[:max_documents] return [info_dict[i] for i in sorted_doc_ids] def select_by_length( sorted_doc_infos, max_token_len, lambda_fn=None, ): """ Give a list of (document ID, document content in string), we will select as many documents as possible as long as the total length does not go above `max_token_len`. :param sorted_doc_infos: A list of semantic search result dict of documents sorted by "score". :param max_token_len: The maximum token length for selected documents. :param lambda_fn: A function that takes in search results dict and output a formatted example for context stuffing. :return: A tuple of ( A concatenation of selected documents used as context, A list of selected document IDs ) """ if not sorted_doc_infos: return "", [] selected_indices = [] total_doc_tokens = 0 doc_dict = {} for i, doc_info in enumerate(sorted_doc_infos): doc = lambda_fn(doc_info) if lambda_fn else doc_info["text"] n_doc_tokens = len(tokenizer.encode(doc)) if total_doc_tokens + n_doc_tokens < max_token_len: total_doc_tokens += n_doc_tokens selected_indices.append(i) doc_dict[i] = doc # The top ranked documents should go at the end. selected_indices = selected_indices[::-1] context = "".join([doc_dict[i] for i in selected_indices]) selected_doc_infos = [sorted_doc_infos[i] for i in selected_indices] return context, selected_doc_infos def answers( examples, question, model, examples_context, file_id=None, documents=None, logit_bias=None, max_rerank=200, max_tokens=16, alternative_question=None, search_model="ada", temperature=0.0, logprobs=0, stop=None, n=1, ): """ Given a prompt, a question, a list of (question, answer) pairs as examples, and a list of documents for context, it tries to include all the QA examples and top relevant context documents. The constructed prompt for the final completion call: ``` Please answer the question according to the above context. === Context: {{ the context for example QA pairs. }} === Q: example 1 question A: example 1 answer --- Q: example 2 question A: example 2 answer === Context: {{ a list of relevant documents sorted via search(question, documents) }} === Q: question A: ``` The returned object has a structure like: { "answers": [ "Beijing", "Beijing, China" ], "completion_id": "xxx-xxx", "object": "answer", "selected_documents": [ { "document": ..., # document index, same as in search/ results. "object": "search_result", "text": ..., }, ... ], } """ examples = examples if examples else [] example_prompts = [f"Q: {x}\nA: {y}" for x, y in examples] prompt = f"Q: {question}\nA:" # Append all the QA examples into the prompt. if examples_context: examples_context = CONTEXT_TEMPLATE.format(context=examples_context) instruction = ( ANSWERS_INSTRUCTION + examples_context + "\n---\n".join(example_prompts) + "\n" ) logit_bias = logit_bias if logit_bias is not None else {} if file_id is None and documents is None: raise Exception("Please submit at least one of `documents` or `file`.") if file_id is not None and documents is not None: raise Exception("Please submit only one of `documents` or `file`.") instruction = extract_instruction(instruction) n_instruction_tokens = len(tokenizer.encode(instruction)) n_prompt_tokens = len(tokenizer.encode(prompt)) n_query_tokens = len(tokenizer.encode(question)) n_context_tokens = len(tokenizer.encode(CONTEXT_TEMPLATE.format(context=""))) if documents is not None: documents = [doc.strip() + " " for doc in documents] n_docs_tokens = [len(tokenizer.encode(doc)) for doc in documents] # Except all the required content, how many tokens left for context stuffing. leftover_token_len = MAX_TOKENS_LIMIT - ( n_instruction_tokens + n_context_tokens + n_prompt_tokens + max_tokens ) sorted_doc_infos = [] question_for_search = ( alternative_question if alternative_question is not None else question ) if file_id is not None: search_model_, sorted_doc_infos = semantic_search( search_model, question_for_search, file_id=file_id, max_documents=max_rerank, ) elif len(documents) == 0: # If no context document is provided, do nothing. pass elif min(n_docs_tokens) >= leftover_token_len: # If there is no room for adding any context doc. pass elif (max_rerank is None or max_rerank >= len(documents)) and sum( n_docs_tokens ) < leftover_token_len: # If the total length of docs is short enough to be added all. selected_indices = list(range(len(documents))) sorted_doc_infos = [ {"document": i, "text": documents[i]} for i in selected_indices ] elif n_query_tokens + max(n_docs_tokens) >= MAX_TOKENS_LIMIT: # If the prompt and the longest document together go above the limit. total_tokens = n_query_tokens + max(n_docs_tokens) raise Exception( f"The longest document and prompt pair together contains {total_tokens} " f"tokens, above the limit {MAX_TOKENS_LIMIT} for semantic search. Please consider " f"shortening the prompt or the longest document." ) else: # If we can add some context documents but not all of them, we should # query search endpoint to rank docs by score. sorted_doc_infos = semantic_search( search_model, question_for_search, examples=[{"text": doc} for doc in documents], max_documents=max_rerank, ) # Select documents w.r.t. the context length limitation. context, sorted_doc_infos = select_by_length( sorted_doc_infos, leftover_token_len, lambda_fn=lambda x: x["text"].strip() + " ", ) # Add instruction before the context and the prompt after the context. if context: context = CONTEXT_TEMPLATE.format(context=context.strip()) full_prompt = instruction + context + prompt completion_result = openai.Completion.create( engine=model, prompt=full_prompt, logit_bias=logit_bias, temperature=temperature, n=n, max_tokens=max_tokens, stop=stop, logprobs=logprobs, ) completion_result["selected_documents"] = sorted_doc_infos result = dict( object="answer", selected_documents=completion_result.pop("selected_documents"), completion=completion_result["id"], ) result["answers"] = [ item["text"].replace("A:", "").split("Q:")[0].strip() for item in completion_result["choices"] ] return result print( answers( examples=[ ["What is the capital of Washington", "Olympia"], ["What is the capital of Oregon", "Salem"], ], question="What is the capital of China?", examples_context="I am a bot that names country capitals", documents=["I am a bot that names country capitals"], model="davinci", search_model="ada", alternative_question="different test", max_tokens=16, stop=["\n\n"], ) )