From b6dfb2c856aadbdf676f311a6abddf21e6a0f6c0 Mon Sep 17 00:00:00 2001 From: Alex Date: Thu, 30 Mar 2023 12:44:25 +0100 Subject: [PATCH] map_reduce --- application/app.py | 40 +++++++++++---------- application/prompts/chat_combine_prompt.txt | 2 +- application/prompts/chat_reduce_prompt.txt | 3 +- application/requirements.txt | 23 +++++++++--- 4 files changed, 42 insertions(+), 26 deletions(-) diff --git a/application/app.py b/application/app.py index 0d0174b..d9e6dee 100644 --- a/application/app.py +++ b/application/app.py @@ -10,7 +10,8 @@ from celery.result import AsyncResult from flask import Flask, request, render_template, send_from_directory, jsonify from langchain import FAISS from langchain import VectorDBQA, HuggingFaceHub, Cohere, OpenAI -from langchain.chains import ChatVectorDBChain +from langchain.chains import LLMChain, ConversationalRetrievalChain +from langchain.chains.conversational_retrieval.prompts import CONDENSE_QUESTION_PROMPT from langchain.chains.question_answering import load_qa_chain from langchain.chat_models import ChatOpenAI from langchain.embeddings import OpenAIEmbeddings, HuggingFaceHubEmbeddings, CohereEmbeddings, \ @@ -188,14 +189,15 @@ def api_answer(): llm = Cohere(model="command-xlarge-nightly", cohere_api_key=api_key) if llm_choice == "openai_chat": - chain = ChatVectorDBChain.from_llm( - llm=llm, - vectorstore=docsearch, - #prompt=p_chat_combine, - qa_prompt=p_chat_combine, - top_k_docs_for_context=3, - return_source_documents=False) - result = chain({"question": question, "chat_history": []}) + question_generator = LLMChain(llm=llm, prompt=CONDENSE_QUESTION_PROMPT) + doc_chain = load_qa_chain(llm, chain_type="map_reduce", combine_prompt=p_chat_combine) + chain = ConversationalRetrievalChain( + retriever=docsearch.as_retriever(k=2), + question_generator=question_generator, + combine_docs_chain=doc_chain, + ) + chat_history = [] + result = chain({"question": question, "chat_history": chat_history}) else: qa_chain = load_qa_chain(llm=llm, chain_type="map_reduce", combine_prompt=c_prompt, question_prompt=q_prompt) @@ -289,16 +291,16 @@ def combined_json(): # get json from https://d3dg1063dc54p9.cloudfront.net/combined.json data = [{ - "name": 'default', - "language": 'default', - "version": '', - "description": 'default', - "fullName": 'default', - "date": 'default', - "docLink": 'default', - "model": embeddings_choice, - "location": "local" - }] + "name": 'default', + "language": 'default', + "version": '', + "description": 'default', + "fullName": 'default', + "date": 'default', + "docLink": 'default', + "model": embeddings_choice, + "location": "local" + }] # structure: name, language, version, description, fullName, date, docLink # append data from vectors_collection for index in vectors_collection.find({'user': user}): diff --git a/application/prompts/chat_combine_prompt.txt b/application/prompts/chat_combine_prompt.txt index 82502ee..981b6e6 100644 --- a/application/prompts/chat_combine_prompt.txt +++ b/application/prompts/chat_combine_prompt.txt @@ -1,4 +1,4 @@ You are a DocsGPT, friendly and helpful AI assistant by Arc53 that provides help with documents. You give thorough answers with code examples if possible. Use the following pieces of context to help answer the users question. If its not relevant to the question, provide friendly responses. ---------------- -{context} \ No newline at end of file +{summaries} \ No newline at end of file diff --git a/application/prompts/chat_reduce_prompt.txt b/application/prompts/chat_reduce_prompt.txt index 82502ee..a5842d8 100644 --- a/application/prompts/chat_reduce_prompt.txt +++ b/application/prompts/chat_reduce_prompt.txt @@ -1,4 +1,3 @@ -You are a DocsGPT, friendly and helpful AI assistant by Arc53 that provides help with documents. You give thorough answers with code examples if possible. -Use the following pieces of context to help answer the users question. If its not relevant to the question, provide friendly responses. +Use the following pieces of context to help answer the users question. If its not relevant to the question, respond with "-" ---------------- {context} \ No newline at end of file diff --git a/application/requirements.txt b/application/requirements.txt index 5203b4d..1d904d7 100644 --- a/application/requirements.txt +++ b/application/requirements.txt @@ -7,9 +7,10 @@ amqp==5.1.1 async-timeout==4.0.2 attrs==22.2.0 billiard==3.6.4.0 +blinker==1.5 blobfile==2.0.1 -boto3==1.26.84 -botocore==1.29.84 +boto3==1.26.102 +botocore==1.29.102 cffi==1.15.1 charset-normalizer==3.1.0 click==8.1.3 @@ -28,6 +29,7 @@ faiss-cpu==1.7.3 filelock==3.9.0 Flask==2.2.3 frozenlist==1.3.3 +geojson==2.5.0 greenlet==2.0.2 hub==3.0.1 huggingface-hub==0.12.1 @@ -38,14 +40,16 @@ Jinja2==3.1.2 jmespath==1.0.1 joblib==1.2.0 kombu==5.2.4 -langchain==0.0.118 +langchain==0.0.126 lxml==4.9.2 MarkupSafe==2.1.2 marshmallow==3.19.0 marshmallow-enum==1.5.1 +mpmath==1.3.0 multidict==6.0.4 multiprocess==0.70.14 mypy-extensions==1.0.0 +networkx==3.0 nltk==3.8.1 numcodecs==0.11.0 numpy==1.24.2 @@ -64,7 +68,9 @@ pycryptodomex==3.17 pydantic==1.10.5 PyJWT==2.6.0 pymongo==4.3.3 +pyowm==3.3.0 PyPDF2==3.0.1 +PySocks==1.7.1 python-dateutil==2.8.2 python-dotenv==1.0.0 python-jose==3.3.0 @@ -76,13 +82,22 @@ requests==2.28.2 retry==0.9.2 rsa==4.9 s3transfer==0.6.0 +scikit-learn==1.2.2 +scipy==1.10.1 +sentence-transformers==2.2.2 +sentencepiece==0.1.97 six==1.16.0 SQLAlchemy==1.4.46 +sympy==1.11.1 tenacity==8.2.2 +threadpoolctl==3.1.0 tiktoken==0.3.0 tokenizers==0.13.2 +torch==2.0.0 +torchvision==0.15.1 tqdm==4.65.0 -transformers==4.26.1 +transformers==4.27.2 +typer==0.7.0 typing-inspect==0.8.0 typing_extensions==4.5.0 urllib3==1.26.14