diff --git a/.gitignore b/.gitignore index 0003c21..5af7552 100644 --- a/.gitignore +++ b/.gitignore @@ -108,7 +108,7 @@ venv/ ENV/ env.bak/ venv.bak/ - +.flaskenv # Spyder project settings .spyderproject .spyproject diff --git a/README.md b/README.md index ddc15e2..2fd458c 100644 --- a/README.md +++ b/README.md @@ -13,14 +13,18 @@ Say goodbye to time-consuming manual searches, and let DocsGPT

- + ![example1](https://img.shields.io/github/stars/arc53/docsgpt?style=social) ![example2](https://img.shields.io/github/forks/arc53/docsgpt?style=social) ![example3](https://img.shields.io/github/license/arc53/docsgpt) ![example3](https://img.shields.io/discord/1070046503302877216) - +
+![Group 9](https://user-images.githubusercontent.com/17906039/220427472-2644cff4-7666-46a5-819f-fc4a521f63c7.png) + + + ## Roadmap You can find our [Roadmap](https://github.com/orgs/arc53/projects/2) here, please don't hesitate contributing or creating issues, it helps us make DocsGPT better! diff --git a/application/Dockerfile b/application/Dockerfile index a8e778b..a37a3be 100644 --- a/application/Dockerfile +++ b/application/Dockerfile @@ -1,10 +1,19 @@ -FROM python:3.9 +FROM python:3.11-slim-bullseye as builder +# Tiktoken requires Rust toolchain, so build it in a separate stage +RUN apt-get update && apt-get install -y gcc curl +RUN curl https://sh.rustup.rs -sSf | sh -s -- -y && apt-get install --reinstall libc6-dev -y +ENV PATH="/root/.cargo/bin:${PATH}" +RUN pip install --upgrade pip && pip install tiktoken==0.1.2 + +FROM python:3.11-slim-bullseye +# Copy pre-built packages from builder stage +COPY --from=builder /usr/local/lib/python3.11/site-packages/ /usr/local/lib/python3.11/site-packages/ WORKDIR /app COPY . /app -RUN pip install --no-cache-dir -r requirements.txt ENV FLASK_APP=app.py ENV FLASK_ENV=development +RUN pip install -r requirements.txt EXPOSE 5000 diff --git a/application/app.py b/application/app.py index b6360ae..a509ed6 100644 --- a/application/app.py +++ b/application/app.py @@ -9,7 +9,7 @@ from langchain import OpenAI, VectorDBQA, HuggingFaceHub, Cohere from langchain.chains.question_answering import load_qa_chain from langchain.embeddings import OpenAIEmbeddings, HuggingFaceHubEmbeddings, CohereEmbeddings, HuggingFaceInstructEmbeddings from langchain.prompts import PromptTemplate - +from error import bad_request # os.environ["LANGCHAIN_HANDLER"] = "langchain" if os.getenv("LLM_NAME") is not None: @@ -74,6 +74,7 @@ def api_answer(): data = request.get_json() question = data["question"] history = data["history"] + print('-'*5) if not api_key_set: api_key = data["api_key"] else: @@ -83,62 +84,68 @@ def api_answer(): else: embeddings_key = os.getenv("EMBEDDINGS_KEY") + # use try and except to check for exception + try: - # check if the vectorstore is set - if "active_docs" in data: - vectorstore = "vectors/" + data["active_docs"] - if data['active_docs'] == "default": + # check if the vectorstore is set + if "active_docs" in data: + vectorstore = "vectors/" + data["active_docs"] + if data['active_docs'] == "default": + vectorstore = "" + else: vectorstore = "" - else: - vectorstore = "" - - # loading the index and the store and the prompt template - # Note if you have used other embeddings than OpenAI, you need to change the embeddings - if embeddings_choice == "openai_text-embedding-ada-002": - docsearch = FAISS.load_local(vectorstore, OpenAIEmbeddings(openai_api_key=embeddings_key)) - elif embeddings_choice == "huggingface_sentence-transformers/all-mpnet-base-v2": - docsearch = FAISS.load_local(vectorstore, HuggingFaceHubEmbeddings()) - elif embeddings_choice == "huggingface_hkunlp/instructor-large": - docsearch = FAISS.load_local(vectorstore, HuggingFaceInstructEmbeddings()) - elif embeddings_choice == "cohere_medium": - docsearch = FAISS.load_local(vectorstore, CohereEmbeddings(cohere_api_key=embeddings_key)) - - # create a prompt template - if history: - history = json.loads(history) - template_temp = template_hist.replace("{historyquestion}", history[0]).replace("{historyanswer}", history[1]) - c_prompt = PromptTemplate(input_variables=["summaries", "question"], template=template_temp, template_format="jinja2") - else: - c_prompt = PromptTemplate(input_variables=["summaries", "question"], template=template, template_format="jinja2") - - if llm_choice == "openai": - llm = OpenAI(openai_api_key=api_key, temperature=0) - elif llm_choice == "manifest": - llm = ManifestWrapper(client=manifest, llm_kwargs={"temperature": 0.001, "max_tokens": 2048}) - elif llm_choice == "huggingface": - llm = HuggingFaceHub(repo_id="bigscience/bloom", huggingfacehub_api_token=api_key) - elif llm_choice == "cohere": - llm = Cohere(model="command-xlarge-nightly", cohere_api_key=api_key) - - qa_chain = load_qa_chain(llm=llm, chain_type="map_reduce", - combine_prompt=c_prompt) - - chain = VectorDBQA(combine_documents_chain=qa_chain, vectorstore=docsearch, k=4) - - # fetch the answer - result = chain({"query": question}) - print(result) - - # some formatting for the frontend - result['answer'] = result['result'] - result['answer'] = result['answer'].replace("\\n", "
") - result['answer'] = result['answer'].replace("SOURCES:", "") - # mock result - # result = { - # "answer": "The answer is 42", - # "sources": ["https://en.wikipedia.org/wiki/42_(number)", "https://en.wikipedia.org/wiki/42_(number)"] - # } - return result + + # loading the index and the store and the prompt template + # Note if you have used other embeddings than OpenAI, you need to change the embeddings + if embeddings_choice == "openai_text-embedding-ada-002": + docsearch = FAISS.load_local(vectorstore, OpenAIEmbeddings(openai_api_key=embeddings_key)) + elif embeddings_choice == "huggingface_sentence-transformers/all-mpnet-base-v2": + docsearch = FAISS.load_local(vectorstore, HuggingFaceHubEmbeddings()) + elif embeddings_choice == "huggingface_hkunlp/instructor-large": + docsearch = FAISS.load_local(vectorstore, HuggingFaceInstructEmbeddings()) + elif embeddings_choice == "cohere_medium": + docsearch = FAISS.load_local(vectorstore, CohereEmbeddings(cohere_api_key=embeddings_key)) + + # create a prompt template + if history: + history = json.loads(history) + template_temp = template_hist.replace("{historyquestion}", history[0]).replace("{historyanswer}", history[1]) + c_prompt = PromptTemplate(input_variables=["summaries", "question"], template=template_temp, template_format="jinja2") + else: + c_prompt = PromptTemplate(input_variables=["summaries", "question"], template=template, template_format="jinja2") + + if llm_choice == "openai": + llm = OpenAI(openai_api_key=api_key, temperature=0) + elif llm_choice == "manifest": + llm = ManifestWrapper(client=manifest, llm_kwargs={"temperature": 0.001, "max_tokens": 2048}) + elif llm_choice == "huggingface": + llm = HuggingFaceHub(repo_id="bigscience/bloom", huggingfacehub_api_token=api_key) + elif llm_choice == "cohere": + llm = Cohere(model="command-xlarge-nightly", cohere_api_key=api_key) + + qa_chain = load_qa_chain(llm=llm, chain_type="map_reduce", + combine_prompt=c_prompt) + + chain = VectorDBQA(combine_documents_chain=qa_chain, vectorstore=docsearch, k=4) + + + # fetch the answer + result = chain({"query": question}) + print(result) + + # some formatting for the frontend + result['answer'] = result['result'] + result['answer'] = result['answer'].replace("\\n", "
") + result['answer'] = result['answer'].replace("SOURCES:", "") + # mock result + # result = { + # "answer": "The answer is 42", + # "sources": ["https://en.wikipedia.org/wiki/42_(number)", "https://en.wikipedia.org/wiki/42_(number)"] + # } + return result + except Exception as e: + print(str(e)) + return bad_request(500,str(e)) @app.route("/api/docs_check", methods=["POST"]) diff --git a/application/error.py b/application/error.py new file mode 100644 index 0000000..cab5ea3 --- /dev/null +++ b/application/error.py @@ -0,0 +1,13 @@ +from flask import jsonify +from werkzeug.http import HTTP_STATUS_CODES + +def response_error(code_status,message=None): + payload = {'error':HTTP_STATUS_CODES.get(code_status,"something went wrong")} + if message: + payload['message'] = message + response = jsonify(payload) + response.status_code = code_status + return response + +def bad_request(status_code=400,message=''): + return response_error(code_status=status_code,message=message) \ No newline at end of file diff --git a/application/requirements.txt b/application/requirements.txt index 12c9e65..08c931a 100644 --- a/application/requirements.txt +++ b/application/requirements.txt @@ -64,6 +64,7 @@ transformers==4.26.0 typer==0.7.0 typing-inspect==0.8.0 typing_extensions==4.4.0 +unstructured==0.4.8 urllib3==1.26.14 Werkzeug==2.2.3 XlsxWriter==3.0.8 diff --git a/application/static/src/chat.js b/application/static/src/chat.js index 553d7f3..3c997ad 100644 --- a/application/static/src/chat.js +++ b/application/static/src/chat.js @@ -1,55 +1,73 @@ -var el = document.getElementById('message-form'); -if (el) { - el.addEventListener("submit", function (event) { - console.log("submitting") - event.preventDefault() - var message = document.getElementById("message-input").value; - msg_html = '

' - msg_html += message - msg_html += '

' - document.getElementById("messages").innerHTML += msg_html; - let chatWindow = document.getElementById("messages-container"); - chatWindow.scrollTop = chatWindow.scrollHeight; - document.getElementById("message-input").value = ""; - document.getElementById("button-submit").innerHTML = ' Thinking...'; - document.getElementById("button-submit").disabled = true; - if (localStorage.getItem('activeDocs') == null) { - localStorage.setItem('activeDocs', 'default') - } - - fetch('/api/answer', { - method: 'POST', - headers: { - 'Content-Type': 'application/json', - }, - - body: JSON.stringify({question: message, - api_key: localStorage.getItem('apiKey'), - embeddings_key: localStorage.getItem('apiKey'), - history: localStorage.getItem('chatHistory'), - active_docs: localStorage.getItem('activeDocs')}), +var form = document.getElementById('message-form'); +var errorModal = document.getElementById('error-alert') +document.getElementById('close').addEventListener('click',()=>{ + errorModal.classList.toggle('hidden') +}) + + +function submitForm(event){ + event.preventDefault() + var message = document.getElementById("message-input").value; + console.log(message.length) + if(message.length === 0){ + return + } + msg_html = '

' + msg_html += message + msg_html += '

' + document.getElementById("messages").innerHTML += msg_html; + let chatWindow = document.getElementById("messages-container"); + chatWindow.scrollTop = chatWindow.scrollHeight; + document.getElementById("message-input").value = ""; + document.getElementById("button-submit").innerHTML = ' Thinking...'; + document.getElementById("button-submit").disabled = true; + if (localStorage.getItem('activeDocs') == null) { + localStorage.setItem('activeDocs', 'default') + } + + + fetch('/api/answer', { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + }, + + body: JSON.stringify({question: message, + api_key: localStorage.getItem('apiKey'), + embeddings_key: localStorage.getItem('apiKey'), + history: localStorage.getItem('chatHistory'), + active_docs: localStorage.getItem('activeDocs')}), + }).then((response)=> response.json()) + .then(data => { + console.log('Success:', data); + if(data.error){ + document.getElementById('text-error').textContent = `Error : ${JSON.stringify(data.message)}` + errorModal.classList.toggle('hidden') + } + if(data.answer){ + msg_html = '
' + msg_html += data.answer + msg_html += '
' + document.getElementById("messages").innerHTML += msg_html; + let chatWindow = document.getElementById("messages-container"); + chatWindow.scrollTop = chatWindow.scrollHeight; + } + document.getElementById("button-submit").innerHTML = 'Send'; + document.getElementById("button-submit").disabled = false; + let chatHistory = [message, data.answer || '']; + localStorage.setItem('chatHistory', JSON.stringify(chatHistory)); + + + + }) - .then(response => response.json()) - .then(data => { - console.log('Success:', data); - msg_html = '
' - msg_html += data.answer - msg_html += '
' - document.getElementById("messages").innerHTML += msg_html; - let chatWindow = document.getElementById("messages-container"); - chatWindow.scrollTop = chatWindow.scrollHeight; - document.getElementById("button-submit").innerHTML = 'Send'; - document.getElementById("button-submit").disabled = false; - let chatHistory = [message, data.answer]; - localStorage.setItem('chatHistory', JSON.stringify(chatHistory)); - }) - .catch((error) => { - console.error('Error:', error); - console.log(error); - document.getElementById("button-submit").innerHTML = 'Send'; - document.getElementById("button-submit").disabled = false; - }); - - - }); -} \ No newline at end of file + .catch((error) => { + console.error('Error:', error); + // console.log(error); + // document.getElementById("button-submit").innerHTML = 'Send'; + // document.getElementById("button-submit").disabled = false; + + }); +} + +window.addEventListener('submit',submitForm) diff --git a/application/templates/index.html b/application/templates/index.html index e6dad78..4f8e471 100644 --- a/application/templates/index.html +++ b/application/templates/index.html @@ -16,7 +16,7 @@ - +
@@ -28,6 +28,17 @@ {% endif %}
+ + + + + +
@@ -59,6 +70,8 @@ This will return a new DataFrame with all the columns from both tables, and only
+ +
@@ -77,11 +90,16 @@ This will return a new DataFrame with all the columns from both tables, and only
- + +
+ + + {% if not api_key_set %} -