From 968849e52b189cb71fbe2be15e0ab05c25e418ad Mon Sep 17 00:00:00 2001 From: Anton Larin Date: Sat, 17 Jun 2023 12:40:28 +0200 Subject: [PATCH 1/7] code readability, formatting, minor version bump --- application/app.py | 13 +++++++++++-- application/worker.py | 2 ++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/application/app.py b/application/app.py index d66d420..9305534 100644 --- a/application/app.py +++ b/application/app.py @@ -4,6 +4,7 @@ import http.client import json import os import traceback +import logging import dotenv import openai @@ -40,6 +41,8 @@ from worker import ingest_worker # os.environ["LANGCHAIN_HANDLER"] = "langchain" +logger = logging.getLogger(__name__) + if settings.LLM_NAME == "manifest": from manifest import Manifest from langchain.llms.manifest import ManifestWrapper @@ -176,7 +179,7 @@ def complete_stream(question, docsearch, chat_history, api_key): messages_combine.append({"role": "user", "content": question}) completion = openai.ChatCompletion.create(model="gpt-3.5-turbo", messages=messages_combine, stream=True, max_tokens=500, temperature=0) - + for line in completion: if "content" in line["choices"][0]["delta"]: # check if the delta contains content @@ -217,6 +220,10 @@ def stream(): ) +def is_azure_configured(): + return settings.OPENAI_API_BASE and settings.OPENAI_API_VERSION and settings.AZURE_DEPLOYMENT_NAME + + @app.route("/api/answer", methods=["POST"]) def api_answer(): data = request.get_json() @@ -244,7 +251,8 @@ def api_answer(): input_variables=["context", "question"], template=template_quest, template_format="jinja2" ) if settings.LLM_NAME == "openai_chat": - if settings.OPENAI_API_BASE and settings.OPENAI_API_VERSION and settings.AZURE_DEPLOYMENT_NAME: # azure + if is_azure_configured(): + logger.debug("in Azure") llm = AzureChatOpenAI( openai_api_key=api_key, openai_api_base=settings.OPENAI_API_BASE, @@ -252,6 +260,7 @@ def api_answer(): deployment_name=settings.AZURE_DEPLOYMENT_NAME, ) else: + logger.debug("plain OpenAI") llm = ChatOpenAI(openai_api_key=api_key) # optional parameter: model_name="gpt-4" messages_combine = [SystemMessagePromptTemplate.from_template(chat_combine_template)] if history: diff --git a/application/worker.py b/application/worker.py index d7674ad..2a3ff24 100644 --- a/application/worker.py +++ b/application/worker.py @@ -19,9 +19,11 @@ try: except FileExistsError: pass + def metadata_from_filename(title): return {'title': title} + def generate_random_string(length): return ''.join([string.ascii_letters[i % 52] for i in range(length)]) From 006897f1c0571454351ddffb3da542d3f7ddfe7c Mon Sep 17 00:00:00 2001 From: Anton Larin Date: Sat, 17 Jun 2023 13:20:29 +0200 Subject: [PATCH 2/7] Azure support for streaming output. --- application/.env_sample | 3 ++- application/app.py | 19 +++++++++++++++++-- application/core/settings.py | 3 ++- 3 files changed, 21 insertions(+), 4 deletions(-) diff --git a/application/.env_sample b/application/.env_sample index 24c6495..a9d5862 100644 --- a/application/.env_sample +++ b/application/.env_sample @@ -8,4 +8,5 @@ API_URL=http://localhost:5001 #For OPENAI on Azure OPENAI_API_BASE= OPENAI_API_VERSION= -AZURE_DEPLOYMENT_NAME= \ No newline at end of file +AZURE_DEPLOYMENT_NAME= +AZURE_EMBEDDINGS_DEPLOYMENT_NAME= \ No newline at end of file diff --git a/application/app.py b/application/app.py index 9305534..b987d4a 100644 --- a/application/app.py +++ b/application/app.py @@ -127,7 +127,12 @@ def get_vectorstore(data): def get_docsearch(vectorstore, embeddings_key): if settings.EMBEDDINGS_NAME == "openai_text-embedding-ada-002": - docsearch = FAISS.load_local(vectorstore, OpenAIEmbeddings(openai_api_key=embeddings_key)) + if is_azure_configured(): + os.environ["OPENAI_API_TYPE"] = "azure" + openai_embeddings = OpenAIEmbeddings(model=settings.AZURE_EMBEDDINGS_DEPLOYMENT_NAME) + else: + openai_embeddings = OpenAIEmbeddings(openai_api_key=embeddings_key) + docsearch = FAISS.load_local(vectorstore, openai_embeddings) elif settings.EMBEDDINGS_NAME == "huggingface_sentence-transformers/all-mpnet-base-v2": docsearch = FAISS.load_local(vectorstore, HuggingFaceHubEmbeddings()) elif settings.EMBEDDINGS_NAME == "huggingface_hkunlp/instructor-large": @@ -152,7 +157,17 @@ def home(): def complete_stream(question, docsearch, chat_history, api_key): openai.api_key = api_key - llm = ChatOpenAI(openai_api_key=api_key) + if is_azure_configured(): + logger.debug("in Azure") + llm = AzureChatOpenAI( + openai_api_key=api_key, + openai_api_base=settings.OPENAI_API_BASE, + openai_api_version=settings.OPENAI_API_VERSION, + deployment_name=settings.AZURE_DEPLOYMENT_NAME, + ) + else: + logger.debug("plain OpenAI") + llm = ChatOpenAI(openai_api_key=api_key) docs = docsearch.similarity_search(question, k=2) # join all page_content together with a newline docs_together = "\n".join([doc.page_content for doc in docs]) diff --git a/application/core/settings.py b/application/core/settings.py index ed621bb..853f152 100644 --- a/application/core/settings.py +++ b/application/core/settings.py @@ -18,7 +18,8 @@ class Settings(BaseSettings): EMBEDDINGS_KEY: str = None # api key for embeddings (if using openai, just copy API_KEY OPENAI_API_BASE: str = None # azure openai api base url OPENAI_API_VERSION: str = None # azure openai api version - AZURE_DEPLOYMENT_NAME: str = None # azure deployment name + AZURE_DEPLOYMENT_NAME: str = None # azure deployment name for answering + AZURE_EMBEDDINGS_DEPLOYMENT_NAME: str = None # azure deployment name for embeddings path = Path(__file__).parent.parent.absolute() From fb10a546d6fe132705ea5afe2750cdaf97a694b0 Mon Sep 17 00:00:00 2001 From: Anton Larin Date: Sat, 17 Jun 2023 13:35:05 +0200 Subject: [PATCH 3/7] running with docker-compose with .sh script for more convenience with Azure. --- README.md | 2 +- docker-compose-azure.yaml | 69 ++++++++++++++++++++++++++++++++++++++ docker-compose.yaml | 6 ---- run-with-docker-compose.sh | 9 +++++ 4 files changed, 79 insertions(+), 7 deletions(-) create mode 100644 docker-compose-azure.yaml create mode 100755 run-with-docker-compose.sh diff --git a/README.md b/README.md index 0c0b148..bbc0e55 100644 --- a/README.md +++ b/README.md @@ -66,7 +66,7 @@ Note: Make sure you have docker installed OPENAI_API_KEY=Yourkey VITE_API_STREAMING=true ``` -3. Run `docker-compose build && docker-compose up` +3. Run `./run-with-docker-compose.sh` 4. Navigate to http://localhost:5173/ To stop just run Ctrl + C diff --git a/docker-compose-azure.yaml b/docker-compose-azure.yaml new file mode 100644 index 0000000..81c67c2 --- /dev/null +++ b/docker-compose-azure.yaml @@ -0,0 +1,69 @@ +version: "3.9" + +services: + frontend: + build: ./frontend + environment: + - VITE_API_HOST=http://localhost:5001 + - VITE_API_STREAMING=$VITE_API_STREAMING + ports: + - "5173:5173" + depends_on: + - backend + + backend: + build: ./application + environment: + - API_KEY=$OPENAI_API_KEY + - EMBEDDINGS_KEY=$OPENAI_API_KEY + - CELERY_BROKER_URL=redis://redis:6379/0 + - CELERY_RESULT_BACKEND=redis://redis:6379/1 + - MONGO_URI=mongodb://mongo:27017/docsgpt + - OPENAI_API_BASE=$OPENAI_API_BASE + - OPENAI_API_VERSION=$OPENAI_API_VERSION + - AZURE_DEPLOYMENT_NAME=$AZURE_DEPLOYMENT_NAME + - AZURE_EMBEDDINGS_DEPLOYMENT_NAME=$AZURE_EMBEDDINGS_DEPLOYMENT_NAME + ports: + - "5001:5001" + volumes: + - ./application/indexes:/app/indexes + - ./application/inputs:/app/inputs + - ./application/vectors:/app/vectors + depends_on: + - redis + - mongo + + worker: + build: ./application + command: celery -A app.celery worker -l INFO + environment: + - API_KEY=$OPENAI_API_KEY + - EMBEDDINGS_KEY=$OPENAI_API_KEY + - CELERY_BROKER_URL=redis://redis:6379/0 + - CELERY_RESULT_BACKEND=redis://redis:6379/1 + - MONGO_URI=mongodb://mongo:27017/docsgpt + - API_URL=http://backend:5001 + - OPENAI_API_BASE=$OPENAI_API_BASE + - OPENAI_API_VERSION=$OPENAI_API_VERSION + - AZURE_DEPLOYMENT_NAME=$AZURE_DEPLOYMENT_NAME + - AZURE_EMBEDDINGS_DEPLOYMENT_NAME=$AZURE_EMBEDDINGS_DEPLOYMENT_NAME + depends_on: + - redis + - mongo + + redis: + image: redis:6-alpine + ports: + - 6379:6379 + + mongo: + image: mongo:6 + ports: + - 27017:27017 + volumes: + - mongodb_data_container:/data/db + + + +volumes: + mongodb_data_container: \ No newline at end of file diff --git a/docker-compose.yaml b/docker-compose.yaml index c9557dc..c06b61b 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -19,9 +19,6 @@ services: - CELERY_BROKER_URL=redis://redis:6379/0 - CELERY_RESULT_BACKEND=redis://redis:6379/1 - MONGO_URI=mongodb://mongo:27017/docsgpt - #- OPENAI_API_BASE=$OPENAI_API_BASE - #- OPENAI_API_VERSION=$OPENAI_API_VERSION - #- AZURE_DEPLOYMENT_NAME=$AZURE_DEPLOYMENT_NAME ports: - "5001:5001" volumes: @@ -42,9 +39,6 @@ services: - CELERY_RESULT_BACKEND=redis://redis:6379/1 - MONGO_URI=mongodb://mongo:27017/docsgpt - API_URL=http://backend:5001 - #- OPENAI_API_BASE=$OPENAI_API_BASE - #- OPENAI_API_VERSION=$OPENAI_API_VERSION - #- AZURE_DEPLOYMENT_NAME=$AZURE_DEPLOYMENT_NAME depends_on: - redis - mongo diff --git a/run-with-docker-compose.sh b/run-with-docker-compose.sh new file mode 100755 index 0000000..f59cf93 --- /dev/null +++ b/run-with-docker-compose.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +source .env + +if [[ -n "$OPENAI_API_BASE" ]] && [[ -n "$OPENAI_API_VERSION" ]] && [[ -n "$AZURE_DEPLOYMENT_NAME" ]] && [[ -n "$AZURE_EMBEDDINGS_DEPLOYMENT_NAME" ]]; then + docker-compose -f docker-compose-azure.yaml build && docker-compose -f docker-compose-azure.yaml up +else + docker-compose build && docker-compose up +fi From f5e287ffa6f9e64a8bb06064c429a8300a72a667 Mon Sep 17 00:00:00 2001 From: Anton Larin Date: Sat, 17 Jun 2023 13:38:50 +0200 Subject: [PATCH 4/7] optimized imports --- application/app.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/application/app.py b/application/app.py index b987d4a..44158ff 100644 --- a/application/app.py +++ b/application/app.py @@ -2,9 +2,10 @@ import asyncio import datetime import http.client import json +import logging import os +import platform import traceback -import logging import dotenv import openai @@ -50,7 +51,6 @@ if settings.LLM_NAME == "manifest": manifest = Manifest(client_name="huggingface", client_connection="http://127.0.0.1:5000") # Redirect PosixPath to WindowsPath on Windows -import platform if platform.system() == "Windows": import pathlib From 84168e22d0d995740d34992ea1cb0d7342322a80 Mon Sep 17 00:00:00 2001 From: Anton Larin Date: Sat, 17 Jun 2023 16:09:22 +0200 Subject: [PATCH 5/7] add missing variable after testin and minor fixes. --- docker-compose-azure.yaml | 2 ++ run-with-docker-compose.sh | 2 ++ scripts/parser/open_ai_func.py | 1 + 3 files changed, 5 insertions(+) diff --git a/docker-compose-azure.yaml b/docker-compose-azure.yaml index 81c67c2..773196d 100644 --- a/docker-compose-azure.yaml +++ b/docker-compose-azure.yaml @@ -19,6 +19,7 @@ services: - CELERY_BROKER_URL=redis://redis:6379/0 - CELERY_RESULT_BACKEND=redis://redis:6379/1 - MONGO_URI=mongodb://mongo:27017/docsgpt + - OPENAI_API_KEY=$OPENAI_API_KEY - OPENAI_API_BASE=$OPENAI_API_BASE - OPENAI_API_VERSION=$OPENAI_API_VERSION - AZURE_DEPLOYMENT_NAME=$AZURE_DEPLOYMENT_NAME @@ -43,6 +44,7 @@ services: - CELERY_RESULT_BACKEND=redis://redis:6379/1 - MONGO_URI=mongodb://mongo:27017/docsgpt - API_URL=http://backend:5001 + - OPENAI_API_KEY=$OPENAI_API_KEY - OPENAI_API_BASE=$OPENAI_API_BASE - OPENAI_API_VERSION=$OPENAI_API_VERSION - AZURE_DEPLOYMENT_NAME=$AZURE_DEPLOYMENT_NAME diff --git a/run-with-docker-compose.sh b/run-with-docker-compose.sh index f59cf93..4c89229 100755 --- a/run-with-docker-compose.sh +++ b/run-with-docker-compose.sh @@ -3,7 +3,9 @@ source .env if [[ -n "$OPENAI_API_BASE" ]] && [[ -n "$OPENAI_API_VERSION" ]] && [[ -n "$AZURE_DEPLOYMENT_NAME" ]] && [[ -n "$AZURE_EMBEDDINGS_DEPLOYMENT_NAME" ]]; then + echo "Running Azure Configuration" docker-compose -f docker-compose-azure.yaml build && docker-compose -f docker-compose-azure.yaml up else + echo "Running Plain Configuration" docker-compose build && docker-compose up fi diff --git a/scripts/parser/open_ai_func.py b/scripts/parser/open_ai_func.py index d817402..63b0663 100644 --- a/scripts/parser/open_ai_func.py +++ b/scripts/parser/open_ai_func.py @@ -49,6 +49,7 @@ def call_openai_api(docs, folder_name): os.environ.get("OPENAI_API_BASE") and os.environ.get("OPENAI_API_VERSION") and os.environ.get("AZURE_DEPLOYMENT_NAME") + and os.environ.get("AZURE_EMBEDDINGS_DEPLOYMENT_NAME") ): os.environ["OPENAI_API_TYPE"] = "azure" openai_embeddings = OpenAIEmbeddings(model=os.environ.get("AZURE_EMBEDDINGS_DEPLOYMENT_NAME")) From 5eda42ff31e1dbaa05284dbf2e795fb3d33ab932 Mon Sep 17 00:00:00 2001 From: Anton Larin Date: Sat, 17 Jun 2023 17:31:21 +0200 Subject: [PATCH 6/7] fix configuration to support streaming answer with Azure --- application/app.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/application/app.py b/application/app.py index 44158ff..4c260d6 100644 --- a/application/app.py +++ b/application/app.py @@ -159,6 +159,10 @@ def complete_stream(question, docsearch, chat_history, api_key): openai.api_key = api_key if is_azure_configured(): logger.debug("in Azure") + openai.api_type = "azure" + openai.api_version = settings.OPENAI_API_VERSION + openai.api_base = settings.OPENAI_API_BASE + openai.engine = settings.AZURE_DEPLOYMENT_NAME llm = AzureChatOpenAI( openai_api_key=api_key, openai_api_base=settings.OPENAI_API_BASE, From 0ff5f408d6f458e4d780adfada3e8442cf4a09ef Mon Sep 17 00:00:00 2001 From: Anton Larin Date: Sat, 17 Jun 2023 17:31:21 +0200 Subject: [PATCH 7/7] fix configuration to support streaming answer with Azure --- application/app.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/application/app.py b/application/app.py index 4c260d6..52046cb 100644 --- a/application/app.py +++ b/application/app.py @@ -162,7 +162,6 @@ def complete_stream(question, docsearch, chat_history, api_key): openai.api_type = "azure" openai.api_version = settings.OPENAI_API_VERSION openai.api_base = settings.OPENAI_API_BASE - openai.engine = settings.AZURE_DEPLOYMENT_NAME llm = AzureChatOpenAI( openai_api_key=api_key, openai_api_base=settings.OPENAI_API_BASE, @@ -196,7 +195,7 @@ def complete_stream(question, docsearch, chat_history, api_key): messages_combine.append({"role": "user", "content": i["prompt"]}) messages_combine.append({"role": "system", "content": i["response"]}) messages_combine.append({"role": "user", "content": question}) - completion = openai.ChatCompletion.create(model="gpt-3.5-turbo", + completion = openai.ChatCompletion.create(model="gpt-3.5-turbo", engine=settings.AZURE_DEPLOYMENT_NAME, messages=messages_combine, stream=True, max_tokens=500, temperature=0) for line in completion: