diff --git a/README.md b/README.md index 82a10f4..7910e24 100644 --- a/README.md +++ b/README.md @@ -64,7 +64,7 @@ Note: Make sure you have docker installed OPENAI_API_KEY=Yourkey VITE_API_STREAMING=true ``` -3. Run `docker-compose build && docker-compose up` +3. Run `./run-with-docker-compose.sh` 4. Navigate to http://localhost:5173/ To stop just run Ctrl + C diff --git a/application/.env_sample b/application/.env_sample index 24c6495..a9d5862 100644 --- a/application/.env_sample +++ b/application/.env_sample @@ -8,4 +8,5 @@ API_URL=http://localhost:5001 #For OPENAI on Azure OPENAI_API_BASE= OPENAI_API_VERSION= -AZURE_DEPLOYMENT_NAME= \ No newline at end of file +AZURE_DEPLOYMENT_NAME= +AZURE_EMBEDDINGS_DEPLOYMENT_NAME= \ No newline at end of file diff --git a/application/app.py b/application/app.py index d66d420..52046cb 100644 --- a/application/app.py +++ b/application/app.py @@ -2,7 +2,9 @@ import asyncio import datetime import http.client import json +import logging import os +import platform import traceback import dotenv @@ -40,6 +42,8 @@ from worker import ingest_worker # os.environ["LANGCHAIN_HANDLER"] = "langchain" +logger = logging.getLogger(__name__) + if settings.LLM_NAME == "manifest": from manifest import Manifest from langchain.llms.manifest import ManifestWrapper @@ -47,7 +51,6 @@ if settings.LLM_NAME == "manifest": manifest = Manifest(client_name="huggingface", client_connection="http://127.0.0.1:5000") # Redirect PosixPath to WindowsPath on Windows -import platform if platform.system() == "Windows": import pathlib @@ -124,7 +127,12 @@ def get_vectorstore(data): def get_docsearch(vectorstore, embeddings_key): if settings.EMBEDDINGS_NAME == "openai_text-embedding-ada-002": - docsearch = FAISS.load_local(vectorstore, OpenAIEmbeddings(openai_api_key=embeddings_key)) + if is_azure_configured(): + os.environ["OPENAI_API_TYPE"] = "azure" + openai_embeddings = OpenAIEmbeddings(model=settings.AZURE_EMBEDDINGS_DEPLOYMENT_NAME) + else: + openai_embeddings = OpenAIEmbeddings(openai_api_key=embeddings_key) + docsearch = FAISS.load_local(vectorstore, openai_embeddings) elif settings.EMBEDDINGS_NAME == "huggingface_sentence-transformers/all-mpnet-base-v2": docsearch = FAISS.load_local(vectorstore, HuggingFaceHubEmbeddings()) elif settings.EMBEDDINGS_NAME == "huggingface_hkunlp/instructor-large": @@ -149,7 +157,20 @@ def home(): def complete_stream(question, docsearch, chat_history, api_key): openai.api_key = api_key - llm = ChatOpenAI(openai_api_key=api_key) + if is_azure_configured(): + logger.debug("in Azure") + openai.api_type = "azure" + openai.api_version = settings.OPENAI_API_VERSION + openai.api_base = settings.OPENAI_API_BASE + llm = AzureChatOpenAI( + openai_api_key=api_key, + openai_api_base=settings.OPENAI_API_BASE, + openai_api_version=settings.OPENAI_API_VERSION, + deployment_name=settings.AZURE_DEPLOYMENT_NAME, + ) + else: + logger.debug("plain OpenAI") + llm = ChatOpenAI(openai_api_key=api_key) docs = docsearch.similarity_search(question, k=2) # join all page_content together with a newline docs_together = "\n".join([doc.page_content for doc in docs]) @@ -174,9 +195,9 @@ def complete_stream(question, docsearch, chat_history, api_key): messages_combine.append({"role": "user", "content": i["prompt"]}) messages_combine.append({"role": "system", "content": i["response"]}) messages_combine.append({"role": "user", "content": question}) - completion = openai.ChatCompletion.create(model="gpt-3.5-turbo", + completion = openai.ChatCompletion.create(model="gpt-3.5-turbo", engine=settings.AZURE_DEPLOYMENT_NAME, messages=messages_combine, stream=True, max_tokens=500, temperature=0) - + for line in completion: if "content" in line["choices"][0]["delta"]: # check if the delta contains content @@ -217,6 +238,10 @@ def stream(): ) +def is_azure_configured(): + return settings.OPENAI_API_BASE and settings.OPENAI_API_VERSION and settings.AZURE_DEPLOYMENT_NAME + + @app.route("/api/answer", methods=["POST"]) def api_answer(): data = request.get_json() @@ -244,7 +269,8 @@ def api_answer(): input_variables=["context", "question"], template=template_quest, template_format="jinja2" ) if settings.LLM_NAME == "openai_chat": - if settings.OPENAI_API_BASE and settings.OPENAI_API_VERSION and settings.AZURE_DEPLOYMENT_NAME: # azure + if is_azure_configured(): + logger.debug("in Azure") llm = AzureChatOpenAI( openai_api_key=api_key, openai_api_base=settings.OPENAI_API_BASE, @@ -252,6 +278,7 @@ def api_answer(): deployment_name=settings.AZURE_DEPLOYMENT_NAME, ) else: + logger.debug("plain OpenAI") llm = ChatOpenAI(openai_api_key=api_key) # optional parameter: model_name="gpt-4" messages_combine = [SystemMessagePromptTemplate.from_template(chat_combine_template)] if history: diff --git a/application/core/settings.py b/application/core/settings.py index ed621bb..853f152 100644 --- a/application/core/settings.py +++ b/application/core/settings.py @@ -18,7 +18,8 @@ class Settings(BaseSettings): EMBEDDINGS_KEY: str = None # api key for embeddings (if using openai, just copy API_KEY OPENAI_API_BASE: str = None # azure openai api base url OPENAI_API_VERSION: str = None # azure openai api version - AZURE_DEPLOYMENT_NAME: str = None # azure deployment name + AZURE_DEPLOYMENT_NAME: str = None # azure deployment name for answering + AZURE_EMBEDDINGS_DEPLOYMENT_NAME: str = None # azure deployment name for embeddings path = Path(__file__).parent.parent.absolute() diff --git a/application/worker.py b/application/worker.py index d7674ad..2a3ff24 100644 --- a/application/worker.py +++ b/application/worker.py @@ -19,9 +19,11 @@ try: except FileExistsError: pass + def metadata_from_filename(title): return {'title': title} + def generate_random_string(length): return ''.join([string.ascii_letters[i % 52] for i in range(length)]) diff --git a/docker-compose-azure.yaml b/docker-compose-azure.yaml new file mode 100644 index 0000000..773196d --- /dev/null +++ b/docker-compose-azure.yaml @@ -0,0 +1,71 @@ +version: "3.9" + +services: + frontend: + build: ./frontend + environment: + - VITE_API_HOST=http://localhost:5001 + - VITE_API_STREAMING=$VITE_API_STREAMING + ports: + - "5173:5173" + depends_on: + - backend + + backend: + build: ./application + environment: + - API_KEY=$OPENAI_API_KEY + - EMBEDDINGS_KEY=$OPENAI_API_KEY + - CELERY_BROKER_URL=redis://redis:6379/0 + - CELERY_RESULT_BACKEND=redis://redis:6379/1 + - MONGO_URI=mongodb://mongo:27017/docsgpt + - OPENAI_API_KEY=$OPENAI_API_KEY + - OPENAI_API_BASE=$OPENAI_API_BASE + - OPENAI_API_VERSION=$OPENAI_API_VERSION + - AZURE_DEPLOYMENT_NAME=$AZURE_DEPLOYMENT_NAME + - AZURE_EMBEDDINGS_DEPLOYMENT_NAME=$AZURE_EMBEDDINGS_DEPLOYMENT_NAME + ports: + - "5001:5001" + volumes: + - ./application/indexes:/app/indexes + - ./application/inputs:/app/inputs + - ./application/vectors:/app/vectors + depends_on: + - redis + - mongo + + worker: + build: ./application + command: celery -A app.celery worker -l INFO + environment: + - API_KEY=$OPENAI_API_KEY + - EMBEDDINGS_KEY=$OPENAI_API_KEY + - CELERY_BROKER_URL=redis://redis:6379/0 + - CELERY_RESULT_BACKEND=redis://redis:6379/1 + - MONGO_URI=mongodb://mongo:27017/docsgpt + - API_URL=http://backend:5001 + - OPENAI_API_KEY=$OPENAI_API_KEY + - OPENAI_API_BASE=$OPENAI_API_BASE + - OPENAI_API_VERSION=$OPENAI_API_VERSION + - AZURE_DEPLOYMENT_NAME=$AZURE_DEPLOYMENT_NAME + - AZURE_EMBEDDINGS_DEPLOYMENT_NAME=$AZURE_EMBEDDINGS_DEPLOYMENT_NAME + depends_on: + - redis + - mongo + + redis: + image: redis:6-alpine + ports: + - 6379:6379 + + mongo: + image: mongo:6 + ports: + - 27017:27017 + volumes: + - mongodb_data_container:/data/db + + + +volumes: + mongodb_data_container: \ No newline at end of file diff --git a/docker-compose.yaml b/docker-compose.yaml index c9557dc..c06b61b 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -19,9 +19,6 @@ services: - CELERY_BROKER_URL=redis://redis:6379/0 - CELERY_RESULT_BACKEND=redis://redis:6379/1 - MONGO_URI=mongodb://mongo:27017/docsgpt - #- OPENAI_API_BASE=$OPENAI_API_BASE - #- OPENAI_API_VERSION=$OPENAI_API_VERSION - #- AZURE_DEPLOYMENT_NAME=$AZURE_DEPLOYMENT_NAME ports: - "5001:5001" volumes: @@ -42,9 +39,6 @@ services: - CELERY_RESULT_BACKEND=redis://redis:6379/1 - MONGO_URI=mongodb://mongo:27017/docsgpt - API_URL=http://backend:5001 - #- OPENAI_API_BASE=$OPENAI_API_BASE - #- OPENAI_API_VERSION=$OPENAI_API_VERSION - #- AZURE_DEPLOYMENT_NAME=$AZURE_DEPLOYMENT_NAME depends_on: - redis - mongo diff --git a/run-with-docker-compose.sh b/run-with-docker-compose.sh new file mode 100755 index 0000000..4c89229 --- /dev/null +++ b/run-with-docker-compose.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +source .env + +if [[ -n "$OPENAI_API_BASE" ]] && [[ -n "$OPENAI_API_VERSION" ]] && [[ -n "$AZURE_DEPLOYMENT_NAME" ]] && [[ -n "$AZURE_EMBEDDINGS_DEPLOYMENT_NAME" ]]; then + echo "Running Azure Configuration" + docker-compose -f docker-compose-azure.yaml build && docker-compose -f docker-compose-azure.yaml up +else + echo "Running Plain Configuration" + docker-compose build && docker-compose up +fi diff --git a/scripts/parser/open_ai_func.py b/scripts/parser/open_ai_func.py index d817402..63b0663 100644 --- a/scripts/parser/open_ai_func.py +++ b/scripts/parser/open_ai_func.py @@ -49,6 +49,7 @@ def call_openai_api(docs, folder_name): os.environ.get("OPENAI_API_BASE") and os.environ.get("OPENAI_API_VERSION") and os.environ.get("AZURE_DEPLOYMENT_NAME") + and os.environ.get("AZURE_EMBEDDINGS_DEPLOYMENT_NAME") ): os.environ["OPENAI_API_TYPE"] = "azure" openai_embeddings = OpenAIEmbeddings(model=os.environ.get("AZURE_EMBEDDINGS_DEPLOYMENT_NAME"))