From ae2ded119fbdab8e37554a06928824547133e9e7 Mon Sep 17 00:00:00 2001 From: xucai Date: Wed, 12 Jun 2024 19:48:28 +0800 Subject: [PATCH 1/5] rename celery_init.py --- application/api/user/routes.py | 2 +- application/api/user/tasks.py | 2 +- application/app.py | 2 +- application/{celery.py => celery_init.py} | 0 tests/test_celery.py | 2 +- 5 files changed, 4 insertions(+), 4 deletions(-) rename application/{celery.py => celery_init.py} (100%) diff --git a/application/api/user/routes.py b/application/api/user/routes.py index 51101492..443faddc 100644 --- a/application/api/user/routes.py +++ b/application/api/user/routes.py @@ -229,7 +229,7 @@ def upload_remote(): def task_status(): """Get celery job status.""" task_id = request.args.get("task_id") - from application.celery import celery + from application.celery_init import celery task = celery.AsyncResult(task_id) task_meta = task.info diff --git a/application/api/user/tasks.py b/application/api/user/tasks.py index 4602bf85..862b6dcd 100644 --- a/application/api/user/tasks.py +++ b/application/api/user/tasks.py @@ -1,5 +1,5 @@ from application.worker import ingest_worker, remote_worker -from application.celery import celery +from application.celery_init import celery @celery.task(bind=True) def ingest(self, directory, formats, name_job, filename, user): diff --git a/application/app.py b/application/app.py index e646ffbe..fe8efd12 100644 --- a/application/app.py +++ b/application/app.py @@ -1,6 +1,6 @@ import platform import dotenv -from application.celery import celery +from application.celery_init import celery from flask import Flask, request, redirect from application.core.settings import settings from application.api.user.routes import user diff --git a/application/celery.py b/application/celery_init.py similarity index 100% rename from application/celery.py rename to application/celery_init.py diff --git a/tests/test_celery.py b/tests/test_celery.py index f4b22448..8237d136 100644 --- a/tests/test_celery.py +++ b/tests/test_celery.py @@ -1,6 +1,6 @@ from unittest.mock import patch from application.core.settings import settings -from application.celery import make_celery +from application.celery_init import make_celery @patch('application.celery.Celery') From 3454309cbcd5d81675937382f2e375ab80ed6a6b Mon Sep 17 00:00:00 2001 From: Alex Date: Fri, 14 Jun 2024 12:58:35 +0100 Subject: [PATCH 2/5] chore: Refactor embeddings instantiation to use a singleton pattern --- application/vectorstore/base.py | 58 +++++++++++++++++++++------------ 1 file changed, 38 insertions(+), 20 deletions(-) diff --git a/application/vectorstore/base.py b/application/vectorstore/base.py index d8f74705..ec10519f 100644 --- a/application/vectorstore/base.py +++ b/application/vectorstore/base.py @@ -8,6 +8,30 @@ from langchain_community.embeddings import ( from langchain_openai import OpenAIEmbeddings from application.core.settings import settings +class EmbeddingsSingleton: + _instances = {} + + @staticmethod + def get_instance(embeddings_name, *args, **kwargs): + if embeddings_name not in EmbeddingsSingleton._instances: + EmbeddingsSingleton._instances[embeddings_name] = EmbeddingsSingleton._create_instance(embeddings_name, *args, **kwargs) + return EmbeddingsSingleton._instances[embeddings_name] + + @staticmethod + def _create_instance(embeddings_name, *args, **kwargs): + embeddings_factory = { + "openai_text-embedding-ada-002": OpenAIEmbeddings, + "huggingface_sentence-transformers/all-mpnet-base-v2": HuggingFaceEmbeddings, + "huggingface_sentence-transformers-all-mpnet-base-v2": HuggingFaceEmbeddings, + "huggingface_hkunlp/instructor-large": HuggingFaceInstructEmbeddings, + "cohere_medium": CohereEmbeddings + } + + if embeddings_name not in embeddings_factory: + raise ValueError(f"Invalid embeddings_name: {embeddings_name}") + + return embeddings_factory[embeddings_name](*args, **kwargs) + class BaseVectorStore(ABC): def __init__(self): pass @@ -20,42 +44,36 @@ class BaseVectorStore(ABC): return settings.OPENAI_API_BASE and settings.OPENAI_API_VERSION and settings.AZURE_DEPLOYMENT_NAME def _get_embeddings(self, embeddings_name, embeddings_key=None): - embeddings_factory = { - "openai_text-embedding-ada-002": OpenAIEmbeddings, - "huggingface_sentence-transformers/all-mpnet-base-v2": HuggingFaceEmbeddings, - "huggingface_hkunlp/instructor-large": HuggingFaceInstructEmbeddings, - "cohere_medium": CohereEmbeddings - } - - if embeddings_name not in embeddings_factory: - raise ValueError(f"Invalid embeddings_name: {embeddings_name}") - if embeddings_name == "openai_text-embedding-ada-002": if self.is_azure_configured(): os.environ["OPENAI_API_TYPE"] = "azure" - embedding_instance = embeddings_factory[embeddings_name]( + embedding_instance = EmbeddingsSingleton.get_instance( + embeddings_name, model=settings.AZURE_EMBEDDINGS_DEPLOYMENT_NAME ) else: - embedding_instance = embeddings_factory[embeddings_name]( + embedding_instance = EmbeddingsSingleton.get_instance( + embeddings_name, openai_api_key=embeddings_key ) elif embeddings_name == "cohere_medium": - embedding_instance = embeddings_factory[embeddings_name]( + embedding_instance = EmbeddingsSingleton.get_instance( + embeddings_name, cohere_api_key=embeddings_key ) elif embeddings_name == "huggingface_sentence-transformers/all-mpnet-base-v2": if os.path.exists("./model/all-mpnet-base-v2"): - embedding_instance = embeddings_factory[embeddings_name]( + embedding_instance = EmbeddingsSingleton.get_instance( + embeddings_name, model_name="./model/all-mpnet-base-v2", - model_kwargs={"device": "cpu"}, + model_kwargs={"device": "cpu"} ) else: - embedding_instance = embeddings_factory[embeddings_name]( - model_kwargs={"device": "cpu"}, + embedding_instance = EmbeddingsSingleton.get_instance( + embeddings_name, + model_kwargs={"device": "cpu"} ) else: - embedding_instance = embeddings_factory[embeddings_name]() - - return embedding_instance + embedding_instance = EmbeddingsSingleton.get_instance(embeddings_name) + return embedding_instance \ No newline at end of file From 1754570057107b2b872bed0a213b7c3d9529c1d6 Mon Sep 17 00:00:00 2001 From: xucai Date: Wed, 19 Jun 2024 16:17:09 +0800 Subject: [PATCH 3/5] rename celery_init.py --- tests/test_celery.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_celery.py b/tests/test_celery.py index 8237d136..f4ec6a03 100644 --- a/tests/test_celery.py +++ b/tests/test_celery.py @@ -3,7 +3,7 @@ from application.core.settings import settings from application.celery_init import make_celery -@patch('application.celery.Celery') +@patch('application.celery_init.Celery') def test_make_celery(mock_celery): # Arrange app_name = 'test_app_name' From e5c30cf841c9ab0824aa2bbeb07ba7d856569a46 Mon Sep 17 00:00:00 2001 From: Alex Date: Wed, 19 Jun 2024 11:45:37 +0100 Subject: [PATCH 4/5] upgrade docker to 24.04 --- application/Dockerfile | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/application/Dockerfile b/application/Dockerfile index 26254310..efe2cb3b 100644 --- a/application/Dockerfile +++ b/application/Dockerfile @@ -1,11 +1,16 @@ # Builder Stage -FROM ubuntu:mantic as builder +FROM ubuntu:24.04 as builder ENV DEBIAN_FRONTEND=noninteractive +RUN apt-get update && \ + apt-get install -y software-properties-common + +RUN add-apt-repository ppa:deadsnakes/ppa + # Install necessary packages and Python RUN apt-get update && \ - apt-get install -y --no-install-recommends gcc curl wget unzip libc6-dev python3.11 python3-pip python3.11-venv && \ + apt-get install -y --no-install-recommends gcc curl wget unzip libc6-dev python3.11 python3.11-distutils python3.11-venv && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* @@ -42,7 +47,12 @@ RUN pip install --no-cache-dir --upgrade pip && \ pip install --no-cache-dir -r requirements.txt # Final Stage -FROM ubuntu:mantic as final +FROM ubuntu:24.04 as final + +RUN apt-get update && \ + apt-get install -y software-properties-common + +RUN add-apt-repository ppa:deadsnakes/ppa # Install Python RUN apt-get update && apt-get install -y --no-install-recommends python3.11 && \ From e47e75114240beded477180974ae0a0e06761404 Mon Sep 17 00:00:00 2001 From: Alex Date: Wed, 19 Jun 2024 12:35:30 +0100 Subject: [PATCH 5/5] fix link --- docs/pages/API/api-key-guide.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/pages/API/api-key-guide.md b/docs/pages/API/api-key-guide.md index 653a08c8..53bb4b58 100644 --- a/docs/pages/API/api-key-guide.md +++ b/docs/pages/API/api-key-guide.md @@ -14,7 +14,7 @@ Before creating your first API key, you must upload the document that will be li After uploading your document, you can obtain an API key either through the graphical user interface or via an API call: - **Graphical User Interface:** Navigate to the Settings section of the DocsGPT web app, find the API Keys option, and press 'Create New' to generate your key. -- **API Call:** Alternatively, you can use the `/api/create_api_key` endpoint to create a new API key. For detailed instructions, visit [DocsGPT API Documentation](https://docs.docsgpt.cloud/Developing/API-docs#8-apicreate_api_key). +- **API Call:** Alternatively, you can use the `/api/create_api_key` endpoint to create a new API key. For detailed instructions, visit [DocsGPT API Documentation](https://docs.docsgpt.cloud/API/API-docs#8-apicreate_api_key). ### Understanding Key Variables @@ -27,4 +27,4 @@ Upon creating your API key, you will encounter several key variables. Each serve With your API key ready, you can now integrate DocsGPT into your application, such as the DocsGPT Widget or any other software, via `/api/answer` or `/stream` endpoints. The source document is preset with the API key, allowing you to bypass fields like `selectDocs` and `active_docs` during implementation. -Congratulations on taking the first step towards enhancing your applications with DocsGPT! With this guide, you're now equipped to navigate the process of obtaining and understanding DocsGPT API keys. \ No newline at end of file +Congratulations on taking the first step towards enhancing your applications with DocsGPT! With this guide, you're now equipped to navigate the process of obtaining and understanding DocsGPT API keys.