diff --git a/application/Dockerfile b/application/Dockerfile index 26254310..efe2cb3b 100644 --- a/application/Dockerfile +++ b/application/Dockerfile @@ -1,11 +1,16 @@ # Builder Stage -FROM ubuntu:mantic as builder +FROM ubuntu:24.04 as builder ENV DEBIAN_FRONTEND=noninteractive +RUN apt-get update && \ + apt-get install -y software-properties-common + +RUN add-apt-repository ppa:deadsnakes/ppa + # Install necessary packages and Python RUN apt-get update && \ - apt-get install -y --no-install-recommends gcc curl wget unzip libc6-dev python3.11 python3-pip python3.11-venv && \ + apt-get install -y --no-install-recommends gcc curl wget unzip libc6-dev python3.11 python3.11-distutils python3.11-venv && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* @@ -42,7 +47,12 @@ RUN pip install --no-cache-dir --upgrade pip && \ pip install --no-cache-dir -r requirements.txt # Final Stage -FROM ubuntu:mantic as final +FROM ubuntu:24.04 as final + +RUN apt-get update && \ + apt-get install -y software-properties-common + +RUN add-apt-repository ppa:deadsnakes/ppa # Install Python RUN apt-get update && apt-get install -y --no-install-recommends python3.11 && \ diff --git a/application/api/user/routes.py b/application/api/user/routes.py index 51101492..443faddc 100644 --- a/application/api/user/routes.py +++ b/application/api/user/routes.py @@ -229,7 +229,7 @@ def upload_remote(): def task_status(): """Get celery job status.""" task_id = request.args.get("task_id") - from application.celery import celery + from application.celery_init import celery task = celery.AsyncResult(task_id) task_meta = task.info diff --git a/application/api/user/tasks.py b/application/api/user/tasks.py index 4602bf85..862b6dcd 100644 --- a/application/api/user/tasks.py +++ b/application/api/user/tasks.py @@ -1,5 +1,5 @@ from application.worker import ingest_worker, remote_worker -from application.celery import celery +from application.celery_init import celery @celery.task(bind=True) def ingest(self, directory, formats, name_job, filename, user): diff --git a/application/app.py b/application/app.py index e646ffbe..fe8efd12 100644 --- a/application/app.py +++ b/application/app.py @@ -1,6 +1,6 @@ import platform import dotenv -from application.celery import celery +from application.celery_init import celery from flask import Flask, request, redirect from application.core.settings import settings from application.api.user.routes import user diff --git a/application/celery.py b/application/celery_init.py similarity index 100% rename from application/celery.py rename to application/celery_init.py diff --git a/application/vectorstore/base.py b/application/vectorstore/base.py index d8f74705..ec10519f 100644 --- a/application/vectorstore/base.py +++ b/application/vectorstore/base.py @@ -8,6 +8,30 @@ from langchain_community.embeddings import ( from langchain_openai import OpenAIEmbeddings from application.core.settings import settings +class EmbeddingsSingleton: + _instances = {} + + @staticmethod + def get_instance(embeddings_name, *args, **kwargs): + if embeddings_name not in EmbeddingsSingleton._instances: + EmbeddingsSingleton._instances[embeddings_name] = EmbeddingsSingleton._create_instance(embeddings_name, *args, **kwargs) + return EmbeddingsSingleton._instances[embeddings_name] + + @staticmethod + def _create_instance(embeddings_name, *args, **kwargs): + embeddings_factory = { + "openai_text-embedding-ada-002": OpenAIEmbeddings, + "huggingface_sentence-transformers/all-mpnet-base-v2": HuggingFaceEmbeddings, + "huggingface_sentence-transformers-all-mpnet-base-v2": HuggingFaceEmbeddings, + "huggingface_hkunlp/instructor-large": HuggingFaceInstructEmbeddings, + "cohere_medium": CohereEmbeddings + } + + if embeddings_name not in embeddings_factory: + raise ValueError(f"Invalid embeddings_name: {embeddings_name}") + + return embeddings_factory[embeddings_name](*args, **kwargs) + class BaseVectorStore(ABC): def __init__(self): pass @@ -20,42 +44,36 @@ class BaseVectorStore(ABC): return settings.OPENAI_API_BASE and settings.OPENAI_API_VERSION and settings.AZURE_DEPLOYMENT_NAME def _get_embeddings(self, embeddings_name, embeddings_key=None): - embeddings_factory = { - "openai_text-embedding-ada-002": OpenAIEmbeddings, - "huggingface_sentence-transformers/all-mpnet-base-v2": HuggingFaceEmbeddings, - "huggingface_hkunlp/instructor-large": HuggingFaceInstructEmbeddings, - "cohere_medium": CohereEmbeddings - } - - if embeddings_name not in embeddings_factory: - raise ValueError(f"Invalid embeddings_name: {embeddings_name}") - if embeddings_name == "openai_text-embedding-ada-002": if self.is_azure_configured(): os.environ["OPENAI_API_TYPE"] = "azure" - embedding_instance = embeddings_factory[embeddings_name]( + embedding_instance = EmbeddingsSingleton.get_instance( + embeddings_name, model=settings.AZURE_EMBEDDINGS_DEPLOYMENT_NAME ) else: - embedding_instance = embeddings_factory[embeddings_name]( + embedding_instance = EmbeddingsSingleton.get_instance( + embeddings_name, openai_api_key=embeddings_key ) elif embeddings_name == "cohere_medium": - embedding_instance = embeddings_factory[embeddings_name]( + embedding_instance = EmbeddingsSingleton.get_instance( + embeddings_name, cohere_api_key=embeddings_key ) elif embeddings_name == "huggingface_sentence-transformers/all-mpnet-base-v2": if os.path.exists("./model/all-mpnet-base-v2"): - embedding_instance = embeddings_factory[embeddings_name]( + embedding_instance = EmbeddingsSingleton.get_instance( + embeddings_name, model_name="./model/all-mpnet-base-v2", - model_kwargs={"device": "cpu"}, + model_kwargs={"device": "cpu"} ) else: - embedding_instance = embeddings_factory[embeddings_name]( - model_kwargs={"device": "cpu"}, + embedding_instance = EmbeddingsSingleton.get_instance( + embeddings_name, + model_kwargs={"device": "cpu"} ) else: - embedding_instance = embeddings_factory[embeddings_name]() - - return embedding_instance + embedding_instance = EmbeddingsSingleton.get_instance(embeddings_name) + return embedding_instance \ No newline at end of file diff --git a/docs/pages/API/api-key-guide.md b/docs/pages/API/api-key-guide.md index 653a08c8..53bb4b58 100644 --- a/docs/pages/API/api-key-guide.md +++ b/docs/pages/API/api-key-guide.md @@ -14,7 +14,7 @@ Before creating your first API key, you must upload the document that will be li After uploading your document, you can obtain an API key either through the graphical user interface or via an API call: - **Graphical User Interface:** Navigate to the Settings section of the DocsGPT web app, find the API Keys option, and press 'Create New' to generate your key. -- **API Call:** Alternatively, you can use the `/api/create_api_key` endpoint to create a new API key. For detailed instructions, visit [DocsGPT API Documentation](https://docs.docsgpt.cloud/Developing/API-docs#8-apicreate_api_key). +- **API Call:** Alternatively, you can use the `/api/create_api_key` endpoint to create a new API key. For detailed instructions, visit [DocsGPT API Documentation](https://docs.docsgpt.cloud/API/API-docs#8-apicreate_api_key). ### Understanding Key Variables @@ -27,4 +27,4 @@ Upon creating your API key, you will encounter several key variables. Each serve With your API key ready, you can now integrate DocsGPT into your application, such as the DocsGPT Widget or any other software, via `/api/answer` or `/stream` endpoints. The source document is preset with the API key, allowing you to bypass fields like `selectDocs` and `active_docs` during implementation. -Congratulations on taking the first step towards enhancing your applications with DocsGPT! With this guide, you're now equipped to navigate the process of obtaining and understanding DocsGPT API keys. \ No newline at end of file +Congratulations on taking the first step towards enhancing your applications with DocsGPT! With this guide, you're now equipped to navigate the process of obtaining and understanding DocsGPT API keys. diff --git a/tests/test_celery.py b/tests/test_celery.py index f4b22448..f4ec6a03 100644 --- a/tests/test_celery.py +++ b/tests/test_celery.py @@ -1,9 +1,9 @@ from unittest.mock import patch from application.core.settings import settings -from application.celery import make_celery +from application.celery_init import make_celery -@patch('application.celery.Celery') +@patch('application.celery_init.Celery') def test_make_celery(mock_celery): # Arrange app_name = 'test_app_name'