From b3461b7134ace6d36ba0afede871d5c86748065f Mon Sep 17 00:00:00 2001 From: Alex Date: Tue, 9 Jan 2024 11:39:32 +0000 Subject: [PATCH] Add MPNet model and update vector store for Hugging Face embeddings --- application/Dockerfile | 6 ++++++ application/vectorstore/base.py | 5 +++++ 2 files changed, 11 insertions(+) diff --git a/application/Dockerfile b/application/Dockerfile index 7ea9966..6f70d2f 100644 --- a/application/Dockerfile +++ b/application/Dockerfile @@ -7,6 +7,10 @@ ENV PATH="/root/.cargo/bin:${PATH}" RUN pip install --upgrade pip && pip install tiktoken==0.5.2 COPY requirements.txt . RUN pip install -r requirements.txt +RUN apt-get install -y wget unzip +RUN wget https://docsgpt.s3.eu-west-1.amazonaws.com/models/embeddings/mpnet-base-v2.zip +RUN unzip mpnet-base-v2.zip -d model +RUN rm mpnet-base-v2.zip FROM python:3.11-slim-bullseye @@ -14,6 +18,8 @@ FROM python:3.11-slim-bullseye COPY --from=builder /usr/local/ /usr/local/ WORKDIR /app +COPY --from=builder /model /app/model + COPY . /app/application ENV FLASK_APP=app.py ENV FLASK_DEBUG=true diff --git a/application/vectorstore/base.py b/application/vectorstore/base.py index e1664c6..ffff49b 100644 --- a/application/vectorstore/base.py +++ b/application/vectorstore/base.py @@ -44,6 +44,11 @@ class BaseVectorStore(ABC): embedding_instance = embeddings_factory[embeddings_name]( cohere_api_key=embeddings_key ) + elif embeddings_name == "huggingface_sentence-transformers/all-mpnet-base-v2": + embedding_instance = embeddings_factory[embeddings_name]( + model_name="./model/all-mpnet-base-v2", + model_kwargs={"device": "cpu"}, + ) else: embedding_instance = embeddings_factory[embeddings_name]()