Merge branch 'arc53:main' into main

This commit is contained in:
Siddhant Rai 2024-03-05 14:22:51 +05:30 committed by GitHub
commit 19b09515a1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 81 additions and 15 deletions

View File

@ -3,6 +3,7 @@ from typing import Optional
import os
from pydantic_settings import BaseSettings
current_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
@ -15,7 +16,7 @@ class Settings(BaseSettings):
MODEL_PATH: str = os.path.join(current_dir, "models/docsgpt-7b-f16.gguf")
TOKENS_MAX_HISTORY: int = 150
UPLOAD_FOLDER: str = "inputs"
VECTOR_STORE: str = "faiss" # "faiss" or "elasticsearch"
VECTOR_STORE: str = "faiss" # "faiss" or "elasticsearch" or "qdrant"
API_URL: str = "http://localhost:7091" # backend url for celery worker
@ -27,21 +28,36 @@ class Settings(BaseSettings):
AZURE_EMBEDDINGS_DEPLOYMENT_NAME: Optional[str] = None # azure deployment name for embeddings
# elasticsearch
ELASTIC_CLOUD_ID: Optional[str] = None # cloud id for elasticsearch
ELASTIC_USERNAME: Optional[str] = None # username for elasticsearch
ELASTIC_PASSWORD: Optional[str] = None # password for elasticsearch
ELASTIC_URL: Optional[str] = None # url for elasticsearch
ELASTIC_INDEX: Optional[str] = "docsgpt" # index name for elasticsearch
ELASTIC_CLOUD_ID: Optional[str] = None # cloud id for elasticsearch
ELASTIC_USERNAME: Optional[str] = None # username for elasticsearch
ELASTIC_PASSWORD: Optional[str] = None # password for elasticsearch
ELASTIC_URL: Optional[str] = None # url for elasticsearch
ELASTIC_INDEX: Optional[str] = "docsgpt" # index name for elasticsearch
# SageMaker config
SAGEMAKER_ENDPOINT: Optional[str] = None # SageMaker endpoint name
SAGEMAKER_REGION: Optional[str] = None # SageMaker region name
SAGEMAKER_ACCESS_KEY: Optional[str] = None # SageMaker access key
SAGEMAKER_SECRET_KEY: Optional[str] = None # SageMaker secret key
SAGEMAKER_ENDPOINT: Optional[str] = None # SageMaker endpoint name
SAGEMAKER_REGION: Optional[str] = None # SageMaker region name
SAGEMAKER_ACCESS_KEY: Optional[str] = None # SageMaker access key
SAGEMAKER_SECRET_KEY: Optional[str] = None # SageMaker secret key
# prem ai project id
# prem ai project id
PREMAI_PROJECT_ID: Optional[str] = None
# Qdrant vectorstore config
QDRANT_COLLECTION_NAME: Optional[str] = "docsgpt"
QDRANT_LOCATION: Optional[str] = None
QDRANT_URL: Optional[str] = None
QDRANT_PORT: Optional[int] = 6333
QDRANT_GRPC_PORT: int = 6334
QDRANT_PREFER_GRPC: bool = False
QDRANT_HTTPS: Optional[bool] = None
QDRANT_API_KEY: Optional[str] = None
QDRANT_PREFIX: Optional[str] = None
QDRANT_TIMEOUT: Optional[float] = None
QDRANT_HOST: Optional[str] = None
QDRANT_PATH: Optional[str] = None
QDRANT_DISTANCE_FUNC: str = "Cosine"
path = Path(__file__).parent.parent.absolute()
settings = Settings(_env_file=path.joinpath(".env"), _env_file_encoding="utf-8")

View File

@ -21,6 +21,7 @@ pydantic_settings==2.1.0
pymongo==4.6.1
PyPDF2==3.0.1
python-dotenv==1.0.1
qdrant-client==1.7.3
redis==5.0.1
Requests==2.31.0
retry==0.9.2

View File

@ -0,0 +1,47 @@
from langchain_community.vectorstores.qdrant import Qdrant
from application.vectorstore.base import BaseVectorStore
from application.core.settings import settings
from qdrant_client import models
class QdrantStore(BaseVectorStore):
def __init__(self, path: str = "", embeddings_key: str = "embeddings"):
self._filter = models.Filter(
must=[
models.FieldCondition(
key="metadata.store",
match=models.MatchValue(value=path.replace("application/indexes/", "").rstrip("/")),
)
]
)
self._docsearch = Qdrant.construct_instance(
["TEXT_TO_OBTAIN_EMBEDDINGS_DIMENSION"],
embedding=self._get_embeddings(settings.EMBEDDINGS_NAME, embeddings_key),
collection_name=settings.QDRANT_COLLECTION_NAME,
location=settings.QDRANT_LOCATION,
url=settings.QDRANT_URL,
port=settings.QDRANT_PORT,
grpc_port=settings.QDRANT_GRPC_PORT,
https=settings.QDRANT_HTTPS,
prefer_grpc=settings.QDRANT_PREFER_GRPC,
api_key=settings.QDRANT_API_KEY,
prefix=settings.QDRANT_PREFIX,
timeout=settings.QDRANT_TIMEOUT,
path=settings.QDRANT_PATH,
distance_func=settings.QDRANT_DISTANCE_FUNC,
)
def search(self, *args, **kwargs):
return self._docsearch.similarity_search(filter=self._filter, *args, **kwargs)
def add_texts(self, *args, **kwargs):
return self._docsearch.add_texts(*args, **kwargs)
def save_local(self, *args, **kwargs):
pass
def delete_index(self, *args, **kwargs):
return self._docsearch.client.delete(
collection_name=settings.QDRANT_COLLECTION_NAME, points_selector=self._filter
)

View File

@ -1,13 +1,15 @@
from application.vectorstore.faiss import FaissStore
from application.vectorstore.elasticsearch import ElasticsearchStore
from application.vectorstore.mongodb import MongoDBVectorStore
from application.vectorstore.qdrant import QdrantStore
class VectorCreator:
vectorstores = {
'faiss': FaissStore,
'elasticsearch':ElasticsearchStore,
'mongodb': MongoDBVectorStore,
"faiss": FaissStore,
"elasticsearch": ElasticsearchStore,
"mongodb": MongoDBVectorStore,
"qdrant": QdrantStore,
}
@classmethod
@ -15,4 +17,4 @@ class VectorCreator:
vectorstore_class = cls.vectorstores.get(type.lower())
if not vectorstore_class:
raise ValueError(f"No vectorstore class found for type {type}")
return vectorstore_class(*args, **kwargs)
return vectorstore_class(*args, **kwargs)