mirror of
https://github.com/arc53/DocsGPT
synced 2024-11-08 01:10:24 +00:00
Merge branch 'arc53:main' into main
This commit is contained in:
commit
19b09515a1
@ -3,6 +3,7 @@ from typing import Optional
|
||||
import os
|
||||
|
||||
from pydantic_settings import BaseSettings
|
||||
|
||||
current_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
|
||||
@ -15,7 +16,7 @@ class Settings(BaseSettings):
|
||||
MODEL_PATH: str = os.path.join(current_dir, "models/docsgpt-7b-f16.gguf")
|
||||
TOKENS_MAX_HISTORY: int = 150
|
||||
UPLOAD_FOLDER: str = "inputs"
|
||||
VECTOR_STORE: str = "faiss" # "faiss" or "elasticsearch"
|
||||
VECTOR_STORE: str = "faiss" # "faiss" or "elasticsearch" or "qdrant"
|
||||
|
||||
API_URL: str = "http://localhost:7091" # backend url for celery worker
|
||||
|
||||
@ -27,21 +28,36 @@ class Settings(BaseSettings):
|
||||
AZURE_EMBEDDINGS_DEPLOYMENT_NAME: Optional[str] = None # azure deployment name for embeddings
|
||||
|
||||
# elasticsearch
|
||||
ELASTIC_CLOUD_ID: Optional[str] = None # cloud id for elasticsearch
|
||||
ELASTIC_USERNAME: Optional[str] = None # username for elasticsearch
|
||||
ELASTIC_PASSWORD: Optional[str] = None # password for elasticsearch
|
||||
ELASTIC_URL: Optional[str] = None # url for elasticsearch
|
||||
ELASTIC_INDEX: Optional[str] = "docsgpt" # index name for elasticsearch
|
||||
ELASTIC_CLOUD_ID: Optional[str] = None # cloud id for elasticsearch
|
||||
ELASTIC_USERNAME: Optional[str] = None # username for elasticsearch
|
||||
ELASTIC_PASSWORD: Optional[str] = None # password for elasticsearch
|
||||
ELASTIC_URL: Optional[str] = None # url for elasticsearch
|
||||
ELASTIC_INDEX: Optional[str] = "docsgpt" # index name for elasticsearch
|
||||
|
||||
# SageMaker config
|
||||
SAGEMAKER_ENDPOINT: Optional[str] = None # SageMaker endpoint name
|
||||
SAGEMAKER_REGION: Optional[str] = None # SageMaker region name
|
||||
SAGEMAKER_ACCESS_KEY: Optional[str] = None # SageMaker access key
|
||||
SAGEMAKER_SECRET_KEY: Optional[str] = None # SageMaker secret key
|
||||
SAGEMAKER_ENDPOINT: Optional[str] = None # SageMaker endpoint name
|
||||
SAGEMAKER_REGION: Optional[str] = None # SageMaker region name
|
||||
SAGEMAKER_ACCESS_KEY: Optional[str] = None # SageMaker access key
|
||||
SAGEMAKER_SECRET_KEY: Optional[str] = None # SageMaker secret key
|
||||
|
||||
# prem ai project id
|
||||
# prem ai project id
|
||||
PREMAI_PROJECT_ID: Optional[str] = None
|
||||
|
||||
# Qdrant vectorstore config
|
||||
QDRANT_COLLECTION_NAME: Optional[str] = "docsgpt"
|
||||
QDRANT_LOCATION: Optional[str] = None
|
||||
QDRANT_URL: Optional[str] = None
|
||||
QDRANT_PORT: Optional[int] = 6333
|
||||
QDRANT_GRPC_PORT: int = 6334
|
||||
QDRANT_PREFER_GRPC: bool = False
|
||||
QDRANT_HTTPS: Optional[bool] = None
|
||||
QDRANT_API_KEY: Optional[str] = None
|
||||
QDRANT_PREFIX: Optional[str] = None
|
||||
QDRANT_TIMEOUT: Optional[float] = None
|
||||
QDRANT_HOST: Optional[str] = None
|
||||
QDRANT_PATH: Optional[str] = None
|
||||
QDRANT_DISTANCE_FUNC: str = "Cosine"
|
||||
|
||||
|
||||
path = Path(__file__).parent.parent.absolute()
|
||||
settings = Settings(_env_file=path.joinpath(".env"), _env_file_encoding="utf-8")
|
||||
|
@ -21,6 +21,7 @@ pydantic_settings==2.1.0
|
||||
pymongo==4.6.1
|
||||
PyPDF2==3.0.1
|
||||
python-dotenv==1.0.1
|
||||
qdrant-client==1.7.3
|
||||
redis==5.0.1
|
||||
Requests==2.31.0
|
||||
retry==0.9.2
|
||||
|
47
application/vectorstore/qdrant.py
Normal file
47
application/vectorstore/qdrant.py
Normal file
@ -0,0 +1,47 @@
|
||||
from langchain_community.vectorstores.qdrant import Qdrant
|
||||
from application.vectorstore.base import BaseVectorStore
|
||||
from application.core.settings import settings
|
||||
from qdrant_client import models
|
||||
|
||||
|
||||
class QdrantStore(BaseVectorStore):
|
||||
def __init__(self, path: str = "", embeddings_key: str = "embeddings"):
|
||||
self._filter = models.Filter(
|
||||
must=[
|
||||
models.FieldCondition(
|
||||
key="metadata.store",
|
||||
match=models.MatchValue(value=path.replace("application/indexes/", "").rstrip("/")),
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
self._docsearch = Qdrant.construct_instance(
|
||||
["TEXT_TO_OBTAIN_EMBEDDINGS_DIMENSION"],
|
||||
embedding=self._get_embeddings(settings.EMBEDDINGS_NAME, embeddings_key),
|
||||
collection_name=settings.QDRANT_COLLECTION_NAME,
|
||||
location=settings.QDRANT_LOCATION,
|
||||
url=settings.QDRANT_URL,
|
||||
port=settings.QDRANT_PORT,
|
||||
grpc_port=settings.QDRANT_GRPC_PORT,
|
||||
https=settings.QDRANT_HTTPS,
|
||||
prefer_grpc=settings.QDRANT_PREFER_GRPC,
|
||||
api_key=settings.QDRANT_API_KEY,
|
||||
prefix=settings.QDRANT_PREFIX,
|
||||
timeout=settings.QDRANT_TIMEOUT,
|
||||
path=settings.QDRANT_PATH,
|
||||
distance_func=settings.QDRANT_DISTANCE_FUNC,
|
||||
)
|
||||
|
||||
def search(self, *args, **kwargs):
|
||||
return self._docsearch.similarity_search(filter=self._filter, *args, **kwargs)
|
||||
|
||||
def add_texts(self, *args, **kwargs):
|
||||
return self._docsearch.add_texts(*args, **kwargs)
|
||||
|
||||
def save_local(self, *args, **kwargs):
|
||||
pass
|
||||
|
||||
def delete_index(self, *args, **kwargs):
|
||||
return self._docsearch.client.delete(
|
||||
collection_name=settings.QDRANT_COLLECTION_NAME, points_selector=self._filter
|
||||
)
|
@ -1,13 +1,15 @@
|
||||
from application.vectorstore.faiss import FaissStore
|
||||
from application.vectorstore.elasticsearch import ElasticsearchStore
|
||||
from application.vectorstore.mongodb import MongoDBVectorStore
|
||||
from application.vectorstore.qdrant import QdrantStore
|
||||
|
||||
|
||||
class VectorCreator:
|
||||
vectorstores = {
|
||||
'faiss': FaissStore,
|
||||
'elasticsearch':ElasticsearchStore,
|
||||
'mongodb': MongoDBVectorStore,
|
||||
"faiss": FaissStore,
|
||||
"elasticsearch": ElasticsearchStore,
|
||||
"mongodb": MongoDBVectorStore,
|
||||
"qdrant": QdrantStore,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
@ -15,4 +17,4 @@ class VectorCreator:
|
||||
vectorstore_class = cls.vectorstores.get(type.lower())
|
||||
if not vectorstore_class:
|
||||
raise ValueError(f"No vectorstore class found for type {type}")
|
||||
return vectorstore_class(*args, **kwargs)
|
||||
return vectorstore_class(*args, **kwargs)
|
||||
|
Loading…
Reference in New Issue
Block a user