Qdrant metadata payload keys (#13001)

- **Description:** In Qdrant allows to input list of keys as the
content_payload_key to retrieve multiple fields (the generated document
will contain the dictionary {field: value} in a string),
- **Issue:** Previously we were able to retrieve only one field from the
vector database when making a search
  - **Dependencies:** 
  - **Tag maintainer:** 
  - **Twitter handle:** @jb_dlb

---------

Co-authored-by: Jean Baptiste De La Broise <jeanbaptiste.delabroise@mdpi.com>
pull/14361/head
Jean-Baptiste dlb 6 months ago committed by GitHub
parent ad6dfb6220
commit 38813d7090
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -82,8 +82,8 @@ class Qdrant(VectorStore):
qdrant = Qdrant(client, collection_name, embedding_function)
"""
CONTENT_KEY = "page_content"
METADATA_KEY = "metadata"
CONTENT_KEY = ["page_content"]
METADATA_KEY = ["metadata"]
VECTOR_NAME = None
def __init__(
@ -91,8 +91,8 @@ class Qdrant(VectorStore):
client: Any,
collection_name: str,
embeddings: Optional[Embeddings] = None,
content_payload_key: str = CONTENT_KEY,
metadata_payload_key: str = METADATA_KEY,
content_payload_key: Union[list, str] = CONTENT_KEY,
metadata_payload_key: Union[list, str] = METADATA_KEY,
distance_strategy: str = "COSINE",
vector_name: Optional[str] = VECTOR_NAME,
embedding_function: Optional[Callable] = None, # deprecated
@ -112,6 +112,12 @@ class Qdrant(VectorStore):
f"got {type(client)}"
)
if isinstance(content_payload_key, str): # Ensuring Backward compatibility
content_payload_key = [content_payload_key]
if isinstance(metadata_payload_key, str): # Ensuring Backward compatibility
metadata_payload_key = [metadata_payload_key]
if embeddings is None and embedding_function is None:
raise ValueError(
"`embeddings` value can't be None. Pass `Embeddings` instance."
@ -127,8 +133,14 @@ class Qdrant(VectorStore):
self._embeddings_function = embedding_function
self.client: qdrant_client.QdrantClient = client
self.collection_name = collection_name
self.content_payload_key = content_payload_key or self.CONTENT_KEY
self.metadata_payload_key = metadata_payload_key or self.METADATA_KEY
self.content_payload_key = (
content_payload_key if content_payload_key is not None else self.CONTENT_KEY
)
self.metadata_payload_key = (
metadata_payload_key
if metadata_payload_key is not None
else self.METADATA_KEY
)
self.vector_name = vector_name or self.VECTOR_NAME
if embedding_function is not None:
@ -1178,8 +1190,8 @@ class Qdrant(VectorStore):
path: Optional[str] = None,
collection_name: Optional[str] = None,
distance_func: str = "Cosine",
content_payload_key: str = CONTENT_KEY,
metadata_payload_key: str = METADATA_KEY,
content_payload_key: List[str] = CONTENT_KEY,
metadata_payload_key: List[str] = METADATA_KEY,
vector_name: Optional[str] = VECTOR_NAME,
batch_size: int = 64,
shard_number: Optional[int] = None,
@ -1354,8 +1366,8 @@ class Qdrant(VectorStore):
path: Optional[str] = None,
collection_name: Optional[str] = None,
distance_func: str = "Cosine",
content_payload_key: str = CONTENT_KEY,
metadata_payload_key: str = METADATA_KEY,
content_payload_key: List[str] = CONTENT_KEY,
metadata_payload_key: List[str] = METADATA_KEY,
vector_name: Optional[str] = VECTOR_NAME,
batch_size: int = 64,
shard_number: Optional[int] = None,
@ -1527,8 +1539,8 @@ class Qdrant(VectorStore):
path: Optional[str] = None,
collection_name: Optional[str] = None,
distance_func: str = "Cosine",
content_payload_key: str = CONTENT_KEY,
metadata_payload_key: str = METADATA_KEY,
content_payload_key: List[str] = CONTENT_KEY,
metadata_payload_key: List[str] = METADATA_KEY,
vector_name: Optional[str] = VECTOR_NAME,
shard_number: Optional[int] = None,
replication_factor: Optional[int] = None,
@ -1691,8 +1703,8 @@ class Qdrant(VectorStore):
path: Optional[str] = None,
collection_name: Optional[str] = None,
distance_func: str = "Cosine",
content_payload_key: str = CONTENT_KEY,
metadata_payload_key: str = METADATA_KEY,
content_payload_key: List[str] = CONTENT_KEY,
metadata_payload_key: List[str] = METADATA_KEY,
vector_name: Optional[str] = VECTOR_NAME,
shard_number: Optional[int] = None,
replication_factor: Optional[int] = None,
@ -1888,11 +1900,11 @@ class Qdrant(VectorStore):
@classmethod
def _build_payloads(
cls,
cls: Type[Qdrant],
texts: Iterable[str],
metadatas: Optional[List[dict]],
content_payload_key: str,
metadata_payload_key: str,
content_payload_key: list[str],
metadata_payload_key: list[str],
) -> List[dict]:
payloads = []
for i, text in enumerate(texts):
@ -1913,29 +1925,67 @@ class Qdrant(VectorStore):
@classmethod
def _document_from_scored_point(
cls,
cls: Type[Qdrant],
scored_point: Any,
content_payload_key: str,
metadata_payload_key: str,
content_payload_key: list[str],
metadata_payload_key: list[str],
) -> Document:
return Document(
page_content=scored_point.payload.get(content_payload_key),
metadata=scored_point.payload.get(metadata_payload_key) or {},
payload = scored_point.payload
return Qdrant._document_from_payload(
payload=payload,
content_payload_key=content_payload_key,
metadata_payload_key=metadata_payload_key,
)
@classmethod
def _document_from_scored_point_grpc(
cls,
cls: Type[Qdrant],
scored_point: Any,
content_payload_key: str,
metadata_payload_key: str,
content_payload_key: list[str],
metadata_payload_key: list[str],
) -> Document:
from qdrant_client.conversions.conversion import grpc_to_payload
payload = grpc_to_payload(scored_point.payload)
return Qdrant._document_from_payload(
payload=payload,
content_payload_key=content_payload_key,
metadata_payload_key=metadata_payload_key,
)
@classmethod
def _document_from_payload(
cls: Type[Qdrant],
payload: Any,
content_payload_key: list[str],
metadata_payload_key: list[str],
) -> Document:
if len(content_payload_key) == 1:
content = payload.get(
content_payload_key
) # Ensuring backward compatibility
elif len(content_payload_key) > 1:
content = {
content_key: payload.get(content_key)
for content_key in content_payload_key
}
content = str(content) # Ensuring str type output
else:
content = ""
if len(metadata_payload_key) == 1:
metadata = payload.get(
metadata_payload_key
) # Ensuring backward compatibility
elif len(metadata_payload_key) > 1:
metadata = {
metadata_key: payload.get(metadata_key)
for metadata_key in metadata_payload_key
}
else:
metadata = {}
return Document(
page_content=payload[content_payload_key],
metadata=payload.get(metadata_payload_key) or {},
page_content=content,
metadata=metadata,
)
def _build_condition(self, key: str, value: Any) -> List[rest.FieldCondition]:

Loading…
Cancel
Save