|
|
|
@ -233,66 +233,33 @@ class SKLearnVectorStore(VectorStore):
|
|
|
|
|
return list(zip(neigh_idxs[0], neigh_dists[0]))
|
|
|
|
|
|
|
|
|
|
def similarity_search_with_score(
|
|
|
|
|
self,
|
|
|
|
|
query: str,
|
|
|
|
|
*,
|
|
|
|
|
k: int = DEFAULT_K,
|
|
|
|
|
fetch_k: int = DEFAULT_FETCH_K,
|
|
|
|
|
filter: Optional[Dict[str, Any]] = None,
|
|
|
|
|
**kwargs: Any,
|
|
|
|
|
self, query: str, *, k: int = DEFAULT_K, **kwargs: Any
|
|
|
|
|
) -> List[Tuple[Document, float]]:
|
|
|
|
|
query_embedding = self._embedding_function.embed_query(query)
|
|
|
|
|
indices_dists = self._similarity_index_search_with_score(
|
|
|
|
|
query_embedding, k=fetch_k, **kwargs
|
|
|
|
|
query_embedding, k=k, **kwargs
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
docs: List[Tuple[Document, float]] = []
|
|
|
|
|
for idx, dist in indices_dists:
|
|
|
|
|
doc = (
|
|
|
|
|
return [
|
|
|
|
|
(
|
|
|
|
|
Document(
|
|
|
|
|
page_content=self._texts[idx],
|
|
|
|
|
metadata={"id": self._ids[idx], **self._metadatas[idx]},
|
|
|
|
|
),
|
|
|
|
|
dist,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
if filter is None:
|
|
|
|
|
docs.append(doc)
|
|
|
|
|
else:
|
|
|
|
|
filter = {
|
|
|
|
|
key: [value] if not isinstance(value, list) else value
|
|
|
|
|
for key, value in filter.items()
|
|
|
|
|
}
|
|
|
|
|
if all(
|
|
|
|
|
doc[0].metadata.get(key) in value for key, value in filter.items()
|
|
|
|
|
):
|
|
|
|
|
docs.append(doc)
|
|
|
|
|
return docs[:k]
|
|
|
|
|
for idx, dist in indices_dists
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
def similarity_search(
|
|
|
|
|
self,
|
|
|
|
|
query: str,
|
|
|
|
|
k: int = DEFAULT_K,
|
|
|
|
|
fetch_k: int = DEFAULT_FETCH_K,
|
|
|
|
|
filter: Optional[Dict[str, Any]] = None,
|
|
|
|
|
**kwargs: Any,
|
|
|
|
|
self, query: str, k: int = DEFAULT_K, **kwargs: Any
|
|
|
|
|
) -> List[Document]:
|
|
|
|
|
docs_scores = self.similarity_search_with_score(
|
|
|
|
|
query, k=k, fetch_k=fetch_k, filter=filter, **kwargs
|
|
|
|
|
)
|
|
|
|
|
docs_scores = self.similarity_search_with_score(query, k=k, **kwargs)
|
|
|
|
|
return [doc for doc, _ in docs_scores]
|
|
|
|
|
|
|
|
|
|
def _similarity_search_with_relevance_scores(
|
|
|
|
|
self,
|
|
|
|
|
query: str,
|
|
|
|
|
k: int = DEFAULT_K,
|
|
|
|
|
fetch_k: int = DEFAULT_FETCH_K,
|
|
|
|
|
filter: Optional[Dict[str, Any]] = None,
|
|
|
|
|
**kwargs: Any,
|
|
|
|
|
self, query: str, k: int = DEFAULT_K, **kwargs: Any
|
|
|
|
|
) -> List[Tuple[Document, float]]:
|
|
|
|
|
docs_dists = self.similarity_search_with_score(
|
|
|
|
|
query, k=k, fetch_k=fetch_k, filter=filter, **kwargs
|
|
|
|
|
)
|
|
|
|
|
docs_dists = self.similarity_search_with_score(query, k=k, **kwargs)
|
|
|
|
|
docs, dists = zip(*docs_dists)
|
|
|
|
|
scores = [1 / math.exp(dist) for dist in dists]
|
|
|
|
|
return list(zip(list(docs), scores))
|
|
|
|
@ -303,7 +270,6 @@ class SKLearnVectorStore(VectorStore):
|
|
|
|
|
k: int = DEFAULT_K,
|
|
|
|
|
fetch_k: int = DEFAULT_FETCH_K,
|
|
|
|
|
lambda_mult: float = 0.5,
|
|
|
|
|
filter: Optional[Dict[str, Any]] = None,
|
|
|
|
|
**kwargs: Any,
|
|
|
|
|
) -> List[Document]:
|
|
|
|
|
"""Return docs selected using the maximal marginal relevance.
|
|
|
|
@ -317,7 +283,6 @@ class SKLearnVectorStore(VectorStore):
|
|
|
|
|
of diversity among the results with 0 corresponding
|
|
|
|
|
to maximum diversity and 1 to minimum diversity.
|
|
|
|
|
Defaults to 0.5.
|
|
|
|
|
filter: (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
|
|
|
|
|
Returns:
|
|
|
|
|
List of Documents selected by maximal marginal relevance.
|
|
|
|
|
"""
|
|
|
|
@ -329,28 +294,17 @@ class SKLearnVectorStore(VectorStore):
|
|
|
|
|
mmr_selected = maximal_marginal_relevance(
|
|
|
|
|
self._np.array(embedding, dtype=self._np.float32),
|
|
|
|
|
result_embeddings,
|
|
|
|
|
k=fetch_k,
|
|
|
|
|
k=k,
|
|
|
|
|
lambda_mult=lambda_mult,
|
|
|
|
|
)
|
|
|
|
|
mmr_indices = [indices[i] for i in mmr_selected]
|
|
|
|
|
|
|
|
|
|
docs = []
|
|
|
|
|
for idx in mmr_indices:
|
|
|
|
|
doc = Document(
|
|
|
|
|
return [
|
|
|
|
|
Document(
|
|
|
|
|
page_content=self._texts[idx],
|
|
|
|
|
metadata={"id": self._ids[idx], **self._metadatas[idx]},
|
|
|
|
|
)
|
|
|
|
|
if filter is None:
|
|
|
|
|
docs.append(doc)
|
|
|
|
|
else:
|
|
|
|
|
filter = {
|
|
|
|
|
key: [value] if not isinstance(value, list) else value
|
|
|
|
|
for key, value in filter.items()
|
|
|
|
|
}
|
|
|
|
|
if all(doc.metadata.get(key) in value for key, value in filter.items()):
|
|
|
|
|
docs.append(doc)
|
|
|
|
|
|
|
|
|
|
return docs[:k]
|
|
|
|
|
for idx in mmr_indices
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
def max_marginal_relevance_search(
|
|
|
|
|
self,
|
|
|
|
@ -358,7 +312,6 @@ class SKLearnVectorStore(VectorStore):
|
|
|
|
|
k: int = DEFAULT_K,
|
|
|
|
|
fetch_k: int = DEFAULT_FETCH_K,
|
|
|
|
|
lambda_mult: float = 0.5,
|
|
|
|
|
filter: Optional[Dict[str, Any]] = None,
|
|
|
|
|
**kwargs: Any,
|
|
|
|
|
) -> List[Document]:
|
|
|
|
|
"""Return docs selected using the maximal marginal relevance.
|
|
|
|
@ -372,7 +325,6 @@ class SKLearnVectorStore(VectorStore):
|
|
|
|
|
of diversity among the results with 0 corresponding
|
|
|
|
|
to maximum diversity and 1 to minimum diversity.
|
|
|
|
|
Defaults to 0.5.
|
|
|
|
|
filter: (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
|
|
|
|
|
Returns:
|
|
|
|
|
List of Documents selected by maximal marginal relevance.
|
|
|
|
|
"""
|
|
|
|
@ -383,7 +335,7 @@ class SKLearnVectorStore(VectorStore):
|
|
|
|
|
|
|
|
|
|
embedding = self._embedding_function.embed_query(query)
|
|
|
|
|
docs = self.max_marginal_relevance_search_by_vector(
|
|
|
|
|
embedding, k, fetch_k, lambda_mul=lambda_mult, filter=filter, **kwargs
|
|
|
|
|
embedding, k, fetch_k, lambda_mul=lambda_mult
|
|
|
|
|
)
|
|
|
|
|
return docs
|
|
|
|
|
|
|
|
|
|