Harrison/faiss score (#6341)

Co-authored-by: Frank Stein <16441059+simonfromla@users.noreply.github.com>
Co-authored-by: Sims Juju <sims@Ju.lan>
This commit is contained in:
Harrison Chase 2023-06-17 11:00:47 -07:00 committed by GitHub
parent 42a28ac1ba
commit 61e4a1adf9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 26 additions and 6 deletions

View File

@ -34,8 +34,11 @@ class ContextualCompressionRetriever(BaseRetriever, BaseModel):
Sequence of relevant documents Sequence of relevant documents
""" """
docs = self.base_retriever.get_relevant_documents(query) docs = self.base_retriever.get_relevant_documents(query)
compressed_docs = self.base_compressor.compress_documents(docs, query) if docs:
return list(compressed_docs) compressed_docs = self.base_compressor.compress_documents(docs, query)
return list(compressed_docs)
else:
return []
async def aget_relevant_documents(self, query: str) -> List[Document]: async def aget_relevant_documents(self, query: str) -> List[Document]:
"""Get documents relevant for a query. """Get documents relevant for a query.
@ -47,5 +50,10 @@ class ContextualCompressionRetriever(BaseRetriever, BaseModel):
List of relevant documents List of relevant documents
""" """
docs = await self.base_retriever.aget_relevant_documents(query) docs = await self.base_retriever.aget_relevant_documents(query)
compressed_docs = await self.base_compressor.acompress_documents(docs, query) if docs:
return list(compressed_docs) compressed_docs = await self.base_compressor.acompress_documents(
docs, query
)
return list(compressed_docs)
else:
return []

View File

@ -159,8 +159,8 @@ class VectorStore(ABC):
] ]
if len(docs_and_similarities) == 0: if len(docs_and_similarities) == 0:
warnings.warn( warnings.warn(
f"No relevant docs were retrieved using the relevance score\ "No relevant docs were retrieved using the relevance score"
threshold {score_threshold}" f" threshold {score_threshold}"
) )
return docs_and_similarities return docs_and_similarities

View File

@ -185,6 +185,7 @@ class FAISS(VectorStore):
k: int = 4, k: int = 4,
filter: Optional[Dict[str, Any]] = None, filter: Optional[Dict[str, Any]] = None,
fetch_k: int = 20, fetch_k: int = 20,
**kwargs: Any,
) -> List[Tuple[Document, float]]: ) -> List[Tuple[Document, float]]:
"""Return docs most similar to query. """Return docs most similar to query.
@ -194,6 +195,9 @@ class FAISS(VectorStore):
filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None. filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None.
fetch_k: (Optional[int]) Number of Documents to fetch before filtering. fetch_k: (Optional[int]) Number of Documents to fetch before filtering.
Defaults to 20. Defaults to 20.
**kwargs: kwargs to be passed to similarity search. Can include:
score_threshold: Optional, a floating point value between 0 to 1 to
filter the resulting set of retrieved docs
Returns: Returns:
List of documents most similar to the query text and L2 distance List of documents most similar to the query text and L2 distance
@ -218,6 +222,14 @@ class FAISS(VectorStore):
docs.append((doc, scores[0][j])) docs.append((doc, scores[0][j]))
else: else:
docs.append((doc, scores[0][j])) docs.append((doc, scores[0][j]))
score_threshold = kwargs.get("score_threshold")
if score_threshold is not None:
docs = [
(doc, similarity)
for doc, similarity in docs
if similarity >= score_threshold
]
return docs[:k] return docs[:k]
def similarity_search_with_score( def similarity_search_with_score(