From ff19a62afc2c8d6d9e705bd0af5ffad426263f49 Mon Sep 17 00:00:00 2001 From: Rui Ramos Date: Mon, 13 Nov 2023 19:47:38 +0000 Subject: [PATCH] Fix Pinecone cosine relevance score (#8920) Fixes: #8207 Description: Pinecone returns scores (not distances) with cosine similarity. The values according to the docs are [-1, 1], although I could never reproduce negative values. This PR ensures that the score returned from Pinecone is preserved, rather than inverted, so the most relevant documents can be filtered (eg when using similarity thresholds) I'll leave this as a draft PR as I couldn't run the tests (my pinecone account might not be enough - some errors were being thrown around namespaces) so hopefully someone who _can_ will pick this up. Maintainers: @rlancemartin, @eyurtsev --------- Co-authored-by: Erick Friis --- libs/langchain/langchain/vectorstores/pinecone.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/libs/langchain/langchain/vectorstores/pinecone.py b/libs/langchain/langchain/vectorstores/pinecone.py index 1489968afb..b39ab9c081 100644 --- a/libs/langchain/langchain/vectorstores/pinecone.py +++ b/libs/langchain/langchain/vectorstores/pinecone.py @@ -250,6 +250,11 @@ class Pinecone(VectorStore): "(dot product), or euclidean" ) + @staticmethod + def _cosine_relevance_score_fn(score: float) -> float: + """Pinecone returns cosine similarity scores between [-1,1]""" + return (score + 1) / 2 + def max_marginal_relevance_search_by_vector( self, embedding: List[float],