diff --git a/langchain/vectorstores/pinecone.py b/langchain/vectorstores/pinecone.py index cc2169ac..f9a6fe9b 100644 --- a/langchain/vectorstores/pinecone.py +++ b/langchain/vectorstores/pinecone.py @@ -1,6 +1,7 @@ """Wrapper around Pinecone vector database.""" from __future__ import annotations +import logging import uuid from typing import Any, Callable, Iterable, List, Optional, Tuple @@ -8,6 +9,8 @@ from langchain.docstore.document import Document from langchain.embeddings.base import Embeddings from langchain.vectorstores.base import VectorStore +logger = logging.getLogger(__name__) + class Pinecone(VectorStore): """Wrapper around Pinecone vector database. @@ -120,8 +123,14 @@ class Pinecone(VectorStore): ) for res in results["matches"]: metadata = res["metadata"] - text = metadata.pop(self._text_key) - docs.append((Document(page_content=text, metadata=metadata), res["score"])) + if self._text_key in metadata: + text = metadata.pop(self._text_key) + score = res["score"] + docs.append((Document(page_content=text, metadata=metadata), score)) + else: + logger.warning( + f"Found document with no `{self._text_key}` key. Skipping." + ) return docs def similarity_search( @@ -143,22 +152,10 @@ class Pinecone(VectorStore): Returns: List of Documents most similar to the query and score for each """ - if namespace is None: - namespace = self._namespace - query_obj = self._embedding_function(query) - docs = [] - results = self._index.query( - [query_obj], - top_k=k, - include_metadata=True, - namespace=namespace, - filter=filter, + docs_and_scores = self.similarity_search_with_score( + query, k=k, filter=filter, namespace=namespace, **kwargs ) - for res in results["matches"]: - metadata = res["metadata"] - text = metadata.pop(self._text_key) - docs.append(Document(page_content=text, metadata=metadata)) - return docs + return [doc for doc, _ in docs_and_scores] @classmethod def from_texts(