use namespace argument in Pinecone constructor (#1757)

Fix #1756

Use the `namespace` argument of `Pinecone.from_exisiting_index` to set
the default value of `namespace` for other methods. Leads to more
expected behavior and easier integration in chains.

For the test, I've added a line to delete and rebuild the
`langchain-demo` index at the beginning of the test. I'm not 100% sure
if it's a good idea but it makes the test reproducible.
tool-patch
LeoGrin 1 year ago committed by GitHub
parent 280cb4160d
commit 3701b2901e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -32,6 +32,7 @@ class Pinecone(VectorStore):
index: Any, index: Any,
embedding_function: Callable, embedding_function: Callable,
text_key: str, text_key: str,
namespace: Optional[str] = None,
): ):
"""Initialize with Pinecone client.""" """Initialize with Pinecone client."""
try: try:
@ -49,6 +50,7 @@ class Pinecone(VectorStore):
self._index = index self._index = index
self._embedding_function = embedding_function self._embedding_function = embedding_function
self._text_key = text_key self._text_key = text_key
self._namespace = namespace
def add_texts( def add_texts(
self, self,
@ -71,6 +73,8 @@ class Pinecone(VectorStore):
List of ids from adding the texts into the vectorstore. List of ids from adding the texts into the vectorstore.
""" """
if namespace is None:
namespace = self._namespace
# Embed and create the documents # Embed and create the documents
docs = [] docs = []
ids = ids or [str(uuid.uuid4()) for _ in texts] ids = ids or [str(uuid.uuid4()) for _ in texts]
@ -101,6 +105,8 @@ class Pinecone(VectorStore):
Returns: Returns:
List of Documents most similar to the query and score for each List of Documents most similar to the query and score for each
""" """
if namespace is None:
namespace = self._namespace
query_obj = self._embedding_function(query) query_obj = self._embedding_function(query)
docs = [] docs = []
results = self._index.query( results = self._index.query(
@ -135,6 +141,8 @@ class Pinecone(VectorStore):
Returns: Returns:
List of Documents most similar to the query and score for each List of Documents most similar to the query and score for each
""" """
if namespace is None:
namespace = self._namespace
query_obj = self._embedding_function(query) query_obj = self._embedding_function(query)
docs = [] docs = []
results = self._index.query( results = self._index.query(
@ -222,7 +230,7 @@ class Pinecone(VectorStore):
index = pinecone.Index(_index_name) index = pinecone.Index(_index_name)
# upsert to Pinecone # upsert to Pinecone
index.upsert(vectors=list(to_upsert), namespace=namespace) index.upsert(vectors=list(to_upsert), namespace=namespace)
return cls(index, embedding.embed_query, text_key) return cls(index, embedding.embed_query, text_key, namespace)
@classmethod @classmethod
def from_existing_index( def from_existing_index(
@ -242,5 +250,5 @@ class Pinecone(VectorStore):
) )
return cls( return cls(
pinecone.Index(index_name, namespace), embedding.embed_query, text_key pinecone.Index(index_name), embedding.embed_query, text_key, namespace
) )

@ -7,6 +7,11 @@ from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings
pinecone.init(api_key="YOUR_API_KEY", environment="YOUR_ENV") pinecone.init(api_key="YOUR_API_KEY", environment="YOUR_ENV")
# if the index already exists, delete it
try:
pinecone.delete_index("langchain-demo")
except Exception:
pass
index = pinecone.Index("langchain-demo") index = pinecone.Index("langchain-demo")
@ -57,3 +62,36 @@ def test_pinecone_with_scores() -> None:
Document(page_content="baz", metadata={"page": 2}), Document(page_content="baz", metadata={"page": 2}),
] ]
assert scores[0] > scores[1] > scores[2] assert scores[0] > scores[1] > scores[2]
def test_pinecone_with_namespaces() -> None:
"Test that namespaces are properly handled." ""
# Create two indexes with the same name but different namespaces
texts = ["foo", "bar", "baz"]
metadatas = [{"page": i} for i in range(len(texts))]
Pinecone.from_texts(
texts,
FakeEmbeddings(),
index_name="langchain-demo",
metadatas=metadatas,
namespace="test-namespace",
)
texts = ["foo2", "bar2", "baz2"]
metadatas = [{"page": i} for i in range(len(texts))]
Pinecone.from_texts(
texts,
FakeEmbeddings(),
index_name="langchain-demo",
metadatas=metadatas,
namespace="test-namespace2",
)
# Search with namespace
docsearch = Pinecone.from_existing_index(
"langchain-demo", embedding=FakeEmbeddings(), namespace="test-namespace"
)
output = docsearch.similarity_search("foo", k=6)
# check that we don't get results from the other namespace
page_contents = [o.page_content for o in output]
assert set(page_contents) == set(["foo", "bar", "baz"])

Loading…
Cancel
Save