use namespace argument in Pinecone constructor (#1757)

Fix #1756

Use the `namespace` argument of `Pinecone.from_exisiting_index` to set
the default value of `namespace` for other methods. Leads to more
expected behavior and easier integration in chains.

For the test, I've added a line to delete and rebuild the
`langchain-demo` index at the beginning of the test. I'm not 100% sure
if it's a good idea but it makes the test reproducible.
tool-patch
LeoGrin 1 year ago committed by GitHub
parent 280cb4160d
commit 3701b2901e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -32,6 +32,7 @@ class Pinecone(VectorStore):
index: Any,
embedding_function: Callable,
text_key: str,
namespace: Optional[str] = None,
):
"""Initialize with Pinecone client."""
try:
@ -49,6 +50,7 @@ class Pinecone(VectorStore):
self._index = index
self._embedding_function = embedding_function
self._text_key = text_key
self._namespace = namespace
def add_texts(
self,
@ -71,6 +73,8 @@ class Pinecone(VectorStore):
List of ids from adding the texts into the vectorstore.
"""
if namespace is None:
namespace = self._namespace
# Embed and create the documents
docs = []
ids = ids or [str(uuid.uuid4()) for _ in texts]
@ -101,6 +105,8 @@ class Pinecone(VectorStore):
Returns:
List of Documents most similar to the query and score for each
"""
if namespace is None:
namespace = self._namespace
query_obj = self._embedding_function(query)
docs = []
results = self._index.query(
@ -135,6 +141,8 @@ class Pinecone(VectorStore):
Returns:
List of Documents most similar to the query and score for each
"""
if namespace is None:
namespace = self._namespace
query_obj = self._embedding_function(query)
docs = []
results = self._index.query(
@ -222,7 +230,7 @@ class Pinecone(VectorStore):
index = pinecone.Index(_index_name)
# upsert to Pinecone
index.upsert(vectors=list(to_upsert), namespace=namespace)
return cls(index, embedding.embed_query, text_key)
return cls(index, embedding.embed_query, text_key, namespace)
@classmethod
def from_existing_index(
@ -242,5 +250,5 @@ class Pinecone(VectorStore):
)
return cls(
pinecone.Index(index_name, namespace), embedding.embed_query, text_key
pinecone.Index(index_name), embedding.embed_query, text_key, namespace
)

@ -7,6 +7,11 @@ from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings
pinecone.init(api_key="YOUR_API_KEY", environment="YOUR_ENV")
# if the index already exists, delete it
try:
pinecone.delete_index("langchain-demo")
except Exception:
pass
index = pinecone.Index("langchain-demo")
@ -57,3 +62,36 @@ def test_pinecone_with_scores() -> None:
Document(page_content="baz", metadata={"page": 2}),
]
assert scores[0] > scores[1] > scores[2]
def test_pinecone_with_namespaces() -> None:
"Test that namespaces are properly handled." ""
# Create two indexes with the same name but different namespaces
texts = ["foo", "bar", "baz"]
metadatas = [{"page": i} for i in range(len(texts))]
Pinecone.from_texts(
texts,
FakeEmbeddings(),
index_name="langchain-demo",
metadatas=metadatas,
namespace="test-namespace",
)
texts = ["foo2", "bar2", "baz2"]
metadatas = [{"page": i} for i in range(len(texts))]
Pinecone.from_texts(
texts,
FakeEmbeddings(),
index_name="langchain-demo",
metadatas=metadatas,
namespace="test-namespace2",
)
# Search with namespace
docsearch = Pinecone.from_existing_index(
"langchain-demo", embedding=FakeEmbeddings(), namespace="test-namespace"
)
output = docsearch.similarity_search("foo", k=6)
# check that we don't get results from the other namespace
page_contents = [o.page_content for o in output]
assert set(page_contents) == set(["foo", "bar", "baz"])

Loading…
Cancel
Save