From 3701b2901e76f2f97239c2152a6a7d01754fb666 Mon Sep 17 00:00:00 2001
From: LeoGrin <45738728+LeoGrin@users.noreply.github.com>
Date: Sun, 19 Mar 2023 03:55:38 +0100
Subject: [PATCH] use namespace argument in Pinecone constructor (#1757)

Fix #1756

Use the `namespace` argument of `Pinecone.from_exisiting_index` to set
the default value of `namespace` for other methods. Leads to more
expected behavior and easier integration in chains.

For the test, I've added a line to delete and rebuild the
`langchain-demo` index at the beginning of the test. I'm not 100% sure
if it's a good idea but it makes the test reproducible.
---
 langchain/vectorstores/pinecone.py            | 12 +++++-
 .../vectorstores/test_pinecone.py             | 38 +++++++++++++++++++
 2 files changed, 48 insertions(+), 2 deletions(-)

diff --git a/langchain/vectorstores/pinecone.py b/langchain/vectorstores/pinecone.py
index 6dfef7b7..7983ba45 100644
--- a/langchain/vectorstores/pinecone.py
+++ b/langchain/vectorstores/pinecone.py
@@ -32,6 +32,7 @@ class Pinecone(VectorStore):
         index: Any,
         embedding_function: Callable,
         text_key: str,
+        namespace: Optional[str] = None,
     ):
         """Initialize with Pinecone client."""
         try:
@@ -49,6 +50,7 @@ class Pinecone(VectorStore):
         self._index = index
         self._embedding_function = embedding_function
         self._text_key = text_key
+        self._namespace = namespace
 
     def add_texts(
         self,
@@ -71,6 +73,8 @@ class Pinecone(VectorStore):
             List of ids from adding the texts into the vectorstore.
 
         """
+        if namespace is None:
+            namespace = self._namespace
         # Embed and create the documents
         docs = []
         ids = ids or [str(uuid.uuid4()) for _ in texts]
@@ -101,6 +105,8 @@ class Pinecone(VectorStore):
         Returns:
             List of Documents most similar to the query and score for each
         """
+        if namespace is None:
+            namespace = self._namespace
         query_obj = self._embedding_function(query)
         docs = []
         results = self._index.query(
@@ -135,6 +141,8 @@ class Pinecone(VectorStore):
         Returns:
             List of Documents most similar to the query and score for each
         """
+        if namespace is None:
+            namespace = self._namespace
         query_obj = self._embedding_function(query)
         docs = []
         results = self._index.query(
@@ -222,7 +230,7 @@ class Pinecone(VectorStore):
                 index = pinecone.Index(_index_name)
             # upsert to Pinecone
             index.upsert(vectors=list(to_upsert), namespace=namespace)
-        return cls(index, embedding.embed_query, text_key)
+        return cls(index, embedding.embed_query, text_key, namespace)
 
     @classmethod
     def from_existing_index(
@@ -242,5 +250,5 @@ class Pinecone(VectorStore):
             )
 
         return cls(
-            pinecone.Index(index_name, namespace), embedding.embed_query, text_key
+            pinecone.Index(index_name), embedding.embed_query, text_key, namespace
         )
diff --git a/tests/integration_tests/vectorstores/test_pinecone.py b/tests/integration_tests/vectorstores/test_pinecone.py
index 2c92c501..bcfe4104 100644
--- a/tests/integration_tests/vectorstores/test_pinecone.py
+++ b/tests/integration_tests/vectorstores/test_pinecone.py
@@ -7,6 +7,11 @@ from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings
 
 pinecone.init(api_key="YOUR_API_KEY", environment="YOUR_ENV")
 
+# if the index already exists, delete it
+try:
+    pinecone.delete_index("langchain-demo")
+except Exception:
+    pass
 index = pinecone.Index("langchain-demo")
 
 
@@ -57,3 +62,36 @@ def test_pinecone_with_scores() -> None:
         Document(page_content="baz", metadata={"page": 2}),
     ]
     assert scores[0] > scores[1] > scores[2]
+
+
+def test_pinecone_with_namespaces() -> None:
+    "Test that namespaces are properly handled." ""
+    # Create two indexes with the same name but different namespaces
+    texts = ["foo", "bar", "baz"]
+    metadatas = [{"page": i} for i in range(len(texts))]
+    Pinecone.from_texts(
+        texts,
+        FakeEmbeddings(),
+        index_name="langchain-demo",
+        metadatas=metadatas,
+        namespace="test-namespace",
+    )
+
+    texts = ["foo2", "bar2", "baz2"]
+    metadatas = [{"page": i} for i in range(len(texts))]
+    Pinecone.from_texts(
+        texts,
+        FakeEmbeddings(),
+        index_name="langchain-demo",
+        metadatas=metadatas,
+        namespace="test-namespace2",
+    )
+
+    # Search with namespace
+    docsearch = Pinecone.from_existing_index(
+        "langchain-demo", embedding=FakeEmbeddings(), namespace="test-namespace"
+    )
+    output = docsearch.similarity_search("foo", k=6)
+    # check that we don't get results from the other namespace
+    page_contents = [o.page_content for o in output]
+    assert set(page_contents) == set(["foo", "bar", "baz"])