mirror of
https://github.com/hwchase17/langchain
synced 2024-10-29 17:07:25 +00:00
3701b2901e
Fix #1756 Use the `namespace` argument of `Pinecone.from_exisiting_index` to set the default value of `namespace` for other methods. Leads to more expected behavior and easier integration in chains. For the test, I've added a line to delete and rebuild the `langchain-demo` index at the beginning of the test. I'm not 100% sure if it's a good idea but it makes the test reproducible.
98 lines
3.2 KiB
Python
98 lines
3.2 KiB
Python
"""Test Pinecone functionality."""
|
|
import pinecone
|
|
|
|
from langchain.docstore.document import Document
|
|
from langchain.vectorstores.pinecone import Pinecone
|
|
from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings
|
|
|
|
pinecone.init(api_key="YOUR_API_KEY", environment="YOUR_ENV")
|
|
|
|
# if the index already exists, delete it
|
|
try:
|
|
pinecone.delete_index("langchain-demo")
|
|
except Exception:
|
|
pass
|
|
index = pinecone.Index("langchain-demo")
|
|
|
|
|
|
def test_pinecone() -> None:
|
|
"""Test end to end construction and search."""
|
|
texts = ["foo", "bar", "baz"]
|
|
docsearch = Pinecone.from_texts(
|
|
texts, FakeEmbeddings(), index_name="langchain-demo", namespace="test"
|
|
)
|
|
output = docsearch.similarity_search("foo", k=1, namespace="test")
|
|
assert output == [Document(page_content="foo")]
|
|
|
|
|
|
def test_pinecone_with_metadatas() -> None:
|
|
"""Test end to end construction and search."""
|
|
texts = ["foo", "bar", "baz"]
|
|
metadatas = [{"page": i} for i in range(len(texts))]
|
|
docsearch = Pinecone.from_texts(
|
|
texts,
|
|
FakeEmbeddings(),
|
|
index_name="langchain-demo",
|
|
metadatas=metadatas,
|
|
namespace="test-metadata",
|
|
)
|
|
output = docsearch.similarity_search("foo", k=1, namespace="test-metadata")
|
|
assert output == [Document(page_content="foo", metadata={"page": 0})]
|
|
|
|
|
|
def test_pinecone_with_scores() -> None:
|
|
"""Test end to end construction and search with scores and IDs."""
|
|
texts = ["foo", "bar", "baz"]
|
|
metadatas = [{"page": i} for i in range(len(texts))]
|
|
docsearch = Pinecone.from_texts(
|
|
texts,
|
|
FakeEmbeddings(),
|
|
index_name="langchain-demo",
|
|
metadatas=metadatas,
|
|
namespace="test-metadata-score",
|
|
)
|
|
output = docsearch.similarity_search_with_score(
|
|
"foo", k=3, namespace="test-metadata-score"
|
|
)
|
|
docs = [o[0] for o in output]
|
|
scores = [o[1] for o in output]
|
|
assert docs == [
|
|
Document(page_content="foo", metadata={"page": 0}),
|
|
Document(page_content="bar", metadata={"page": 1}),
|
|
Document(page_content="baz", metadata={"page": 2}),
|
|
]
|
|
assert scores[0] > scores[1] > scores[2]
|
|
|
|
|
|
def test_pinecone_with_namespaces() -> None:
|
|
"Test that namespaces are properly handled." ""
|
|
# Create two indexes with the same name but different namespaces
|
|
texts = ["foo", "bar", "baz"]
|
|
metadatas = [{"page": i} for i in range(len(texts))]
|
|
Pinecone.from_texts(
|
|
texts,
|
|
FakeEmbeddings(),
|
|
index_name="langchain-demo",
|
|
metadatas=metadatas,
|
|
namespace="test-namespace",
|
|
)
|
|
|
|
texts = ["foo2", "bar2", "baz2"]
|
|
metadatas = [{"page": i} for i in range(len(texts))]
|
|
Pinecone.from_texts(
|
|
texts,
|
|
FakeEmbeddings(),
|
|
index_name="langchain-demo",
|
|
metadatas=metadatas,
|
|
namespace="test-namespace2",
|
|
)
|
|
|
|
# Search with namespace
|
|
docsearch = Pinecone.from_existing_index(
|
|
"langchain-demo", embedding=FakeEmbeddings(), namespace="test-namespace"
|
|
)
|
|
output = docsearch.similarity_search("foo", k=6)
|
|
# check that we don't get results from the other namespace
|
|
page_contents = [o.page_content for o in output]
|
|
assert set(page_contents) == set(["foo", "bar", "baz"])
|