langchain/tests/integration_tests/vectorstores/test_pinecone.py

"""Test Pinecone functionality."""
import pinecone

from langchain.docstore.document import Document
from langchain.vectorstores.pinecone import Pinecone
from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings

pinecone.init(api_key="YOUR_API_KEY", environment="YOUR_ENV")

# if the index already exists, delete it
try:
    pinecone.delete_index("langchain-demo")
except Exception:
    pass
index = pinecone.Index("langchain-demo")


def test_pinecone() -> None:
    """Test end to end construction and search."""
    texts = ["foo", "bar", "baz"]
    docsearch = Pinecone.from_texts(
        texts, FakeEmbeddings(), index_name="langchain-demo", namespace="test"
    )
    output = docsearch.similarity_search("foo", k=1, namespace="test")
    assert output == [Document(page_content="foo")]


def test_pinecone_with_metadatas() -> None:
    """Test end to end construction and search."""
    texts = ["foo", "bar", "baz"]
    metadatas = [{"page": i} for i in range(len(texts))]
    docsearch = Pinecone.from_texts(
        texts,
        FakeEmbeddings(),
        index_name="langchain-demo",
        metadatas=metadatas,
        namespace="test-metadata",
    )
    output = docsearch.similarity_search("foo", k=1, namespace="test-metadata")
    assert output == [Document(page_content="foo", metadata={"page": 0})]


def test_pinecone_with_scores() -> None:
    """Test end to end construction and search with scores and IDs."""
    texts = ["foo", "bar", "baz"]
    metadatas = [{"page": i} for i in range(len(texts))]
    docsearch = Pinecone.from_texts(
        texts,
        FakeEmbeddings(),
        index_name="langchain-demo",
        metadatas=metadatas,
        namespace="test-metadata-score",
    )
    output = docsearch.similarity_search_with_score(
        "foo", k=3, namespace="test-metadata-score"
    )
    docs = [o[0] for o in output]
    scores = [o[1] for o in output]
    assert docs == [
        Document(page_content="foo", metadata={"page": 0}),
        Document(page_content="bar", metadata={"page": 1}),
        Document(page_content="baz", metadata={"page": 2}),
    ]
    assert scores[0] > scores[1] > scores[2]


def test_pinecone_with_namespaces() -> None:
    "Test that namespaces are properly handled." ""
    # Create two indexes with the same name but different namespaces
    texts = ["foo", "bar", "baz"]
    metadatas = [{"page": i} for i in range(len(texts))]
    Pinecone.from_texts(
        texts,
        FakeEmbeddings(),
        index_name="langchain-demo",
        metadatas=metadatas,
        namespace="test-namespace",
    )

    texts = ["foo2", "bar2", "baz2"]
    metadatas = [{"page": i} for i in range(len(texts))]
    Pinecone.from_texts(
        texts,
        FakeEmbeddings(),
        index_name="langchain-demo",
        metadatas=metadatas,
        namespace="test-namespace2",
    )

    # Search with namespace
    docsearch = Pinecone.from_existing_index(
        "langchain-demo", embedding=FakeEmbeddings(), namespace="test-namespace"
    )
    output = docsearch.similarity_search("foo", k=6)
    # check that we don't get results from the other namespace
    page_contents = [o.page_content for o in output]
    assert set(page_contents) == set(["foo", "bar", "baz"])
Add pinecone integration test (#911) Basic integration test for pinecone 2023-02-07 02:13:35 +00:00			`"""Test Pinecone functionality."""`
			`import pinecone`

			`from langchain.docstore.document import Document`
			`from langchain.vectorstores.pinecone import Pinecone`
			`from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings`

			`pinecone.init(api_key="YOUR_API_KEY", environment="YOUR_ENV")`

use namespace argument in Pinecone constructor (#1757) Fix #1756 Use the `namespace` argument of `Pinecone.from_exisiting_index` to set the default value of `namespace` for other methods. Leads to more expected behavior and easier integration in chains. For the test, I've added a line to delete and rebuild the `langchain-demo` index at the beginning of the test. I'm not 100% sure if it's a good idea but it makes the test reproducible. 2023-03-19 02:55:38 +00:00			`# if the index already exists, delete it`
			`try:`
			`pinecone.delete_index("langchain-demo")`
			`except Exception:`
			`pass`
Add pinecone integration test (#911) Basic integration test for pinecone 2023-02-07 02:13:35 +00:00			`index = pinecone.Index("langchain-demo")`


			`def test_pinecone() -> None:`
			`"""Test end to end construction and search."""`
			`texts = ["foo", "bar", "baz"]`
			`docsearch = Pinecone.from_texts(`
			`texts, FakeEmbeddings(), index_name="langchain-demo", namespace="test"`
			`)`
			`output = docsearch.similarity_search("foo", k=1, namespace="test")`
			`assert output == [Document(page_content="foo")]`


			`def test_pinecone_with_metadatas() -> None:`
			`"""Test end to end construction and search."""`
			`texts = ["foo", "bar", "baz"]`
			`metadatas = [{"page": i} for i in range(len(texts))]`
			`docsearch = Pinecone.from_texts(`
			`texts,`
			`FakeEmbeddings(),`
			`index_name="langchain-demo",`
			`metadatas=metadatas,`
			`namespace="test-metadata",`
			`)`
			`output = docsearch.similarity_search("foo", k=1, namespace="test-metadata")`
			`assert output == [Document(page_content="foo", metadata={"page": 0})]`


			`def test_pinecone_with_scores() -> None:`
			`"""Test end to end construction and search with scores and IDs."""`
			`texts = ["foo", "bar", "baz"]`
			`metadatas = [{"page": i} for i in range(len(texts))]`
			`docsearch = Pinecone.from_texts(`
			`texts,`
			`FakeEmbeddings(),`
			`index_name="langchain-demo",`
			`metadatas=metadatas,`
			`namespace="test-metadata-score",`
			`)`
			`output = docsearch.similarity_search_with_score(`
			`"foo", k=3, namespace="test-metadata-score"`
			`)`
			`docs = [o[0] for o in output]`
			`scores = [o[1] for o in output]`
			`assert docs == [`
			`Document(page_content="foo", metadata={"page": 0}),`
			`Document(page_content="bar", metadata={"page": 1}),`
			`Document(page_content="baz", metadata={"page": 2}),`
			`]`
			`assert scores[0] > scores[1] > scores[2]`
use namespace argument in Pinecone constructor (#1757) Fix #1756 Use the `namespace` argument of `Pinecone.from_exisiting_index` to set the default value of `namespace` for other methods. Leads to more expected behavior and easier integration in chains. For the test, I've added a line to delete and rebuild the `langchain-demo` index at the beginning of the test. I'm not 100% sure if it's a good idea but it makes the test reproducible. 2023-03-19 02:55:38 +00:00

			`def test_pinecone_with_namespaces() -> None:`
			`"Test that namespaces are properly handled." ""`
			`# Create two indexes with the same name but different namespaces`
			`texts = ["foo", "bar", "baz"]`
			`metadatas = [{"page": i} for i in range(len(texts))]`
			`Pinecone.from_texts(`
			`texts,`
			`FakeEmbeddings(),`
			`index_name="langchain-demo",`
			`metadatas=metadatas,`
			`namespace="test-namespace",`
			`)`

			`texts = ["foo2", "bar2", "baz2"]`
			`metadatas = [{"page": i} for i in range(len(texts))]`
			`Pinecone.from_texts(`
			`texts,`
			`FakeEmbeddings(),`
			`index_name="langchain-demo",`
			`metadatas=metadatas,`
			`namespace="test-namespace2",`
			`)`

			`# Search with namespace`
			`docsearch = Pinecone.from_existing_index(`
			`"langchain-demo", embedding=FakeEmbeddings(), namespace="test-namespace"`
			`)`
			`output = docsearch.similarity_search("foo", k=6)`
			`# check that we don't get results from the other namespace`
			`page_contents = [o.page_content for o in output]`
			`assert set(page_contents) == set(["foo", "bar", "baz"])`