From 659c67e896f356636438c1c9f9f365162a7d4860 Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Wed, 5 Apr 2023 06:42:27 +0300 Subject: [PATCH] Don't create a new Pinecone index if doesn't exist (#2414) In the case no pinecone index is specified, or a wrong one is, do not create a new one. Creating new indexes can cause unexpected costs to users, and some code paths could cause a new one to be created on each invocation. This PR solves #2413. --- langchain/vectorstores/pinecone.py | 32 +++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/langchain/vectorstores/pinecone.py b/langchain/vectorstores/pinecone.py index b5479acf30..f06c5b9f05 100644 --- a/langchain/vectorstores/pinecone.py +++ b/langchain/vectorstores/pinecone.py @@ -21,7 +21,9 @@ class Pinecone(VectorStore): from langchain.embeddings.openai import OpenAIEmbeddings import pinecone - pinecone.init(api_key="***", environment="us-west1-gcp") + # The environment should be the one specified next to the API key + # in your Pinecone console + pinecone.init(api_key="***", environment="...") index = pinecone.Index("langchain-demo") embeddings = OpenAIEmbeddings() vectorstore = Pinecone(index, embeddings.embed_query, "text") @@ -184,6 +186,11 @@ class Pinecone(VectorStore): from langchain import Pinecone from langchain.embeddings import OpenAIEmbeddings + import pinecone + + # The environment should be the one specified next to the API key + # in your Pinecone console + pinecone.init(api_key="***", environment="...") embeddings = OpenAIEmbeddings() pinecone = Pinecone.from_texts( texts, @@ -198,12 +205,22 @@ class Pinecone(VectorStore): "Could not import pinecone python package. " "Please install it with `pip install pinecone-client`." ) - _index_name = index_name or str(uuid.uuid4()) + indexes = pinecone.list_indexes() # checks if provided index exists - if _index_name in indexes: - index = pinecone.Index(_index_name) + + if index_name in indexes: + index = pinecone.Index(index_name) + elif len(indexes) == 0: + raise ValueError( + "No active indexes found in your Pinecone project, " + "are you sure you're using the right API key and environment?" + ) else: - index = None + raise ValueError( + f"Index '{index_name}' not found in your Pinecone project. " + "Did you mean one of the following indexes: {', '.join(indexes)}" + ) + for i in range(0, len(texts), batch_size): # set end position of batch i_end = min(i + batch_size, len(texts)) @@ -224,10 +241,7 @@ class Pinecone(VectorStore): for j, line in enumerate(lines_batch): metadata[j][text_key] = line to_upsert = zip(ids_batch, embeds, metadata) - # Create index if it does not exist - if index is None: - pinecone.create_index(_index_name, dimension=len(embeds[0])) - index = pinecone.Index(_index_name) + # upsert to Pinecone index.upsert(vectors=list(to_upsert), namespace=namespace) return cls(index, embedding.embed_query, text_key, namespace)