Don't create a new Pinecone index if doesn't exist (#2414)

In the case no pinecone index is specified, or a wrong one is, do not
create a new one. Creating new indexes can cause unexpected costs to
users, and some code paths could cause a new one to be created on each
invocation.
This PR solves #2413.
This commit is contained in:
Adam Gutglick 2023-04-05 06:42:27 +03:00 committed by GitHub
parent e519a81a05
commit 659c67e896
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -21,7 +21,9 @@ class Pinecone(VectorStore):
from langchain.embeddings.openai import OpenAIEmbeddings from langchain.embeddings.openai import OpenAIEmbeddings
import pinecone import pinecone
pinecone.init(api_key="***", environment="us-west1-gcp") # The environment should be the one specified next to the API key
# in your Pinecone console
pinecone.init(api_key="***", environment="...")
index = pinecone.Index("langchain-demo") index = pinecone.Index("langchain-demo")
embeddings = OpenAIEmbeddings() embeddings = OpenAIEmbeddings()
vectorstore = Pinecone(index, embeddings.embed_query, "text") vectorstore = Pinecone(index, embeddings.embed_query, "text")
@ -184,6 +186,11 @@ class Pinecone(VectorStore):
from langchain import Pinecone from langchain import Pinecone
from langchain.embeddings import OpenAIEmbeddings from langchain.embeddings import OpenAIEmbeddings
import pinecone
# The environment should be the one specified next to the API key
# in your Pinecone console
pinecone.init(api_key="***", environment="...")
embeddings = OpenAIEmbeddings() embeddings = OpenAIEmbeddings()
pinecone = Pinecone.from_texts( pinecone = Pinecone.from_texts(
texts, texts,
@ -198,12 +205,22 @@ class Pinecone(VectorStore):
"Could not import pinecone python package. " "Could not import pinecone python package. "
"Please install it with `pip install pinecone-client`." "Please install it with `pip install pinecone-client`."
) )
_index_name = index_name or str(uuid.uuid4())
indexes = pinecone.list_indexes() # checks if provided index exists indexes = pinecone.list_indexes() # checks if provided index exists
if _index_name in indexes:
index = pinecone.Index(_index_name) if index_name in indexes:
index = pinecone.Index(index_name)
elif len(indexes) == 0:
raise ValueError(
"No active indexes found in your Pinecone project, "
"are you sure you're using the right API key and environment?"
)
else: else:
index = None raise ValueError(
f"Index '{index_name}' not found in your Pinecone project. "
"Did you mean one of the following indexes: {', '.join(indexes)}"
)
for i in range(0, len(texts), batch_size): for i in range(0, len(texts), batch_size):
# set end position of batch # set end position of batch
i_end = min(i + batch_size, len(texts)) i_end = min(i + batch_size, len(texts))
@ -224,10 +241,7 @@ class Pinecone(VectorStore):
for j, line in enumerate(lines_batch): for j, line in enumerate(lines_batch):
metadata[j][text_key] = line metadata[j][text_key] = line
to_upsert = zip(ids_batch, embeds, metadata) to_upsert = zip(ids_batch, embeds, metadata)
# Create index if it does not exist
if index is None:
pinecone.create_index(_index_name, dimension=len(embeds[0]))
index = pinecone.Index(_index_name)
# upsert to Pinecone # upsert to Pinecone
index.upsert(vectors=list(to_upsert), namespace=namespace) index.upsert(vectors=list(to_upsert), namespace=namespace)
return cls(index, embedding.embed_query, text_key, namespace) return cls(index, embedding.embed_query, text_key, namespace)