Don't create a new Pinecone index if doesn't exist (#2414)

In the case no pinecone index is specified, or a wrong one is, do not
create a new one. Creating new indexes can cause unexpected costs to
users, and some code paths could cause a new one to be created on each
invocation.
This PR solves #2413.
doc
Adam Gutglick 1 year ago committed by GitHub
parent e519a81a05
commit 659c67e896
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -21,7 +21,9 @@ class Pinecone(VectorStore):
from langchain.embeddings.openai import OpenAIEmbeddings
import pinecone
pinecone.init(api_key="***", environment="us-west1-gcp")
# The environment should be the one specified next to the API key
# in your Pinecone console
pinecone.init(api_key="***", environment="...")
index = pinecone.Index("langchain-demo")
embeddings = OpenAIEmbeddings()
vectorstore = Pinecone(index, embeddings.embed_query, "text")
@ -184,6 +186,11 @@ class Pinecone(VectorStore):
from langchain import Pinecone
from langchain.embeddings import OpenAIEmbeddings
import pinecone
# The environment should be the one specified next to the API key
# in your Pinecone console
pinecone.init(api_key="***", environment="...")
embeddings = OpenAIEmbeddings()
pinecone = Pinecone.from_texts(
texts,
@ -198,12 +205,22 @@ class Pinecone(VectorStore):
"Could not import pinecone python package. "
"Please install it with `pip install pinecone-client`."
)
_index_name = index_name or str(uuid.uuid4())
indexes = pinecone.list_indexes() # checks if provided index exists
if _index_name in indexes:
index = pinecone.Index(_index_name)
if index_name in indexes:
index = pinecone.Index(index_name)
elif len(indexes) == 0:
raise ValueError(
"No active indexes found in your Pinecone project, "
"are you sure you're using the right API key and environment?"
)
else:
index = None
raise ValueError(
f"Index '{index_name}' not found in your Pinecone project. "
"Did you mean one of the following indexes: {', '.join(indexes)}"
)
for i in range(0, len(texts), batch_size):
# set end position of batch
i_end = min(i + batch_size, len(texts))
@ -224,10 +241,7 @@ class Pinecone(VectorStore):
for j, line in enumerate(lines_batch):
metadata[j][text_key] = line
to_upsert = zip(ids_batch, embeds, metadata)
# Create index if it does not exist
if index is None:
pinecone.create_index(_index_name, dimension=len(embeds[0]))
index = pinecone.Index(_index_name)
# upsert to Pinecone
index.upsert(vectors=list(to_upsert), namespace=namespace)
return cls(index, embedding.embed_query, text_key, namespace)

Loading…
Cancel
Save