mirror of https://github.com/hwchase17/langchain
pinecone: init pkg (#16556)
<!-- Thank you for contributing to LangChain! Please title your PR "<package>: <description>", where <package> is whichever of langchain, community, core, experimental, etc. is being modified. Replace this entire comment with: - **Description:** a description of the change, - **Issue:** the issue # it fixes if applicable, - **Dependencies:** any dependencies required for this change, - **Twitter handle:** we announce bigger features on Twitter. If your PR gets announced, and you'd like a mention, we'll gladly shout you out! Please make sure your PR is passing linting and testing before submitting. Run `make format`, `make lint` and `make test` from the root of the package you've modified to check this locally. See contribution guidelines for more information on how to write/run tests, lint, etc: https://python.langchain.com/docs/contributing/ If you're adding a new integration, please include: 1. a test for the integration, preferably unit tests that do not rely on network access, 2. an example notebook showing its use. It lives in `docs/docs/integrations` directory. If no one reviews your PR within a few days, please @-mention one of @baskaryan, @eyurtsev, @hwchase17. -->pull/17058/head
parent
1183769cf7
commit
6ffd5b15bc
@ -0,0 +1 @@
|
||||
__pycache__
|
@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2023 LangChain, Inc.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
@ -0,0 +1,61 @@
|
||||
.PHONY: all format lint test tests integration_tests docker_tests help extended_tests
|
||||
|
||||
# Default target executed when no arguments are given to make.
|
||||
all: help
|
||||
|
||||
# Define a variable for the test file path.
|
||||
TEST_FILE ?= tests/unit_tests/
|
||||
|
||||
integration_tests: TEST_FILE = tests/integration_tests/
|
||||
|
||||
test integration_tests:
|
||||
poetry run pytest $(TEST_FILE)
|
||||
|
||||
tests:
|
||||
poetry run pytest $(TEST_FILE)
|
||||
|
||||
|
||||
######################
|
||||
# LINTING AND FORMATTING
|
||||
######################
|
||||
|
||||
# Define a variable for Python and notebook files.
|
||||
PYTHON_FILES=.
|
||||
MYPY_CACHE=.mypy_cache
|
||||
lint format: PYTHON_FILES=.
|
||||
lint_diff format_diff: PYTHON_FILES=$(shell git diff --relative=libs/partners/pinecone --name-only --diff-filter=d master | grep -E '\.py$$|\.ipynb$$')
|
||||
lint_package: PYTHON_FILES=langchain_pinecone
|
||||
lint_tests: PYTHON_FILES=tests
|
||||
lint_tests: MYPY_CACHE=.mypy_cache_test
|
||||
|
||||
lint lint_diff lint_package lint_tests:
|
||||
poetry run ruff .
|
||||
poetry run ruff format $(PYTHON_FILES) --diff
|
||||
poetry run ruff --select I $(PYTHON_FILES)
|
||||
mkdir $(MYPY_CACHE); poetry run mypy $(PYTHON_FILES) --cache-dir $(MYPY_CACHE)
|
||||
|
||||
format format_diff:
|
||||
poetry run ruff format $(PYTHON_FILES)
|
||||
poetry run ruff --select I --fix $(PYTHON_FILES)
|
||||
|
||||
spell_check:
|
||||
poetry run codespell --toml pyproject.toml
|
||||
|
||||
spell_fix:
|
||||
poetry run codespell --toml pyproject.toml -w
|
||||
|
||||
check_imports: $(shell find langchain_pinecone -name '*.py')
|
||||
poetry run python ./scripts/check_imports.py $^
|
||||
|
||||
######################
|
||||
# HELP
|
||||
######################
|
||||
|
||||
help:
|
||||
@echo '----'
|
||||
@echo 'check_imports - check imports'
|
||||
@echo 'format - run code formatters'
|
||||
@echo 'lint - run linters'
|
||||
@echo 'test - run unit tests'
|
||||
@echo 'tests - run unit tests'
|
||||
@echo 'test TEST_FILE=<test_file> - run all tests in file'
|
@ -0,0 +1,27 @@
|
||||
# langchain-pinecone
|
||||
|
||||
This package contains the LangChain integration with Pinecone.
|
||||
|
||||
## Installation
|
||||
|
||||
```bash
|
||||
pip install -U langchain-pinecone
|
||||
```
|
||||
|
||||
And you should configure credentials by setting the following environment variables:
|
||||
|
||||
- `PINECONE_API_KEY`
|
||||
- `PINECONE_INDEX_NAME`
|
||||
- `PINECONE_ENVIRONMENT`
|
||||
|
||||
## Usage
|
||||
|
||||
The `Pinecone` class exposes the connection to the Pinecone vector store.
|
||||
|
||||
```python
|
||||
from langchain_pinecone import Pinecone
|
||||
|
||||
embeddings = ... # use a LangChain Embeddings class
|
||||
|
||||
vectorstore = Pinecone(embeddings=embeddings)
|
||||
```
|
@ -0,0 +1,5 @@
|
||||
from langchain_pinecone.vectorstores import Pinecone
|
||||
|
||||
__all__ = [
|
||||
"Pinecone",
|
||||
]
|
@ -0,0 +1,71 @@
|
||||
from enum import Enum
|
||||
from typing import List, Union
|
||||
|
||||
import numpy as np
|
||||
import simsimd # type: ignore
|
||||
|
||||
Matrix = Union[List[List[float]], List[np.ndarray], np.ndarray]
|
||||
|
||||
|
||||
class DistanceStrategy(str, Enum):
|
||||
"""Enumerator of the Distance strategies for calculating distances
|
||||
between vectors."""
|
||||
|
||||
EUCLIDEAN_DISTANCE = "EUCLIDEAN_DISTANCE"
|
||||
MAX_INNER_PRODUCT = "MAX_INNER_PRODUCT"
|
||||
COSINE = "COSINE"
|
||||
|
||||
|
||||
def maximal_marginal_relevance(
|
||||
query_embedding: np.ndarray,
|
||||
embedding_list: list,
|
||||
lambda_mult: float = 0.5,
|
||||
k: int = 4,
|
||||
) -> List[int]:
|
||||
"""Calculate maximal marginal relevance."""
|
||||
if min(k, len(embedding_list)) <= 0:
|
||||
return []
|
||||
if query_embedding.ndim == 1:
|
||||
query_embedding = np.expand_dims(query_embedding, axis=0)
|
||||
similarity_to_query = cosine_similarity(query_embedding, embedding_list)[0]
|
||||
most_similar = int(np.argmax(similarity_to_query))
|
||||
idxs = [most_similar]
|
||||
selected = np.array([embedding_list[most_similar]])
|
||||
while len(idxs) < min(k, len(embedding_list)):
|
||||
best_score = -np.inf
|
||||
idx_to_add = -1
|
||||
similarity_to_selected = cosine_similarity(embedding_list, selected)
|
||||
for i, query_score in enumerate(similarity_to_query):
|
||||
if i in idxs:
|
||||
continue
|
||||
redundant_score = max(similarity_to_selected[i])
|
||||
equation_score = (
|
||||
lambda_mult * query_score - (1 - lambda_mult) * redundant_score
|
||||
)
|
||||
if equation_score > best_score:
|
||||
best_score = equation_score
|
||||
idx_to_add = i
|
||||
idxs.append(idx_to_add)
|
||||
selected = np.append(selected, [embedding_list[idx_to_add]], axis=0)
|
||||
return idxs
|
||||
|
||||
|
||||
def cosine_similarity(X: Matrix, Y: Matrix) -> np.ndarray:
|
||||
"""Row-wise cosine similarity between two equal-width matrices."""
|
||||
if len(X) == 0 or len(Y) == 0:
|
||||
return np.array([])
|
||||
|
||||
X = np.array(X)
|
||||
Y = np.array(Y)
|
||||
if X.shape[1] != Y.shape[1]:
|
||||
raise ValueError(
|
||||
f"Number of columns in X and Y must be the same. X has shape {X.shape} "
|
||||
f"and Y has shape {Y.shape}."
|
||||
)
|
||||
|
||||
X = np.array(X, dtype=np.float32)
|
||||
Y = np.array(Y, dtype=np.float32)
|
||||
Z = 1 - simsimd.cdist(X, Y, metric="cosine")
|
||||
if isinstance(Z, float):
|
||||
return np.array([Z])
|
||||
return Z
|
@ -0,0 +1,487 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
import uuid
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
Callable,
|
||||
Iterable,
|
||||
List,
|
||||
Optional,
|
||||
Tuple,
|
||||
TypeVar,
|
||||
)
|
||||
|
||||
import numpy as np
|
||||
from langchain_core.documents import Document
|
||||
from langchain_core.embeddings import Embeddings
|
||||
from langchain_core.utils.iter import batch_iterate
|
||||
from langchain_core.vectorstores import VectorStore
|
||||
from pinecone import Pinecone as PineconeClient # type: ignore
|
||||
|
||||
from langchain_pinecone._utilities import DistanceStrategy, maximal_marginal_relevance
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pinecone import Index
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
VST = TypeVar("VST", bound=VectorStore)
|
||||
|
||||
|
||||
class Pinecone(VectorStore):
|
||||
"""`Pinecone` vector store.
|
||||
|
||||
Example:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_pinecone import Pinecone
|
||||
from langchain_openai import OpenAIEmbeddings
|
||||
|
||||
embeddings = OpenAIEmbeddings()
|
||||
index_name = "my-index"
|
||||
namespace = "my-namespace"
|
||||
vectorstore = Pinecone(
|
||||
index_name=index_name,
|
||||
embedding=embedding,
|
||||
namespace=namespace,
|
||||
)
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
# setting default params to bypass having to pass in
|
||||
# the index and embedding objects - manually throw
|
||||
# exceptions if they are not passed in or set in environment
|
||||
# (keeping param for backwards compatibility)
|
||||
index: Optional[Any] = None,
|
||||
embedding: Optional[Embeddings] = None,
|
||||
text_key: Optional[str] = "text",
|
||||
namespace: Optional[str] = None,
|
||||
distance_strategy: Optional[DistanceStrategy] = DistanceStrategy.COSINE,
|
||||
*,
|
||||
pinecone_api_key: Optional[str] = None,
|
||||
index_name: Optional[str] = None,
|
||||
):
|
||||
if embedding is None:
|
||||
raise ValueError("Embedding must be provided")
|
||||
self._embedding = embedding
|
||||
if text_key is None:
|
||||
raise ValueError("Text key must be provided")
|
||||
self._text_key = text_key
|
||||
|
||||
self._namespace = namespace
|
||||
self.distance_strategy = distance_strategy
|
||||
|
||||
if index:
|
||||
# supports old way of initializing externally
|
||||
self._index = index
|
||||
else:
|
||||
# all internal initialization
|
||||
_pinecone_api_key = (
|
||||
pinecone_api_key or os.environ.get("PINECONE_API_KEY") or ""
|
||||
)
|
||||
if not _pinecone_api_key:
|
||||
raise ValueError(
|
||||
"Pinecone API key must be provided in either `pinecone_api_key` "
|
||||
"or `PINECONE_API_KEY` environment variable"
|
||||
)
|
||||
|
||||
_index_name = index_name or os.environ.get("PINECONE_INDEX_NAME") or ""
|
||||
if not _index_name:
|
||||
raise ValueError(
|
||||
"Pinecone index name must be provided in either `index_name` "
|
||||
"or `PINECONE_INDEX_NAME` environment variable"
|
||||
)
|
||||
|
||||
# needs
|
||||
client = PineconeClient(api_key=_pinecone_api_key)
|
||||
self._index = client.Index(_index_name)
|
||||
|
||||
@property
|
||||
def embeddings(self) -> Optional[Embeddings]:
|
||||
"""Access the query embedding object if available."""
|
||||
return self._embedding
|
||||
|
||||
def add_texts(
|
||||
self,
|
||||
texts: Iterable[str],
|
||||
metadatas: Optional[List[dict]] = None,
|
||||
ids: Optional[List[str]] = None,
|
||||
namespace: Optional[str] = None,
|
||||
batch_size: int = 32,
|
||||
embedding_chunk_size: int = 1000,
|
||||
*,
|
||||
async_req: bool = True,
|
||||
**kwargs: Any,
|
||||
) -> List[str]:
|
||||
"""Run more texts through the embeddings and add to the vectorstore.
|
||||
|
||||
Upsert optimization is done by chunking the embeddings and upserting them.
|
||||
This is done to avoid memory issues and optimize using HTTP based embeddings.
|
||||
For OpenAI embeddings, use pool_threads>4 when constructing the pinecone.Index,
|
||||
embedding_chunk_size>1000 and batch_size~64 for best performance.
|
||||
Args:
|
||||
texts: Iterable of strings to add to the vectorstore.
|
||||
metadatas: Optional list of metadatas associated with the texts.
|
||||
ids: Optional list of ids to associate with the texts.
|
||||
namespace: Optional pinecone namespace to add the texts to.
|
||||
batch_size: Batch size to use when adding the texts to the vectorstore.
|
||||
embedding_chunk_size: Chunk size to use when embedding the texts.
|
||||
|
||||
Returns:
|
||||
List of ids from adding the texts into the vectorstore.
|
||||
|
||||
"""
|
||||
if namespace is None:
|
||||
namespace = self._namespace
|
||||
|
||||
texts = list(texts)
|
||||
ids = ids or [str(uuid.uuid4()) for _ in texts]
|
||||
metadatas = metadatas or [{} for _ in texts]
|
||||
for metadata, text in zip(metadatas, texts):
|
||||
metadata[self._text_key] = text
|
||||
|
||||
# For loops to avoid memory issues and optimize when using HTTP based embeddings
|
||||
# The first loop runs the embeddings, it benefits when using OpenAI embeddings
|
||||
# The second loops runs the pinecone upsert asynchronously.
|
||||
for i in range(0, len(texts), embedding_chunk_size):
|
||||
chunk_texts = texts[i : i + embedding_chunk_size]
|
||||
chunk_ids = ids[i : i + embedding_chunk_size]
|
||||
chunk_metadatas = metadatas[i : i + embedding_chunk_size]
|
||||
embeddings = self._embedding.embed_documents(chunk_texts)
|
||||
async_res = [
|
||||
self._index.upsert(
|
||||
vectors=batch,
|
||||
namespace=namespace,
|
||||
async_req=async_req,
|
||||
**kwargs,
|
||||
)
|
||||
for batch in batch_iterate(
|
||||
batch_size, zip(chunk_ids, embeddings, chunk_metadatas)
|
||||
)
|
||||
]
|
||||
[res.get() for res in async_res]
|
||||
|
||||
return ids
|
||||
|
||||
def similarity_search_with_score(
|
||||
self,
|
||||
query: str,
|
||||
k: int = 4,
|
||||
filter: Optional[dict] = None,
|
||||
namespace: Optional[str] = None,
|
||||
) -> List[Tuple[Document, float]]:
|
||||
"""Return pinecone documents most similar to query, along with scores.
|
||||
|
||||
Args:
|
||||
query: Text to look up documents similar to.
|
||||
k: Number of Documents to return. Defaults to 4.
|
||||
filter: Dictionary of argument(s) to filter on metadata
|
||||
namespace: Namespace to search in. Default will search in '' namespace.
|
||||
|
||||
Returns:
|
||||
List of Documents most similar to the query and score for each
|
||||
"""
|
||||
return self.similarity_search_by_vector_with_score(
|
||||
self._embedding.embed_query(query), k=k, filter=filter, namespace=namespace
|
||||
)
|
||||
|
||||
def similarity_search_by_vector_with_score(
|
||||
self,
|
||||
embedding: List[float],
|
||||
*,
|
||||
k: int = 4,
|
||||
filter: Optional[dict] = None,
|
||||
namespace: Optional[str] = None,
|
||||
) -> List[Tuple[Document, float]]:
|
||||
"""Return pinecone documents most similar to embedding, along with scores."""
|
||||
|
||||
if namespace is None:
|
||||
namespace = self._namespace
|
||||
docs = []
|
||||
results = self._index.query(
|
||||
vector=embedding,
|
||||
top_k=k,
|
||||
include_metadata=True,
|
||||
namespace=namespace,
|
||||
filter=filter,
|
||||
)
|
||||
for res in results["matches"]:
|
||||
metadata = res["metadata"]
|
||||
if self._text_key in metadata:
|
||||
text = metadata.pop(self._text_key)
|
||||
score = res["score"]
|
||||
docs.append((Document(page_content=text, metadata=metadata), score))
|
||||
else:
|
||||
logger.warning(
|
||||
f"Found document with no `{self._text_key}` key. Skipping."
|
||||
)
|
||||
return docs
|
||||
|
||||
def similarity_search(
|
||||
self,
|
||||
query: str,
|
||||
k: int = 4,
|
||||
filter: Optional[dict] = None,
|
||||
namespace: Optional[str] = None,
|
||||
**kwargs: Any,
|
||||
) -> List[Document]:
|
||||
"""Return pinecone documents most similar to query.
|
||||
|
||||
Args:
|
||||
query: Text to look up documents similar to.
|
||||
k: Number of Documents to return. Defaults to 4.
|
||||
filter: Dictionary of argument(s) to filter on metadata
|
||||
namespace: Namespace to search in. Default will search in '' namespace.
|
||||
|
||||
Returns:
|
||||
List of Documents most similar to the query and score for each
|
||||
"""
|
||||
docs_and_scores = self.similarity_search_with_score(
|
||||
query, k=k, filter=filter, namespace=namespace, **kwargs
|
||||
)
|
||||
return [doc for doc, _ in docs_and_scores]
|
||||
|
||||
def _select_relevance_score_fn(self) -> Callable[[float], float]:
|
||||
"""
|
||||
The 'correct' relevance function
|
||||
may differ depending on a few things, including:
|
||||
- the distance / similarity metric used by the VectorStore
|
||||
- the scale of your embeddings (OpenAI's are unit normed. Many others are not!)
|
||||
- embedding dimensionality
|
||||
- etc.
|
||||
"""
|
||||
|
||||
if self.distance_strategy == DistanceStrategy.COSINE:
|
||||
return self._cosine_relevance_score_fn
|
||||
elif self.distance_strategy == DistanceStrategy.MAX_INNER_PRODUCT:
|
||||
return self._max_inner_product_relevance_score_fn
|
||||
elif self.distance_strategy == DistanceStrategy.EUCLIDEAN_DISTANCE:
|
||||
return self._euclidean_relevance_score_fn
|
||||
else:
|
||||
raise ValueError(
|
||||
"Unknown distance strategy, must be cosine, max_inner_product "
|
||||
"(dot product), or euclidean"
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _cosine_relevance_score_fn(score: float) -> float:
|
||||
"""Pinecone returns cosine similarity scores between [-1,1]"""
|
||||
return (score + 1) / 2
|
||||
|
||||
def max_marginal_relevance_search_by_vector(
|
||||
self,
|
||||
embedding: List[float],
|
||||
k: int = 4,
|
||||
fetch_k: int = 20,
|
||||
lambda_mult: float = 0.5,
|
||||
filter: Optional[dict] = None,
|
||||
namespace: Optional[str] = None,
|
||||
**kwargs: Any,
|
||||
) -> List[Document]:
|
||||
"""Return docs selected using the maximal marginal relevance.
|
||||
|
||||
Maximal marginal relevance optimizes for similarity to query AND diversity
|
||||
among selected documents.
|
||||
|
||||
Args:
|
||||
embedding: Embedding to look up documents similar to.
|
||||
k: Number of Documents to return. Defaults to 4.
|
||||
fetch_k: Number of Documents to fetch to pass to MMR algorithm.
|
||||
lambda_mult: Number between 0 and 1 that determines the degree
|
||||
of diversity among the results with 0 corresponding
|
||||
to maximum diversity and 1 to minimum diversity.
|
||||
Defaults to 0.5.
|
||||
Returns:
|
||||
List of Documents selected by maximal marginal relevance.
|
||||
"""
|
||||
if namespace is None:
|
||||
namespace = self._namespace
|
||||
results = self._index.query(
|
||||
vector=[embedding],
|
||||
top_k=fetch_k,
|
||||
include_values=True,
|
||||
include_metadata=True,
|
||||
namespace=namespace,
|
||||
filter=filter,
|
||||
)
|
||||
mmr_selected = maximal_marginal_relevance(
|
||||
np.array([embedding], dtype=np.float32),
|
||||
[item["values"] for item in results["matches"]],
|
||||
k=k,
|
||||
lambda_mult=lambda_mult,
|
||||
)
|
||||
selected = [results["matches"][i]["metadata"] for i in mmr_selected]
|
||||
return [
|
||||
Document(page_content=metadata.pop((self._text_key)), metadata=metadata)
|
||||
for metadata in selected
|
||||
]
|
||||
|
||||
def max_marginal_relevance_search(
|
||||
self,
|
||||
query: str,
|
||||
k: int = 4,
|
||||
fetch_k: int = 20,
|
||||
lambda_mult: float = 0.5,
|
||||
filter: Optional[dict] = None,
|
||||
namespace: Optional[str] = None,
|
||||
**kwargs: Any,
|
||||
) -> List[Document]:
|
||||
"""Return docs selected using the maximal marginal relevance.
|
||||
|
||||
Maximal marginal relevance optimizes for similarity to query AND diversity
|
||||
among selected documents.
|
||||
|
||||
Args:
|
||||
query: Text to look up documents similar to.
|
||||
k: Number of Documents to return. Defaults to 4.
|
||||
fetch_k: Number of Documents to fetch to pass to MMR algorithm.
|
||||
lambda_mult: Number between 0 and 1 that determines the degree
|
||||
of diversity among the results with 0 corresponding
|
||||
to maximum diversity and 1 to minimum diversity.
|
||||
Defaults to 0.5.
|
||||
Returns:
|
||||
List of Documents selected by maximal marginal relevance.
|
||||
"""
|
||||
embedding = self._embedding.embed_query(query)
|
||||
return self.max_marginal_relevance_search_by_vector(
|
||||
embedding, k, fetch_k, lambda_mult, filter, namespace
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def get_pinecone_index(
|
||||
cls,
|
||||
index_name: Optional[str],
|
||||
pool_threads: int = 4,
|
||||
*,
|
||||
pinecone_api_key: Optional[str] = None,
|
||||
) -> Index:
|
||||
"""Return a Pinecone Index instance.
|
||||
|
||||
Args:
|
||||
index_name: Name of the index to use.
|
||||
pool_threads: Number of threads to use for index upsert.
|
||||
Returns:
|
||||
Pinecone Index instance."""
|
||||
_pinecone_api_key = pinecone_api_key or os.environ.get("PINECONE_API_KEY") or ""
|
||||
client = PineconeClient(api_key=_pinecone_api_key, pool_threads=pool_threads)
|
||||
indexes = client.list_indexes()
|
||||
index_names = [i.name for i in indexes.index_list["indexes"]]
|
||||
|
||||
if index_name in index_names:
|
||||
index = client.Index(index_name)
|
||||
elif len(index_names) == 0:
|
||||
raise ValueError(
|
||||
"No active indexes found in your Pinecone project, "
|
||||
"are you sure you're using the right Pinecone API key and Environment? "
|
||||
"Please double check your Pinecone dashboard."
|
||||
)
|
||||
else:
|
||||
raise ValueError(
|
||||
f"Index '{index_name}' not found in your Pinecone project. "
|
||||
f"Did you mean one of the following indexes: {', '.join(index_names)}"
|
||||
)
|
||||
return index
|
||||
|
||||
@classmethod
|
||||
def from_texts(
|
||||
cls,
|
||||
texts: List[str],
|
||||
embedding: Embeddings,
|
||||
metadatas: Optional[List[dict]] = None,
|
||||
ids: Optional[List[str]] = None,
|
||||
batch_size: int = 32,
|
||||
text_key: str = "text",
|
||||
namespace: Optional[str] = None,
|
||||
index_name: Optional[str] = None,
|
||||
upsert_kwargs: Optional[dict] = None,
|
||||
pool_threads: int = 4,
|
||||
embeddings_chunk_size: int = 1000,
|
||||
**kwargs: Any,
|
||||
) -> Pinecone:
|
||||
"""Construct Pinecone wrapper from raw documents.
|
||||
|
||||
This is a user friendly interface that:
|
||||
1. Embeds documents.
|
||||
2. Adds the documents to a provided Pinecone index
|
||||
|
||||
This is intended to be a quick way to get started.
|
||||
|
||||
The `pool_threads` affects the speed of the upsert operations.
|
||||
Example:
|
||||
.. code-block:: python
|
||||
|
||||
from langchain_community.vectorstores import Pinecone
|
||||
from langchain_community.embeddings import OpenAIEmbeddings
|
||||
import pinecone
|
||||
|
||||
# The environment should be the one specified next to the API key
|
||||
# in your Pinecone console
|
||||
pinecone.init(api_key="***", environment="...")
|
||||
embeddings = OpenAIEmbeddings()
|
||||
pinecone = Pinecone.from_texts(
|
||||
texts,
|
||||
embeddings,
|
||||
index_name="langchain-demo"
|
||||
)
|
||||
"""
|
||||
pinecone_index = cls.get_pinecone_index(index_name, pool_threads)
|
||||
pinecone = cls(pinecone_index, embedding, text_key, namespace, **kwargs)
|
||||
|
||||
pinecone.add_texts(
|
||||
texts,
|
||||
metadatas=metadatas,
|
||||
ids=ids,
|
||||
namespace=namespace,
|
||||
batch_size=batch_size,
|
||||
embedding_chunk_size=embeddings_chunk_size,
|
||||
**(upsert_kwargs or {}),
|
||||
)
|
||||
return pinecone
|
||||
|
||||
@classmethod
|
||||
def from_existing_index(
|
||||
cls,
|
||||
index_name: str,
|
||||
embedding: Embeddings,
|
||||
text_key: str = "text",
|
||||
namespace: Optional[str] = None,
|
||||
pool_threads: int = 4,
|
||||
) -> Pinecone:
|
||||
"""Load pinecone vectorstore from index name."""
|
||||
pinecone_index = cls.get_pinecone_index(index_name, pool_threads)
|
||||
return cls(pinecone_index, embedding, text_key, namespace)
|
||||
|
||||
def delete(
|
||||
self,
|
||||
ids: Optional[List[str]] = None,
|
||||
delete_all: Optional[bool] = None,
|
||||
namespace: Optional[str] = None,
|
||||
filter: Optional[dict] = None,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
"""Delete by vector IDs or filter.
|
||||
Args:
|
||||
ids: List of ids to delete.
|
||||
filter: Dictionary of conditions to filter vectors to delete.
|
||||
"""
|
||||
|
||||
if namespace is None:
|
||||
namespace = self._namespace
|
||||
|
||||
if delete_all:
|
||||
self._index.delete(delete_all=True, namespace=namespace, **kwargs)
|
||||
elif ids is not None:
|
||||
chunk_size = 1000
|
||||
for i in range(0, len(ids), chunk_size):
|
||||
chunk = ids[i : i + chunk_size]
|
||||
self._index.delete(ids=chunk, namespace=namespace, **kwargs)
|
||||
elif filter is not None:
|
||||
self._index.delete(filter=filter, namespace=namespace, **kwargs)
|
||||
else:
|
||||
raise ValueError("Either ids, delete_all, or filter must be provided.")
|
||||
|
||||
return None
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,97 @@
|
||||
[tool.poetry]
|
||||
name = "langchain-pinecone"
|
||||
version = "0.0.1"
|
||||
description = "An integration package connecting Pinecone and LangChain"
|
||||
authors = []
|
||||
readme = "README.md"
|
||||
repository = "https://github.com/langchain-ai/langchain"
|
||||
license = "MIT"
|
||||
|
||||
[tool.poetry.urls]
|
||||
"Source Code" = "https://github.com/langchain-ai/langchain/tree/master/libs/partners/pinecone"
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = ">=3.8.1,<4.0"
|
||||
langchain-core = ">=0.0.12"
|
||||
pinecone-client = {version = "^3", python = ">=3.8,<3.13"}
|
||||
simsimd = "^3.6.3"
|
||||
numpy = "^1"
|
||||
|
||||
[tool.poetry.group.test]
|
||||
optional = true
|
||||
|
||||
[tool.poetry.group.test.dependencies]
|
||||
pytest = "^7.3.0"
|
||||
freezegun = "^1.2.2"
|
||||
pytest-mock = "^3.10.0"
|
||||
syrupy = "^4.0.2"
|
||||
pytest-watcher = "^0.3.4"
|
||||
pytest-asyncio = "^0.21.1"
|
||||
langchain-core = {path = "../../core", develop = true}
|
||||
|
||||
[tool.poetry.group.codespell]
|
||||
optional = true
|
||||
|
||||
[tool.poetry.group.codespell.dependencies]
|
||||
codespell = "^2.2.0"
|
||||
|
||||
[tool.poetry.group.test_integration]
|
||||
optional = true
|
||||
|
||||
[tool.poetry.group.test_integration.dependencies]
|
||||
langchain-openai = ">=0.0.3,<0.1"
|
||||
|
||||
[tool.poetry.group.lint]
|
||||
optional = true
|
||||
|
||||
[tool.poetry.group.lint.dependencies]
|
||||
ruff = "^0.1.5"
|
||||
|
||||
[tool.poetry.group.typing.dependencies]
|
||||
mypy = "^0.991"
|
||||
langchain-core = {path = "../../core", develop = true}
|
||||
|
||||
[tool.poetry.group.dev]
|
||||
optional = true
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
langchain-core = {path = "../../core", develop = true}
|
||||
|
||||
[tool.ruff]
|
||||
select = [
|
||||
"E", # pycodestyle
|
||||
"F", # pyflakes
|
||||
"I", # isort
|
||||
]
|
||||
|
||||
[tool.mypy]
|
||||
disallow_untyped_defs = "True"
|
||||
|
||||
[tool.coverage.run]
|
||||
omit = [
|
||||
"tests/*",
|
||||
]
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core>=1.0.0"]
|
||||
build-backend = "poetry.core.masonry.api"
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
# --strict-markers will raise errors on unknown marks.
|
||||
# https://docs.pytest.org/en/7.1.x/how-to/mark.html#raising-errors-on-unknown-marks
|
||||
#
|
||||
# https://docs.pytest.org/en/7.1.x/reference/reference.html
|
||||
# --strict-config any warnings encountered while parsing the `pytest`
|
||||
# section of the configuration file raise errors.
|
||||
#
|
||||
# https://github.com/tophat/syrupy
|
||||
# --snapshot-warn-unused Prints a warning on unused snapshots rather than fail the test suite.
|
||||
addopts = "--snapshot-warn-unused --strict-markers --strict-config --durations=5"
|
||||
# Registering custom markers.
|
||||
# https://docs.pytest.org/en/7.1.x/example/markers.html#registering-markers
|
||||
markers = [
|
||||
"requires: mark tests as requiring a specific library",
|
||||
"asyncio: mark tests as requiring asyncio",
|
||||
"compile: mark placeholder test used to compile integration tests without running them",
|
||||
]
|
||||
asyncio_mode = "auto"
|
@ -0,0 +1,17 @@
|
||||
import sys
|
||||
import traceback
|
||||
from importlib.machinery import SourceFileLoader
|
||||
|
||||
if __name__ == "__main__":
|
||||
files = sys.argv[1:]
|
||||
has_failure = False
|
||||
for file in files:
|
||||
try:
|
||||
SourceFileLoader("x", file).load_module()
|
||||
except Exception:
|
||||
has_faillure = True
|
||||
print(file)
|
||||
traceback.print_exc()
|
||||
print()
|
||||
|
||||
sys.exit(1 if has_failure else 0)
|
@ -0,0 +1,27 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# This script searches for lines starting with "import pydantic" or "from pydantic"
|
||||
# in tracked files within a Git repository.
|
||||
#
|
||||
# Usage: ./scripts/check_pydantic.sh /path/to/repository
|
||||
|
||||
# Check if a path argument is provided
|
||||
if [ $# -ne 1 ]; then
|
||||
echo "Usage: $0 /path/to/repository"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
repository_path="$1"
|
||||
|
||||
# Search for lines matching the pattern within the specified repository
|
||||
result=$(git -C "$repository_path" grep -E '^import pydantic|^from pydantic')
|
||||
|
||||
# Check if any matching lines were found
|
||||
if [ -n "$result" ]; then
|
||||
echo "ERROR: The following lines need to be updated:"
|
||||
echo "$result"
|
||||
echo "Please replace the code with an import from langchain_core.pydantic_v1."
|
||||
echo "For example, replace 'from pydantic import BaseModel'"
|
||||
echo "with 'from langchain_core.pydantic_v1 import BaseModel'"
|
||||
exit 1
|
||||
fi
|
@ -0,0 +1,17 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -eu
|
||||
|
||||
# Initialize a variable to keep track of errors
|
||||
errors=0
|
||||
|
||||
# make sure not importing from langchain or langchain_experimental
|
||||
git --no-pager grep '^from langchain\.' . && errors=$((errors+1))
|
||||
git --no-pager grep '^from langchain_experimental\.' . && errors=$((errors+1))
|
||||
|
||||
# Decide on an exit status based on the errors
|
||||
if [ "$errors" -gt 0 ]; then
|
||||
exit 1
|
||||
else
|
||||
exit 0
|
||||
fi
|
@ -0,0 +1,7 @@
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.mark.compile
|
||||
def test_placeholder() -> None:
|
||||
"""Used for compiling integration tests without running any real tests."""
|
||||
pass
|
@ -0,0 +1,288 @@
|
||||
import os
|
||||
import time
|
||||
import uuid
|
||||
from typing import List
|
||||
|
||||
import numpy as np
|
||||
import pinecone # type: ignore
|
||||
import pytest
|
||||
from langchain_core.documents import Document
|
||||
from langchain_openai import OpenAIEmbeddings
|
||||
from pinecone import PodSpec
|
||||
|
||||
from langchain_pinecone import Pinecone
|
||||
|
||||
INDEX_NAME = "langchain-test-index" # name of the index
|
||||
NAMESPACE_NAME = "langchain-test-namespace" # name of the namespace
|
||||
DIMENSION = 1536 # dimension of the embeddings
|
||||
|
||||
DEFAULT_SLEEP = 20
|
||||
|
||||
|
||||
class TestPinecone:
|
||||
index: "pinecone.Index"
|
||||
|
||||
@classmethod
|
||||
def setup_class(cls) -> None:
|
||||
import pinecone
|
||||
|
||||
client = pinecone.Pinecone(api_key=os.environ["PINECONE_API_KEY"])
|
||||
index_list = client.list_indexes()
|
||||
for i in index_list:
|
||||
if i["name"] == INDEX_NAME:
|
||||
client.delete_index(INDEX_NAME)
|
||||
break
|
||||
client.create_index(
|
||||
name=INDEX_NAME,
|
||||
dimension=DIMENSION,
|
||||
metric="cosine",
|
||||
spec=PodSpec(environment=os.environ["PINECONE_ENVIRONMENT"]),
|
||||
)
|
||||
|
||||
cls.index = client.Index(INDEX_NAME)
|
||||
|
||||
# insure the index is empty
|
||||
index_stats = cls.index.describe_index_stats()
|
||||
assert index_stats["dimension"] == DIMENSION
|
||||
if index_stats["namespaces"].get(NAMESPACE_NAME) is not None:
|
||||
assert index_stats["namespaces"][NAMESPACE_NAME]["vector_count"] == 0
|
||||
|
||||
@classmethod
|
||||
def teardown_class(cls) -> None:
|
||||
index_stats = cls.index.describe_index_stats()
|
||||
for _namespace_name in index_stats["namespaces"].keys():
|
||||
cls.index.delete(delete_all=True, namespace=_namespace_name)
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def setup(self) -> None:
|
||||
# delete all the vectors in the index
|
||||
print("called")
|
||||
self.index.delete(delete_all=True, namespace=NAMESPACE_NAME)
|
||||
# index_stats = self.index.describe_index_stats()
|
||||
# for _namespace_name in index_stats["namespaces"].keys():
|
||||
# self.index.delete(delete_all=True, namespace=_namespace_name)
|
||||
time.sleep(DEFAULT_SLEEP) # prevent race condition with previous step
|
||||
# index_stats = self.index.describe_index_stats
|
||||
|
||||
@pytest.fixture
|
||||
def embedding_openai(self) -> OpenAIEmbeddings:
|
||||
return OpenAIEmbeddings()
|
||||
|
||||
@pytest.fixture
|
||||
def texts(self) -> List[str]:
|
||||
return ["foo", "bar", "baz"]
|
||||
|
||||
def test_from_texts(
|
||||
self, texts: List[str], embedding_openai: OpenAIEmbeddings
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
unique_id = uuid.uuid4().hex
|
||||
needs = f"foobuu {unique_id} booo"
|
||||
texts.insert(0, needs)
|
||||
|
||||
docsearch = Pinecone.from_texts(
|
||||
texts=texts,
|
||||
embedding=embedding_openai,
|
||||
index_name=INDEX_NAME,
|
||||
namespace=NAMESPACE_NAME,
|
||||
)
|
||||
time.sleep(DEFAULT_SLEEP) # prevent race condition
|
||||
output = docsearch.similarity_search(unique_id, k=1, namespace=NAMESPACE_NAME)
|
||||
assert output == [Document(page_content=needs)]
|
||||
|
||||
def test_from_texts_with_metadatas(
|
||||
self, texts: List[str], embedding_openai: OpenAIEmbeddings
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
|
||||
unique_id = uuid.uuid4().hex
|
||||
needs = f"foobuu {unique_id} booo"
|
||||
texts = [needs] + texts
|
||||
|
||||
metadatas = [{"page": i} for i in range(len(texts))]
|
||||
docsearch = Pinecone.from_texts(
|
||||
texts,
|
||||
embedding_openai,
|
||||
index_name=INDEX_NAME,
|
||||
metadatas=metadatas,
|
||||
namespace=NAMESPACE_NAME,
|
||||
)
|
||||
time.sleep(DEFAULT_SLEEP) # prevent race condition
|
||||
output = docsearch.similarity_search(needs, k=1, namespace=NAMESPACE_NAME)
|
||||
|
||||
# TODO: why metadata={"page": 0.0}) instead of {"page": 0}?
|
||||
assert output == [Document(page_content=needs, metadata={"page": 0.0})]
|
||||
|
||||
def test_from_texts_with_scores(self, embedding_openai: OpenAIEmbeddings) -> None:
|
||||
"""Test end to end construction and search with scores and IDs."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [{"page": i} for i in range(len(texts))]
|
||||
print("metadatas", metadatas)
|
||||
docsearch = Pinecone.from_texts(
|
||||
texts,
|
||||
embedding_openai,
|
||||
index_name=INDEX_NAME,
|
||||
metadatas=metadatas,
|
||||
namespace=NAMESPACE_NAME,
|
||||
)
|
||||
print(texts)
|
||||
time.sleep(DEFAULT_SLEEP) # prevent race condition
|
||||
output = docsearch.similarity_search_with_score(
|
||||
"foo", k=3, namespace=NAMESPACE_NAME
|
||||
)
|
||||
docs = [o[0] for o in output]
|
||||
scores = [o[1] for o in output]
|
||||
sorted_documents = sorted(docs, key=lambda x: x.metadata["page"])
|
||||
print(sorted_documents)
|
||||
|
||||
# TODO: why metadata={"page": 0.0}) instead of {"page": 0}, etc???
|
||||
assert sorted_documents == [
|
||||
Document(page_content="foo", metadata={"page": 0.0}),
|
||||
Document(page_content="bar", metadata={"page": 1.0}),
|
||||
Document(page_content="baz", metadata={"page": 2.0}),
|
||||
]
|
||||
assert scores[0] > scores[1] > scores[2]
|
||||
|
||||
def test_from_existing_index_with_namespaces(
|
||||
self, embedding_openai: OpenAIEmbeddings
|
||||
) -> None:
|
||||
"""Test that namespaces are properly handled."""
|
||||
# Create two indexes with the same name but different namespaces
|
||||
texts_1 = ["foo", "bar", "baz"]
|
||||
metadatas = [{"page": i} for i in range(len(texts_1))]
|
||||
Pinecone.from_texts(
|
||||
texts_1,
|
||||
embedding_openai,
|
||||
index_name=INDEX_NAME,
|
||||
metadatas=metadatas,
|
||||
namespace=f"{INDEX_NAME}-1",
|
||||
)
|
||||
|
||||
texts_2 = ["foo2", "bar2", "baz2"]
|
||||
metadatas = [{"page": i} for i in range(len(texts_2))]
|
||||
|
||||
Pinecone.from_texts(
|
||||
texts_2,
|
||||
embedding_openai,
|
||||
index_name=INDEX_NAME,
|
||||
metadatas=metadatas,
|
||||
namespace=f"{INDEX_NAME}-2",
|
||||
)
|
||||
|
||||
time.sleep(DEFAULT_SLEEP) # prevent race condition
|
||||
|
||||
# Search with namespace
|
||||
docsearch = Pinecone.from_existing_index(
|
||||
index_name=INDEX_NAME,
|
||||
embedding=embedding_openai,
|
||||
namespace=f"{INDEX_NAME}-1",
|
||||
)
|
||||
output = docsearch.similarity_search("foo", k=20, namespace=f"{INDEX_NAME}-1")
|
||||
# check that we don't get results from the other namespace
|
||||
page_contents = sorted(set([o.page_content for o in output]))
|
||||
assert all(content in ["foo", "bar", "baz"] for content in page_contents)
|
||||
assert all(content not in ["foo2", "bar2", "baz2"] for content in page_contents)
|
||||
|
||||
def test_add_documents_with_ids(
|
||||
self, texts: List[str], embedding_openai: OpenAIEmbeddings
|
||||
) -> None:
|
||||
ids = [uuid.uuid4().hex for _ in range(len(texts))]
|
||||
Pinecone.from_texts(
|
||||
texts=texts,
|
||||
ids=ids,
|
||||
embedding=embedding_openai,
|
||||
index_name=INDEX_NAME,
|
||||
namespace=NAMESPACE_NAME,
|
||||
)
|
||||
time.sleep(DEFAULT_SLEEP) # prevent race condition
|
||||
index_stats = self.index.describe_index_stats()
|
||||
assert index_stats["namespaces"][NAMESPACE_NAME]["vector_count"] == len(texts)
|
||||
|
||||
ids_1 = [uuid.uuid4().hex for _ in range(len(texts))]
|
||||
Pinecone.from_texts(
|
||||
texts=[t + "-1" for t in texts],
|
||||
ids=ids_1,
|
||||
embedding=embedding_openai,
|
||||
index_name=INDEX_NAME,
|
||||
namespace=NAMESPACE_NAME,
|
||||
)
|
||||
time.sleep(DEFAULT_SLEEP) # prevent race condition
|
||||
index_stats = self.index.describe_index_stats()
|
||||
assert (
|
||||
index_stats["namespaces"][NAMESPACE_NAME]["vector_count"] == len(texts) * 2
|
||||
)
|
||||
# only focused on this namespace now
|
||||
# assert index_stats["total_vector_count"] == len(texts) * 2
|
||||
|
||||
@pytest.mark.xfail(reason="relevance score just over 1")
|
||||
def test_relevance_score_bound(self, embedding_openai: OpenAIEmbeddings) -> None:
|
||||
"""Ensures all relevance scores are between 0 and 1."""
|
||||
texts = ["foo", "bar", "baz"]
|
||||
metadatas = [{"page": i} for i in range(len(texts))]
|
||||
docsearch = Pinecone.from_texts(
|
||||
texts,
|
||||
embedding_openai,
|
||||
index_name=INDEX_NAME,
|
||||
metadatas=metadatas,
|
||||
)
|
||||
# wait for the index to be ready
|
||||
time.sleep(DEFAULT_SLEEP)
|
||||
output = docsearch.similarity_search_with_relevance_scores("foo", k=3)
|
||||
print(output)
|
||||
assert all(
|
||||
(1 >= score or np.isclose(score, 1)) and score >= 0 for _, score in output
|
||||
)
|
||||
|
||||
@pytest.mark.skipif(reason="slow to run for benchmark")
|
||||
@pytest.mark.parametrize(
|
||||
"pool_threads,batch_size,embeddings_chunk_size,data_multiplier",
|
||||
[
|
||||
(
|
||||
1,
|
||||
32,
|
||||
32,
|
||||
1000,
|
||||
), # simulate single threaded with embeddings_chunk_size = batch_size = 32
|
||||
(
|
||||
1,
|
||||
32,
|
||||
1000,
|
||||
1000,
|
||||
), # simulate single threaded with embeddings_chunk_size = 1000
|
||||
(
|
||||
4,
|
||||
32,
|
||||
1000,
|
||||
1000,
|
||||
), # simulate 4 threaded with embeddings_chunk_size = 1000
|
||||
(20, 64, 5000, 1000),
|
||||
], # simulate 20 threaded with embeddings_chunk_size = 5000
|
||||
)
|
||||
def test_from_texts_with_metadatas_benchmark(
|
||||
self,
|
||||
pool_threads: int,
|
||||
batch_size: int,
|
||||
embeddings_chunk_size: int,
|
||||
data_multiplier: int,
|
||||
documents: List[Document],
|
||||
embedding_openai: OpenAIEmbeddings,
|
||||
) -> None:
|
||||
"""Test end to end construction and search."""
|
||||
|
||||
texts = [document.page_content for document in documents] * data_multiplier
|
||||
uuids = [uuid.uuid4().hex for _ in range(len(texts))]
|
||||
metadatas = [{"page": i} for i in range(len(texts))]
|
||||
docsearch = Pinecone.from_texts(
|
||||
texts,
|
||||
embedding_openai,
|
||||
ids=uuids,
|
||||
metadatas=metadatas,
|
||||
index_name=INDEX_NAME,
|
||||
namespace=NAMESPACE_NAME,
|
||||
pool_threads=pool_threads,
|
||||
batch_size=batch_size,
|
||||
embeddings_chunk_size=embeddings_chunk_size,
|
||||
)
|
||||
|
||||
query = "What did the president say about Ketanji Brown Jackson"
|
||||
_ = docsearch.similarity_search(query, k=1, namespace=NAMESPACE_NAME)
|
@ -0,0 +1,9 @@
|
||||
from langchain_pinecone import __all__
|
||||
|
||||
EXPECTED_ALL = [
|
||||
"Pinecone",
|
||||
]
|
||||
|
||||
|
||||
def test_all_imports() -> None:
|
||||
assert sorted(EXPECTED_ALL) == sorted(__all__)
|
@ -0,0 +1,12 @@
|
||||
from unittest.mock import Mock
|
||||
|
||||
from langchain_pinecone.vectorstores import Pinecone
|
||||
|
||||
|
||||
def test_initialization() -> None:
|
||||
"""Test integration vectorstore initialization."""
|
||||
# mock index
|
||||
index = Mock()
|
||||
embedding = Mock()
|
||||
text_key = "xyz"
|
||||
Pinecone(index, embedding, text_key)
|
Loading…
Reference in New Issue