community[patch]: `vectorstores` import update (#21169)

Issue: we have several helper functions to import third-party libraries
like lancedb.import_lancedb in
[community.vectorstores](https://api.python.langchain.com/en/latest/vectorstores/langchain_community.vectorstores.lancedb.import_lancedb.html#langchain_community.vectorstores.lancedb.import_lancedb).
And we have core.utils.utils.guard_import that works exactly for this
purpose.
The import_<package> functions work inconsistently and rather be private
functions.
Change: replaced these functions with the guard_import function.

Related to #21133
pull/21413/merge
Leonid Ganeline 2 weeks ago committed by GitHub
parent 3003363605
commit 500569da48
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -10,6 +10,7 @@ from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple
import numpy as np
from langchain_core.documents import Document
from langchain_core.embeddings import Embeddings
from langchain_core.utils import guard_import
from langchain_core.vectorstores import VectorStore
from langchain_community.docstore.base import Docstore
@ -22,14 +23,7 @@ DEFAULT_METRIC = "angular"
def dependable_annoy_import() -> Any:
"""Import annoy if available, otherwise raise error."""
try:
import annoy
except ImportError:
raise ImportError(
"Could not import annoy python package. "
"Please install it with `pip install --user annoy` "
)
return annoy
return guard_import("annoy")
class Annoy(VectorStore):
@ -300,7 +294,7 @@ class Annoy(VectorStore):
f"Expected one of {list(INDEX_METRICS)}"
)
)
annoy = dependable_annoy_import()
annoy = guard_import("annoy")
if not embeddings:
raise ValueError("embeddings must be provided to build AnnoyIndex")
f = len(embeddings[0])
@ -459,7 +453,7 @@ class Annoy(VectorStore):
)
path = Path(folder_path)
# load index separately since it is not picklable
annoy = dependable_annoy_import()
annoy = guard_import("annoy")
# load docstore and index_to_docstore_id
with open(path / "index.pkl", "rb") as file:
docstore, index_to_docstore_id, config_object = pickle.load(file)

@ -7,20 +7,13 @@ from typing import Any, Iterable, List, Optional
from langchain_core.documents import Document
from langchain_core.embeddings import Embeddings
from langchain_core.utils import guard_import
from langchain_core.vectorstores import VectorStore
def import_lancedb() -> Any:
"""Import lancedb package."""
try:
import lancedb
except ImportError as e:
raise ImportError(
"Could not import pinecone lancedb package. "
"Please install it with `pip install lancedb`."
) from e
return lancedb
return guard_import("lancedb")
class LanceDB(VectorStore):
@ -64,7 +57,7 @@ class LanceDB(VectorStore):
mode: Optional[str] = "overwrite",
):
"""Initialize with Lance DB vectorstore"""
lancedb = import_lancedb()
lancedb = guard_import("lancedb")
self._embedding = embedding
self._vector_key = vector_key
self._id_key = id_key

@ -9,6 +9,7 @@ from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple
import numpy as np
from langchain_core.documents import Document
from langchain_core.embeddings import Embeddings
from langchain_core.utils import guard_import
from langchain_core.vectorstores import VectorStore
from langchain_community.docstore.base import AddableMixin, Docstore
@ -26,14 +27,7 @@ def dependable_scann_import() -> Any:
"""
Import `scann` if available, otherwise raise error.
"""
try:
import scann
except ImportError:
raise ImportError(
"Could not import scann python package. "
"Please install it with `pip install scann` "
)
return scann
return guard_import("scann")
class ScaNN(VectorStore):
@ -312,7 +306,7 @@ class ScaNN(VectorStore):
normalize_L2: bool = False,
**kwargs: Any,
) -> ScaNN:
scann = dependable_scann_import()
scann = guard_import("scann")
distance_strategy = kwargs.get(
"distance_strategy", DistanceStrategy.EUCLIDEAN_DISTANCE
)
@ -494,7 +488,7 @@ class ScaNN(VectorStore):
scann_path = path / "{index_name}.scann".format(index_name=index_name)
scann_path.mkdir(exist_ok=True, parents=True)
# load index separately since it is not picklable
scann = dependable_scann_import()
scann = guard_import("scann")
index = scann.scann_ops_pybind.load_searcher(str(scann_path))
# load docstore and index_to_docstore_id

@ -1,4 +1,5 @@
"""Wrapper around TileDB vector database."""
from __future__ import annotations
import pickle
@ -9,6 +10,7 @@ from typing import Any, Dict, Iterable, List, Mapping, Optional, Tuple
import numpy as np
from langchain_core.documents import Document
from langchain_core.embeddings import Embeddings
from langchain_core.utils import guard_import
from langchain_core.vectorstores import VectorStore
from langchain_community.vectorstores.utils import maximal_marginal_relevance
@ -24,16 +26,10 @@ MAX_FLOAT = sys.float_info.max
def dependable_tiledb_import() -> Any:
"""Import tiledb-vector-search if available, otherwise raise error."""
try:
import tiledb as tiledb
import tiledb.vector_search as tiledb_vs
except ImportError:
raise ImportError(
"Could not import tiledb-vector-search python package. "
"Please install it with `conda install -c tiledb tiledb-vector-search` "
"or `pip install tiledb-vector-search`"
)
return tiledb_vs, tiledb
return (
guard_import("tiledb.vector_search"),
guard_import("tiledb"),
)
def get_vector_index_uri_from_group(group: Any) -> str:
@ -115,7 +111,10 @@ class TileDB(VectorStore):
self.metric = metric
self.config = config
tiledb_vs, tiledb = dependable_tiledb_import()
tiledb_vs, tiledb = (
guard_import("tiledb.vector_search"),
guard_import("tiledb"),
)
with tiledb.scope_ctx(ctx_or_config=config):
index_group = tiledb.Group(self.index_uri, "r")
self.vector_index_uri = (
@ -173,7 +172,7 @@ class TileDB(VectorStore):
Returns:
List of Documents and scores.
"""
tiledb_vs, tiledb = dependable_tiledb_import()
tiledb = guard_import("tiledb")
docs = []
docs_array = tiledb.open(
self.docs_array_uri, "r", timestamp=self.timestamp, config=self.config
@ -477,7 +476,10 @@ class TileDB(VectorStore):
metadatas: bool = True,
config: Optional[Mapping[str, Any]] = None,
) -> None:
tiledb_vs, tiledb = dependable_tiledb_import()
tiledb_vs, tiledb = (
guard_import("tiledb.vector_search"),
guard_import("tiledb"),
)
with tiledb.scope_ctx(ctx_or_config=config):
try:
tiledb.group_create(index_uri)
@ -550,7 +552,10 @@ class TileDB(VectorStore):
f"Expected one of {list(INDEX_METRICS)}"
)
)
tiledb_vs, tiledb = dependable_tiledb_import()
tiledb_vs, tiledb = (
guard_import("tiledb.vector_search"),
guard_import("tiledb"),
)
input_vectors = np.array(embeddings).astype(np.float32)
cls.create(
index_uri=index_uri,
@ -646,7 +651,7 @@ class TileDB(VectorStore):
Returns:
List of ids from adding the texts into the vectorstore.
"""
tiledb_vs, tiledb = dependable_tiledb_import()
tiledb = guard_import("tiledb")
embeddings = self.embedding.embed_documents(list(texts))
if ids is None:
ids = [str(random.randint(0, MAX_UINT64 - 1)) for _ in texts]

@ -5,6 +5,7 @@ from typing import Any, Dict, Iterable, List, Optional, Tuple
import numpy as np
from langchain_core.documents import Document
from langchain_core.embeddings import Embeddings
from langchain_core.utils import guard_import
from langchain_core.vectorstores import VectorStore
from langchain_community.docstore.base import AddableMixin, Docstore
@ -15,14 +16,7 @@ def dependable_usearch_import() -> Any:
"""
Import usearch if available, otherwise raise error.
"""
try:
import usearch.index
except ImportError:
raise ImportError(
"Could not import usearch python package. "
"Please install it with `pip install usearch` "
)
return usearch.index
return guard_import("usearch.index")
class USearch(VectorStore):
@ -170,7 +164,7 @@ class USearch(VectorStore):
documents.append(Document(page_content=text, metadata=metadata))
docstore = InMemoryDocstore(dict(zip(ids, documents)))
usearch = dependable_usearch_import()
usearch = guard_import("usearch.index")
index = usearch.Index(ndim=len(embeddings[0]), metric=metric)
index.add(np.array(ids), np.array(embeddings))
return cls(embedding, index, docstore, ids.tolist())

Loading…
Cancel
Save