community[patch]: Add initial tests for AzureSearch vector store (#17663)

**Description:** AzureSearch vector store has no tests. This PR adds
initial tests to validate the code can be imported and used.
**Issue:** N/A
**Dependencies:** azure-search-documents and azure-identity are added as
optional dependencies for testing

---------

Co-authored-by: Matt Gotteiner <[email protected]>
Co-authored-by: Bagatur <baskaryan@gmail.com>
pull/20612/merge
Matt 3 weeks ago committed by GitHub
parent 5f1d1666e3
commit 28df4750ef
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -1,4 +1,4 @@
# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand.
# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand.
[[package]]
name = "aenum"
@ -552,6 +552,17 @@ azure-core = ">=1.30.0,<2.0.0"
isodate = ">=0.6.1,<1.0.0"
typing-extensions = ">=4.6.0"
[[package]]
name = "azure-common"
version = "1.1.28"
description = "Microsoft Azure Client Library for Python (Common)"
optional = true
python-versions = "*"
files = [
{file = "azure-common-1.1.28.zip", hash = "sha256:4ac0cd3214e36b6a1b6a442686722a5d8cc449603aa833f3f0f40bda836704a3"},
{file = "azure_common-1.1.28-py2.py3-none-any.whl", hash = "sha256:5c12d3dcf4ec20599ca6b0d3e09e86e146353d443e7fcc050c9a19c1f9df20ad"},
]
[[package]]
name = "azure-core"
version = "1.30.1"
@ -571,6 +582,39 @@ typing-extensions = ">=4.6.0"
[package.extras]
aio = ["aiohttp (>=3.0)"]
[[package]]
name = "azure-identity"
version = "1.16.0"
description = "Microsoft Azure Identity Library for Python"
optional = true
python-versions = ">=3.8"
files = [
{file = "azure-identity-1.16.0.tar.gz", hash = "sha256:6ff1d667cdcd81da1ceab42f80a0be63ca846629f518a922f7317a7e3c844e1b"},
{file = "azure_identity-1.16.0-py3-none-any.whl", hash = "sha256:722fdb60b8fdd55fa44dc378b8072f4b419b56a5e54c0de391f644949f3a826f"},
]
[package.dependencies]
azure-core = ">=1.23.0"
cryptography = ">=2.5"
msal = ">=1.24.0"
msal-extensions = ">=0.3.0"
[[package]]
name = "azure-search-documents"
version = "11.4.0"
description = "Microsoft Azure Cognitive Search Client Library for Python"
optional = true
python-versions = ">=3.7"
files = [
{file = "azure-search-documents-11.4.0.tar.gz", hash = "sha256:599f269f106fb51e646ff426a218c21811575598e6a769b23fa4a0127c0f57e0"},
{file = "azure_search_documents-11.4.0-py3-none-any.whl", hash = "sha256:e435266dc992a3450dc475309c9475f89a4bb0e9dac838140e609d9f1c7608ac"},
]
[package.dependencies]
azure-common = ">=1.1,<2.0"
azure-core = ">=1.28.0,<2.0.0"
isodate = ">=0.6.0"
[[package]]
name = "babel"
version = "2.14.0"
@ -3204,7 +3248,6 @@ files = [
{file = "jq-1.6.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:227b178b22a7f91ae88525810441791b1ca1fc71c86f03190911793be15cec3d"},
{file = "jq-1.6.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:780eb6383fbae12afa819ef676fc93e1548ae4b076c004a393af26a04b460742"},
{file = "jq-1.6.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:08ded6467f4ef89fec35b2bf310f210f8cd13fbd9d80e521500889edf8d22441"},
{file = "jq-1.6.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:49e44ed677713f4115bd5bf2dbae23baa4cd503be350e12a1c1f506b0687848f"},
{file = "jq-1.6.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:984f33862af285ad3e41e23179ac4795f1701822473e1a26bf87ff023e5a89ea"},
{file = "jq-1.6.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f42264fafc6166efb5611b5d4cb01058887d050a6c19334f6a3f8a13bb369df5"},
{file = "jq-1.6.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a67154f150aaf76cc1294032ed588436eb002097dd4fd1e283824bf753a05080"},
@ -3715,7 +3758,7 @@ files = [
[[package]]
name = "langchain-core"
version = "0.1.45"
version = "0.1.46"
description = "Building applications with LLMs through composability"
optional = false
python-versions = ">=3.8.1,<4.0"
@ -4204,6 +4247,25 @@ requests = ">=2.0.0,<3"
[package.extras]
broker = ["pymsalruntime (>=0.13.2,<0.15)"]
[[package]]
name = "msal-extensions"
version = "1.1.0"
description = "Microsoft Authentication Library extensions (MSAL EX) provides a persistence API that can save your data on disk, encrypted on Windows, macOS and Linux. Concurrent data access will be coordinated by a file lock mechanism."
optional = true
python-versions = ">=3.7"
files = [
{file = "msal-extensions-1.1.0.tar.gz", hash = "sha256:6ab357867062db7b253d0bd2df6d411c7891a0ee7308d54d1e4317c1d1c54252"},
{file = "msal_extensions-1.1.0-py3-none-any.whl", hash = "sha256:01be9711b4c0b1a151450068eeb2c4f0997df3bba085ac299de3a66f585e382f"},
]
[package.dependencies]
msal = ">=0.4.1,<2.0.0"
packaging = "*"
portalocker = [
{version = ">=1.0,<3", markers = "platform_system != \"Windows\""},
{version = ">=1.6,<3", markers = "platform_system == \"Windows\""},
]
[[package]]
name = "multidict"
version = "6.0.5"
@ -5349,6 +5411,25 @@ files = [
dev = ["pre-commit", "tox"]
testing = ["pytest", "pytest-benchmark"]
[[package]]
name = "portalocker"
version = "2.8.2"
description = "Wraps the portalocker recipe for easy usage"
optional = true
python-versions = ">=3.8"
files = [
{file = "portalocker-2.8.2-py3-none-any.whl", hash = "sha256:cfb86acc09b9aa7c3b43594e19be1345b9d16af3feb08bf92f23d4dce513a28e"},
{file = "portalocker-2.8.2.tar.gz", hash = "sha256:2b035aa7828e46c58e9b31390ee1f169b98e1066ab10b9a6a861fe7e25ee4f33"},
]
[package.dependencies]
pywin32 = {version = ">=226", markers = "platform_system == \"Windows\""}
[package.extras]
docs = ["sphinx (>=1.7.1)"]
redis = ["redis"]
tests = ["pytest (>=5.4.1)", "pytest-cov (>=2.8.1)", "pytest-mypy (>=0.8.0)", "pytest-timeout (>=2.1.0)", "redis", "sphinx (>=6.0.0)", "types-redis"]
[[package]]
name = "praw"
version = "7.7.1"
@ -5529,6 +5610,8 @@ files = [
{file = "psycopg2-2.9.9-cp310-cp310-win_amd64.whl", hash = "sha256:426f9f29bde126913a20a96ff8ce7d73fd8a216cfb323b1f04da402d452853c3"},
{file = "psycopg2-2.9.9-cp311-cp311-win32.whl", hash = "sha256:ade01303ccf7ae12c356a5e10911c9e1c51136003a9a1d92f7aa9d010fb98372"},
{file = "psycopg2-2.9.9-cp311-cp311-win_amd64.whl", hash = "sha256:121081ea2e76729acfb0673ff33755e8703d45e926e416cb59bae3a86c6a4981"},
{file = "psycopg2-2.9.9-cp312-cp312-win32.whl", hash = "sha256:d735786acc7dd25815e89cc4ad529a43af779db2e25aa7c626de864127e5a024"},
{file = "psycopg2-2.9.9-cp312-cp312-win_amd64.whl", hash = "sha256:a7653d00b732afb6fc597e29c50ad28087dcb4fbfb28e86092277a559ae4e693"},
{file = "psycopg2-2.9.9-cp37-cp37m-win32.whl", hash = "sha256:5e0d98cade4f0e0304d7d6f25bbfbc5bd186e07b38eac65379309c4ca3193efa"},
{file = "psycopg2-2.9.9-cp37-cp37m-win_amd64.whl", hash = "sha256:7e2dacf8b009a1c1e843b5213a87f7c544b2b042476ed7755be813eaf4e8347a"},
{file = "psycopg2-2.9.9-cp38-cp38-win32.whl", hash = "sha256:ff432630e510709564c01dafdbe996cb552e0b9f3f065eb89bdce5bd31fabf4c"},
@ -5571,6 +5654,7 @@ files = [
{file = "psycopg2_binary-2.9.9-cp311-cp311-win32.whl", hash = "sha256:dc4926288b2a3e9fd7b50dc6a1909a13bbdadfc67d93f3374d984e56f885579d"},
{file = "psycopg2_binary-2.9.9-cp311-cp311-win_amd64.whl", hash = "sha256:b76bedd166805480ab069612119ea636f5ab8f8771e640ae103e05a4aae3e417"},
{file = "psycopg2_binary-2.9.9-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:8532fd6e6e2dc57bcb3bc90b079c60de896d2128c5d9d6f24a63875a95a088cf"},
{file = "psycopg2_binary-2.9.9-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b0605eaed3eb239e87df0d5e3c6489daae3f7388d455d0c0b4df899519c6a38d"},
{file = "psycopg2_binary-2.9.9-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f8544b092a29a6ddd72f3556a9fcf249ec412e10ad28be6a0c0d948924f2212"},
{file = "psycopg2_binary-2.9.9-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2d423c8d8a3c82d08fe8af900ad5b613ce3632a1249fd6a223941d0735fce493"},
{file = "psycopg2_binary-2.9.9-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2e5afae772c00980525f6d6ecf7cbca55676296b580c0e6abb407f15f3706996"},
@ -5579,6 +5663,8 @@ files = [
{file = "psycopg2_binary-2.9.9-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:cb16c65dcb648d0a43a2521f2f0a2300f40639f6f8c1ecbc662141e4e3e1ee07"},
{file = "psycopg2_binary-2.9.9-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:911dda9c487075abd54e644ccdf5e5c16773470a6a5d3826fda76699410066fb"},
{file = "psycopg2_binary-2.9.9-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:57fede879f08d23c85140a360c6a77709113efd1c993923c59fde17aa27599fe"},
{file = "psycopg2_binary-2.9.9-cp312-cp312-win32.whl", hash = "sha256:64cf30263844fa208851ebb13b0732ce674d8ec6a0c86a4e160495d299ba3c93"},
{file = "psycopg2_binary-2.9.9-cp312-cp312-win_amd64.whl", hash = "sha256:81ff62668af011f9a48787564ab7eded4e9fb17a4a6a74af5ffa6a457400d2ab"},
{file = "psycopg2_binary-2.9.9-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:2293b001e319ab0d869d660a704942c9e2cce19745262a8aba2115ef41a0a42a"},
{file = "psycopg2_binary-2.9.9-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:03ef7df18daf2c4c07e2695e8cfd5ee7f748a1d54d802330985a78d2a5a6dca9"},
{file = "psycopg2_binary-2.9.9-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0a602ea5aff39bb9fac6308e9c9d82b9a35c2bf288e184a816002c9fae930b77"},
@ -6576,6 +6662,7 @@ files = [
{file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"},
{file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"},
{file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"},
{file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"},
{file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"},
{file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"},
{file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"},
@ -9229,9 +9316,9 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p
[extras]
cli = ["typer"]
extended-testing = ["aiosqlite", "aleph-alpha-client", "anthropic", "arxiv", "assemblyai", "atlassian-python-api", "azure-ai-documentintelligence", "beautifulsoup4", "bibtexparser", "cassio", "chardet", "cloudpickle", "cloudpickle", "cohere", "databricks-vectorsearch", "datasets", "dgml-utils", "elasticsearch", "esprima", "faiss-cpu", "feedparser", "fireworks-ai", "friendli-client", "geopandas", "gitpython", "google-cloud-documentai", "gql", "gradientai", "hdbcli", "hologres-vector", "html2text", "httpx", "httpx-sse", "javelin-sdk", "jinja2", "jq", "jsonschema", "lxml", "markdownify", "motor", "msal", "mwparserfromhell", "mwxml", "newspaper3k", "numexpr", "nvidia-riva-client", "oci", "openai", "openapi-pydantic", "oracle-ads", "pandas", "pdfminer-six", "pgvector", "praw", "premai", "psychicapi", "py-trello", "pyjwt", "pymupdf", "pypdf", "pypdfium2", "pyspark", "rank-bm25", "rapidfuzz", "rapidocr-onnxruntime", "rdflib", "requests-toolbelt", "rspace_client", "scikit-learn", "sqlite-vss", "streamlit", "sympy", "telethon", "tidb-vector", "timescale-vector", "tqdm", "tree-sitter", "tree-sitter-languages", "upstash-redis", "vdms", "xata", "xmltodict"]
extended-testing = ["aiosqlite", "aleph-alpha-client", "anthropic", "arxiv", "assemblyai", "atlassian-python-api", "azure-ai-documentintelligence", "azure-identity", "azure-search-documents", "beautifulsoup4", "bibtexparser", "cassio", "chardet", "cloudpickle", "cloudpickle", "cohere", "databricks-vectorsearch", "datasets", "dgml-utils", "elasticsearch", "esprima", "faiss-cpu", "feedparser", "fireworks-ai", "friendli-client", "geopandas", "gitpython", "google-cloud-documentai", "gql", "gradientai", "hdbcli", "hologres-vector", "html2text", "httpx", "httpx-sse", "javelin-sdk", "jinja2", "jq", "jsonschema", "lxml", "markdownify", "motor", "msal", "mwparserfromhell", "mwxml", "newspaper3k", "numexpr", "nvidia-riva-client", "oci", "openai", "openapi-pydantic", "oracle-ads", "pandas", "pdfminer-six", "pgvector", "praw", "premai", "psychicapi", "py-trello", "pyjwt", "pymupdf", "pypdf", "pypdfium2", "pyspark", "rank-bm25", "rapidfuzz", "rapidocr-onnxruntime", "rdflib", "requests-toolbelt", "rspace_client", "scikit-learn", "sqlite-vss", "streamlit", "sympy", "telethon", "tidb-vector", "timescale-vector", "tqdm", "tree-sitter", "tree-sitter-languages", "upstash-redis", "vdms", "xata", "xmltodict"]
[metadata]
lock-version = "2.0"
python-versions = ">=3.8.1,<4.0"
content-hash = "48ea73a94d06ae90f8f089017ae1bbcf9d37b2cc9957a44fb617785be0fe3236"
content-hash = "b066cbf8a1f02cae88c6c099e916d805fe6eb8685fd15c093d66cf52ea363fa5"

@ -94,6 +94,8 @@ hdbcli = {version = "^2.19.21", optional = true}
oci = {version = "^2.119.1", optional = true}
rdflib = {version = "7.0.0", optional = true}
nvidia-riva-client = {version = "^2.14.0", optional = true}
azure-search-documents = {version = "11.4.0", optional = true}
azure-identity = {version = "^1.15.0", optional = true}
tidb-vector = {version = ">=0.0.3,<1.0.0", optional = true}
friendli-client = {version = "^1.2.4", optional = true}
premai = {version = "^0.3.25", optional = true}
@ -268,6 +270,8 @@ extended_testing = [
"hdbcli",
"oci",
"rdflib",
"azure-search-documents",
"azure-identity",
"tidb-vector",
"cloudpickle",
"friendli-client",

@ -0,0 +1,170 @@
import json
from typing import List, Optional
from unittest.mock import patch
import pytest
from langchain_community.vectorstores.azuresearch import AzureSearch
from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings
DEFAULT_VECTOR_DIMENSION = 4
class FakeEmbeddingsWithDimension(FakeEmbeddings):
"""Fake embeddings functionality for testing."""
def __init__(self, dimension: int = DEFAULT_VECTOR_DIMENSION):
super().__init__()
self.dimension = dimension
def embed_documents(self, embedding_texts: List[str]) -> List[List[float]]:
"""Return simple embeddings."""
return [
[float(1.0)] * (self.dimension - 1) + [float(i)]
for i in range(len(embedding_texts))
]
def embed_query(self, text: str) -> List[float]:
"""Return simple embeddings."""
return [float(1.0)] * (self.dimension - 1) + [float(0.0)]
DEFAULT_INDEX_NAME = "langchain-index"
DEFAULT_ENDPOINT = "https://my-search-service.search.windows.net"
DEFAULT_KEY = "mykey"
DEFAULT_EMBEDDING_MODEL = FakeEmbeddingsWithDimension()
def mock_default_index(*args, **kwargs): # type: ignore[no-untyped-def]
from azure.search.documents.indexes.models import (
ExhaustiveKnnAlgorithmConfiguration,
ExhaustiveKnnParameters,
HnswAlgorithmConfiguration,
HnswParameters,
SearchField,
SearchFieldDataType,
SearchIndex,
VectorSearch,
VectorSearchAlgorithmMetric,
VectorSearchProfile,
)
return SearchIndex(
name=DEFAULT_INDEX_NAME,
fields=[
SearchField(
name="id",
type=SearchFieldDataType.String,
key=True,
hidden=False,
searchable=False,
filterable=True,
sortable=False,
facetable=False,
),
SearchField(
name="content",
type=SearchFieldDataType.String,
key=False,
hidden=False,
searchable=True,
filterable=False,
sortable=False,
facetable=False,
),
SearchField(
name="content_vector",
type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
searchable=True,
vector_search_dimensions=4,
vector_search_profile_name="myHnswProfile",
),
SearchField(
name="metadata",
type="Edm.String",
key=False,
hidden=False,
searchable=True,
filterable=False,
sortable=False,
facetable=False,
),
],
vector_search=VectorSearch(
profiles=[
VectorSearchProfile(
name="myHnswProfile", algorithm_configuration_name="default"
),
VectorSearchProfile(
name="myExhaustiveKnnProfile",
algorithm_configuration_name="default_exhaustive_knn",
),
],
algorithms=[
HnswAlgorithmConfiguration(
name="default",
parameters=HnswParameters(
m=4,
ef_construction=400,
ef_search=500,
metric=VectorSearchAlgorithmMetric.COSINE,
),
),
ExhaustiveKnnAlgorithmConfiguration(
name="default_exhaustive_knn",
parameters=ExhaustiveKnnParameters(
metric=VectorSearchAlgorithmMetric.COSINE
),
),
],
),
)
def create_vector_store() -> AzureSearch:
return AzureSearch(
azure_search_endpoint=DEFAULT_ENDPOINT,
azure_search_key=DEFAULT_KEY,
index_name=DEFAULT_INDEX_NAME,
embedding_function=DEFAULT_EMBEDDING_MODEL,
)
@pytest.mark.requires("azure.search.documents")
def test_init_existing_index() -> None:
from azure.search.documents.indexes import SearchIndexClient
def mock_create_index() -> None:
pytest.fail("Should not create index in this test")
with patch.multiple(
SearchIndexClient, get_index=mock_default_index, create_index=mock_create_index
):
vector_store = create_vector_store()
assert vector_store.client is not None
@pytest.mark.requires("azure.search.documents")
def test_init_new_index() -> None:
from azure.core.exceptions import ResourceNotFoundError
from azure.search.documents.indexes import SearchIndexClient
from azure.search.documents.indexes.models import SearchIndex
def no_index(self, name: str): # type: ignore[no-untyped-def]
raise ResourceNotFoundError
created_index: Optional[SearchIndex] = None
def mock_create_index(self, index): # type: ignore[no-untyped-def]
nonlocal created_index
created_index = index
with patch.multiple(
SearchIndexClient, get_index=no_index, create_index=mock_create_index
):
vector_store = create_vector_store()
assert vector_store.client is not None
assert created_index is not None
assert json.dumps(created_index.as_dict()) == json.dumps(
mock_default_index().as_dict()
)
Loading…
Cancel
Save