mirror of https://github.com/hwchase17/langchain
community[patch]: Add initial tests for AzureSearch vector store (#17663)
**Description:** AzureSearch vector store has no tests. This PR adds initial tests to validate the code can be imported and used. **Issue:** N/A **Dependencies:** azure-search-documents and azure-identity are added as optional dependencies for testing --------- Co-authored-by: Matt Gotteiner <[email protected]> Co-authored-by: Bagatur <baskaryan@gmail.com>pull/20437/head^2
parent
5f1d1666e3
commit
28df4750ef
@ -0,0 +1,170 @@
|
|||||||
|
import json
|
||||||
|
from typing import List, Optional
|
||||||
|
from unittest.mock import patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from langchain_community.vectorstores.azuresearch import AzureSearch
|
||||||
|
from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings
|
||||||
|
|
||||||
|
DEFAULT_VECTOR_DIMENSION = 4
|
||||||
|
|
||||||
|
|
||||||
|
class FakeEmbeddingsWithDimension(FakeEmbeddings):
|
||||||
|
"""Fake embeddings functionality for testing."""
|
||||||
|
|
||||||
|
def __init__(self, dimension: int = DEFAULT_VECTOR_DIMENSION):
|
||||||
|
super().__init__()
|
||||||
|
self.dimension = dimension
|
||||||
|
|
||||||
|
def embed_documents(self, embedding_texts: List[str]) -> List[List[float]]:
|
||||||
|
"""Return simple embeddings."""
|
||||||
|
return [
|
||||||
|
[float(1.0)] * (self.dimension - 1) + [float(i)]
|
||||||
|
for i in range(len(embedding_texts))
|
||||||
|
]
|
||||||
|
|
||||||
|
def embed_query(self, text: str) -> List[float]:
|
||||||
|
"""Return simple embeddings."""
|
||||||
|
return [float(1.0)] * (self.dimension - 1) + [float(0.0)]
|
||||||
|
|
||||||
|
|
||||||
|
DEFAULT_INDEX_NAME = "langchain-index"
|
||||||
|
DEFAULT_ENDPOINT = "https://my-search-service.search.windows.net"
|
||||||
|
DEFAULT_KEY = "mykey"
|
||||||
|
DEFAULT_EMBEDDING_MODEL = FakeEmbeddingsWithDimension()
|
||||||
|
|
||||||
|
|
||||||
|
def mock_default_index(*args, **kwargs): # type: ignore[no-untyped-def]
|
||||||
|
from azure.search.documents.indexes.models import (
|
||||||
|
ExhaustiveKnnAlgorithmConfiguration,
|
||||||
|
ExhaustiveKnnParameters,
|
||||||
|
HnswAlgorithmConfiguration,
|
||||||
|
HnswParameters,
|
||||||
|
SearchField,
|
||||||
|
SearchFieldDataType,
|
||||||
|
SearchIndex,
|
||||||
|
VectorSearch,
|
||||||
|
VectorSearchAlgorithmMetric,
|
||||||
|
VectorSearchProfile,
|
||||||
|
)
|
||||||
|
|
||||||
|
return SearchIndex(
|
||||||
|
name=DEFAULT_INDEX_NAME,
|
||||||
|
fields=[
|
||||||
|
SearchField(
|
||||||
|
name="id",
|
||||||
|
type=SearchFieldDataType.String,
|
||||||
|
key=True,
|
||||||
|
hidden=False,
|
||||||
|
searchable=False,
|
||||||
|
filterable=True,
|
||||||
|
sortable=False,
|
||||||
|
facetable=False,
|
||||||
|
),
|
||||||
|
SearchField(
|
||||||
|
name="content",
|
||||||
|
type=SearchFieldDataType.String,
|
||||||
|
key=False,
|
||||||
|
hidden=False,
|
||||||
|
searchable=True,
|
||||||
|
filterable=False,
|
||||||
|
sortable=False,
|
||||||
|
facetable=False,
|
||||||
|
),
|
||||||
|
SearchField(
|
||||||
|
name="content_vector",
|
||||||
|
type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
|
||||||
|
searchable=True,
|
||||||
|
vector_search_dimensions=4,
|
||||||
|
vector_search_profile_name="myHnswProfile",
|
||||||
|
),
|
||||||
|
SearchField(
|
||||||
|
name="metadata",
|
||||||
|
type="Edm.String",
|
||||||
|
key=False,
|
||||||
|
hidden=False,
|
||||||
|
searchable=True,
|
||||||
|
filterable=False,
|
||||||
|
sortable=False,
|
||||||
|
facetable=False,
|
||||||
|
),
|
||||||
|
],
|
||||||
|
vector_search=VectorSearch(
|
||||||
|
profiles=[
|
||||||
|
VectorSearchProfile(
|
||||||
|
name="myHnswProfile", algorithm_configuration_name="default"
|
||||||
|
),
|
||||||
|
VectorSearchProfile(
|
||||||
|
name="myExhaustiveKnnProfile",
|
||||||
|
algorithm_configuration_name="default_exhaustive_knn",
|
||||||
|
),
|
||||||
|
],
|
||||||
|
algorithms=[
|
||||||
|
HnswAlgorithmConfiguration(
|
||||||
|
name="default",
|
||||||
|
parameters=HnswParameters(
|
||||||
|
m=4,
|
||||||
|
ef_construction=400,
|
||||||
|
ef_search=500,
|
||||||
|
metric=VectorSearchAlgorithmMetric.COSINE,
|
||||||
|
),
|
||||||
|
),
|
||||||
|
ExhaustiveKnnAlgorithmConfiguration(
|
||||||
|
name="default_exhaustive_knn",
|
||||||
|
parameters=ExhaustiveKnnParameters(
|
||||||
|
metric=VectorSearchAlgorithmMetric.COSINE
|
||||||
|
),
|
||||||
|
),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def create_vector_store() -> AzureSearch:
|
||||||
|
return AzureSearch(
|
||||||
|
azure_search_endpoint=DEFAULT_ENDPOINT,
|
||||||
|
azure_search_key=DEFAULT_KEY,
|
||||||
|
index_name=DEFAULT_INDEX_NAME,
|
||||||
|
embedding_function=DEFAULT_EMBEDDING_MODEL,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.requires("azure.search.documents")
|
||||||
|
def test_init_existing_index() -> None:
|
||||||
|
from azure.search.documents.indexes import SearchIndexClient
|
||||||
|
|
||||||
|
def mock_create_index() -> None:
|
||||||
|
pytest.fail("Should not create index in this test")
|
||||||
|
|
||||||
|
with patch.multiple(
|
||||||
|
SearchIndexClient, get_index=mock_default_index, create_index=mock_create_index
|
||||||
|
):
|
||||||
|
vector_store = create_vector_store()
|
||||||
|
assert vector_store.client is not None
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.requires("azure.search.documents")
|
||||||
|
def test_init_new_index() -> None:
|
||||||
|
from azure.core.exceptions import ResourceNotFoundError
|
||||||
|
from azure.search.documents.indexes import SearchIndexClient
|
||||||
|
from azure.search.documents.indexes.models import SearchIndex
|
||||||
|
|
||||||
|
def no_index(self, name: str): # type: ignore[no-untyped-def]
|
||||||
|
raise ResourceNotFoundError
|
||||||
|
|
||||||
|
created_index: Optional[SearchIndex] = None
|
||||||
|
|
||||||
|
def mock_create_index(self, index): # type: ignore[no-untyped-def]
|
||||||
|
nonlocal created_index
|
||||||
|
created_index = index
|
||||||
|
|
||||||
|
with patch.multiple(
|
||||||
|
SearchIndexClient, get_index=no_index, create_index=mock_create_index
|
||||||
|
):
|
||||||
|
vector_store = create_vector_store()
|
||||||
|
assert vector_store.client is not None
|
||||||
|
assert created_index is not None
|
||||||
|
assert json.dumps(created_index.as_dict()) == json.dumps(
|
||||||
|
mock_default_index().as_dict()
|
||||||
|
)
|
Loading…
Reference in New Issue