community[patch]: update AzureSearch class to work with azure-search-documents=11.4.0 (#15659)

- **Description:** Updates
`libs/community/langchain_community/vectorstores/azuresearch.py` to
support the stable version `azure-search-documents=11.4.0`
- **Issue:** https://github.com/langchain-ai/langchain/issues/14534,
https://github.com/langchain-ai/langchain/issues/15039,
https://github.com/langchain-ai/langchain/issues/15355
  - **Dependencies:** azure-search-documents>=11.4.0

---------

Co-authored-by: Clément Tamines <Skar0@users.noreply.github.com>
Co-authored-by: Bagatur <baskaryan@gmail.com>
pull/16822/head
Lingzhen Chen 8 months ago committed by GitHub
parent e135dc70c3
commit 30af711c34
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -37,14 +37,10 @@ if TYPE_CHECKING:
CorsOptions,
ScoringProfile,
SearchField,
SemanticConfiguration,
VectorSearch,
)
try:
from azure.search.documents.indexes.models import SemanticSearch
except ImportError:
from azure.search.documents.indexes.models import SemanticSettings # <11.4.0
# Allow overriding field names for Azure Search
FIELDS_ID = get_from_env(
key="AZURESEARCH_FIELDS_ID", env_key="AZURESEARCH_FIELDS_ID", default="id"
@ -73,7 +69,7 @@ def _get_search_client(
semantic_configuration_name: Optional[str] = None,
fields: Optional[List[SearchField]] = None,
vector_search: Optional[VectorSearch] = None,
semantic_settings: Optional[Union[SemanticSearch, SemanticSettings]] = None,
semantic_configurations: Optional[SemanticConfiguration] = None,
scoring_profiles: Optional[List[ScoringProfile]] = None,
default_scoring_profile: Optional[str] = None,
default_fields: Optional[List[SearchField]] = None,
@ -86,30 +82,20 @@ def _get_search_client(
from azure.search.documents import SearchClient
from azure.search.documents.indexes import SearchIndexClient
from azure.search.documents.indexes.models import (
ExhaustiveKnnAlgorithmConfiguration,
ExhaustiveKnnParameters,
HnswAlgorithmConfiguration,
HnswParameters,
SearchIndex,
SemanticConfiguration,
SemanticField,
VectorSearch,
SemanticPrioritizedFields,
SemanticSearch,
VectorSearchAlgorithmKind,
VectorSearchAlgorithmMetric,
VectorSearchProfile,
)
# class names changed for versions >= 11.4.0
try:
from azure.search.documents.indexes.models import (
HnswAlgorithmConfiguration, # HnswVectorSearchAlgorithmConfiguration is old
SemanticPrioritizedFields, # PrioritizedFields outdated
SemanticSearch, # SemanticSettings outdated
)
NEW_VERSION = True
except ImportError:
from azure.search.documents.indexes.models import (
HnswVectorSearchAlgorithmConfiguration,
PrioritizedFields,
SemanticSettings,
)
NEW_VERSION = False
default_fields = default_fields or []
if key is None:
credential = DefaultAzureCredential()
@ -155,77 +141,55 @@ def _get_search_client(
fields = default_fields
# Vector search configuration
if vector_search is None:
if NEW_VERSION:
# >= 11.4.0:
# VectorSearch(algorithm_configuration) --> VectorSearch(algorithms)
# HnswVectorSearchAlgorithmConfiguration --> HnswAlgorithmConfiguration
vector_search = VectorSearch(
algorithms=[
HnswAlgorithmConfiguration(
name="default",
kind="hnsw",
parameters={ # type: ignore
"m": 4,
"efConstruction": 400,
"efSearch": 500,
"metric": "cosine",
},
)
]
)
else: # < 11.4.0
vector_search = VectorSearch(
algorithm_configurations=[
HnswVectorSearchAlgorithmConfiguration(
name="default",
kind="hnsw",
parameters={ # type: ignore
"m": 4,
"efConstruction": 400,
"efSearch": 500,
"metric": "cosine",
},
)
]
)
vector_search = VectorSearch(
algorithms=[
HnswAlgorithmConfiguration(
name="default",
kind=VectorSearchAlgorithmKind.HNSW,
parameters=HnswParameters(
m=4,
ef_construction=400,
ef_search=500,
metric=VectorSearchAlgorithmMetric.COSINE,
),
),
ExhaustiveKnnAlgorithmConfiguration(
name="default_exhaustive_knn",
kind=VectorSearchAlgorithmKind.EXHAUSTIVE_KNN,
parameters=ExhaustiveKnnParameters(
metric=VectorSearchAlgorithmMetric.COSINE
),
),
],
profiles=[
VectorSearchProfile(
name="myHnswProfile",
algorithm_configuration_name="default",
),
VectorSearchProfile(
name="myExhaustiveKnnProfile",
algorithm_configuration_name="default_exhaustive_knn",
),
],
)
# Create the semantic settings with the configuration
if semantic_settings is None and semantic_configuration_name is not None:
if NEW_VERSION:
# <=11.4.0: SemanticSettings --> SemanticSearch
# PrioritizedFields(prioritized_content_fields)
# --> SemanticPrioritizedFields(content_fields)
semantic_settings = SemanticSearch(
configurations=[
SemanticConfiguration(
name=semantic_configuration_name,
prioritized_fields=SemanticPrioritizedFields(
content_fields=[
SemanticField(field_name=FIELDS_CONTENT)
],
),
)
]
)
else: # < 11.4.0
semantic_settings = SemanticSettings(
configurations=[
SemanticConfiguration(
name=semantic_configuration_name,
prioritized_fields=PrioritizedFields(
prioritized_content_fields=[
SemanticField(field_name=FIELDS_CONTENT)
],
),
)
]
)
semantic_search = None
if semantic_configurations is None and semantic_configuration_name is not None:
semantic_configuration = SemanticConfiguration(
name=semantic_configuration_name,
prioritized_fields=SemanticPrioritizedFields(
content_fields=[SemanticField(field_name=FIELDS_CONTENT)],
),
)
semantic_search = SemanticSearch(configurations=[semantic_configuration])
# Create the search index with the semantic settings and vector search
index = SearchIndex(
name=index_name,
fields=fields,
vector_search=vector_search,
semantic_settings=semantic_settings,
semantic_search=semantic_search,
scoring_profiles=scoring_profiles,
default_scoring_profile=default_scoring_profile,
cors_options=cors_options,
@ -251,10 +215,9 @@ class AzureSearch(VectorStore):
embedding_function: Union[Callable, Embeddings],
search_type: str = "hybrid",
semantic_configuration_name: Optional[str] = None,
semantic_query_language: str = "en-us",
fields: Optional[List[SearchField]] = None,
vector_search: Optional[VectorSearch] = None,
semantic_settings: Optional[Union[SemanticSearch, SemanticSettings]] = None,
semantic_configurations: Optional[SemanticConfiguration] = None,
scoring_profiles: Optional[List[ScoringProfile]] = None,
default_scoring_profile: Optional[str] = None,
cors_options: Optional[CorsOptions] = None,
@ -309,7 +272,7 @@ class AzureSearch(VectorStore):
semantic_configuration_name=semantic_configuration_name,
fields=fields,
vector_search=vector_search,
semantic_settings=semantic_settings,
semantic_configurations=semantic_configurations,
scoring_profiles=scoring_profiles,
default_scoring_profile=default_scoring_profile,
default_fields=default_fields,
@ -318,7 +281,6 @@ class AzureSearch(VectorStore):
)
self.search_type = search_type
self.semantic_configuration_name = semantic_configuration_name
self.semantic_query_language = semantic_query_language
self.fields = fields if fields else default_fields
@property
@ -451,39 +413,30 @@ class AzureSearch(VectorStore):
Returns:
List of Documents most similar to the query and score for each
"""
from azure.search.documents.models import Vector
from azure.search.documents.models import VectorizedQuery
results = self.client.search(
search_text="",
vectors=[
Vector(
value=np.array(self.embed_query(query), dtype=np.float32).tolist(),
k=k,
vector_queries=[
VectorizedQuery(
vector=np.array(self.embed_query(query), dtype=np.float32).tolist(),
k_nearest_neighbors=k,
fields=FIELDS_CONTENT_VECTOR,
)
],
filter=filters,
top=k,
)
# Convert results to Document objects
docs = [
(
Document(
page_content=result.pop(FIELDS_CONTENT),
metadata={
**(
{FIELDS_ID: result.pop(FIELDS_ID)}
if FIELDS_ID in result
else {}
),
**(
json.loads(result[FIELDS_METADATA])
if FIELDS_METADATA in result
else {
k: v
for k, v in result.items()
if k != FIELDS_CONTENT_VECTOR
}
),
metadata=json.loads(result[FIELDS_METADATA])
if FIELDS_METADATA in result
else {
k: v for k, v in result.items() if k != FIELDS_CONTENT_VECTOR
},
),
float(result["@search.score"]),
@ -520,14 +473,14 @@ class AzureSearch(VectorStore):
Returns:
List of Documents most similar to the query and score for each
"""
from azure.search.documents.models import Vector
from azure.search.documents.models import VectorizedQuery
results = self.client.search(
search_text=query,
vectors=[
Vector(
value=np.array(self.embed_query(query), dtype=np.float32).tolist(),
k=k,
vector_queries=[
VectorizedQuery(
vector=np.array(self.embed_query(query), dtype=np.float32).tolist(),
k_nearest_neighbors=k,
fields=FIELDS_CONTENT_VECTOR,
)
],
@ -539,21 +492,10 @@ class AzureSearch(VectorStore):
(
Document(
page_content=result.pop(FIELDS_CONTENT),
metadata={
**(
{FIELDS_ID: result.pop(FIELDS_ID)}
if FIELDS_ID in result
else {}
),
**(
json.loads(result[FIELDS_METADATA])
if FIELDS_METADATA in result
else {
k: v
for k, v in result.items()
if k != FIELDS_CONTENT_VECTOR
}
),
metadata=json.loads(result[FIELDS_METADATA])
if FIELDS_METADATA in result
else {
k: v for k, v in result.items() if k != FIELDS_CONTENT_VECTOR
},
),
float(result["@search.score"]),
@ -610,20 +552,19 @@ class AzureSearch(VectorStore):
Returns:
List of Documents most similar to the query and score for each
"""
from azure.search.documents.models import Vector
from azure.search.documents.models import VectorizedQuery
results = self.client.search(
search_text=query,
vectors=[
Vector(
value=np.array(self.embed_query(query), dtype=np.float32).tolist(),
k=50,
vector_queries=[
VectorizedQuery(
vector=np.array(self.embed_query(query), dtype=np.float32).tolist(),
k_nearest_neighbors=k,
fields=FIELDS_CONTENT_VECTOR,
)
],
filter=filters,
query_type="semantic",
query_language=self.semantic_query_language,
semantic_configuration_name=self.semantic_configuration_name,
query_caption="extractive",
query_answer="extractive",
@ -643,11 +584,6 @@ class AzureSearch(VectorStore):
Document(
page_content=result.pop(FIELDS_CONTENT),
metadata={
**(
{FIELDS_ID: result.pop(FIELDS_ID)}
if FIELDS_ID in result
else {}
),
**(
json.loads(result[FIELDS_METADATA])
if FIELDS_METADATA in result
@ -667,9 +603,7 @@ class AzureSearch(VectorStore):
if result.get("@search.captions")
else {},
"answers": semantic_answers_dict.get(
json.loads(result[FIELDS_METADATA]).get("key")
if FIELDS_METADATA in result
else "",
json.loads(result["metadata"]).get("key"),
"",
),
},

Loading…
Cancel
Save