|
|
|
@ -37,14 +37,10 @@ if TYPE_CHECKING:
|
|
|
|
|
CorsOptions,
|
|
|
|
|
ScoringProfile,
|
|
|
|
|
SearchField,
|
|
|
|
|
SemanticConfiguration,
|
|
|
|
|
VectorSearch,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
from azure.search.documents.indexes.models import SemanticSearch
|
|
|
|
|
except ImportError:
|
|
|
|
|
from azure.search.documents.indexes.models import SemanticSettings # <11.4.0
|
|
|
|
|
|
|
|
|
|
# Allow overriding field names for Azure Search
|
|
|
|
|
FIELDS_ID = get_from_env(
|
|
|
|
|
key="AZURESEARCH_FIELDS_ID", env_key="AZURESEARCH_FIELDS_ID", default="id"
|
|
|
|
@ -73,7 +69,7 @@ def _get_search_client(
|
|
|
|
|
semantic_configuration_name: Optional[str] = None,
|
|
|
|
|
fields: Optional[List[SearchField]] = None,
|
|
|
|
|
vector_search: Optional[VectorSearch] = None,
|
|
|
|
|
semantic_settings: Optional[Union[SemanticSearch, SemanticSettings]] = None,
|
|
|
|
|
semantic_configurations: Optional[SemanticConfiguration] = None,
|
|
|
|
|
scoring_profiles: Optional[List[ScoringProfile]] = None,
|
|
|
|
|
default_scoring_profile: Optional[str] = None,
|
|
|
|
|
default_fields: Optional[List[SearchField]] = None,
|
|
|
|
@ -86,30 +82,20 @@ def _get_search_client(
|
|
|
|
|
from azure.search.documents import SearchClient
|
|
|
|
|
from azure.search.documents.indexes import SearchIndexClient
|
|
|
|
|
from azure.search.documents.indexes.models import (
|
|
|
|
|
ExhaustiveKnnAlgorithmConfiguration,
|
|
|
|
|
ExhaustiveKnnParameters,
|
|
|
|
|
HnswAlgorithmConfiguration,
|
|
|
|
|
HnswParameters,
|
|
|
|
|
SearchIndex,
|
|
|
|
|
SemanticConfiguration,
|
|
|
|
|
SemanticField,
|
|
|
|
|
VectorSearch,
|
|
|
|
|
SemanticPrioritizedFields,
|
|
|
|
|
SemanticSearch,
|
|
|
|
|
VectorSearchAlgorithmKind,
|
|
|
|
|
VectorSearchAlgorithmMetric,
|
|
|
|
|
VectorSearchProfile,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# class names changed for versions >= 11.4.0
|
|
|
|
|
try:
|
|
|
|
|
from azure.search.documents.indexes.models import (
|
|
|
|
|
HnswAlgorithmConfiguration, # HnswVectorSearchAlgorithmConfiguration is old
|
|
|
|
|
SemanticPrioritizedFields, # PrioritizedFields outdated
|
|
|
|
|
SemanticSearch, # SemanticSettings outdated
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
NEW_VERSION = True
|
|
|
|
|
except ImportError:
|
|
|
|
|
from azure.search.documents.indexes.models import (
|
|
|
|
|
HnswVectorSearchAlgorithmConfiguration,
|
|
|
|
|
PrioritizedFields,
|
|
|
|
|
SemanticSettings,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
NEW_VERSION = False
|
|
|
|
|
|
|
|
|
|
default_fields = default_fields or []
|
|
|
|
|
if key is None:
|
|
|
|
|
credential = DefaultAzureCredential()
|
|
|
|
@ -155,77 +141,55 @@ def _get_search_client(
|
|
|
|
|
fields = default_fields
|
|
|
|
|
# Vector search configuration
|
|
|
|
|
if vector_search is None:
|
|
|
|
|
if NEW_VERSION:
|
|
|
|
|
# >= 11.4.0:
|
|
|
|
|
# VectorSearch(algorithm_configuration) --> VectorSearch(algorithms)
|
|
|
|
|
# HnswVectorSearchAlgorithmConfiguration --> HnswAlgorithmConfiguration
|
|
|
|
|
vector_search = VectorSearch(
|
|
|
|
|
algorithms=[
|
|
|
|
|
HnswAlgorithmConfiguration(
|
|
|
|
|
name="default",
|
|
|
|
|
kind="hnsw",
|
|
|
|
|
parameters={ # type: ignore
|
|
|
|
|
"m": 4,
|
|
|
|
|
"efConstruction": 400,
|
|
|
|
|
"efSearch": 500,
|
|
|
|
|
"metric": "cosine",
|
|
|
|
|
},
|
|
|
|
|
)
|
|
|
|
|
]
|
|
|
|
|
)
|
|
|
|
|
else: # < 11.4.0
|
|
|
|
|
vector_search = VectorSearch(
|
|
|
|
|
algorithm_configurations=[
|
|
|
|
|
HnswVectorSearchAlgorithmConfiguration(
|
|
|
|
|
name="default",
|
|
|
|
|
kind="hnsw",
|
|
|
|
|
parameters={ # type: ignore
|
|
|
|
|
"m": 4,
|
|
|
|
|
"efConstruction": 400,
|
|
|
|
|
"efSearch": 500,
|
|
|
|
|
"metric": "cosine",
|
|
|
|
|
},
|
|
|
|
|
)
|
|
|
|
|
]
|
|
|
|
|
)
|
|
|
|
|
vector_search = VectorSearch(
|
|
|
|
|
algorithms=[
|
|
|
|
|
HnswAlgorithmConfiguration(
|
|
|
|
|
name="default",
|
|
|
|
|
kind=VectorSearchAlgorithmKind.HNSW,
|
|
|
|
|
parameters=HnswParameters(
|
|
|
|
|
m=4,
|
|
|
|
|
ef_construction=400,
|
|
|
|
|
ef_search=500,
|
|
|
|
|
metric=VectorSearchAlgorithmMetric.COSINE,
|
|
|
|
|
),
|
|
|
|
|
),
|
|
|
|
|
ExhaustiveKnnAlgorithmConfiguration(
|
|
|
|
|
name="default_exhaustive_knn",
|
|
|
|
|
kind=VectorSearchAlgorithmKind.EXHAUSTIVE_KNN,
|
|
|
|
|
parameters=ExhaustiveKnnParameters(
|
|
|
|
|
metric=VectorSearchAlgorithmMetric.COSINE
|
|
|
|
|
),
|
|
|
|
|
),
|
|
|
|
|
],
|
|
|
|
|
profiles=[
|
|
|
|
|
VectorSearchProfile(
|
|
|
|
|
name="myHnswProfile",
|
|
|
|
|
algorithm_configuration_name="default",
|
|
|
|
|
),
|
|
|
|
|
VectorSearchProfile(
|
|
|
|
|
name="myExhaustiveKnnProfile",
|
|
|
|
|
algorithm_configuration_name="default_exhaustive_knn",
|
|
|
|
|
),
|
|
|
|
|
],
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Create the semantic settings with the configuration
|
|
|
|
|
if semantic_settings is None and semantic_configuration_name is not None:
|
|
|
|
|
if NEW_VERSION:
|
|
|
|
|
# <=11.4.0: SemanticSettings --> SemanticSearch
|
|
|
|
|
# PrioritizedFields(prioritized_content_fields)
|
|
|
|
|
# --> SemanticPrioritizedFields(content_fields)
|
|
|
|
|
semantic_settings = SemanticSearch(
|
|
|
|
|
configurations=[
|
|
|
|
|
SemanticConfiguration(
|
|
|
|
|
name=semantic_configuration_name,
|
|
|
|
|
prioritized_fields=SemanticPrioritizedFields(
|
|
|
|
|
content_fields=[
|
|
|
|
|
SemanticField(field_name=FIELDS_CONTENT)
|
|
|
|
|
],
|
|
|
|
|
),
|
|
|
|
|
)
|
|
|
|
|
]
|
|
|
|
|
)
|
|
|
|
|
else: # < 11.4.0
|
|
|
|
|
semantic_settings = SemanticSettings(
|
|
|
|
|
configurations=[
|
|
|
|
|
SemanticConfiguration(
|
|
|
|
|
name=semantic_configuration_name,
|
|
|
|
|
prioritized_fields=PrioritizedFields(
|
|
|
|
|
prioritized_content_fields=[
|
|
|
|
|
SemanticField(field_name=FIELDS_CONTENT)
|
|
|
|
|
],
|
|
|
|
|
),
|
|
|
|
|
)
|
|
|
|
|
]
|
|
|
|
|
)
|
|
|
|
|
semantic_search = None
|
|
|
|
|
if semantic_configurations is None and semantic_configuration_name is not None:
|
|
|
|
|
semantic_configuration = SemanticConfiguration(
|
|
|
|
|
name=semantic_configuration_name,
|
|
|
|
|
prioritized_fields=SemanticPrioritizedFields(
|
|
|
|
|
content_fields=[SemanticField(field_name=FIELDS_CONTENT)],
|
|
|
|
|
),
|
|
|
|
|
)
|
|
|
|
|
semantic_search = SemanticSearch(configurations=[semantic_configuration])
|
|
|
|
|
|
|
|
|
|
# Create the search index with the semantic settings and vector search
|
|
|
|
|
index = SearchIndex(
|
|
|
|
|
name=index_name,
|
|
|
|
|
fields=fields,
|
|
|
|
|
vector_search=vector_search,
|
|
|
|
|
semantic_settings=semantic_settings,
|
|
|
|
|
semantic_search=semantic_search,
|
|
|
|
|
scoring_profiles=scoring_profiles,
|
|
|
|
|
default_scoring_profile=default_scoring_profile,
|
|
|
|
|
cors_options=cors_options,
|
|
|
|
@ -251,10 +215,9 @@ class AzureSearch(VectorStore):
|
|
|
|
|
embedding_function: Union[Callable, Embeddings],
|
|
|
|
|
search_type: str = "hybrid",
|
|
|
|
|
semantic_configuration_name: Optional[str] = None,
|
|
|
|
|
semantic_query_language: str = "en-us",
|
|
|
|
|
fields: Optional[List[SearchField]] = None,
|
|
|
|
|
vector_search: Optional[VectorSearch] = None,
|
|
|
|
|
semantic_settings: Optional[Union[SemanticSearch, SemanticSettings]] = None,
|
|
|
|
|
semantic_configurations: Optional[SemanticConfiguration] = None,
|
|
|
|
|
scoring_profiles: Optional[List[ScoringProfile]] = None,
|
|
|
|
|
default_scoring_profile: Optional[str] = None,
|
|
|
|
|
cors_options: Optional[CorsOptions] = None,
|
|
|
|
@ -309,7 +272,7 @@ class AzureSearch(VectorStore):
|
|
|
|
|
semantic_configuration_name=semantic_configuration_name,
|
|
|
|
|
fields=fields,
|
|
|
|
|
vector_search=vector_search,
|
|
|
|
|
semantic_settings=semantic_settings,
|
|
|
|
|
semantic_configurations=semantic_configurations,
|
|
|
|
|
scoring_profiles=scoring_profiles,
|
|
|
|
|
default_scoring_profile=default_scoring_profile,
|
|
|
|
|
default_fields=default_fields,
|
|
|
|
@ -318,7 +281,6 @@ class AzureSearch(VectorStore):
|
|
|
|
|
)
|
|
|
|
|
self.search_type = search_type
|
|
|
|
|
self.semantic_configuration_name = semantic_configuration_name
|
|
|
|
|
self.semantic_query_language = semantic_query_language
|
|
|
|
|
self.fields = fields if fields else default_fields
|
|
|
|
|
|
|
|
|
|
@property
|
|
|
|
@ -451,39 +413,30 @@ class AzureSearch(VectorStore):
|
|
|
|
|
Returns:
|
|
|
|
|
List of Documents most similar to the query and score for each
|
|
|
|
|
"""
|
|
|
|
|
from azure.search.documents.models import Vector
|
|
|
|
|
|
|
|
|
|
from azure.search.documents.models import VectorizedQuery
|
|
|
|
|
|
|
|
|
|
results = self.client.search(
|
|
|
|
|
search_text="",
|
|
|
|
|
vectors=[
|
|
|
|
|
Vector(
|
|
|
|
|
value=np.array(self.embed_query(query), dtype=np.float32).tolist(),
|
|
|
|
|
k=k,
|
|
|
|
|
vector_queries=[
|
|
|
|
|
VectorizedQuery(
|
|
|
|
|
vector=np.array(self.embed_query(query), dtype=np.float32).tolist(),
|
|
|
|
|
k_nearest_neighbors=k,
|
|
|
|
|
fields=FIELDS_CONTENT_VECTOR,
|
|
|
|
|
)
|
|
|
|
|
],
|
|
|
|
|
filter=filters,
|
|
|
|
|
top=k,
|
|
|
|
|
)
|
|
|
|
|
# Convert results to Document objects
|
|
|
|
|
docs = [
|
|
|
|
|
(
|
|
|
|
|
Document(
|
|
|
|
|
page_content=result.pop(FIELDS_CONTENT),
|
|
|
|
|
metadata={
|
|
|
|
|
**(
|
|
|
|
|
{FIELDS_ID: result.pop(FIELDS_ID)}
|
|
|
|
|
if FIELDS_ID in result
|
|
|
|
|
else {}
|
|
|
|
|
),
|
|
|
|
|
**(
|
|
|
|
|
json.loads(result[FIELDS_METADATA])
|
|
|
|
|
if FIELDS_METADATA in result
|
|
|
|
|
else {
|
|
|
|
|
k: v
|
|
|
|
|
for k, v in result.items()
|
|
|
|
|
if k != FIELDS_CONTENT_VECTOR
|
|
|
|
|
}
|
|
|
|
|
),
|
|
|
|
|
metadata=json.loads(result[FIELDS_METADATA])
|
|
|
|
|
if FIELDS_METADATA in result
|
|
|
|
|
else {
|
|
|
|
|
k: v for k, v in result.items() if k != FIELDS_CONTENT_VECTOR
|
|
|
|
|
},
|
|
|
|
|
),
|
|
|
|
|
float(result["@search.score"]),
|
|
|
|
@ -520,14 +473,14 @@ class AzureSearch(VectorStore):
|
|
|
|
|
Returns:
|
|
|
|
|
List of Documents most similar to the query and score for each
|
|
|
|
|
"""
|
|
|
|
|
from azure.search.documents.models import Vector
|
|
|
|
|
from azure.search.documents.models import VectorizedQuery
|
|
|
|
|
|
|
|
|
|
results = self.client.search(
|
|
|
|
|
search_text=query,
|
|
|
|
|
vectors=[
|
|
|
|
|
Vector(
|
|
|
|
|
value=np.array(self.embed_query(query), dtype=np.float32).tolist(),
|
|
|
|
|
k=k,
|
|
|
|
|
vector_queries=[
|
|
|
|
|
VectorizedQuery(
|
|
|
|
|
vector=np.array(self.embed_query(query), dtype=np.float32).tolist(),
|
|
|
|
|
k_nearest_neighbors=k,
|
|
|
|
|
fields=FIELDS_CONTENT_VECTOR,
|
|
|
|
|
)
|
|
|
|
|
],
|
|
|
|
@ -539,21 +492,10 @@ class AzureSearch(VectorStore):
|
|
|
|
|
(
|
|
|
|
|
Document(
|
|
|
|
|
page_content=result.pop(FIELDS_CONTENT),
|
|
|
|
|
metadata={
|
|
|
|
|
**(
|
|
|
|
|
{FIELDS_ID: result.pop(FIELDS_ID)}
|
|
|
|
|
if FIELDS_ID in result
|
|
|
|
|
else {}
|
|
|
|
|
),
|
|
|
|
|
**(
|
|
|
|
|
json.loads(result[FIELDS_METADATA])
|
|
|
|
|
if FIELDS_METADATA in result
|
|
|
|
|
else {
|
|
|
|
|
k: v
|
|
|
|
|
for k, v in result.items()
|
|
|
|
|
if k != FIELDS_CONTENT_VECTOR
|
|
|
|
|
}
|
|
|
|
|
),
|
|
|
|
|
metadata=json.loads(result[FIELDS_METADATA])
|
|
|
|
|
if FIELDS_METADATA in result
|
|
|
|
|
else {
|
|
|
|
|
k: v for k, v in result.items() if k != FIELDS_CONTENT_VECTOR
|
|
|
|
|
},
|
|
|
|
|
),
|
|
|
|
|
float(result["@search.score"]),
|
|
|
|
@ -610,20 +552,19 @@ class AzureSearch(VectorStore):
|
|
|
|
|
Returns:
|
|
|
|
|
List of Documents most similar to the query and score for each
|
|
|
|
|
"""
|
|
|
|
|
from azure.search.documents.models import Vector
|
|
|
|
|
from azure.search.documents.models import VectorizedQuery
|
|
|
|
|
|
|
|
|
|
results = self.client.search(
|
|
|
|
|
search_text=query,
|
|
|
|
|
vectors=[
|
|
|
|
|
Vector(
|
|
|
|
|
value=np.array(self.embed_query(query), dtype=np.float32).tolist(),
|
|
|
|
|
k=50,
|
|
|
|
|
vector_queries=[
|
|
|
|
|
VectorizedQuery(
|
|
|
|
|
vector=np.array(self.embed_query(query), dtype=np.float32).tolist(),
|
|
|
|
|
k_nearest_neighbors=k,
|
|
|
|
|
fields=FIELDS_CONTENT_VECTOR,
|
|
|
|
|
)
|
|
|
|
|
],
|
|
|
|
|
filter=filters,
|
|
|
|
|
query_type="semantic",
|
|
|
|
|
query_language=self.semantic_query_language,
|
|
|
|
|
semantic_configuration_name=self.semantic_configuration_name,
|
|
|
|
|
query_caption="extractive",
|
|
|
|
|
query_answer="extractive",
|
|
|
|
@ -643,11 +584,6 @@ class AzureSearch(VectorStore):
|
|
|
|
|
Document(
|
|
|
|
|
page_content=result.pop(FIELDS_CONTENT),
|
|
|
|
|
metadata={
|
|
|
|
|
**(
|
|
|
|
|
{FIELDS_ID: result.pop(FIELDS_ID)}
|
|
|
|
|
if FIELDS_ID in result
|
|
|
|
|
else {}
|
|
|
|
|
),
|
|
|
|
|
**(
|
|
|
|
|
json.loads(result[FIELDS_METADATA])
|
|
|
|
|
if FIELDS_METADATA in result
|
|
|
|
@ -667,9 +603,7 @@ class AzureSearch(VectorStore):
|
|
|
|
|
if result.get("@search.captions")
|
|
|
|
|
else {},
|
|
|
|
|
"answers": semantic_answers_dict.get(
|
|
|
|
|
json.loads(result[FIELDS_METADATA]).get("key")
|
|
|
|
|
if FIELDS_METADATA in result
|
|
|
|
|
else "",
|
|
|
|
|
json.loads(result["metadata"]).get("key"),
|
|
|
|
|
"",
|
|
|
|
|
),
|
|
|
|
|
},
|
|
|
|
|