Azure Search: Remove select field restrictions and expand metadata to other fields, also expose kwargs to searches (#9894)

Description: 
If metadata field returned in results, previous behavior unchanged. If
metadata field does not exist in results, expand metadata to any fields
returned outside of content field.

There's precedence for this as well, see the retriever:
https://github.com/langchain-ai/langchain/blob/master/libs/langchain/langchain/retrievers/azure_cognitive_search.py#L96C46-L96C46

Issue: 
#9765 - Ameliorates hard-coding in case you already indexed to cognitive
search without a metadata field but rather placed metadata in separate
fields.

@hwchase17
pull/10805/head
Sam Chou 11 months ago committed by GitHub
parent 94cf71ecfa
commit 4f19ba3065
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -378,15 +378,18 @@ class AzureSearch(VectorStore):
fields=FIELDS_CONTENT_VECTOR,
)
],
select=[FIELDS_ID, FIELDS_CONTENT, FIELDS_METADATA],
filter=filters,
)
# Convert results to Document objects
docs = [
(
Document(
page_content=result[FIELDS_CONTENT],
metadata=json.loads(result[FIELDS_METADATA]),
page_content=result.pop(FIELDS_CONTENT),
metadata=json.loads(result[FIELDS_METADATA])
if FIELDS_METADATA in result
else {
k: v for k, v in result.items() if k != FIELDS_CONTENT_VECTOR
},
),
float(result["@search.score"]),
)
@ -435,7 +438,6 @@ class AzureSearch(VectorStore):
fields=FIELDS_CONTENT_VECTOR,
)
],
select=[FIELDS_ID, FIELDS_CONTENT, FIELDS_METADATA],
filter=filters,
top=k,
)
@ -443,8 +445,12 @@ class AzureSearch(VectorStore):
docs = [
(
Document(
page_content=result[FIELDS_CONTENT],
metadata=json.loads(result[FIELDS_METADATA]),
page_content=result.pop(FIELDS_CONTENT),
metadata=json.loads(result[FIELDS_METADATA])
if FIELDS_METADATA in result
else {
k: v for k, v in result.items() if k != FIELDS_CONTENT_VECTOR
},
),
float(result["@search.score"]),
)
@ -495,7 +501,6 @@ class AzureSearch(VectorStore):
fields=FIELDS_CONTENT_VECTOR,
)
],
select=[FIELDS_ID, FIELDS_CONTENT, FIELDS_METADATA],
filter=filters,
query_type="semantic",
query_language=self.semantic_query_language,
@ -516,9 +521,17 @@ class AzureSearch(VectorStore):
docs = [
(
Document(
page_content=result["content"],
page_content=result.pop(FIELDS_CONTENT),
metadata={
**json.loads(result["metadata"]),
**(
json.loads(result[FIELDS_METADATA])
if FIELDS_METADATA in result
else {
k: v
for k, v in result.items()
if k != FIELDS_CONTENT_VECTOR
}
),
**{
"captions": {
"text": result.get("@search.captions", [{}])[0].text,
@ -568,7 +581,7 @@ class AzureSearchVectorStoreRetriever(BaseRetriever):
vectorstore: AzureSearch
"""Azure Search instance used to find similar documents."""
search_type: str = "hybrid"
"""Type of search to perform. Options are "similarity", "hybrid",
"""Type of search to perform. Options are "similarity", "hybrid",
"semantic_hybrid"."""
k: int = 4
"""Number of documents to return."""
@ -590,15 +603,15 @@ class AzureSearchVectorStoreRetriever(BaseRetriever):
def _get_relevant_documents(
self,
query: str,
*,
run_manager: CallbackManagerForRetrieverRun,
**kwargs: Any,
) -> List[Document]:
if self.search_type == "similarity":
docs = self.vectorstore.vector_search(query, k=self.k)
docs = self.vectorstore.vector_search(query, k=self.k, **kwargs)
elif self.search_type == "hybrid":
docs = self.vectorstore.hybrid_search(query, k=self.k)
docs = self.vectorstore.hybrid_search(query, k=self.k, **kwargs)
elif self.search_type == "semantic_hybrid":
docs = self.vectorstore.semantic_hybrid_search(query, k=self.k)
docs = self.vectorstore.semantic_hybrid_search(query, k=self.k, **kwargs)
else:
raise ValueError(f"search_type of {self.search_type} not allowed.")
return docs

Loading…
Cancel
Save