Azure Search: Remove select field restrictions and expand metadata to other fields, also expose kwargs to searches (#9894)

Description: 
If metadata field returned in results, previous behavior unchanged. If
metadata field does not exist in results, expand metadata to any fields
returned outside of content field.

There's precedence for this as well, see the retriever:
https://github.com/langchain-ai/langchain/blob/master/libs/langchain/langchain/retrievers/azure_cognitive_search.py#L96C46-L96C46

Issue: 
#9765 - Ameliorates hard-coding in case you already indexed to cognitive
search without a metadata field but rather placed metadata in separate
fields.

@hwchase17
This commit is contained in:
Sam Chou 2023-09-19 16:10:29 -07:00 committed by GitHub
parent 94cf71ecfa
commit 4f19ba3065
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -378,15 +378,18 @@ class AzureSearch(VectorStore):
fields=FIELDS_CONTENT_VECTOR, fields=FIELDS_CONTENT_VECTOR,
) )
], ],
select=[FIELDS_ID, FIELDS_CONTENT, FIELDS_METADATA],
filter=filters, filter=filters,
) )
# Convert results to Document objects # Convert results to Document objects
docs = [ docs = [
( (
Document( Document(
page_content=result[FIELDS_CONTENT], page_content=result.pop(FIELDS_CONTENT),
metadata=json.loads(result[FIELDS_METADATA]), metadata=json.loads(result[FIELDS_METADATA])
if FIELDS_METADATA in result
else {
k: v for k, v in result.items() if k != FIELDS_CONTENT_VECTOR
},
), ),
float(result["@search.score"]), float(result["@search.score"]),
) )
@ -435,7 +438,6 @@ class AzureSearch(VectorStore):
fields=FIELDS_CONTENT_VECTOR, fields=FIELDS_CONTENT_VECTOR,
) )
], ],
select=[FIELDS_ID, FIELDS_CONTENT, FIELDS_METADATA],
filter=filters, filter=filters,
top=k, top=k,
) )
@ -443,8 +445,12 @@ class AzureSearch(VectorStore):
docs = [ docs = [
( (
Document( Document(
page_content=result[FIELDS_CONTENT], page_content=result.pop(FIELDS_CONTENT),
metadata=json.loads(result[FIELDS_METADATA]), metadata=json.loads(result[FIELDS_METADATA])
if FIELDS_METADATA in result
else {
k: v for k, v in result.items() if k != FIELDS_CONTENT_VECTOR
},
), ),
float(result["@search.score"]), float(result["@search.score"]),
) )
@ -495,7 +501,6 @@ class AzureSearch(VectorStore):
fields=FIELDS_CONTENT_VECTOR, fields=FIELDS_CONTENT_VECTOR,
) )
], ],
select=[FIELDS_ID, FIELDS_CONTENT, FIELDS_METADATA],
filter=filters, filter=filters,
query_type="semantic", query_type="semantic",
query_language=self.semantic_query_language, query_language=self.semantic_query_language,
@ -516,9 +521,17 @@ class AzureSearch(VectorStore):
docs = [ docs = [
( (
Document( Document(
page_content=result["content"], page_content=result.pop(FIELDS_CONTENT),
metadata={ metadata={
**json.loads(result["metadata"]), **(
json.loads(result[FIELDS_METADATA])
if FIELDS_METADATA in result
else {
k: v
for k, v in result.items()
if k != FIELDS_CONTENT_VECTOR
}
),
**{ **{
"captions": { "captions": {
"text": result.get("@search.captions", [{}])[0].text, "text": result.get("@search.captions", [{}])[0].text,
@ -590,15 +603,15 @@ class AzureSearchVectorStoreRetriever(BaseRetriever):
def _get_relevant_documents( def _get_relevant_documents(
self, self,
query: str, query: str,
*,
run_manager: CallbackManagerForRetrieverRun, run_manager: CallbackManagerForRetrieverRun,
**kwargs: Any,
) -> List[Document]: ) -> List[Document]:
if self.search_type == "similarity": if self.search_type == "similarity":
docs = self.vectorstore.vector_search(query, k=self.k) docs = self.vectorstore.vector_search(query, k=self.k, **kwargs)
elif self.search_type == "hybrid": elif self.search_type == "hybrid":
docs = self.vectorstore.hybrid_search(query, k=self.k) docs = self.vectorstore.hybrid_search(query, k=self.k, **kwargs)
elif self.search_type == "semantic_hybrid": elif self.search_type == "semantic_hybrid":
docs = self.vectorstore.semantic_hybrid_search(query, k=self.k) docs = self.vectorstore.semantic_hybrid_search(query, k=self.k, **kwargs)
else: else:
raise ValueError(f"search_type of {self.search_type} not allowed.") raise ValueError(f"search_type of {self.search_type} not allowed.")
return docs return docs