mirror of
https://github.com/hwchase17/langchain
synced 2024-11-04 06:00:26 +00:00
Azure Search: Remove select field restrictions and expand metadata to other fields, also expose kwargs to searches (#9894)
Description: If metadata field returned in results, previous behavior unchanged. If metadata field does not exist in results, expand metadata to any fields returned outside of content field. There's precedence for this as well, see the retriever: https://github.com/langchain-ai/langchain/blob/master/libs/langchain/langchain/retrievers/azure_cognitive_search.py#L96C46-L96C46 Issue: #9765 - Ameliorates hard-coding in case you already indexed to cognitive search without a metadata field but rather placed metadata in separate fields. @hwchase17
This commit is contained in:
parent
94cf71ecfa
commit
4f19ba3065
@ -378,15 +378,18 @@ class AzureSearch(VectorStore):
|
|||||||
fields=FIELDS_CONTENT_VECTOR,
|
fields=FIELDS_CONTENT_VECTOR,
|
||||||
)
|
)
|
||||||
],
|
],
|
||||||
select=[FIELDS_ID, FIELDS_CONTENT, FIELDS_METADATA],
|
|
||||||
filter=filters,
|
filter=filters,
|
||||||
)
|
)
|
||||||
# Convert results to Document objects
|
# Convert results to Document objects
|
||||||
docs = [
|
docs = [
|
||||||
(
|
(
|
||||||
Document(
|
Document(
|
||||||
page_content=result[FIELDS_CONTENT],
|
page_content=result.pop(FIELDS_CONTENT),
|
||||||
metadata=json.loads(result[FIELDS_METADATA]),
|
metadata=json.loads(result[FIELDS_METADATA])
|
||||||
|
if FIELDS_METADATA in result
|
||||||
|
else {
|
||||||
|
k: v for k, v in result.items() if k != FIELDS_CONTENT_VECTOR
|
||||||
|
},
|
||||||
),
|
),
|
||||||
float(result["@search.score"]),
|
float(result["@search.score"]),
|
||||||
)
|
)
|
||||||
@ -435,7 +438,6 @@ class AzureSearch(VectorStore):
|
|||||||
fields=FIELDS_CONTENT_VECTOR,
|
fields=FIELDS_CONTENT_VECTOR,
|
||||||
)
|
)
|
||||||
],
|
],
|
||||||
select=[FIELDS_ID, FIELDS_CONTENT, FIELDS_METADATA],
|
|
||||||
filter=filters,
|
filter=filters,
|
||||||
top=k,
|
top=k,
|
||||||
)
|
)
|
||||||
@ -443,8 +445,12 @@ class AzureSearch(VectorStore):
|
|||||||
docs = [
|
docs = [
|
||||||
(
|
(
|
||||||
Document(
|
Document(
|
||||||
page_content=result[FIELDS_CONTENT],
|
page_content=result.pop(FIELDS_CONTENT),
|
||||||
metadata=json.loads(result[FIELDS_METADATA]),
|
metadata=json.loads(result[FIELDS_METADATA])
|
||||||
|
if FIELDS_METADATA in result
|
||||||
|
else {
|
||||||
|
k: v for k, v in result.items() if k != FIELDS_CONTENT_VECTOR
|
||||||
|
},
|
||||||
),
|
),
|
||||||
float(result["@search.score"]),
|
float(result["@search.score"]),
|
||||||
)
|
)
|
||||||
@ -495,7 +501,6 @@ class AzureSearch(VectorStore):
|
|||||||
fields=FIELDS_CONTENT_VECTOR,
|
fields=FIELDS_CONTENT_VECTOR,
|
||||||
)
|
)
|
||||||
],
|
],
|
||||||
select=[FIELDS_ID, FIELDS_CONTENT, FIELDS_METADATA],
|
|
||||||
filter=filters,
|
filter=filters,
|
||||||
query_type="semantic",
|
query_type="semantic",
|
||||||
query_language=self.semantic_query_language,
|
query_language=self.semantic_query_language,
|
||||||
@ -516,9 +521,17 @@ class AzureSearch(VectorStore):
|
|||||||
docs = [
|
docs = [
|
||||||
(
|
(
|
||||||
Document(
|
Document(
|
||||||
page_content=result["content"],
|
page_content=result.pop(FIELDS_CONTENT),
|
||||||
metadata={
|
metadata={
|
||||||
**json.loads(result["metadata"]),
|
**(
|
||||||
|
json.loads(result[FIELDS_METADATA])
|
||||||
|
if FIELDS_METADATA in result
|
||||||
|
else {
|
||||||
|
k: v
|
||||||
|
for k, v in result.items()
|
||||||
|
if k != FIELDS_CONTENT_VECTOR
|
||||||
|
}
|
||||||
|
),
|
||||||
**{
|
**{
|
||||||
"captions": {
|
"captions": {
|
||||||
"text": result.get("@search.captions", [{}])[0].text,
|
"text": result.get("@search.captions", [{}])[0].text,
|
||||||
@ -590,15 +603,15 @@ class AzureSearchVectorStoreRetriever(BaseRetriever):
|
|||||||
def _get_relevant_documents(
|
def _get_relevant_documents(
|
||||||
self,
|
self,
|
||||||
query: str,
|
query: str,
|
||||||
*,
|
|
||||||
run_manager: CallbackManagerForRetrieverRun,
|
run_manager: CallbackManagerForRetrieverRun,
|
||||||
|
**kwargs: Any,
|
||||||
) -> List[Document]:
|
) -> List[Document]:
|
||||||
if self.search_type == "similarity":
|
if self.search_type == "similarity":
|
||||||
docs = self.vectorstore.vector_search(query, k=self.k)
|
docs = self.vectorstore.vector_search(query, k=self.k, **kwargs)
|
||||||
elif self.search_type == "hybrid":
|
elif self.search_type == "hybrid":
|
||||||
docs = self.vectorstore.hybrid_search(query, k=self.k)
|
docs = self.vectorstore.hybrid_search(query, k=self.k, **kwargs)
|
||||||
elif self.search_type == "semantic_hybrid":
|
elif self.search_type == "semantic_hybrid":
|
||||||
docs = self.vectorstore.semantic_hybrid_search(query, k=self.k)
|
docs = self.vectorstore.semantic_hybrid_search(query, k=self.k, **kwargs)
|
||||||
else:
|
else:
|
||||||
raise ValueError(f"search_type of {self.search_type} not allowed.")
|
raise ValueError(f"search_type of {self.search_type} not allowed.")
|
||||||
return docs
|
return docs
|
||||||
|
Loading…
Reference in New Issue
Block a user