mirror of
https://github.com/hwchase17/langchain
synced 2024-11-04 06:00:26 +00:00
refactor: Code refactoring & simplification for Google Cloud Enterprise Search retriever (#8369)
Followup to https://github.com/langchain-ai/langchain/pull/7857 - Changes `_convert_search_response()` to use object attributes instead of converting to dictionary - Simplifies logic for readability
This commit is contained in:
parent
594f195e54
commit
d7e6770de8
@ -106,34 +106,23 @@ class GoogleCloudEnterpriseSearchRetriever(BaseRetriever):
|
|||||||
self, results: Sequence[SearchResult]
|
self, results: Sequence[SearchResult]
|
||||||
) -> List[Document]:
|
) -> List[Document]:
|
||||||
"""Converts a sequence of search results to a list of LangChain documents."""
|
"""Converts a sequence of search results to a list of LangChain documents."""
|
||||||
from google.protobuf.json_format import MessageToDict
|
documents: List[Document] = []
|
||||||
|
|
||||||
documents = []
|
|
||||||
for result in results:
|
for result in results:
|
||||||
document_dict = MessageToDict(result.document._pb)
|
derived_struct_data = result.document.derived_struct_data
|
||||||
derived_struct_data = document_dict.get("derivedStructData", None)
|
doc_metadata = result.document.struct_data
|
||||||
if derived_struct_data:
|
doc_metadata.source = derived_struct_data.link or ""
|
||||||
doc_metadata = document_dict.get("structData", {})
|
doc_metadata.id = result.document.id
|
||||||
chunk_type = (
|
|
||||||
"extractive_answers"
|
for chunk in (
|
||||||
if self.get_extractive_answers
|
derived_struct_data.extractive_answers
|
||||||
else "extractive_segments"
|
or derived_struct_data.extractive_segments
|
||||||
|
):
|
||||||
|
if hasattr(chunk, "page_number"):
|
||||||
|
doc_metadata.source += f":{chunk.page_number}"
|
||||||
|
documents.append(
|
||||||
|
Document(page_content=chunk.content, metadata=doc_metadata)
|
||||||
)
|
)
|
||||||
for chunk in derived_struct_data.get(chunk_type, []):
|
|
||||||
if chunk_type == "extractive_answers":
|
|
||||||
doc_metadata["source"] = (
|
|
||||||
f"{derived_struct_data.get('link', '')}"
|
|
||||||
f":{chunk.get('pageNumber', '')}"
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
doc_metadata[
|
|
||||||
"source"
|
|
||||||
] = f"{derived_struct_data.get('link', '')}"
|
|
||||||
doc_metadata["id"] = document_dict["id"]
|
|
||||||
document = Document(
|
|
||||||
page_content=chunk.get("content", ""), metadata=doc_metadata
|
|
||||||
)
|
|
||||||
documents.append(document)
|
|
||||||
|
|
||||||
return documents
|
return documents
|
||||||
|
|
||||||
@ -162,7 +151,7 @@ class GoogleCloudEnterpriseSearchRetriever(BaseRetriever):
|
|||||||
extractive_content_spec=extractive_content_spec,
|
extractive_content_spec=extractive_content_spec,
|
||||||
)
|
)
|
||||||
|
|
||||||
request = SearchRequest(
|
return SearchRequest(
|
||||||
query=query,
|
query=query,
|
||||||
filter=self.filter,
|
filter=self.filter,
|
||||||
serving_config=self._serving_config,
|
serving_config=self._serving_config,
|
||||||
@ -171,8 +160,6 @@ class GoogleCloudEnterpriseSearchRetriever(BaseRetriever):
|
|||||||
query_expansion_spec=query_expansion_spec,
|
query_expansion_spec=query_expansion_spec,
|
||||||
)
|
)
|
||||||
|
|
||||||
return request
|
|
||||||
|
|
||||||
def _get_relevant_documents(
|
def _get_relevant_documents(
|
||||||
self, query: str, *, run_manager: CallbackManagerForRetrieverRun
|
self, query: str, *, run_manager: CallbackManagerForRetrieverRun
|
||||||
) -> List[Document]:
|
) -> List[Document]:
|
||||||
|
Loading…
Reference in New Issue
Block a user