|
|
@ -36,7 +36,7 @@ class _BaseGoogleVertexAISearchRetriever(BaseModel):
|
|
|
|
""" Defines the Vertex AI Search data type
|
|
|
|
""" Defines the Vertex AI Search data type
|
|
|
|
0 - Unstructured data
|
|
|
|
0 - Unstructured data
|
|
|
|
1 - Structured data
|
|
|
|
1 - Structured data
|
|
|
|
2 - Website data (with Advanced Website Indexing)
|
|
|
|
2 - Website data
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
@root_validator(pre=True)
|
|
|
|
@root_validator(pre=True)
|
|
|
@ -154,7 +154,7 @@ class _BaseGoogleVertexAISearchRetriever(BaseModel):
|
|
|
|
return documents
|
|
|
|
return documents
|
|
|
|
|
|
|
|
|
|
|
|
def _convert_website_search_response(
|
|
|
|
def _convert_website_search_response(
|
|
|
|
self, results: Sequence[SearchResult]
|
|
|
|
self, results: Sequence[SearchResult], chunk_type: str
|
|
|
|
) -> List[Document]:
|
|
|
|
) -> List[Document]:
|
|
|
|
"""Converts a sequence of search results to a list of LangChain documents."""
|
|
|
|
"""Converts a sequence of search results to a list of LangChain documents."""
|
|
|
|
from google.protobuf.json_format import MessageToDict
|
|
|
|
from google.protobuf.json_format import MessageToDict
|
|
|
@ -173,24 +173,26 @@ class _BaseGoogleVertexAISearchRetriever(BaseModel):
|
|
|
|
doc_metadata["id"] = document_dict["id"]
|
|
|
|
doc_metadata["id"] = document_dict["id"]
|
|
|
|
doc_metadata["source"] = derived_struct_data.get("link", "")
|
|
|
|
doc_metadata["source"] = derived_struct_data.get("link", "")
|
|
|
|
|
|
|
|
|
|
|
|
chunk_type = "extractive_answers"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if chunk_type not in derived_struct_data:
|
|
|
|
if chunk_type not in derived_struct_data:
|
|
|
|
continue
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
text_field = "snippet" if chunk_type == "snippets" else "content"
|
|
|
|
|
|
|
|
|
|
|
|
for chunk in derived_struct_data[chunk_type]:
|
|
|
|
for chunk in derived_struct_data[chunk_type]:
|
|
|
|
documents.append(
|
|
|
|
documents.append(
|
|
|
|
Document(
|
|
|
|
Document(
|
|
|
|
page_content=chunk.get("content", ""), metadata=doc_metadata
|
|
|
|
page_content=chunk.get(text_field, ""), metadata=doc_metadata
|
|
|
|
)
|
|
|
|
)
|
|
|
|
)
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
if not documents:
|
|
|
|
if not documents:
|
|
|
|
print(
|
|
|
|
print(f"No {chunk_type} could be found.")
|
|
|
|
f"No {chunk_type} could be found.\n"
|
|
|
|
if chunk_type == "extractive_answers":
|
|
|
|
"Make sure that your data store is using Advanced Website Indexing.\n"
|
|
|
|
print(
|
|
|
|
"https://cloud.google.com/generative-ai-app-builder/docs/about-advanced-features#advanced-website-indexing" # noqa: E501
|
|
|
|
"Make sure that your data store is using Advanced Website "
|
|
|
|
)
|
|
|
|
"Indexing.\n"
|
|
|
|
|
|
|
|
"https://cloud.google.com/generative-ai-app-builder/docs/about-advanced-features#advanced-website-indexing" # noqa: E501
|
|
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
return documents
|
|
|
|
return documents
|
|
|
|
|
|
|
|
|
|
|
@ -206,7 +208,7 @@ class GoogleVertexAISearchRetriever(BaseRetriever, _BaseGoogleVertexAISearchRetr
|
|
|
|
filter: Optional[str] = None
|
|
|
|
filter: Optional[str] = None
|
|
|
|
"""Filter expression."""
|
|
|
|
"""Filter expression."""
|
|
|
|
get_extractive_answers: bool = False
|
|
|
|
get_extractive_answers: bool = False
|
|
|
|
"""If True return Extractive Answers, otherwise return Extractive Segments."""
|
|
|
|
"""If True return Extractive Answers, otherwise return Extractive Segments or Snippets.""" # noqa: E501
|
|
|
|
max_documents: int = Field(default=5, ge=1, le=100)
|
|
|
|
max_documents: int = Field(default=5, ge=1, le=100)
|
|
|
|
"""The maximum number of documents to return."""
|
|
|
|
"""The maximum number of documents to return."""
|
|
|
|
max_extractive_answer_count: int = Field(default=1, ge=1, le=5)
|
|
|
|
max_extractive_answer_count: int = Field(default=1, ge=1, le=5)
|
|
|
@ -307,12 +309,15 @@ class GoogleVertexAISearchRetriever(BaseRetriever, _BaseGoogleVertexAISearchRetr
|
|
|
|
content_search_spec = SearchRequest.ContentSearchSpec(
|
|
|
|
content_search_spec = SearchRequest.ContentSearchSpec(
|
|
|
|
extractive_content_spec=SearchRequest.ContentSearchSpec.ExtractiveContentSpec(
|
|
|
|
extractive_content_spec=SearchRequest.ContentSearchSpec.ExtractiveContentSpec(
|
|
|
|
max_extractive_answer_count=self.max_extractive_answer_count,
|
|
|
|
max_extractive_answer_count=self.max_extractive_answer_count,
|
|
|
|
)
|
|
|
|
),
|
|
|
|
|
|
|
|
snippet_spec=SearchRequest.ContentSearchSpec.SnippetSpec(
|
|
|
|
|
|
|
|
return_snippet=True
|
|
|
|
|
|
|
|
),
|
|
|
|
)
|
|
|
|
)
|
|
|
|
else:
|
|
|
|
else:
|
|
|
|
raise NotImplementedError(
|
|
|
|
raise NotImplementedError(
|
|
|
|
"Only data store type 0 (Unstructured), 1 (Structured),"
|
|
|
|
"Only data store type 0 (Unstructured), 1 (Structured),"
|
|
|
|
"or 2 (Website with Advanced Indexing) are supported currently."
|
|
|
|
"or 2 (Website) are supported currently."
|
|
|
|
+ f" Got {self.engine_data_type}"
|
|
|
|
+ f" Got {self.engine_data_type}"
|
|
|
|
)
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
@ -354,11 +359,16 @@ class GoogleVertexAISearchRetriever(BaseRetriever, _BaseGoogleVertexAISearchRetr
|
|
|
|
elif self.engine_data_type == 1:
|
|
|
|
elif self.engine_data_type == 1:
|
|
|
|
documents = self._convert_structured_search_response(response.results)
|
|
|
|
documents = self._convert_structured_search_response(response.results)
|
|
|
|
elif self.engine_data_type == 2:
|
|
|
|
elif self.engine_data_type == 2:
|
|
|
|
documents = self._convert_website_search_response(response.results)
|
|
|
|
chunk_type = (
|
|
|
|
|
|
|
|
"extractive_answers" if self.get_extractive_answers else "snippets"
|
|
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
documents = self._convert_website_search_response(
|
|
|
|
|
|
|
|
response.results, chunk_type
|
|
|
|
|
|
|
|
)
|
|
|
|
else:
|
|
|
|
else:
|
|
|
|
raise NotImplementedError(
|
|
|
|
raise NotImplementedError(
|
|
|
|
"Only data store type 0 (Unstructured), 1 (Structured),"
|
|
|
|
"Only data store type 0 (Unstructured), 1 (Structured),"
|
|
|
|
"or 2 (Website with Advanced Indexing) are supported currently."
|
|
|
|
"or 2 (Website) are supported currently."
|
|
|
|
+ f" Got {self.engine_data_type}"
|
|
|
|
+ f" Got {self.engine_data_type}"
|
|
|
|
)
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
@ -431,7 +441,9 @@ class GoogleVertexAIMultiTurnSearchRetriever(
|
|
|
|
response = self._client.converse_conversation(request)
|
|
|
|
response = self._client.converse_conversation(request)
|
|
|
|
|
|
|
|
|
|
|
|
if self.engine_data_type == 2:
|
|
|
|
if self.engine_data_type == 2:
|
|
|
|
return self._convert_website_search_response(response.search_results)
|
|
|
|
return self._convert_website_search_response(
|
|
|
|
|
|
|
|
response.search_results, "extractive_answers"
|
|
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
return self._convert_unstructured_search_response(
|
|
|
|
return self._convert_unstructured_search_response(
|
|
|
|
response.search_results, "extractive_answers"
|
|
|
|
response.search_results, "extractive_answers"
|
|
|
|