From b0d0338f21d605127df754016575ebea32646b95 Mon Sep 17 00:00:00 2001 From: Wilson Leao Neto Date: Sun, 6 Aug 2023 02:21:24 +0200 Subject: [PATCH] feat: expose Kendra result item id and document id as document metadata (#8796) - Description: we expose Kendra result item id and document id as document metadata. - Tag maintainer: @3coins @baskaryan - Twitter handle: wilsonleao **Why** The result item id and document id might be used to keep track of the retrieved resources. --- libs/langchain/langchain/retrievers/kendra.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/libs/langchain/langchain/retrievers/kendra.py b/libs/langchain/langchain/retrievers/kendra.py index 966edc3d79..344a2bf121 100644 --- a/libs/langchain/langchain/retrievers/kendra.py +++ b/libs/langchain/langchain/retrievers/kendra.py @@ -138,7 +138,7 @@ class ResultItem(BaseModel, ABC, extra=Extra.allow): """Abstract class that represents a result item.""" Id: Optional[str] - """The ID of the item.""" + """The ID of the relevant result item.""" DocumentId: Optional[str] """The document ID.""" DocumentURI: Optional[str] @@ -156,8 +156,13 @@ class ResultItem(BaseModel, ABC, extra=Extra.allow): def get_additional_metadata(self) -> dict: """Document additional metadata dict. - This returns any extra metadata except these values: - ['source', 'title', 'excerpt' and 'document_attributes']. + This returns any extra metadata except these: + * result_id + * document_id + * source + * title + * excerpt + * document_attributes """ return {} @@ -173,6 +178,8 @@ class ResultItem(BaseModel, ABC, extra=Extra.allow): metadata = self.get_additional_metadata() metadata.update( { + "result_id": self.Id, + "document_id": self.DocumentId, "source": self.DocumentURI, "title": self.get_title(), "excerpt": self.get_excerpt(),