From 8cf2ff0be0be0e0edb7f68260ccf4d3d11a03e05 Mon Sep 17 00:00:00 2001 From: Jelmer Borst Date: Sat, 29 Apr 2023 04:56:53 +0200 Subject: [PATCH] Confluence: Add page status filter for spaces (#3732) At the moment all content in Confluence is retrieved by default, including archived content. Often, this is undesired as the content is not relevant anymore. **Notes** Fetching pages by label does not support excluding archived content. This may lead to unexpected results. --- langchain/document_loaders/confluence.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/langchain/document_loaders/confluence.py b/langchain/document_loaders/confluence.py index 9da00395..75490157 100644 --- a/langchain/document_loaders/confluence.py +++ b/langchain/document_loaders/confluence.py @@ -156,6 +156,7 @@ class ConfluenceLoader(BaseLoader): page_ids: Optional[List[str]] = None, label: Optional[str] = None, cql: Optional[str] = None, + include_archived_content: bool = False, include_attachments: bool = False, include_comments: bool = False, limit: Optional[int] = 50, @@ -170,6 +171,9 @@ class ConfluenceLoader(BaseLoader): :type label: Optional[str], optional :param cql: CQL Expression, defaults to None :type cql: Optional[str], optional + :param include_archived_content: Whether to include archived content, + defaults to False + :type include_archived_content: bool, optional :param include_attachments: defaults to False :type include_attachments: bool, optional :param include_comments: defaults to False @@ -197,6 +201,7 @@ class ConfluenceLoader(BaseLoader): space=space_key, limit=limit, max_pages=max_pages, + status="any" if include_archived_content else "current", expand="body.storage.value", ) for page in pages: @@ -221,6 +226,7 @@ class ConfluenceLoader(BaseLoader): cql=cql, limit=limit, max_pages=max_pages, + include_archived_spaces=include_archived_content, expand="body.storage.value", ) for page in pages: