From 232faba796e6fab1159c57e63a3cc999545f2089 Mon Sep 17 00:00:00 2001 From: xu0o0 Date: Mon, 12 Jun 2023 04:23:22 +0800 Subject: [PATCH] fix: TypeError when loading confluence pages by cql (#5878) The Confluence loader uses the wrong API (`Confluence.cql()` provided by `atlassian-python-api`) to load pages by CQL. `Confluence.cql()` is a wrapper of the `/rest/api/search` API which searches for entities in Confluence. To search for pages in Confluence, the loader can use the `/rest/api/content/search` API. #### Who can review? Tag maintainers/contributors who might be interested: @eyurtsev #### References ##### Cloud API https://developer.atlassian.com/cloud/confluence/rest/v1/api-group-content/#api-wiki-rest-api-content-search-get https://developer.atlassian.com/cloud/confluence/rest/v1/api-group-search/#api-wiki-rest-api-search-get ##### Server API https://docs.atlassian.com/ConfluenceServer/rest/8.3.1/#api/content-search https://docs.atlassian.com/ConfluenceServer/rest/8.3.1/#api/search --- langchain/document_loaders/confluence.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/langchain/document_loaders/confluence.py b/langchain/document_loaders/confluence.py index 05806412..a90cfb80 100644 --- a/langchain/document_loaders/confluence.py +++ b/langchain/document_loaders/confluence.py @@ -1,7 +1,7 @@ """Load Data from a Confluence Space""" import logging from io import BytesIO -from typing import Any, Callable, List, Optional, Union +from typing import Any, Callable, Dict, List, Optional, Union from tenacity import ( before_sleep_log, @@ -253,7 +253,7 @@ class ConfluenceLoader(BaseLoader): if cql: pages = self.paginate_request( - self.confluence.cql, + self._search_content_by_cql, cql=cql, limit=limit, max_pages=max_pages, @@ -292,6 +292,19 @@ class ConfluenceLoader(BaseLoader): return docs + def _search_content_by_cql( + self, cql: str, include_archived_spaces: Optional[bool] = None, **kwargs: Any + ) -> List[dict]: + url = "rest/api/content/search" + + params: Dict[str, Any] = {"cql": cql} + params.update(kwargs) + if include_archived_spaces is not None: + params["includeArchivedSpaces"] = include_archived_spaces + + response = self.confluence.get(url, params=params) + return response.get("results", []) + def paginate_request(self, retrieval_method: Callable, **kwargs: Any) -> List: """Paginate the various methods to retrieve groups of pages.