mirror of
https://github.com/hwchase17/langchain
synced 2024-11-06 03:20:49 +00:00
fix: TypeError when loading confluence pages by cql (#5878)
The Confluence loader uses the wrong API (`Confluence.cql()` provided by `atlassian-python-api`) to load pages by CQL. `Confluence.cql()` is a wrapper of the `/rest/api/search` API which searches for entities in Confluence. To search for pages in Confluence, the loader can use the `/rest/api/content/search` API. #### Who can review? Tag maintainers/contributors who might be interested: @eyurtsev <!-- For a quicker response, figure out the right person to tag with @ @hwchase17 - project lead Tracing / Callbacks - @agola11 Async - @agola11 DataLoaders - @eyurtsev Models - @hwchase17 - @agola11 Agents / Tools / Toolkits - @vowelparrot VectorStores / Retrievers / Memory - @dev2049 --> #### References ##### Cloud API https://developer.atlassian.com/cloud/confluence/rest/v1/api-group-content/#api-wiki-rest-api-content-search-get https://developer.atlassian.com/cloud/confluence/rest/v1/api-group-search/#api-wiki-rest-api-search-get ##### Server API https://docs.atlassian.com/ConfluenceServer/rest/8.3.1/#api/content-search https://docs.atlassian.com/ConfluenceServer/rest/8.3.1/#api/search
This commit is contained in:
parent
d7d629911b
commit
232faba796
@ -1,7 +1,7 @@
|
||||
"""Load Data from a Confluence Space"""
|
||||
import logging
|
||||
from io import BytesIO
|
||||
from typing import Any, Callable, List, Optional, Union
|
||||
from typing import Any, Callable, Dict, List, Optional, Union
|
||||
|
||||
from tenacity import (
|
||||
before_sleep_log,
|
||||
@ -253,7 +253,7 @@ class ConfluenceLoader(BaseLoader):
|
||||
|
||||
if cql:
|
||||
pages = self.paginate_request(
|
||||
self.confluence.cql,
|
||||
self._search_content_by_cql,
|
||||
cql=cql,
|
||||
limit=limit,
|
||||
max_pages=max_pages,
|
||||
@ -292,6 +292,19 @@ class ConfluenceLoader(BaseLoader):
|
||||
|
||||
return docs
|
||||
|
||||
def _search_content_by_cql(
|
||||
self, cql: str, include_archived_spaces: Optional[bool] = None, **kwargs: Any
|
||||
) -> List[dict]:
|
||||
url = "rest/api/content/search"
|
||||
|
||||
params: Dict[str, Any] = {"cql": cql}
|
||||
params.update(kwargs)
|
||||
if include_archived_spaces is not None:
|
||||
params["includeArchivedSpaces"] = include_archived_spaces
|
||||
|
||||
response = self.confluence.get(url, params=params)
|
||||
return response.get("results", [])
|
||||
|
||||
def paginate_request(self, retrieval_method: Callable, **kwargs: Any) -> List:
|
||||
"""Paginate the various methods to retrieve groups of pages.
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user