community[minor]: Add LarkSuite wiki document loader. (#21016)

**Description:** Add LarkSuite wiki document loader. Refer to [LarkSuite
api document
](https://open.feishu.cn/document/server-docs/docs/wiki-v2/space-node/list)for
details.
**Issue:** None
**Dependencies:** None
**Twitter handle:** None
pull/21023/head
Pengcheng Liu 2 months ago committed by GitHub
parent d36332476c
commit 1fad39be1c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -46,3 +46,33 @@ class LarkSuiteDocLoader(BaseLoader):
"title": metadata_json["data"]["document"]["title"],
}
yield Document(page_content=text, metadata=metadata)
class LarkSuiteWikiLoader(LarkSuiteDocLoader):
"""Load from `LarkSuite` (`FeiShu`) wiki."""
def __init__(self, domain: str, access_token: str, wiki_id: str):
"""Initialize with domain, access_token (tenant / user), and wiki_id.
Args:
domain: The domain to load the LarkSuite.
access_token: The access_token to use.
wiki_id: The wiki_id to load.
"""
self.domain = domain
self.access_token = access_token
self.wiki_id = wiki_id
self.document_id = ""
def lazy_load(self) -> Iterator[Document]:
"""Lazy load LarkSuite (FeiShu) wiki document."""
# convert Feishu wiki id to document id
if not self.document_id:
wiki_url_prefix = f"{self.domain}/open-apis/wiki/v2/spaces/get_node"
wiki_node_info_json = self._get_larksuite_api_json_data(
f"{wiki_url_prefix}?token={self.wiki_id}"
)
self.document_id = wiki_node_info_json["data"]["node"]["obj_token"]
yield from super().lazy_load()

@ -1,4 +1,7 @@
from langchain_community.document_loaders.larksuite import LarkSuiteDocLoader
from langchain_community.document_loaders.larksuite import (
LarkSuiteDocLoader,
LarkSuiteWikiLoader,
)
DOMAIN = ""
ACCESS_TOKEN = ""
@ -12,3 +15,12 @@ def test_larksuite_doc_loader() -> None:
assert len(docs) == 1
assert docs[0].page_content is not None
def test_larksuite_wiki_loader() -> None:
"""Test LarkSuite (FeiShu) wiki loader."""
loader = LarkSuiteWikiLoader(DOMAIN, ACCESS_TOKEN, DOCUMENT_ID)
docs = loader.load()
assert len(docs) == 1
assert docs[0].page_content is not None

Loading…
Cancel
Save