forked from Archives/langchain
709f26b69e
I've added a bilibili loader, bilibili is a very active video site in China and I think we need this loader. Example: ```python from langchain.document_loaders.bilibili import BiliBiliLoader loader = BiliBiliLoader( ["https://www.bilibili.com/video/BV1xt411o7Xu/", "https://www.bilibili.com/video/av330407025/"] ) docs = loader.load() ``` Co-authored-by: 了空 <568250549@qq.com>
21 lines
554 B
Python
21 lines
554 B
Python
from langchain.document_loaders.bilibili import BiliBiliLoader
|
|
|
|
|
|
def test_bilibili_loader() -> None:
|
|
"""Test Bilibili Loader."""
|
|
loader = BiliBiliLoader(
|
|
[
|
|
"https://www.bilibili.com/video/BV1xt411o7Xu/",
|
|
"https://www.bilibili.com/video/av330407025/",
|
|
]
|
|
)
|
|
docs = loader.load()
|
|
|
|
assert len(docs) == 2
|
|
|
|
assert len(docs[0].page_content) > 0
|
|
assert docs[1].metadata["owner"]["mid"] == 398095160
|
|
|
|
assert docs[1].page_content == ""
|
|
assert docs[1].metadata["owner"]["mid"] == 398095160
|