Remove unnecessary spaces from document object’s page_content of BiliBiliLoader (#4619)

- Remove unnecessary spaces from document object’s page_content of
BiliBiliLoader
- Fix BiliBiliLoader document and test file
dynamic_agent_tools
了空 1 year ago committed by GitHub
parent f47ec5b4b6
commit f7e3d97b19
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -5,7 +5,7 @@
"id": "66a7777e", "id": "66a7777e",
"metadata": {}, "metadata": {},
"source": [ "source": [
"# Bilibili\n", "# BiliBili\n",
"\n", "\n",
">[Bilibili](https://www.bilibili.tv/) is one of the most beloved long-form video sites in China.\n", ">[Bilibili](https://www.bilibili.tv/) is one of the most beloved long-form video sites in China.\n",
"\n", "\n",
@ -35,7 +35,7 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain.document_loaders.bilibili import BiliBiliLoader" "from langchain.document_loaders import BiliBiliLoader"
] ]
}, },
{ {

@ -60,11 +60,11 @@ class BiliBiliLoader(BaseLoader):
raw_sub_titles = json.loads(result.content)["body"] raw_sub_titles = json.loads(result.content)["body"]
raw_transcript = " ".join([c["content"] for c in raw_sub_titles]) raw_transcript = " ".join([c["content"] for c in raw_sub_titles])
raw_transcript_with_meta_info = f""" raw_transcript_with_meta_info = (
Video Title: {video_info['title']}, f"Video Title: {video_info['title']},"
description: {video_info['desc']}\n f"description: {video_info['desc']}\n\n"
Transcript: {raw_transcript} f"Transcript: {raw_transcript}"
""" )
return raw_transcript_with_meta_info, video_info return raw_transcript_with_meta_info, video_info
else: else:
raw_transcript = "" raw_transcript = ""

@ -1,4 +1,4 @@
from langchain.document_loaders.bilibili import BiliBiliLoader from langchain.document_loaders import BiliBiliLoader
def test_bilibili_loader() -> None: def test_bilibili_loader() -> None:

Loading…
Cancel
Save