Fix web-base loader (#11135)

Fix initialization

https://github.com/langchain-ai/langchain/issues/11095
This commit is contained in:
Eugene Yurtsev 2023-09-28 22:36:46 -04:00 committed by GitHub
parent 3bc44b01c0
commit 2c114fcb5e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 14 additions and 1 deletions

View File

@ -76,10 +76,15 @@ class WebBaseLoader(BaseLoader):
) )
if web_paths: if web_paths:
self.web_paths = list(web_paths) self.web_paths = list(web_paths)
elif isinstance(web_path, str):
self.web_paths = [web_path]
elif isinstance(web_path, Sequence): elif isinstance(web_path, Sequence):
self.web_paths = list(web_path) self.web_paths = list(web_path)
else: else:
self.web_paths = [web_path] raise TypeError(
f"web_path must be str or Sequence[str] got ({type(web_path)}) or"
f" web_paths must be Sequence[str] got ({type(web_paths)})"
)
self.requests_per_second = requests_per_second self.requests_per_second = requests_per_second
self.default_parser = default_parser self.default_parser = default_parser
self.requests_kwargs = requests_kwargs or {} self.requests_kwargs = requests_kwargs or {}

View File

@ -11,3 +11,11 @@ class TestWebBaseLoader:
url = "https://www.example.com" url = "https://www.example.com"
loader = WebBaseLoader(url, header_template=header_template) loader = WebBaseLoader(url, header_template=header_template)
assert loader.session.headers["User-Agent"] == user_specified_user_agent assert loader.session.headers["User-Agent"] == user_specified_user_agent
def test_web_path_parameter(self) -> None:
web_base_loader = WebBaseLoader(web_paths=["https://www.example.com"])
assert web_base_loader.web_paths == ["https://www.example.com"]
web_base_loader = WebBaseLoader(web_path=["https://www.example.com"])
assert web_base_loader.web_paths == ["https://www.example.com"]
web_base_loader = WebBaseLoader(web_path="https://www.example.com")
assert web_base_loader.web_paths == ["https://www.example.com"]