diff --git a/libs/langchain/langchain/document_loaders/web_base.py b/libs/langchain/langchain/document_loaders/web_base.py index 3c4677267a..5d1a9daa48 100644 --- a/libs/langchain/langchain/document_loaders/web_base.py +++ b/libs/langchain/langchain/document_loaders/web_base.py @@ -76,10 +76,15 @@ class WebBaseLoader(BaseLoader): ) if web_paths: self.web_paths = list(web_paths) + elif isinstance(web_path, str): + self.web_paths = [web_path] elif isinstance(web_path, Sequence): self.web_paths = list(web_path) else: - self.web_paths = [web_path] + raise TypeError( + f"web_path must be str or Sequence[str] got ({type(web_path)}) or" + f" web_paths must be Sequence[str] got ({type(web_paths)})" + ) self.requests_per_second = requests_per_second self.default_parser = default_parser self.requests_kwargs = requests_kwargs or {} diff --git a/libs/langchain/tests/unit_tests/document_loaders/test_web_base.py b/libs/langchain/tests/unit_tests/document_loaders/test_web_base.py index ecbf423dbf..41a81a15a5 100644 --- a/libs/langchain/tests/unit_tests/document_loaders/test_web_base.py +++ b/libs/langchain/tests/unit_tests/document_loaders/test_web_base.py @@ -11,3 +11,11 @@ class TestWebBaseLoader: url = "https://www.example.com" loader = WebBaseLoader(url, header_template=header_template) assert loader.session.headers["User-Agent"] == user_specified_user_agent + + def test_web_path_parameter(self) -> None: + web_base_loader = WebBaseLoader(web_paths=["https://www.example.com"]) + assert web_base_loader.web_paths == ["https://www.example.com"] + web_base_loader = WebBaseLoader(web_path=["https://www.example.com"]) + assert web_base_loader.web_paths == ["https://www.example.com"] + web_base_loader = WebBaseLoader(web_path="https://www.example.com") + assert web_base_loader.web_paths == ["https://www.example.com"]