From 0ab364404ecfcda96a67c0fe81b24fc870617976 Mon Sep 17 00:00:00 2001 From: Johnny Lee <2618366+leecjohnny@users.noreply.github.com> Date: Wed, 12 Apr 2023 00:12:39 -0400 Subject: [PATCH] add continue to fix 'continue_on_failure' parameter for URL doc loader (#2735) Currently, the function still fails if `continue_on_failure` is set to True, because `elements` is not set. --------- Co-authored-by: leecjohnny --- langchain/document_loaders/url.py | 1 + .../document_loaders/test_url.py | 16 ++++++++++++++++ 2 files changed, 17 insertions(+) create mode 100644 tests/integration_tests/document_loaders/test_url.py diff --git a/langchain/document_loaders/url.py b/langchain/document_loaders/url.py index a94c85e5..c0dca346 100644 --- a/langchain/document_loaders/url.py +++ b/langchain/document_loaders/url.py @@ -63,6 +63,7 @@ class UnstructuredURLLoader(BaseLoader): except Exception as e: if self.continue_on_failure: logger.error(f"Error fetching or processing {url}, exeption: {e}") + continue else: raise e text = "\n\n".join([str(el) for el in elements]) diff --git a/tests/integration_tests/document_loaders/test_url.py b/tests/integration_tests/document_loaders/test_url.py new file mode 100644 index 00000000..f61a8114 --- /dev/null +++ b/tests/integration_tests/document_loaders/test_url.py @@ -0,0 +1,16 @@ +import pytest + +from langchain.document_loaders import UnstructuredURLLoader + + +def test_continue_on_failure_true() -> None: + """Test exception is not raised when continue_on_failure=True.""" + loader = UnstructuredURLLoader(["badurl.foobar"]) + loader.load() + + +def test_continue_on_failure_false() -> None: + """Test exception is raised when continue_on_failure=False.""" + loader = UnstructuredURLLoader(["badurl.foobar"], continue_on_failure=False) + with pytest.raises(Exception): + loader.load()