From b786335dd10902489f87a536ee074d747b6df370 Mon Sep 17 00:00:00 2001 From: Ryan Sloan Date: Thu, 3 Aug 2023 19:51:57 -0400 Subject: [PATCH] fix RecursiveUrlLoader (#8582) Description: the recursive url loader does not fully crawl for all urls under base url Maintainer: @baskaryan --- .../langchain/document_loaders/recursive_url_loader.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/libs/langchain/langchain/document_loaders/recursive_url_loader.py b/libs/langchain/langchain/document_loaders/recursive_url_loader.py index be20a45447..dedd585bf3 100644 --- a/libs/langchain/langchain/document_loaders/recursive_url_loader.py +++ b/libs/langchain/langchain/document_loaders/recursive_url_loader.py @@ -92,9 +92,7 @@ class RecursiveUrlLoader(BaseLoader): yield from loaded_link else: yield loaded_link - # If the link is a directory (w/ children) then visit it - if link.endswith("/"): - yield from self.get_child_links_recursive(link, visited) + yield from self.get_child_links_recursive(link, visited) return visited