From f032609f8d9ef729423167f57557e4d638745cea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kacper=20=C5=81ukawski?= Date: Thu, 2 Mar 2023 16:06:26 +0100 Subject: [PATCH] Add `recursive` parameter to `DirectoryLoader` (#1389) This PR allows loading a directory recursively. --- langchain/document_loaders/directory.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/langchain/document_loaders/directory.py b/langchain/document_loaders/directory.py index 74f24dd2..7c77daa9 100644 --- a/langchain/document_loaders/directory.py +++ b/langchain/document_loaders/directory.py @@ -30,6 +30,7 @@ class DirectoryLoader(BaseLoader): silent_errors: bool = False, load_hidden: bool = False, loader_cls: FILE_LOADER_TYPE = UnstructuredFileLoader, + recursive: bool = False, ): """Initialize with path to directory and how to glob over it.""" self.path = path @@ -37,12 +38,14 @@ class DirectoryLoader(BaseLoader): self.load_hidden = load_hidden self.loader_cls = loader_cls self.silent_errors = silent_errors + self.recursive = recursive def load(self) -> List[Document]: """Load documents.""" p = Path(self.path) docs = [] - for i in p.glob(self.glob): + items = p.rglob(self.glob) if self.recursive else p.glob(self.glob) + for i in items: if i.is_file(): if _is_visible(i.relative_to(p)) or self.load_hidden: try: