From 52ac67c5d8df4b2a52a84e3307f395aa4e734974 Mon Sep 17 00:00:00 2001 From: Christophe Bornet Date: Wed, 6 Mar 2024 19:06:55 +0100 Subject: [PATCH] Merge pull request #18654 * Implement lazy_load() for ObsidianLoader --- .../langchain_community/document_loaders/obsidian.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/libs/community/langchain_community/document_loaders/obsidian.py b/libs/community/langchain_community/document_loaders/obsidian.py index e4b69341b6..bcd2659d65 100644 --- a/libs/community/langchain_community/document_loaders/obsidian.py +++ b/libs/community/langchain_community/document_loaders/obsidian.py @@ -2,7 +2,7 @@ import functools import logging import re from pathlib import Path -from typing import Any, Dict, List +from typing import Any, Dict, Iterator import yaml from langchain_core.documents import Document @@ -136,10 +136,8 @@ class ObsidianLoader(BaseLoader): return content return self.FRONT_MATTER_REGEX.sub("", content) - def load(self) -> List[Document]: - """Load documents.""" + def lazy_load(self) -> Iterator[Document]: paths = list(Path(self.file_path).glob("**/*.md")) - docs = [] for path in paths: with open(path, encoding=self.encoding) as f: text = f.read() @@ -163,6 +161,4 @@ class ObsidianLoader(BaseLoader): tags | set(front_matter.get("tags", []) or []) ) - docs.append(Document(page_content=text, metadata=metadata)) - - return docs + yield Document(page_content=text, metadata=metadata)