Merge pull request #18654

* Implement lazy_load() for ObsidianLoader
This commit is contained in:
Christophe Bornet 2024-03-06 19:06:55 +01:00 committed by GitHub
parent b9c0cf9025
commit 52ac67c5d8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -2,7 +2,7 @@ import functools
import logging
import re
from pathlib import Path
from typing import Any, Dict, List
from typing import Any, Dict, Iterator
import yaml
from langchain_core.documents import Document
@ -136,10 +136,8 @@ class ObsidianLoader(BaseLoader):
return content
return self.FRONT_MATTER_REGEX.sub("", content)
def load(self) -> List[Document]:
"""Load documents."""
def lazy_load(self) -> Iterator[Document]:
paths = list(Path(self.file_path).glob("**/*.md"))
docs = []
for path in paths:
with open(path, encoding=self.encoding) as f:
text = f.read()
@ -163,6 +161,4 @@ class ObsidianLoader(BaseLoader):
tags | set(front_matter.get("tags", []) or [])
)
docs.append(Document(page_content=text, metadata=metadata))
return docs
yield Document(page_content=text, metadata=metadata)