Merge pull request #18654

* Implement lazy_load() for ObsidianLoader
This commit is contained in:
Christophe Bornet 2024-03-06 19:06:55 +01:00 committed by GitHub
parent b9c0cf9025
commit 52ac67c5d8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -2,7 +2,7 @@ import functools
import logging import logging
import re import re
from pathlib import Path from pathlib import Path
from typing import Any, Dict, List from typing import Any, Dict, Iterator
import yaml import yaml
from langchain_core.documents import Document from langchain_core.documents import Document
@ -136,10 +136,8 @@ class ObsidianLoader(BaseLoader):
return content return content
return self.FRONT_MATTER_REGEX.sub("", content) return self.FRONT_MATTER_REGEX.sub("", content)
def load(self) -> List[Document]: def lazy_load(self) -> Iterator[Document]:
"""Load documents."""
paths = list(Path(self.file_path).glob("**/*.md")) paths = list(Path(self.file_path).glob("**/*.md"))
docs = []
for path in paths: for path in paths:
with open(path, encoding=self.encoding) as f: with open(path, encoding=self.encoding) as f:
text = f.read() text = f.read()
@ -163,6 +161,4 @@ class ObsidianLoader(BaseLoader):
tags | set(front_matter.get("tags", []) or []) tags | set(front_matter.get("tags", []) or [])
) )
docs.append(Document(page_content=text, metadata=metadata)) yield Document(page_content=text, metadata=metadata)
return docs