mirror of
https://github.com/hwchase17/langchain
synced 2024-11-13 19:10:52 +00:00
parent
b9c0cf9025
commit
52ac67c5d8
@ -2,7 +2,7 @@ import functools
|
|||||||
import logging
|
import logging
|
||||||
import re
|
import re
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, Dict, List
|
from typing import Any, Dict, Iterator
|
||||||
|
|
||||||
import yaml
|
import yaml
|
||||||
from langchain_core.documents import Document
|
from langchain_core.documents import Document
|
||||||
@ -136,10 +136,8 @@ class ObsidianLoader(BaseLoader):
|
|||||||
return content
|
return content
|
||||||
return self.FRONT_MATTER_REGEX.sub("", content)
|
return self.FRONT_MATTER_REGEX.sub("", content)
|
||||||
|
|
||||||
def load(self) -> List[Document]:
|
def lazy_load(self) -> Iterator[Document]:
|
||||||
"""Load documents."""
|
|
||||||
paths = list(Path(self.file_path).glob("**/*.md"))
|
paths = list(Path(self.file_path).glob("**/*.md"))
|
||||||
docs = []
|
|
||||||
for path in paths:
|
for path in paths:
|
||||||
with open(path, encoding=self.encoding) as f:
|
with open(path, encoding=self.encoding) as f:
|
||||||
text = f.read()
|
text = f.read()
|
||||||
@ -163,6 +161,4 @@ class ObsidianLoader(BaseLoader):
|
|||||||
tags | set(front_matter.get("tags", []) or [])
|
tags | set(front_matter.get("tags", []) or [])
|
||||||
)
|
)
|
||||||
|
|
||||||
docs.append(Document(page_content=text, metadata=metadata))
|
yield Document(page_content=text, metadata=metadata)
|
||||||
|
|
||||||
return docs
|
|
||||||
|
Loading…
Reference in New Issue
Block a user