From 08f23c95d92f635b1cd2423cd6e1ff6f3e3b70c2 Mon Sep 17 00:00:00 2001 From: Huang Chongdi Date: Mon, 20 Mar 2023 00:48:31 +0800 Subject: [PATCH] add encoding parameter to ObsidianLoader (#1752) --- langchain/document_loaders/obsidian.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/langchain/document_loaders/obsidian.py b/langchain/document_loaders/obsidian.py index 1ad30db7..df5a5d7e 100644 --- a/langchain/document_loaders/obsidian.py +++ b/langchain/document_loaders/obsidian.py @@ -9,16 +9,17 @@ from langchain.document_loaders.base import BaseLoader class ObsidianLoader(BaseLoader): """Loader that loads Obsidian files from disk.""" - def __init__(self, path: str): + def __init__(self, path: str, encoding: str = "UTF-8"): """Initialize with path.""" self.file_path = path + self.encoding = encoding def load(self) -> List[Document]: """Load documents.""" ps = list(Path(self.file_path).glob("**/*.md")) docs = [] for p in ps: - with open(p) as f: + with open(p, encoding=self.encoding) as f: text = f.read() metadata = {"source": str(p)} docs.append(Document(page_content=text, metadata=metadata))