From 6402c33299de17d1765ddb193c56b3e7a29ffb03 Mon Sep 17 00:00:00 2001 From: Richard Wang Date: Fri, 13 Oct 2023 03:13:41 +0800 Subject: [PATCH] Let Notion document loader support utf-8 and make it default. (#10613) Use utf-8 encoding by default --- libs/langchain/langchain/document_loaders/notion.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/libs/langchain/langchain/document_loaders/notion.py b/libs/langchain/langchain/document_loaders/notion.py index 15678ace9e..d1081301ad 100644 --- a/libs/langchain/langchain/document_loaders/notion.py +++ b/libs/langchain/langchain/document_loaders/notion.py @@ -8,16 +8,17 @@ from langchain.document_loaders.base import BaseLoader class NotionDirectoryLoader(BaseLoader): """Load `Notion directory` dump.""" - def __init__(self, path: str): + def __init__(self, path: str, *, encoding: str = "utf-8") -> None: """Initialize with a file path.""" self.file_path = path + self.encoding = encoding def load(self) -> List[Document]: """Load documents.""" - ps = list(Path(self.file_path).glob("**/*.md")) + paths = list(Path(self.file_path).glob("**/*.md")) docs = [] - for p in ps: - with open(p) as f: + for p in paths: + with open(p, encoding=self.encoding) as f: text = f.read() metadata = {"source": str(p)} docs.append(Document(page_content=text, metadata=metadata))