diff --git a/libs/langchain/langchain/document_loaders/notion.py b/libs/langchain/langchain/document_loaders/notion.py index 15678ace9e..d1081301ad 100644 --- a/libs/langchain/langchain/document_loaders/notion.py +++ b/libs/langchain/langchain/document_loaders/notion.py @@ -8,16 +8,17 @@ from langchain.document_loaders.base import BaseLoader class NotionDirectoryLoader(BaseLoader): """Load `Notion directory` dump.""" - def __init__(self, path: str): + def __init__(self, path: str, *, encoding: str = "utf-8") -> None: """Initialize with a file path.""" self.file_path = path + self.encoding = encoding def load(self) -> List[Document]: """Load documents.""" - ps = list(Path(self.file_path).glob("**/*.md")) + paths = list(Path(self.file_path).glob("**/*.md")) docs = [] - for p in ps: - with open(p) as f: + for p in paths: + with open(p, encoding=self.encoding) as f: text = f.read() metadata = {"source": str(p)} docs.append(Document(page_content=text, metadata=metadata))