From c592b12043353997eb68de56e9cca9a678fb5a51 Mon Sep 17 00:00:00 2001 From: "Daniel Dror (Dubovski)" Date: Tue, 21 Mar 2023 01:03:00 -0400 Subject: [PATCH] Allow passing in encoding to csv_loader (#1836) --- langchain/document_loaders/csv_loader.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/langchain/document_loaders/csv_loader.py b/langchain/document_loaders/csv_loader.py index 7d5a40a8..9911f605 100644 --- a/langchain/document_loaders/csv_loader.py +++ b/langchain/document_loaders/csv_loader.py @@ -31,9 +31,11 @@ class CSVLoader(BaseLoader): file_path: str, source_column: Optional[str] = None, csv_args: Optional[Dict] = None, + encoding: Optional[str] = None, ): self.file_path = file_path self.source_column = source_column + self.encoding = encoding if csv_args is None: self.csv_args = { "delimiter": ",", @@ -45,7 +47,7 @@ class CSVLoader(BaseLoader): def load(self) -> List[Document]: docs = [] - with open(self.file_path, newline="") as csvfile: + with open(self.file_path, newline="", encoding=self.encoding) as csvfile: csv = DictReader(csvfile, **self.csv_args) # type: ignore for i, row in enumerate(csv): content = "\n".join(f"{k.strip()}: {v.strip()}" for k, v in row.items())