From e49284acde3c7b32dacd35f4f0ccac6d6c599c99 Mon Sep 17 00:00:00 2001 From: Travis Hammond <47340315+Tiger767@users.noreply.github.com> Date: Sat, 1 Apr 2023 08:57:17 -0700 Subject: [PATCH] Add encoding parameter to TextLoader (#2250) This merge request proposes changes to the TextLoader class to make it more flexible and robust when handling text files with different encodings. The current implementation of TextLoader does not provide a way to specify the encoding of the text file being read. As a result, it might lead to incorrect handling of files with non-default encodings, causing issues with loading the content. Benefits: - The proposed changes will make the TextLoader class more flexible, allowing it to handle text files with different encodings. - The changes maintain backward compatibility, as the encoding parameter is optional. --- langchain/document_loaders/text.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/langchain/document_loaders/text.py b/langchain/document_loaders/text.py index 0284de88..ce7913d6 100644 --- a/langchain/document_loaders/text.py +++ b/langchain/document_loaders/text.py @@ -1,5 +1,4 @@ -"""Load text files.""" -from typing import List +from typing import List, Optional from langchain.docstore.document import Document from langchain.document_loaders.base import BaseLoader @@ -8,13 +7,14 @@ from langchain.document_loaders.base import BaseLoader class TextLoader(BaseLoader): """Load text files.""" - def __init__(self, file_path: str): + def __init__(self, file_path: str, encoding: Optional[str] = None): """Initialize with file path.""" self.file_path = file_path + self.encoding = encoding def load(self) -> List[Document]: """Load from file path.""" - with open(self.file_path, encoding="utf-8") as f: + with open(self.file_path, encoding=self.encoding) as f: text = f.read() metadata = {"source": self.file_path} return [Document(page_content=text, metadata=metadata)]