Add encoding parameter to TextLoader (#2250)

This merge request proposes changes to the TextLoader class to make it
more flexible and robust when handling text files with different
encodings. The current implementation of TextLoader does not provide a
way to specify the encoding of the text file being read. As a result, it
might lead to incorrect handling of files with non-default encodings,
causing issues with loading the content.

Benefits:
- The proposed changes will make the TextLoader class more flexible,
allowing it to handle text files with different encodings.
- The changes maintain backward compatibility, as the encoding parameter
is optional.
doc
Travis Hammond 1 year ago committed by GitHub
parent 67dde7d893
commit e49284acde
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -1,5 +1,4 @@
"""Load text files."""
from typing import List
from typing import List, Optional
from langchain.docstore.document import Document
from langchain.document_loaders.base import BaseLoader
@ -8,13 +7,14 @@ from langchain.document_loaders.base import BaseLoader
class TextLoader(BaseLoader):
"""Load text files."""
def __init__(self, file_path: str):
def __init__(self, file_path: str, encoding: Optional[str] = None):
"""Initialize with file path."""
self.file_path = file_path
self.encoding = encoding
def load(self) -> List[Document]:
"""Load from file path."""
with open(self.file_path, encoding="utf-8") as f:
with open(self.file_path, encoding=self.encoding) as f:
text = f.read()
metadata = {"source": self.file_path}
return [Document(page_content=text, metadata=metadata)]

Loading…
Cancel
Save