mirror of
https://github.com/hwchase17/langchain
synced 2024-11-04 06:00:26 +00:00
Add encoding parameter to TextLoader (#2250)
This merge request proposes changes to the TextLoader class to make it more flexible and robust when handling text files with different encodings. The current implementation of TextLoader does not provide a way to specify the encoding of the text file being read. As a result, it might lead to incorrect handling of files with non-default encodings, causing issues with loading the content. Benefits: - The proposed changes will make the TextLoader class more flexible, allowing it to handle text files with different encodings. - The changes maintain backward compatibility, as the encoding parameter is optional.
This commit is contained in:
parent
67dde7d893
commit
e49284acde
@ -1,5 +1,4 @@
|
|||||||
"""Load text files."""
|
from typing import List, Optional
|
||||||
from typing import List
|
|
||||||
|
|
||||||
from langchain.docstore.document import Document
|
from langchain.docstore.document import Document
|
||||||
from langchain.document_loaders.base import BaseLoader
|
from langchain.document_loaders.base import BaseLoader
|
||||||
@ -8,13 +7,14 @@ from langchain.document_loaders.base import BaseLoader
|
|||||||
class TextLoader(BaseLoader):
|
class TextLoader(BaseLoader):
|
||||||
"""Load text files."""
|
"""Load text files."""
|
||||||
|
|
||||||
def __init__(self, file_path: str):
|
def __init__(self, file_path: str, encoding: Optional[str] = None):
|
||||||
"""Initialize with file path."""
|
"""Initialize with file path."""
|
||||||
self.file_path = file_path
|
self.file_path = file_path
|
||||||
|
self.encoding = encoding
|
||||||
|
|
||||||
def load(self) -> List[Document]:
|
def load(self) -> List[Document]:
|
||||||
"""Load from file path."""
|
"""Load from file path."""
|
||||||
with open(self.file_path, encoding="utf-8") as f:
|
with open(self.file_path, encoding=self.encoding) as f:
|
||||||
text = f.read()
|
text = f.read()
|
||||||
metadata = {"source": self.file_path}
|
metadata = {"source": self.file_path}
|
||||||
return [Document(page_content=text, metadata=metadata)]
|
return [Document(page_content=text, metadata=metadata)]
|
||||||
|
Loading…
Reference in New Issue
Block a user