langchain/libs/community/langchain_community/document_loaders/toml.py
Erick Friis 3a2eb6e12b
infra: add print rule to ruff (#16221)
Added noqa for existing prints. Can slowly remove / will prevent more
being intro'd
2024-02-09 16:13:30 -08:00

48 lines
1.5 KiB
Python

import json
from pathlib import Path
from typing import Iterator, List, Union
from langchain_core.documents import Document
from langchain_community.document_loaders.base import BaseLoader
class TomlLoader(BaseLoader):
"""Load `TOML` files.
It can load a single source file or several files in a single
directory.
"""
def __init__(self, source: Union[str, Path]):
"""Initialize the TomlLoader with a source file or directory."""
self.source = Path(source)
def load(self) -> List[Document]:
"""Load and return all documents."""
return list(self.lazy_load())
def lazy_load(self) -> Iterator[Document]:
"""Lazily load the TOML documents from the source file or directory."""
import tomli
if self.source.is_file() and self.source.suffix == ".toml":
files = [self.source]
elif self.source.is_dir():
files = list(self.source.glob("**/*.toml"))
else:
raise ValueError("Invalid source path or file type")
for file_path in files:
with file_path.open("r", encoding="utf-8") as file:
content = file.read()
try:
data = tomli.loads(content)
doc = Document(
page_content=json.dumps(data),
metadata={"source": str(file_path)},
)
yield doc
except tomli.TOMLDecodeError as e:
print(f"Error parsing TOML file {file_path}: {e}") # noqa: T201