Harrison/toml loader (#4090)

Co-authored-by: Mika Ayenson <Mikaayenson@users.noreply.github.com>
fix_agent_callbacks
Harrison Chase 1 year ago committed by GitHub
parent d4cf1eb60a
commit a9c2450330
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -233,7 +233,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "7f6e0eae",
"id": "6a91a0bc",
"metadata": {},
"outputs": [],
"source": []
@ -255,7 +255,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.6"
"version": "3.9.1"
}
},
"nbformat": 4,

@ -0,0 +1,22 @@
[internal]
creation_date = "2023-05-01"
updated_date = "2022-05-01"
release = ["release_type"]
min_endpoint_version = "some_semantic_version"
os_list = ["operating_system_list"]
[rule]
uuid = "some_uuid"
name = "Fake Rule Name"
description = "Fake description of rule"
query = '''
process where process.name : "somequery"
'''
[[rule.threat]]
framework = "MITRE ATT&CK"
[rule.threat.tactic]
name = "Execution"
id = "TA0002"
reference = "https://attack.mitre.org/tactics/TA0002/"

@ -0,0 +1,94 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "4284970b",
"metadata": {},
"source": [
"# TOML Loader\n",
"\n",
"If you need to load Toml files, use the `TomlLoader`."
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "202fc42d",
"metadata": {},
"outputs": [],
"source": [
"from langchain.document_loaders import TomlLoader"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "7ecae98c",
"metadata": {},
"outputs": [],
"source": [
"loader = TomlLoader('example_data/fake_rule.toml')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "eb08c26e",
"metadata": {},
"outputs": [],
"source": [
"rule = loader.load()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "405d36bc",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[Document(page_content='{\"internal\": {\"creation_date\": \"2023-05-01\", \"updated_date\": \"2022-05-01\", \"release\": [\"release_type\"], \"min_endpoint_version\": \"some_semantic_version\", \"os_list\": [\"operating_system_list\"]}, \"rule\": {\"uuid\": \"some_uuid\", \"name\": \"Fake Rule Name\", \"description\": \"Fake description of rule\", \"query\": \"process where process.name : \\\\\"somequery\\\\\"\\\\n\", \"threat\": [{\"framework\": \"MITRE ATT&CK\", \"tactic\": {\"name\": \"Execution\", \"id\": \"TA0002\", \"reference\": \"https://attack.mitre.org/tactics/TA0002/\"}}]}}', metadata={'source': 'example_data/fake_rule.toml'})]"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"rule"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a896454d",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.1"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

@ -78,6 +78,7 @@ from langchain.document_loaders.srt import SRTLoader
from langchain.document_loaders.stripe import StripeLoader
from langchain.document_loaders.telegram import TelegramChatLoader
from langchain.document_loaders.text import TextLoader
from langchain.document_loaders.toml import TomlLoader
from langchain.document_loaders.twitter import TwitterTweetLoader
from langchain.document_loaders.unstructured import (
UnstructuredAPIFileIOLoader,
@ -169,6 +170,7 @@ __all__ = [
"SlackDirectoryLoader",
"TelegramChatLoader",
"TextLoader",
"TomlLoader",
"TwitterTweetLoader",
"UnstructuredEPubLoader",
"UnstructuredEmailLoader",

@ -0,0 +1,47 @@
import json
from pathlib import Path
from typing import Iterable, List, Union
from langchain.docstore.document import Document
from langchain.document_loaders.base import BaseLoader
class TomlLoader(BaseLoader):
"""
A TOML document loader that inherits from the BaseLoader class.
This class can be initialized with either a single source file or a source
directory containing TOML files.
"""
def __init__(self, source: Union[str, Path]):
"""Initialize the TomlLoader with a source file or directory."""
self.source = Path(source)
def load(self) -> List[Document]:
"""Load and return all documents."""
return list(self.lazy_load())
def lazy_load(self) -> Iterable[Document]:
"""Lazily load the TOML documents from the source file or directory."""
import tomli
if self.source.is_file() and self.source.suffix == ".toml":
files = [self.source]
elif self.source.is_dir():
files = list(self.source.glob("**/*.toml"))
else:
raise ValueError("Invalid source path or file type")
for file_path in files:
with file_path.open("r", encoding="utf-8") as file:
content = file.read()
try:
data = tomli.loads(content)
doc = Document(
page_content=json.dumps(data),
metadata={"source": str(file_path)},
)
yield doc
except tomli.TOMLDecodeError as e:
print(f"Error parsing TOML file {file_path}: {e}")
Loading…
Cancel
Save