mirror of https://github.com/hwchase17/langchain
Harrison/toml loader (#4090)
Co-authored-by: Mika Ayenson <Mikaayenson@users.noreply.github.com>pull/4091/head
parent
d4cf1eb60a
commit
a9c2450330
@ -0,0 +1,22 @@
|
||||
[internal]
|
||||
creation_date = "2023-05-01"
|
||||
updated_date = "2022-05-01"
|
||||
release = ["release_type"]
|
||||
min_endpoint_version = "some_semantic_version"
|
||||
os_list = ["operating_system_list"]
|
||||
|
||||
[rule]
|
||||
uuid = "some_uuid"
|
||||
name = "Fake Rule Name"
|
||||
description = "Fake description of rule"
|
||||
query = '''
|
||||
process where process.name : "somequery"
|
||||
'''
|
||||
|
||||
[[rule.threat]]
|
||||
framework = "MITRE ATT&CK"
|
||||
|
||||
[rule.threat.tactic]
|
||||
name = "Execution"
|
||||
id = "TA0002"
|
||||
reference = "https://attack.mitre.org/tactics/TA0002/"
|
@ -0,0 +1,94 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "4284970b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# TOML Loader\n",
|
||||
"\n",
|
||||
"If you need to load Toml files, use the `TomlLoader`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "202fc42d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import TomlLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "7ecae98c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = TomlLoader('example_data/fake_rule.toml')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "eb08c26e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"rule = loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "405d36bc",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='{\"internal\": {\"creation_date\": \"2023-05-01\", \"updated_date\": \"2022-05-01\", \"release\": [\"release_type\"], \"min_endpoint_version\": \"some_semantic_version\", \"os_list\": [\"operating_system_list\"]}, \"rule\": {\"uuid\": \"some_uuid\", \"name\": \"Fake Rule Name\", \"description\": \"Fake description of rule\", \"query\": \"process where process.name : \\\\\"somequery\\\\\"\\\\n\", \"threat\": [{\"framework\": \"MITRE ATT&CK\", \"tactic\": {\"name\": \"Execution\", \"id\": \"TA0002\", \"reference\": \"https://attack.mitre.org/tactics/TA0002/\"}}]}}', metadata={'source': 'example_data/fake_rule.toml'})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"rule"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "a896454d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
@ -0,0 +1,47 @@
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Iterable, List, Union
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.document_loaders.base import BaseLoader
|
||||
|
||||
|
||||
class TomlLoader(BaseLoader):
|
||||
"""
|
||||
A TOML document loader that inherits from the BaseLoader class.
|
||||
|
||||
This class can be initialized with either a single source file or a source
|
||||
directory containing TOML files.
|
||||
"""
|
||||
|
||||
def __init__(self, source: Union[str, Path]):
|
||||
"""Initialize the TomlLoader with a source file or directory."""
|
||||
self.source = Path(source)
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
"""Load and return all documents."""
|
||||
return list(self.lazy_load())
|
||||
|
||||
def lazy_load(self) -> Iterable[Document]:
|
||||
"""Lazily load the TOML documents from the source file or directory."""
|
||||
import tomli
|
||||
|
||||
if self.source.is_file() and self.source.suffix == ".toml":
|
||||
files = [self.source]
|
||||
elif self.source.is_dir():
|
||||
files = list(self.source.glob("**/*.toml"))
|
||||
else:
|
||||
raise ValueError("Invalid source path or file type")
|
||||
|
||||
for file_path in files:
|
||||
with file_path.open("r", encoding="utf-8") as file:
|
||||
content = file.read()
|
||||
try:
|
||||
data = tomli.loads(content)
|
||||
doc = Document(
|
||||
page_content=json.dumps(data),
|
||||
metadata={"source": str(file_path)},
|
||||
)
|
||||
yield doc
|
||||
except tomli.TOMLDecodeError as e:
|
||||
print(f"Error parsing TOML file {file_path}: {e}")
|
Loading…
Reference in New Issue