mirror of https://github.com/hwchase17/langchain
Harrison/toml loader (#4090)
Co-authored-by: Mika Ayenson <Mikaayenson@users.noreply.github.com>pull/4091/head
parent
d4cf1eb60a
commit
a9c2450330
@ -0,0 +1,22 @@
|
|||||||
|
[internal]
|
||||||
|
creation_date = "2023-05-01"
|
||||||
|
updated_date = "2022-05-01"
|
||||||
|
release = ["release_type"]
|
||||||
|
min_endpoint_version = "some_semantic_version"
|
||||||
|
os_list = ["operating_system_list"]
|
||||||
|
|
||||||
|
[rule]
|
||||||
|
uuid = "some_uuid"
|
||||||
|
name = "Fake Rule Name"
|
||||||
|
description = "Fake description of rule"
|
||||||
|
query = '''
|
||||||
|
process where process.name : "somequery"
|
||||||
|
'''
|
||||||
|
|
||||||
|
[[rule.threat]]
|
||||||
|
framework = "MITRE ATT&CK"
|
||||||
|
|
||||||
|
[rule.threat.tactic]
|
||||||
|
name = "Execution"
|
||||||
|
id = "TA0002"
|
||||||
|
reference = "https://attack.mitre.org/tactics/TA0002/"
|
@ -0,0 +1,94 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "4284970b",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# TOML Loader\n",
|
||||||
|
"\n",
|
||||||
|
"If you need to load Toml files, use the `TomlLoader`."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 1,
|
||||||
|
"id": "202fc42d",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from langchain.document_loaders import TomlLoader"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 2,
|
||||||
|
"id": "7ecae98c",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"loader = TomlLoader('example_data/fake_rule.toml')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 3,
|
||||||
|
"id": "eb08c26e",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"rule = loader.load()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 4,
|
||||||
|
"id": "405d36bc",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"[Document(page_content='{\"internal\": {\"creation_date\": \"2023-05-01\", \"updated_date\": \"2022-05-01\", \"release\": [\"release_type\"], \"min_endpoint_version\": \"some_semantic_version\", \"os_list\": [\"operating_system_list\"]}, \"rule\": {\"uuid\": \"some_uuid\", \"name\": \"Fake Rule Name\", \"description\": \"Fake description of rule\", \"query\": \"process where process.name : \\\\\"somequery\\\\\"\\\\n\", \"threat\": [{\"framework\": \"MITRE ATT&CK\", \"tactic\": {\"name\": \"Execution\", \"id\": \"TA0002\", \"reference\": \"https://attack.mitre.org/tactics/TA0002/\"}}]}}', metadata={'source': 'example_data/fake_rule.toml'})]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 4,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"rule"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "a896454d",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3 (ipykernel)",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.9.1"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 5
|
||||||
|
}
|
@ -0,0 +1,47 @@
|
|||||||
|
import json
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Iterable, List, Union
|
||||||
|
|
||||||
|
from langchain.docstore.document import Document
|
||||||
|
from langchain.document_loaders.base import BaseLoader
|
||||||
|
|
||||||
|
|
||||||
|
class TomlLoader(BaseLoader):
|
||||||
|
"""
|
||||||
|
A TOML document loader that inherits from the BaseLoader class.
|
||||||
|
|
||||||
|
This class can be initialized with either a single source file or a source
|
||||||
|
directory containing TOML files.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, source: Union[str, Path]):
|
||||||
|
"""Initialize the TomlLoader with a source file or directory."""
|
||||||
|
self.source = Path(source)
|
||||||
|
|
||||||
|
def load(self) -> List[Document]:
|
||||||
|
"""Load and return all documents."""
|
||||||
|
return list(self.lazy_load())
|
||||||
|
|
||||||
|
def lazy_load(self) -> Iterable[Document]:
|
||||||
|
"""Lazily load the TOML documents from the source file or directory."""
|
||||||
|
import tomli
|
||||||
|
|
||||||
|
if self.source.is_file() and self.source.suffix == ".toml":
|
||||||
|
files = [self.source]
|
||||||
|
elif self.source.is_dir():
|
||||||
|
files = list(self.source.glob("**/*.toml"))
|
||||||
|
else:
|
||||||
|
raise ValueError("Invalid source path or file type")
|
||||||
|
|
||||||
|
for file_path in files:
|
||||||
|
with file_path.open("r", encoding="utf-8") as file:
|
||||||
|
content = file.read()
|
||||||
|
try:
|
||||||
|
data = tomli.loads(content)
|
||||||
|
doc = Document(
|
||||||
|
page_content=json.dumps(data),
|
||||||
|
metadata={"source": str(file_path)},
|
||||||
|
)
|
||||||
|
yield doc
|
||||||
|
except tomli.TOMLDecodeError as e:
|
||||||
|
print(f"Error parsing TOML file {file_path}: {e}")
|
Loading…
Reference in New Issue