mirror of
https://github.com/hwchase17/langchain
synced 2024-11-10 01:10:59 +00:00
298 lines
9.5 KiB
Python
298 lines
9.5 KiB
Python
|
from __future__ import annotations
|
||
|
|
||
|
import abc
|
||
|
import json
|
||
|
import os
|
||
|
import re
|
||
|
from pathlib import Path
|
||
|
from typing import Any, Dict, Generic, List, Literal, Optional, Type, TypeVar, Union
|
||
|
|
||
|
import yaml
|
||
|
from pydantic import BaseModel, ConfigDict, Field, FilePath
|
||
|
|
||
|
T = TypeVar("T")
|
||
|
|
||
|
|
||
|
class SimpleModel(BaseModel, Generic[T]):
|
||
|
item: T
|
||
|
|
||
|
|
||
|
class PropertySettings(BaseModel):
|
||
|
model_config = ConfigDict(arbitrary_types_allowed=True)
|
||
|
type: Literal["string", "number", "array", "object", "boolean"]
|
||
|
default: Union[str, int, float, List, Dict, bool] = Field(default=None)
|
||
|
description: str = Field(default="")
|
||
|
|
||
|
|
||
|
class ModelSettings(BaseModel):
|
||
|
api: str = Field(default="")
|
||
|
configuration: dict = Field(default={})
|
||
|
parameters: dict = Field(default={})
|
||
|
response: dict = Field(default={})
|
||
|
|
||
|
def model_dump_safe(self) -> dict:
|
||
|
d = self.model_dump()
|
||
|
d["configuration"] = {
|
||
|
k: "*" * len(v) if "key" in k.lower() or "secret" in k.lower() else v
|
||
|
for k, v in d["configuration"].items()
|
||
|
}
|
||
|
return d
|
||
|
|
||
|
|
||
|
class TemplateSettings(BaseModel):
|
||
|
type: str = Field(default="mustache")
|
||
|
parser: str = Field(default="")
|
||
|
|
||
|
|
||
|
class Prompty(BaseModel):
|
||
|
# metadata
|
||
|
name: str = Field(default="")
|
||
|
description: str = Field(default="")
|
||
|
authors: List[str] = Field(default=[])
|
||
|
tags: List[str] = Field(default=[])
|
||
|
version: str = Field(default="")
|
||
|
base: str = Field(default="")
|
||
|
basePrompty: Optional[Prompty] = Field(default=None)
|
||
|
# model
|
||
|
model: ModelSettings = Field(default_factory=ModelSettings)
|
||
|
|
||
|
# sample
|
||
|
sample: dict = Field(default={})
|
||
|
|
||
|
# input / output
|
||
|
inputs: Dict[str, PropertySettings] = Field(default={})
|
||
|
outputs: Dict[str, PropertySettings] = Field(default={})
|
||
|
|
||
|
# template
|
||
|
template: TemplateSettings
|
||
|
|
||
|
file: FilePath = Field(default="")
|
||
|
content: str = Field(default="")
|
||
|
|
||
|
def to_safe_dict(self) -> Dict[str, Any]:
|
||
|
d = {}
|
||
|
for k, v in self:
|
||
|
if v != "" and v != {} and v != [] and v is not None:
|
||
|
if k == "model":
|
||
|
d[k] = v.model_dump_safe()
|
||
|
elif k == "template":
|
||
|
d[k] = v.model_dump()
|
||
|
elif k == "inputs" or k == "outputs":
|
||
|
d[k] = {k: v.model_dump() for k, v in v.items()}
|
||
|
elif k == "file":
|
||
|
d[k] = (
|
||
|
str(self.file.as_posix())
|
||
|
if isinstance(self.file, Path)
|
||
|
else self.file
|
||
|
)
|
||
|
elif k == "basePrompty":
|
||
|
# no need to serialize basePrompty
|
||
|
continue
|
||
|
|
||
|
else:
|
||
|
d[k] = v
|
||
|
return d
|
||
|
|
||
|
# generate json representation of the prompty
|
||
|
def to_safe_json(self) -> str:
|
||
|
d = self.to_safe_dict()
|
||
|
return json.dumps(d)
|
||
|
|
||
|
@staticmethod
|
||
|
def normalize(attribute: Any, parent: Path, env_error: bool = True) -> Any:
|
||
|
if isinstance(attribute, str):
|
||
|
attribute = attribute.strip()
|
||
|
if attribute.startswith("${") and attribute.endswith("}"):
|
||
|
variable = attribute[2:-1].split(":")
|
||
|
if variable[0] in os.environ.keys():
|
||
|
return os.environ[variable[0]]
|
||
|
else:
|
||
|
if len(variable) > 1:
|
||
|
return variable[1]
|
||
|
else:
|
||
|
if env_error:
|
||
|
raise ValueError(
|
||
|
f"Variable {variable[0]} not found in environment"
|
||
|
)
|
||
|
else:
|
||
|
return ""
|
||
|
elif (
|
||
|
attribute.startswith("file:")
|
||
|
and Path(parent / attribute.split(":")[1]).exists()
|
||
|
):
|
||
|
with open(parent / attribute.split(":")[1], "r") as f:
|
||
|
items = json.load(f)
|
||
|
if isinstance(items, list):
|
||
|
return [Prompty.normalize(value, parent) for value in items]
|
||
|
elif isinstance(items, dict):
|
||
|
return {
|
||
|
key: Prompty.normalize(value, parent)
|
||
|
for key, value in items.items()
|
||
|
}
|
||
|
else:
|
||
|
return items
|
||
|
else:
|
||
|
return attribute
|
||
|
elif isinstance(attribute, list):
|
||
|
return [Prompty.normalize(value, parent) for value in attribute]
|
||
|
elif isinstance(attribute, dict):
|
||
|
return {
|
||
|
key: Prompty.normalize(value, parent)
|
||
|
for key, value in attribute.items()
|
||
|
}
|
||
|
else:
|
||
|
return attribute
|
||
|
|
||
|
|
||
|
def param_hoisting(
|
||
|
top: Dict[str, Any], bottom: Dict[str, Any], top_key: Any = None
|
||
|
) -> Dict[str, Any]:
|
||
|
if top_key:
|
||
|
new_dict = {**top[top_key]} if top_key in top else {}
|
||
|
else:
|
||
|
new_dict = {**top}
|
||
|
for key, value in bottom.items():
|
||
|
if key not in new_dict:
|
||
|
new_dict[key] = value
|
||
|
return new_dict
|
||
|
|
||
|
|
||
|
class Invoker(abc.ABC):
|
||
|
def __init__(self, prompty: Prompty) -> None:
|
||
|
self.prompty = prompty
|
||
|
|
||
|
@abc.abstractmethod
|
||
|
def invoke(self, data: BaseModel) -> BaseModel:
|
||
|
pass
|
||
|
|
||
|
def __call__(self, data: BaseModel) -> BaseModel:
|
||
|
return self.invoke(data)
|
||
|
|
||
|
|
||
|
class NoOpParser(Invoker):
|
||
|
def invoke(self, data: BaseModel) -> BaseModel:
|
||
|
return data
|
||
|
|
||
|
|
||
|
class InvokerFactory(object):
|
||
|
_instance = None
|
||
|
_renderers: Dict[str, Type[Invoker]] = {}
|
||
|
_parsers: Dict[str, Type[Invoker]] = {}
|
||
|
_executors: Dict[str, Type[Invoker]] = {}
|
||
|
_processors: Dict[str, Type[Invoker]] = {}
|
||
|
|
||
|
def __new__(cls) -> InvokerFactory:
|
||
|
if cls._instance is None:
|
||
|
cls._instance = super(InvokerFactory, cls).__new__(cls)
|
||
|
# Add NOOP invokers
|
||
|
cls._renderers["NOOP"] = NoOpParser
|
||
|
cls._parsers["NOOP"] = NoOpParser
|
||
|
cls._executors["NOOP"] = NoOpParser
|
||
|
cls._processors["NOOP"] = NoOpParser
|
||
|
return cls._instance
|
||
|
|
||
|
def register(
|
||
|
self,
|
||
|
type: Literal["renderer", "parser", "executor", "processor"],
|
||
|
name: str,
|
||
|
invoker: Type[Invoker],
|
||
|
) -> None:
|
||
|
if type == "renderer":
|
||
|
self._renderers[name] = invoker
|
||
|
elif type == "parser":
|
||
|
self._parsers[name] = invoker
|
||
|
elif type == "executor":
|
||
|
self._executors[name] = invoker
|
||
|
elif type == "processor":
|
||
|
self._processors[name] = invoker
|
||
|
else:
|
||
|
raise ValueError(f"Invalid type {type}")
|
||
|
|
||
|
def register_renderer(self, name: str, renderer_class: Any) -> None:
|
||
|
self.register("renderer", name, renderer_class)
|
||
|
|
||
|
def register_parser(self, name: str, parser_class: Any) -> None:
|
||
|
self.register("parser", name, parser_class)
|
||
|
|
||
|
def register_executor(self, name: str, executor_class: Any) -> None:
|
||
|
self.register("executor", name, executor_class)
|
||
|
|
||
|
def register_processor(self, name: str, processor_class: Any) -> None:
|
||
|
self.register("processor", name, processor_class)
|
||
|
|
||
|
def __call__(
|
||
|
self,
|
||
|
type: Literal["renderer", "parser", "executor", "processor"],
|
||
|
name: str,
|
||
|
prompty: Prompty,
|
||
|
data: BaseModel,
|
||
|
) -> Any:
|
||
|
if type == "renderer":
|
||
|
return self._renderers[name](prompty)(data)
|
||
|
elif type == "parser":
|
||
|
return self._parsers[name](prompty)(data)
|
||
|
elif type == "executor":
|
||
|
return self._executors[name](prompty)(data)
|
||
|
elif type == "processor":
|
||
|
return self._processors[name](prompty)(data)
|
||
|
else:
|
||
|
raise ValueError(f"Invalid type {type}")
|
||
|
|
||
|
def to_dict(self) -> Dict[str, Any]:
|
||
|
return {
|
||
|
"renderers": {
|
||
|
k: f"{v.__module__}.{v.__name__}" for k, v in self._renderers.items()
|
||
|
},
|
||
|
"parsers": {
|
||
|
k: f"{v.__module__}.{v.__name__}" for k, v in self._parsers.items()
|
||
|
},
|
||
|
"executors": {
|
||
|
k: f"{v.__module__}.{v.__name__}" for k, v in self._executors.items()
|
||
|
},
|
||
|
"processors": {
|
||
|
k: f"{v.__module__}.{v.__name__}" for k, v in self._processors.items()
|
||
|
},
|
||
|
}
|
||
|
|
||
|
def to_json(self) -> str:
|
||
|
return json.dumps(self.to_dict())
|
||
|
|
||
|
|
||
|
class Frontmatter:
|
||
|
_yaml_delim = r"(?:---|\+\+\+)"
|
||
|
_yaml = r"(.*?)"
|
||
|
_content = r"\s*(.+)$"
|
||
|
_re_pattern = r"^\s*" + _yaml_delim + _yaml + _yaml_delim + _content
|
||
|
_regex = re.compile(_re_pattern, re.S | re.M)
|
||
|
|
||
|
@classmethod
|
||
|
def read_file(cls, path: str) -> dict[str, Any]:
|
||
|
"""Reads file at path and returns dict with separated frontmatter.
|
||
|
See read() for more info on dict return value.
|
||
|
"""
|
||
|
with open(path, encoding="utf-8") as file:
|
||
|
file_contents = file.read()
|
||
|
return cls.read(file_contents)
|
||
|
|
||
|
@classmethod
|
||
|
def read(cls, string: str) -> dict[str, Any]:
|
||
|
"""Returns dict with separated frontmatter from string.
|
||
|
|
||
|
Returned dict keys:
|
||
|
attributes -- extracted YAML attributes in dict form.
|
||
|
body -- string contents below the YAML separators
|
||
|
frontmatter -- string representation of YAML
|
||
|
"""
|
||
|
fmatter = ""
|
||
|
body = ""
|
||
|
result = cls._regex.search(string)
|
||
|
|
||
|
if result:
|
||
|
fmatter = result.group(1)
|
||
|
body = result.group(2)
|
||
|
return {
|
||
|
"attributes": yaml.load(fmatter, Loader=yaml.FullLoader),
|
||
|
"body": body,
|
||
|
"frontmatter": fmatter,
|
||
|
}
|