mirror of https://github.com/hwchase17/langchain
feat: Yaml output parser (#14496)
## Description New YAML output parser as a drop-in replacement for the Pydantic output parser. Yaml is a much more token-efficient format than JSON, proving to be **~35% faster and using the same percentage fewer completion tokens**. ☑️ Formatted ☑️ Linted ☑️ Tested (analogous to the existing`test_pydantic_parser.py`) The YAML parser excels in situations where a list of objects is required, where the root object needs no key: ```python class Products(BaseModel): __root__: list[Product] ``` I ran the prompt `Generate 10 healthy, organic products` 10 times on one chain using the `PydanticOutputParser`, the other one using the`YamlOutputParser` with `Products` (see below) being the targeted model to be created. LLMs used were Fireworks' `lama-v2-34b-code-instruct` and OpenAI `gpt-3.5-turbo`. All runs succeeded without validation errors. ```python class Nutrition(BaseModel): sugar: int = Field(description="Sugar in grams") fat: float = Field(description="% of daily fat intake") class Product(BaseModel): name: str = Field(description="Product name") stats: Nutrition class Products(BaseModel): """A list of products""" products: list[Product] # Used `__root__` for the yaml chain ``` Stats after 10 runs reach were as follows: ### JSON ø time: 7.75s ø tokens: 380.8 ### YAML ø time: 5.12s ø tokens: 242.2 Looking forward to feedback, tips and contributions!pull/14158/head^2
parent
d31ff30df6
commit
b4e3e47c92
@ -0,0 +1,58 @@
|
||||
import json
|
||||
import re
|
||||
from typing import Type, TypeVar
|
||||
|
||||
import yaml
|
||||
from langchain_core.exceptions import OutputParserException
|
||||
from langchain_core.output_parsers import BaseOutputParser
|
||||
from langchain_core.pydantic_v1 import BaseModel, ValidationError
|
||||
|
||||
from langchain.output_parsers.format_instructions import YAML_FORMAT_INSTRUCTIONS
|
||||
|
||||
T = TypeVar("T", bound=BaseModel)
|
||||
|
||||
|
||||
class YamlOutputParser(BaseOutputParser[T]):
|
||||
"""Parse YAML output using a pydantic model."""
|
||||
|
||||
pydantic_object: Type[T]
|
||||
"""The pydantic model to parse."""
|
||||
pattern: re.Pattern = re.compile(
|
||||
r"^```(?:ya?ml)?(?P<yaml>[^`]*)", re.MULTILINE | re.DOTALL
|
||||
)
|
||||
"""Regex pattern to match yaml code blocks
|
||||
within triple backticks with optional yaml or yml prefix."""
|
||||
|
||||
def parse(self, text: str) -> T:
|
||||
try:
|
||||
# Greedy search for 1st yaml candidate.
|
||||
match = re.search(self.pattern, text.strip())
|
||||
yaml_str = ""
|
||||
if match:
|
||||
yaml_str = match.group("yaml")
|
||||
|
||||
json_object = yaml.safe_load(yaml_str)
|
||||
return self.pydantic_object.parse_obj(json_object)
|
||||
|
||||
except (yaml.YAMLError, ValidationError) as e:
|
||||
name = self.pydantic_object.__name__
|
||||
msg = f"Failed to parse {name} from completion {text}. Got: {e}"
|
||||
raise OutputParserException(msg, llm_output=text)
|
||||
|
||||
def get_format_instructions(self) -> str:
|
||||
schema = self.pydantic_object.schema()
|
||||
|
||||
# Remove extraneous fields.
|
||||
reduced_schema = schema
|
||||
if "title" in reduced_schema:
|
||||
del reduced_schema["title"]
|
||||
if "type" in reduced_schema:
|
||||
del reduced_schema["type"]
|
||||
# Ensure yaml in context is well-formed with double quotes.
|
||||
schema_str = json.dumps(reduced_schema)
|
||||
|
||||
return YAML_FORMAT_INSTRUCTIONS.format(schema=schema_str)
|
||||
|
||||
@property
|
||||
def _type(self) -> str:
|
||||
return "yaml"
|
@ -0,0 +1,83 @@
|
||||
"""Test yamlOutputParser"""
|
||||
from enum import Enum
|
||||
from typing import Optional
|
||||
|
||||
from langchain_core.exceptions import OutputParserException
|
||||
from langchain_core.pydantic_v1 import BaseModel, Field
|
||||
|
||||
from langchain.output_parsers.yaml import YamlOutputParser
|
||||
|
||||
|
||||
class Actions(Enum):
|
||||
SEARCH = "Search"
|
||||
CREATE = "Create"
|
||||
UPDATE = "Update"
|
||||
DELETE = "Delete"
|
||||
|
||||
|
||||
class TestModel(BaseModel):
|
||||
action: Actions = Field(description="Action to be performed")
|
||||
action_input: str = Field(description="Input to be used in the action")
|
||||
additional_fields: Optional[str] = Field(
|
||||
description="Additional fields", default=None
|
||||
)
|
||||
for_new_lines: str = Field(description="To be used to test newlines")
|
||||
|
||||
|
||||
# Prevent pytest from trying to run tests on TestModel
|
||||
TestModel.__test__ = False # type: ignore[attr-defined]
|
||||
|
||||
|
||||
DEF_RESULT = """```yaml
|
||||
---
|
||||
|
||||
action: Update
|
||||
action_input: The yamlOutputParser class is powerful
|
||||
additional_fields: null
|
||||
for_new_lines: |
|
||||
not_escape_newline:
|
||||
escape_newline:
|
||||
|
||||
```"""
|
||||
|
||||
# action 'update' with a lowercase 'u' to test schema validation failure.
|
||||
DEF_RESULT_FAIL = """```yaml
|
||||
action: update
|
||||
action_input: The yamlOutputParser class is powerful
|
||||
additional_fields: null
|
||||
```"""
|
||||
|
||||
DEF_EXPECTED_RESULT = TestModel(
|
||||
action=Actions.UPDATE,
|
||||
action_input="The yamlOutputParser class is powerful",
|
||||
additional_fields=None,
|
||||
for_new_lines="not_escape_newline:\n escape_newline: \n",
|
||||
)
|
||||
|
||||
|
||||
def test_yaml_output_parser() -> None:
|
||||
"""Test yamlOutputParser."""
|
||||
|
||||
yaml_parser: YamlOutputParser[TestModel] = YamlOutputParser(
|
||||
pydantic_object=TestModel
|
||||
)
|
||||
|
||||
result = yaml_parser.parse(DEF_RESULT)
|
||||
print("parse_result:", result)
|
||||
assert DEF_EXPECTED_RESULT == result
|
||||
|
||||
|
||||
def test_yaml_output_parser_fail() -> None:
|
||||
"""Test YamlOutputParser where completion result fails schema validation."""
|
||||
|
||||
yaml_parser: YamlOutputParser[TestModel] = YamlOutputParser(
|
||||
pydantic_object=TestModel
|
||||
)
|
||||
|
||||
try:
|
||||
yaml_parser.parse(DEF_RESULT_FAIL)
|
||||
except OutputParserException as e:
|
||||
print("parse_result:", e)
|
||||
assert "Failed to parse TestModel from completion" in str(e)
|
||||
else:
|
||||
assert False, "Expected OutputParserException"
|
Loading…
Reference in New Issue