Added param to return only a structured json from the get_format_instructions method (#5848)

I just added a parameter to the method get_format_instructions, to
return directly the JSON instructions without the leading instruction
sentence. I'm planning to use it to define the structure of a JSON
object passed in input, the get_format_instructions().

---------

Co-authored-by: Bagatur <baskaryan@gmail.com>
This commit is contained in:
SvMax 2023-07-08 08:57:26 +02:00 committed by GitHub
parent 362bc301df
commit 1d649b127e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 57 additions and 3 deletions

View File

@ -8,6 +8,14 @@ STRUCTURED_FORMAT_INSTRUCTIONS = """The output should be a markdown code snippet
}} }}
```""" ```"""
STRUCTURED_FORMAT_SIMPLE_INSTRUCTIONS = """
```json
{{
{format}
}}
"""
PYDANTIC_FORMAT_INSTRUCTIONS = """The output should be formatted as a JSON instance that conforms to the JSON schema below. PYDANTIC_FORMAT_INSTRUCTIONS = """The output should be formatted as a JSON instance that conforms to the JSON schema below.
As an example, for the schema {{"properties": {{"foo": {{"title": "Foo", "description": "a list of strings", "type": "array", "items": {{"type": "string"}}}}}}, "required": ["foo"]}}}} As an example, for the schema {{"properties": {{"foo": {{"title": "Foo", "description": "a list of strings", "type": "array", "items": {{"type": "string"}}}}}}, "required": ["foo"]}}}}

View File

@ -4,7 +4,10 @@ from typing import Any, List
from pydantic import BaseModel from pydantic import BaseModel
from langchain.output_parsers.format_instructions import STRUCTURED_FORMAT_INSTRUCTIONS from langchain.output_parsers.format_instructions import (
STRUCTURED_FORMAT_INSTRUCTIONS,
STRUCTURED_FORMAT_SIMPLE_INSTRUCTIONS,
)
from langchain.output_parsers.json import parse_and_check_json_markdown from langchain.output_parsers.json import parse_and_check_json_markdown
from langchain.schema import BaseOutputParser from langchain.schema import BaseOutputParser
@ -32,11 +35,54 @@ class StructuredOutputParser(BaseOutputParser):
) -> StructuredOutputParser: ) -> StructuredOutputParser:
return cls(response_schemas=response_schemas) return cls(response_schemas=response_schemas)
def get_format_instructions(self) -> str: def get_format_instructions(self, only_json: bool = False) -> str:
"""
Method to get the format instructions for the output parser.
example:
```python
from langchain.output_parsers.structured import (
StructuredOutputParser, ResponseSchema
)
response_schemas = [
ResponseSchema(
name="foo",
description="a list of strings",
type="List[string]"
),
ResponseSchema(
name="bar",
description="a string",
type="string"
),
]
parser = StructuredOutputParser.from_response_schemas(response_schemas)
print(parser.get_format_instructions())
output:
# The output should be a markdown code snippet formatted in the following
# schema, including the leading and trailing "```json" and "```":
#
# ```json
# {
# "foo": List[string] // a list of strings
# "bar": string // a string
# }
Args:
only_json (bool): If True, only the json in the markdown code snippet
will be returned, without the introducing text. Defaults to False.
"""
schema_str = "\n".join( schema_str = "\n".join(
[_get_sub_string(schema) for schema in self.response_schemas] [_get_sub_string(schema) for schema in self.response_schemas]
) )
return STRUCTURED_FORMAT_INSTRUCTIONS.format(format=schema_str) if only_json:
return STRUCTURED_FORMAT_SIMPLE_INSTRUCTIONS.format(format=schema_str)
else:
return STRUCTURED_FORMAT_INSTRUCTIONS.format(format=schema_str)
def parse(self, text: str) -> Any: def parse(self, text: str) -> Any:
expected_keys = [rs.name for rs in self.response_schemas] expected_keys = [rs.name for rs in self.response_schemas]