From 1d649b127eb10c426f9b9a67cbd1fe6ec8e6befa Mon Sep 17 00:00:00 2001 From: SvMax Date: Sat, 8 Jul 2023 08:57:26 +0200 Subject: [PATCH] Added param to return only a structured json from the get_format_instructions method (#5848) I just added a parameter to the method get_format_instructions, to return directly the JSON instructions without the leading instruction sentence. I'm planning to use it to define the structure of a JSON object passed in input, the get_format_instructions(). --------- Co-authored-by: Bagatur --- .../output_parsers/format_instructions.py | 8 +++ langchain/output_parsers/structured.py | 52 +++++++++++++++++-- 2 files changed, 57 insertions(+), 3 deletions(-) diff --git a/langchain/output_parsers/format_instructions.py b/langchain/output_parsers/format_instructions.py index d9e5b6b69c..23c8328178 100644 --- a/langchain/output_parsers/format_instructions.py +++ b/langchain/output_parsers/format_instructions.py @@ -8,6 +8,14 @@ STRUCTURED_FORMAT_INSTRUCTIONS = """The output should be a markdown code snippet }} ```""" +STRUCTURED_FORMAT_SIMPLE_INSTRUCTIONS = """ +```json +{{ +{format} +}} +""" + + PYDANTIC_FORMAT_INSTRUCTIONS = """The output should be formatted as a JSON instance that conforms to the JSON schema below. As an example, for the schema {{"properties": {{"foo": {{"title": "Foo", "description": "a list of strings", "type": "array", "items": {{"type": "string"}}}}}}, "required": ["foo"]}}}} diff --git a/langchain/output_parsers/structured.py b/langchain/output_parsers/structured.py index a7b5c3910d..40ff75174a 100644 --- a/langchain/output_parsers/structured.py +++ b/langchain/output_parsers/structured.py @@ -4,7 +4,10 @@ from typing import Any, List from pydantic import BaseModel -from langchain.output_parsers.format_instructions import STRUCTURED_FORMAT_INSTRUCTIONS +from langchain.output_parsers.format_instructions import ( + STRUCTURED_FORMAT_INSTRUCTIONS, + STRUCTURED_FORMAT_SIMPLE_INSTRUCTIONS, +) from langchain.output_parsers.json import parse_and_check_json_markdown from langchain.schema import BaseOutputParser @@ -32,11 +35,54 @@ class StructuredOutputParser(BaseOutputParser): ) -> StructuredOutputParser: return cls(response_schemas=response_schemas) - def get_format_instructions(self) -> str: + def get_format_instructions(self, only_json: bool = False) -> str: + """ + Method to get the format instructions for the output parser. + + example: + ```python + from langchain.output_parsers.structured import ( + StructuredOutputParser, ResponseSchema + ) + + response_schemas = [ + ResponseSchema( + name="foo", + description="a list of strings", + type="List[string]" + ), + ResponseSchema( + name="bar", + description="a string", + type="string" + ), + ] + + parser = StructuredOutputParser.from_response_schemas(response_schemas) + + print(parser.get_format_instructions()) + + output: + # The output should be a markdown code snippet formatted in the following + # schema, including the leading and trailing "```json" and "```": + # + # ```json + # { + # "foo": List[string] // a list of strings + # "bar": string // a string + # } + + Args: + only_json (bool): If True, only the json in the markdown code snippet + will be returned, without the introducing text. Defaults to False. + """ schema_str = "\n".join( [_get_sub_string(schema) for schema in self.response_schemas] ) - return STRUCTURED_FORMAT_INSTRUCTIONS.format(format=schema_str) + if only_json: + return STRUCTURED_FORMAT_SIMPLE_INSTRUCTIONS.format(format=schema_str) + else: + return STRUCTURED_FORMAT_INSTRUCTIONS.format(format=schema_str) def parse(self, text: str) -> Any: expected_keys = [rs.name for rs in self.response_schemas]