Added param to return only a structured json from the get_format_instructions method (#5848)

I just added a parameter to the method get_format_instructions, to return directly the JSON instructions without the leading instruction sentence. I'm planning to use it to define the structure of a JSON object passed in input, the get_format_instructions(). --------- Co-authored-by: Bagatur <baskaryan@gmail.com>
2024-11-06 03:20:49 +00:00 · 2023-07-08 08:57:26 +02:00 · 2023-07-08 08:57:26 +02:00 · 1d649b127e
commit 1d649b127e
parent 362bc301df
2 changed files with 57 additions and 3 deletions
--- a/langchain/output_parsers/format_instructions.py
+++ b/langchain/output_parsers/format_instructions.py
@ -8,6 +8,14 @@ STRUCTURED_FORMAT_INSTRUCTIONS = """The output should be a markdown code snippet
 }}
 ```"""
 STRUCTURED_FORMAT_SIMPLE_INSTRUCTIONS = """
 ```json
 {{
 {format}
 }}
 """
 PYDANTIC_FORMAT_INSTRUCTIONS = """The output should be formatted as a JSON instance that conforms to the JSON schema below.
 As an example, for the schema {{"properties": {{"foo": {{"title": "Foo", "description": "a list of strings", "type": "array", "items": {{"type": "string"}}}}}}, "required": ["foo"]}}}}
--- a/langchain/output_parsers/structured.py
+++ b/langchain/output_parsers/structured.py
@ -4,7 +4,10 @@ from typing import Any, List
 from pydantic import BaseModel
-from langchain.output_parsers.format_instructions import STRUCTURED_FORMAT_INSTRUCTIONS
+from langchain.output_parsers.format_instructions import (
    STRUCTURED_FORMAT_INSTRUCTIONS,
    STRUCTURED_FORMAT_SIMPLE_INSTRUCTIONS,
 )
 from langchain.output_parsers.json import parse_and_check_json_markdown
 from langchain.schema import BaseOutputParser
@ -32,11 +35,54 @@ class StructuredOutputParser(BaseOutputParser):
    ) -> StructuredOutputParser:
        return cls(response_schemas=response_schemas)
-    def get_format_instructions(self) -> str:
+    def get_format_instructions(self, only_json: bool = False) -> str:
        """
        Method to get the format instructions for the output parser.
        example:
        ```python
        from langchain.output_parsers.structured import (
            StructuredOutputParser, ResponseSchema
        )
        response_schemas = [
            ResponseSchema(
                name="foo",
                description="a list of strings",
                type="List[string]"
                ),
            ResponseSchema(
                name="bar",
                description="a string",
                type="string"
                ),
        ]
        parser = StructuredOutputParser.from_response_schemas(response_schemas)
        print(parser.get_format_instructions())
        output:
        # The output should be a markdown code snippet formatted in the following
        # schema, including the leading and trailing "```json" and "```":
        #
        # ```json
        # {
        #     "foo": List[string]  // a list of strings
        #     "bar": string  // a string
        # }
        Args:
            only_json (bool): If True, only the json in the markdown code snippet
                will be returned, without the introducing text. Defaults to False.
        """
        schema_str = "\n".join(
            [_get_sub_string(schema) for schema in self.response_schemas]
        )
-        return STRUCTURED_FORMAT_INSTRUCTIONS.format(format=schema_str)
+        if only_json:
            return STRUCTURED_FORMAT_SIMPLE_INSTRUCTIONS.format(format=schema_str)
        else:
            return STRUCTURED_FORMAT_INSTRUCTIONS.format(format=schema_str)
    def parse(self, text: str) -> Any:
        expected_keys = [rs.name for rs in self.response_schemas]