core[patch]: add additionalProperties recursively to oai function if strict (#25169)

Hello. 
First of all, thank you for maintaining such a great project.

## Description
In https://github.com/langchain-ai/langchain/pull/25123, support for
structured_output is added. However, `"additionalProperties": false`
needs to be included at all levels when a nested object is generated.

error from current code:
https://gist.github.com/fufufukakaka/e9b475300e6934853d119428e390f204
```
BadRequestError: Error code: 400 - {'error': {'message': "Invalid schema for response_format 'JokeWithEvaluation': In context=('properties', 'self_evaluation'), 'additionalProperties' is required to be supplied and to be false", 'type': 'invalid_request_error', 'param': 'response_format', 'code': None}}
```

Reference: [Introducing Structured Outputs in the
API](https://openai.com/index/introducing-structured-outputs-in-the-api/)

```json
{
  "model": "gpt-4o-2024-08-06",
  "messages": [
    {
      "role": "system",
      "content": "You are a helpful math tutor."
    },
    {
      "role": "user",
      "content": "solve 8x + 31 = 2"
    }
  ],
  "response_format": {
    "type": "json_schema",
    "json_schema": {
      "name": "math_response",
      "strict": true,
      "schema": {
        "type": "object",
        "properties": {
          "steps": {
            "type": "array",
            "items": {
              "type": "object",
              "properties": {
                "explanation": {
                  "type": "string"
                },
                "output": {
                  "type": "string"
                }
              },
              "required": ["explanation", "output"],
              "additionalProperties": false
            }
          },
          "final_answer": {
            "type": "string"
          }
        },
        "required": ["steps", "final_answer"],
        "additionalProperties": false
      }
    }
  }
}
```

In the current code, `"additionalProperties": false` is only added at
the last level.
This PR introduces the `_add_additional_properties_key` function, which
recursively adds `"additionalProperties": false` to the entire JSON
schema for the request.

Twitter handle: `@fukkaa1225`

Thank you!

---------

Co-authored-by: Bagatur <baskaryan@gmail.com>
This commit is contained in:
Yusuke Fukasawa 2024-08-23 09:08:58 +09:00 committed by GitHub
parent b35ee09b3f
commit 0258cb96fa
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 103 additions and 6 deletions

View File

@ -389,9 +389,13 @@ def convert_to_openai_function(
if strict is not None:
oai_function["strict"] = strict
# As of 08/06/24, OpenAI requires that additionalProperties be supplied and set
# to False if strict is True.
oai_function["parameters"]["additionalProperties"] = False
if strict:
# As of 08/06/24, OpenAI requires that additionalProperties be supplied and
# set to False if strict is True.
# All properties layer needs 'additionalProperties=False'
oai_function["parameters"] = _recursive_set_additional_properties_false(
oai_function["parameters"]
)
return oai_function
@ -592,3 +596,20 @@ def _py_38_safe_origin(origin: Type) -> Type:
**origin_union_type_map,
}
return cast(Type, origin_map.get(origin, origin))
def _recursive_set_additional_properties_false(
schema: Dict[str, Any],
) -> Dict[str, Any]:
if isinstance(schema, dict):
# Check if 'required' is a key at the current level
if "required" in schema:
schema["additionalProperties"] = False
# Recursively check 'properties' and 'items' if they exist
if "properties" in schema:
for value in schema["properties"].values():
_recursive_set_additional_properties_false(value)
if "items" in schema:
_recursive_set_additional_properties_false(schema["items"])
return schema

View File

@ -341,9 +341,7 @@ def test_convert_to_openai_function_nested() -> None:
"required": ["nested_arg1", "nested_arg2"],
},
},
"required": [
"arg1",
],
"required": ["arg1"],
},
}
@ -351,6 +349,47 @@ def test_convert_to_openai_function_nested() -> None:
assert actual == expected
def test_convert_to_openai_function_nested_strict() -> None:
class Nested(BaseModel):
nested_arg1: int = Field(..., description="foo")
nested_arg2: Literal["bar", "baz"] = Field(
..., description="one of 'bar', 'baz'"
)
def my_function(arg1: Nested) -> None:
"""dummy function"""
pass
expected = {
"name": "my_function",
"description": "dummy function",
"parameters": {
"type": "object",
"properties": {
"arg1": {
"type": "object",
"properties": {
"nested_arg1": {"type": "integer", "description": "foo"},
"nested_arg2": {
"type": "string",
"enum": ["bar", "baz"],
"description": "one of 'bar', 'baz'",
},
},
"required": ["nested_arg1", "nested_arg2"],
"additionalProperties": False,
},
},
"required": ["arg1"],
"additionalProperties": False,
},
"strict": True,
}
actual = convert_to_openai_function(my_function, strict=True)
assert actual == expected
@pytest.mark.xfail(reason="Pydantic converts Optional[str] to str in .schema()")
def test_function_optional_param() -> None:
@tool

View File

@ -868,6 +868,43 @@ def test_structured_output_strict(
next(chat.stream("Tell me a joke about cats."))
@pytest.mark.parametrize(
("model", "method", "strict"), [("gpt-4o-2024-08-06", "json_schema", None)]
)
def test_nested_structured_output_strict(
model: str, method: Literal["json_schema"], strict: Optional[bool]
) -> None:
"""Test to verify structured output with strict=True for nested object."""
from typing import TypedDict
llm = ChatOpenAI(model=model, temperature=0)
class SelfEvaluation(TypedDict):
score: int
text: str
class JokeWithEvaluation(TypedDict):
"""Joke to tell user."""
setup: str
punchline: str
self_evaluation: SelfEvaluation
# Schema
chat = llm.with_structured_output(JokeWithEvaluation, method=method, strict=strict)
result = chat.invoke("Tell me a joke about cats.")
assert isinstance(result, dict)
assert set(result.keys()) == {"setup", "punchline", "self_evaluation"}
assert set(result["self_evaluation"].keys()) == {"score", "text"}
for chunk in chat.stream("Tell me a joke about cats."):
assert isinstance(chunk, dict)
assert isinstance(chunk, dict) # for mypy
assert set(chunk.keys()) == {"setup", "punchline", "self_evaluation"}
assert set(chunk["self_evaluation"].keys()) == {"score", "text"}
def test_json_mode() -> None:
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
response = llm.invoke(