community: Resolve refs recursively when generating openai_fn from OpenAPI spec (#19002)

- **Description:** This PR is intended to improve the generation of
payloads for OpenAI functions when converting from an OpenAPI spec file.
The solution is to recursively resolve `$refs`.
Currently when converting OpenAPI specs into OpenAI functions using
`openapi_spec_to_openai_fn`, if the schemas have nested references, the
generated functions contain `$ref` that causes the LLM to generate
payloads with an incorrect schema.

For example, for the for OpenAPI spec:

```
text = """
{
  "openapi": "3.0.3",
  "info": {
    "title": "Swagger Petstore - OpenAPI 3.0",
    "termsOfService": "http://swagger.io/terms/",
    "contact": {
      "email": "apiteam@swagger.io"
    },
    "license": {
      "name": "Apache 2.0",
      "url": "http://www.apache.org/licenses/LICENSE-2.0.html"
    },
    "version": "1.0.11"
  },
  "externalDocs": {
    "description": "Find out more about Swagger",
    "url": "http://swagger.io"
  },
  "servers": [
    {
      "url": "https://petstore3.swagger.io/api/v3"
    }
  ],
  "tags": [
    {
      "name": "pet",
      "description": "Everything about your Pets",
      "externalDocs": {
        "description": "Find out more",
        "url": "http://swagger.io"
      }
    },
    {
      "name": "store",
      "description": "Access to Petstore orders",
      "externalDocs": {
        "description": "Find out more about our store",
        "url": "http://swagger.io"
      }
    },
    {
      "name": "user",
      "description": "Operations about user"
    }
  ],
  "paths": {
    "/pet": {
      "post": {
        "tags": [
          "pet"
        ],
        "summary": "Add a new pet to the store",
        "description": "Add a new pet to the store",
        "operationId": "addPet",
        "requestBody": {
          "description": "Create a new pet in the store",
          "content": {
            "application/json": {
              "schema": {
                "$ref": "#/components/schemas/Pet"
              }
            }
          },
          "required": true
        },
        "responses": {
          "200": {
            "description": "Successful operation",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/Pet"
                }
              }
            }
          }
        }
      }
    }
  },
  "components": {
    "schemas": {
      "Tag": {
        "type": "object",
        "properties": {
          "id": {
            "type": "integer",
            "format": "int64"
          },
          "model_type": {
            "type": "number"
          }
        }
      },
      "Category": {
        "type": "object",
        "required": [
          "model",
          "year",
          "age"
        ],
        "properties": {
          "year": {
            "type": "integer",
            "format": "int64",
            "example": 1
          },
          "model": {
            "type": "string",
            "example": "Ford"
          },
          "age": {
            "type": "integer",
            "example": 42
          }
        }
      },
      "Pet": {
        "required": [
          "name"
        ],
        "type": "object",
        "properties": {
          "id": {
            "type": "integer",
            "format": "int64",
            "example": 10
          },
          "name": {
            "type": "string",
            "example": "doggie"
          },
          "category": {
            "$ref": "#/components/schemas/Category"
          },
          "tags": {
            "type": "array",
            "items": {
              "$ref": "#/components/schemas/Tag"
            }
          },
          "status": {
            "type": "string",
            "description": "pet status in the store",
            "enum": [
              "available",
              "pending",
              "sold"
            ]
          }
        }
      }
    }
  }
}
"""
```

Executing:
```
spec = OpenAPISpec.from_text(text)
pet_openai_functions, pet_callables = openapi_spec_to_openai_fn(spec)
response = model.invoke("Create a pet named Scott", functions=pet_openai_functions)
```

`pet_open_functions` contains unresolved `$refs`:

```
[
  {
    "name": "addPet",
    "description": "Add a new pet to the store",
    "parameters": {
      "type": "object",
      "properties": {
        "json": {
          "properties": {
            "id": {
              "type": "integer",
              "schema_format": "int64",
              "example": 10
            },
            "name": {
              "type": "string",
              "example": "doggie"
            },
            "category": {
              "ref": "#/components/schemas/Category"
            },
            "tags": {
              "items": {
                "ref": "#/components/schemas/Tag"
              },
              "type": "array"
            },
            "status": {
              "type": "string",
              "enum": [
                "available",
                "pending",
                "sold"
              ],
              "description": "pet status in the store"
            }
          },
          "type": "object",
          "required": [
            "name",
            "photoUrls"
          ]
        }
      }
    }
  }
]
```

and the generated JSON has an incorrect schema (e.g. category is filled
with `id` and `name` instead of `model`, `year` and `age`:

```
{
  "id": 1,
  "name": "Scott",
  "category": {
    "id": 1,
    "name": "Dogs"
  },
  "tags": [
    {
      "id": 1,
      "name": "tag1"
    }
  ],
  "status": "available"
}
```

With this change, the generated JSON by the LLM becomes,
`pet_openai_functions` becomes:

```
[
  {
    "name": "addPet",
    "description": "Add a new pet to the store",
    "parameters": {
      "type": "object",
      "properties": {
        "json": {
          "properties": {
            "id": {
              "type": "integer",
              "schema_format": "int64",
              "example": 10
            },
            "name": {
              "type": "string",
              "example": "doggie"
            },
            "category": {
              "properties": {
                "year": {
                  "type": "integer",
                  "schema_format": "int64",
                  "example": 1
                },
                "model": {
                  "type": "string",
                  "example": "Ford"
                },
                "age": {
                  "type": "integer",
                  "example": 42
                }
              },
              "type": "object",
              "required": [
                "model",
                "year",
                "age"
              ]
            },
            "tags": {
              "items": {
                "properties": {
                  "id": {
                    "type": "integer",
                    "schema_format": "int64"
                  },
                  "model_type": {
                    "type": "number"
                  }
                },
                "type": "object"
              },
              "type": "array"
            },
            "status": {
              "type": "string",
              "enum": [
                "available",
                "pending",
                "sold"
              ],
              "description": "pet status in the store"
            }
          },
          "type": "object",
          "required": [
            "name"
          ]
        }
      }
    }
  }
]
```

and the JSON generated by the LLM is:
```
{
  "id": 1,
  "name": "Scott",
  "category": {
    "year": 2022,
    "model": "Dog",
    "age": 42
  },
  "tags": [
    {
      "id": 1,
      "model_type": 1
    }
  ],
  "status": "available"
}
```

which has the intended schema.

    - **Twitter handle:**: @brunoalvisio

---------

Co-authored-by: Harrison Chase <hw.chase.17@gmail.com>
This commit is contained in:
Bruno Alvisio 2024-09-02 13:17:39 -07:00 committed by GitHub
parent 464dae8ac2
commit ab527027ac
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 141 additions and 2 deletions

View File

@ -129,11 +129,31 @@ class OpenAPISpec(OpenAPI):
raise ValueError(f"No schema found for {ref_name}")
return schemas[ref_name]
def get_schema(self, schema: Union[Reference, Schema]) -> Schema:
def get_schema(
self,
schema: Union[Reference, Schema],
depth: int = 0,
max_depth: Optional[int] = None,
) -> Schema:
if max_depth is not None and depth >= max_depth:
raise RecursionError(
f"Max depth of {max_depth} has been exceeded when resolving references."
)
from openapi_pydantic import Reference
if isinstance(schema, Reference):
return self.get_referenced_schema(schema)
schema = self.get_referenced_schema(schema)
# TODO: Resolve references on all fields of Schema ?
# (e.g. patternProperties, etc...)
if schema.properties is not None:
for p_name, p in schema.properties.items():
schema.properties[p_name] = self.get_schema(p, depth + 1, max_depth)
if schema.items is not None:
schema.items = self.get_schema(schema.items, depth + 1, max_depth)
return schema
def _get_root_referenced_schema(self, ref: Reference) -> Schema:

View File

@ -0,0 +1,70 @@
{
"openapi": "3.0.3",
"info": {
"title": "Swagger Petstore - OpenAPI 3.0",
"license": {
"name": "Apache 2.0",
"url": "http://www.apache.org/licenses/LICENSE-2.0.html"
},
"version": "1.0.11"
},
"paths": {
"/pet": {
"post": {
"summary": "Add a new pet to the store",
"description": "Add a new pet to the store",
"operationId": "addPet",
"requestBody": {
"description": "Create a new pet in the store",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/Pet"
}
}
},
"required": true
}
}
}
},
"components": {
"schemas": {
"Tag": {
"type": "object",
"properties": {
"id": {
"type": "integer",
"format": "int64"
},
"model_type": {
"type": "number"
}
}
},
"Pet": {
"required": [
"name"
],
"type": "object",
"properties": {
"id": {
"type": "integer",
"format": "int64",
"example": 10
},
"name": {
"type": "string",
"example": "doggie"
},
"tags": {
"type": "array",
"items": {
"$ref": "#/components/schemas/Tag"
}
}
}
}
}
}
}

View File

@ -42,3 +42,52 @@ def test_header_param() -> None:
openai_functions, _ = openapi_spec_to_openai_fn(spec)
assert openai_functions == EXPECTED_OPENAI_FUNCTIONS_HEADER_PARAM
EXPECTED_OPENAI_FUNCTIONS_NESTED_REF = [
{
"name": "addPet",
"description": "Add a new pet to the store",
"parameters": {
"type": "object",
"properties": {
"json": {
"properties": {
"id": {
"type": "integer",
"schema_format": "int64",
"example": 10,
},
"name": {"type": "string", "example": "doggie"},
"tags": {
"items": {
"properties": {
"id": {"type": "integer", "schema_format": "int64"},
"model_type": {"type": "number"},
},
"type": "object",
},
"type": "array",
},
},
"type": "object",
"required": ["name"],
}
},
},
}
]
@pytest.mark.requires("openapi_pydantic")
def test_nested_ref_in_openapi_spec() -> None:
spec = OpenAPISpec.from_file(
Path(__file__).parent.parent
/ "data"
/ "openapi_specs"
/ "openapi_spec_nested_ref.json",
)
openai_functions, _ = openapi_spec_to_openai_fn(spec)
assert openai_functions == EXPECTED_OPENAI_FUNCTIONS_NESTED_REF