You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
langchain/libs/community/langchain_community/tools/openapi/utils/api_models.py

632 lines
21 KiB
Python

community[major], core[patch], langchain[patch], experimental[patch]: Create langchain-community (#14463) Moved the following modules to new package langchain-community in a backwards compatible fashion: ``` mv langchain/langchain/adapters community/langchain_community mv langchain/langchain/callbacks community/langchain_community/callbacks mv langchain/langchain/chat_loaders community/langchain_community mv langchain/langchain/chat_models community/langchain_community mv langchain/langchain/document_loaders community/langchain_community mv langchain/langchain/docstore community/langchain_community mv langchain/langchain/document_transformers community/langchain_community mv langchain/langchain/embeddings community/langchain_community mv langchain/langchain/graphs community/langchain_community mv langchain/langchain/llms community/langchain_community mv langchain/langchain/memory/chat_message_histories community/langchain_community mv langchain/langchain/retrievers community/langchain_community mv langchain/langchain/storage community/langchain_community mv langchain/langchain/tools community/langchain_community mv langchain/langchain/utilities community/langchain_community mv langchain/langchain/vectorstores community/langchain_community mv langchain/langchain/agents/agent_toolkits community/langchain_community mv langchain/langchain/cache.py community/langchain_community mv langchain/langchain/adapters community/langchain_community mv langchain/langchain/callbacks community/langchain_community/callbacks mv langchain/langchain/chat_loaders community/langchain_community mv langchain/langchain/chat_models community/langchain_community mv langchain/langchain/document_loaders community/langchain_community mv langchain/langchain/docstore community/langchain_community mv langchain/langchain/document_transformers community/langchain_community mv langchain/langchain/embeddings community/langchain_community mv langchain/langchain/graphs community/langchain_community mv langchain/langchain/llms community/langchain_community mv langchain/langchain/memory/chat_message_histories community/langchain_community mv langchain/langchain/retrievers community/langchain_community mv langchain/langchain/storage community/langchain_community mv langchain/langchain/tools community/langchain_community mv langchain/langchain/utilities community/langchain_community mv langchain/langchain/vectorstores community/langchain_community mv langchain/langchain/agents/agent_toolkits community/langchain_community mv langchain/langchain/cache.py community/langchain_community ``` Moved the following to core ``` mv langchain/langchain/utils/json_schema.py core/langchain_core/utils mv langchain/langchain/utils/html.py core/langchain_core/utils mv langchain/langchain/utils/strings.py core/langchain_core/utils cat langchain/langchain/utils/env.py >> core/langchain_core/utils/env.py rm langchain/langchain/utils/env.py ``` See .scripts/community_split/script_integrations.sh for all changes
10 months ago
"""Pydantic models for parsing an OpenAPI spec."""
from __future__ import annotations
import logging
from enum import Enum
from typing import (
TYPE_CHECKING,
Any,
Dict,
List,
Optional,
Sequence,
Tuple,
Type,
Union,
)
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_community.tools.openapi.utils.openapi_utils import HTTPVerb, OpenAPISpec
logger = logging.getLogger(__name__)
PRIMITIVE_TYPES = {
"integer": int,
"number": float,
"string": str,
"boolean": bool,
"array": List,
"object": Dict,
"null": None,
}
# See https://github.com/OAI/OpenAPI-Specification/blob/main/versions/3.1.0.md#parameterIn
# for more info.
class APIPropertyLocation(Enum):
"""The location of the property."""
QUERY = "query"
PATH = "path"
HEADER = "header"
COOKIE = "cookie" # Not yet supported
@classmethod
def from_str(cls, location: str) -> "APIPropertyLocation":
"""Parse an APIPropertyLocation."""
try:
return cls(location)
except ValueError:
raise ValueError(
f"Invalid APIPropertyLocation. Valid values are {cls.__members__}"
)
_SUPPORTED_MEDIA_TYPES = ("application/json",)
SUPPORTED_LOCATIONS = {
APIPropertyLocation.QUERY,
APIPropertyLocation.PATH,
}
INVALID_LOCATION_TEMPL = (
'Unsupported APIPropertyLocation "{location}"'
" for parameter {name}. "
+ f"Valid values are {[loc.value for loc in SUPPORTED_LOCATIONS]}"
)
SCHEMA_TYPE = Union[str, Type, tuple, None, Enum]
class APIPropertyBase(BaseModel):
"""Base model for an API property."""
# The name of the parameter is required and is case-sensitive.
# If "in" is "path", the "name" field must correspond to a template expression
# within the path field in the Paths Object.
# If "in" is "header" and the "name" field is "Accept", "Content-Type",
# or "Authorization", the parameter definition is ignored.
# For all other cases, the "name" corresponds to the parameter
# name used by the "in" property.
name: str = Field(alias="name")
"""The name of the property."""
required: bool = Field(alias="required")
"""Whether the property is required."""
type: SCHEMA_TYPE = Field(alias="type")
"""The type of the property.
Either a primitive type, a component/parameter type,
or an array or 'object' (dict) of the above."""
default: Optional[Any] = Field(alias="default", default=None)
"""The default value of the property."""
description: Optional[str] = Field(alias="description", default=None)
"""The description of the property."""
if TYPE_CHECKING:
from openapi_pydantic import (
MediaType,
Parameter,
RequestBody,
Schema,
)
class APIProperty(APIPropertyBase):
"""A model for a property in the query, path, header, or cookie params."""
location: APIPropertyLocation = Field(alias="location")
"""The path/how it's being passed to the endpoint."""
@staticmethod
def _cast_schema_list_type(
schema: Schema,
) -> Optional[Union[str, Tuple[str, ...]]]:
type_ = schema.type
if not isinstance(type_, list):
return type_
else:
return tuple(type_)
@staticmethod
def _get_schema_type_for_enum(parameter: Parameter, schema: Schema) -> Enum:
"""Get the schema type when the parameter is an enum."""
param_name = f"{parameter.name}Enum"
return Enum(param_name, {str(v): v for v in schema.enum})
@staticmethod
def _get_schema_type_for_array(
schema: Schema,
) -> Optional[Union[str, Tuple[str, ...]]]:
from openapi_pydantic import (
Reference,
Schema,
)
items = schema.items
if isinstance(items, Schema):
schema_type = APIProperty._cast_schema_list_type(items)
elif isinstance(items, Reference):
ref_name = items.ref.split("/")[-1]
schema_type = ref_name # TODO: Add ref definitions to make his valid
else:
raise ValueError(f"Unsupported array items: {items}")
if isinstance(schema_type, str):
# TODO: recurse
schema_type = (schema_type,)
return schema_type
@staticmethod
def _get_schema_type(parameter: Parameter, schema: Optional[Schema]) -> SCHEMA_TYPE:
if schema is None:
return None
schema_type: SCHEMA_TYPE = APIProperty._cast_schema_list_type(schema)
if schema_type == "array":
schema_type = APIProperty._get_schema_type_for_array(schema)
elif schema_type == "object":
# TODO: Resolve array and object types to components.
raise NotImplementedError("Objects not yet supported")
elif schema_type in PRIMITIVE_TYPES:
if schema.enum:
schema_type = APIProperty._get_schema_type_for_enum(parameter, schema)
else:
# Directly use the primitive type
pass
else:
raise NotImplementedError(f"Unsupported type: {schema_type}")
return schema_type
@staticmethod
def _validate_location(location: APIPropertyLocation, name: str) -> None:
if location not in SUPPORTED_LOCATIONS:
raise NotImplementedError(
INVALID_LOCATION_TEMPL.format(location=location, name=name)
)
@staticmethod
def _validate_content(content: Optional[Dict[str, MediaType]]) -> None:
if content:
raise ValueError(
"API Properties with media content not supported. "
"Media content only supported within APIRequestBodyProperty's"
)
@staticmethod
def _get_schema(parameter: Parameter, spec: OpenAPISpec) -> Optional[Schema]:
from openapi_pydantic import (
Reference,
Schema,
)
schema = parameter.param_schema
if isinstance(schema, Reference):
schema = spec.get_referenced_schema(schema)
elif schema is None:
return None
elif not isinstance(schema, Schema):
raise ValueError(f"Error dereferencing schema: {schema}")
return schema
@staticmethod
def is_supported_location(location: str) -> bool:
"""Return whether the provided location is supported."""
try:
return APIPropertyLocation.from_str(location) in SUPPORTED_LOCATIONS
except ValueError:
return False
@classmethod
def from_parameter(cls, parameter: Parameter, spec: OpenAPISpec) -> "APIProperty":
"""Instantiate from an OpenAPI Parameter."""
location = APIPropertyLocation.from_str(parameter.param_in)
cls._validate_location(
location,
parameter.name,
)
cls._validate_content(parameter.content)
schema = cls._get_schema(parameter, spec)
schema_type = cls._get_schema_type(parameter, schema)
default_val = schema.default if schema is not None else None
return cls(
name=parameter.name,
location=location,
default=default_val,
description=parameter.description,
required=parameter.required,
type=schema_type,
)
class APIRequestBodyProperty(APIPropertyBase):
"""A model for a request body property."""
properties: List["APIRequestBodyProperty"] = Field(alias="properties")
"""The sub-properties of the property."""
# This is useful for handling nested property cycles.
# We can define separate types in that case.
references_used: List[str] = Field(alias="references_used")
"""The references used by the property."""
@classmethod
def _process_object_schema(
cls, schema: Schema, spec: OpenAPISpec, references_used: List[str]
) -> Tuple[Union[str, List[str], None], List["APIRequestBodyProperty"]]:
from openapi_pydantic import (
Reference,
)
properties = []
required_props = schema.required or []
if schema.properties is None:
raise ValueError(
f"No properties found when processing object schema: {schema}"
)
for prop_name, prop_schema in schema.properties.items():
if isinstance(prop_schema, Reference):
ref_name = prop_schema.ref.split("/")[-1]
if ref_name not in references_used:
references_used.append(ref_name)
prop_schema = spec.get_referenced_schema(prop_schema)
else:
continue
properties.append(
cls.from_schema(
schema=prop_schema,
name=prop_name,
required=prop_name in required_props,
spec=spec,
references_used=references_used,
)
)
return schema.type, properties
@classmethod
def _process_array_schema(
cls,
schema: Schema,
name: str,
spec: OpenAPISpec,
references_used: List[str],
) -> str:
from openapi_pydantic import Reference, Schema
items = schema.items
if items is not None:
if isinstance(items, Reference):
ref_name = items.ref.split("/")[-1]
if ref_name not in references_used:
references_used.append(ref_name)
items = spec.get_referenced_schema(items)
else:
pass
return f"Array<{ref_name}>"
else:
pass
if isinstance(items, Schema):
array_type = cls.from_schema(
schema=items,
name=f"{name}Item",
required=True, # TODO: Add required
spec=spec,
references_used=references_used,
)
return f"Array<{array_type.type}>"
return "array"
@classmethod
def from_schema(
cls,
schema: Schema,
name: str,
required: bool,
spec: OpenAPISpec,
references_used: Optional[List[str]] = None,
) -> "APIRequestBodyProperty":
"""Recursively populate from an OpenAPI Schema."""
if references_used is None:
references_used = []
schema_type = schema.type
properties: List[APIRequestBodyProperty] = []
if schema_type == "object" and schema.properties:
schema_type, properties = cls._process_object_schema(
schema, spec, references_used
)
elif schema_type == "array":
schema_type = cls._process_array_schema(schema, name, spec, references_used)
elif schema_type in PRIMITIVE_TYPES:
# Use the primitive type directly
pass
elif schema_type is None:
# No typing specified/parsed. WIll map to 'any'
pass
else:
raise ValueError(f"Unsupported type: {schema_type}")
return cls(
name=name,
required=required,
type=schema_type,
default=schema.default,
description=schema.description,
properties=properties,
references_used=references_used,
)
# class APIRequestBodyProperty(APIPropertyBase):
class APIRequestBody(BaseModel):
"""A model for a request body."""
description: Optional[str] = Field(alias="description")
"""The description of the request body."""
properties: List[APIRequestBodyProperty] = Field(alias="properties")
# E.g., application/json - we only support JSON at the moment.
media_type: str = Field(alias="media_type")
"""The media type of the request body."""
@classmethod
def _process_supported_media_type(
cls,
media_type_obj: MediaType,
spec: OpenAPISpec,
) -> List[APIRequestBodyProperty]:
"""Process the media type of the request body."""
from openapi_pydantic import Reference
references_used = []
schema = media_type_obj.media_type_schema
if isinstance(schema, Reference):
references_used.append(schema.ref.split("/")[-1])
schema = spec.get_referenced_schema(schema)
if schema is None:
raise ValueError(
f"Could not resolve schema for media type: {media_type_obj}"
)
api_request_body_properties = []
required_properties = schema.required or []
if schema.type == "object" and schema.properties:
for prop_name, prop_schema in schema.properties.items():
if isinstance(prop_schema, Reference):
prop_schema = spec.get_referenced_schema(prop_schema)
api_request_body_properties.append(
APIRequestBodyProperty.from_schema(
schema=prop_schema,
name=prop_name,
required=prop_name in required_properties,
spec=spec,
)
)
else:
api_request_body_properties.append(
APIRequestBodyProperty(
name="body",
required=True,
type=schema.type,
default=schema.default,
description=schema.description,
properties=[],
references_used=references_used,
)
)
return api_request_body_properties
@classmethod
def from_request_body(
cls, request_body: RequestBody, spec: OpenAPISpec
) -> "APIRequestBody":
"""Instantiate from an OpenAPI RequestBody."""
properties = []
for media_type, media_type_obj in request_body.content.items():
if media_type not in _SUPPORTED_MEDIA_TYPES:
continue
api_request_body_properties = cls._process_supported_media_type(
media_type_obj,
spec,
)
properties.extend(api_request_body_properties)
return cls(
description=request_body.description,
properties=properties,
media_type=media_type,
)
# class APIRequestBodyProperty(APIPropertyBase):
# class APIRequestBody(BaseModel):
class APIOperation(BaseModel):
"""A model for a single API operation."""
operation_id: str = Field(alias="operation_id")
"""The unique identifier of the operation."""
description: Optional[str] = Field(alias="description")
"""The description of the operation."""
base_url: str = Field(alias="base_url")
"""The base URL of the operation."""
path: str = Field(alias="path")
"""The path of the operation."""
method: HTTPVerb = Field(alias="method")
"""The HTTP method of the operation."""
properties: Sequence[APIProperty] = Field(alias="properties")
# TODO: Add parse in used components to be able to specify what type of
# referenced object it is.
# """The properties of the operation."""
# components: Dict[str, BaseModel] = Field(alias="components")
request_body: Optional[APIRequestBody] = Field(alias="request_body")
"""The request body of the operation."""
@staticmethod
def _get_properties_from_parameters(
parameters: List[Parameter], spec: OpenAPISpec
) -> List[APIProperty]:
"""Get the properties of the operation."""
properties = []
for param in parameters:
if APIProperty.is_supported_location(param.param_in):
properties.append(APIProperty.from_parameter(param, spec))
elif param.required:
raise ValueError(
INVALID_LOCATION_TEMPL.format(
location=param.param_in, name=param.name
)
)
else:
logger.warning(
INVALID_LOCATION_TEMPL.format(
location=param.param_in, name=param.name
)
+ " Ignoring optional parameter"
)
pass
return properties
@classmethod
def from_openapi_url(
cls,
spec_url: str,
path: str,
method: str,
) -> "APIOperation":
"""Create an APIOperation from an OpenAPI URL."""
spec = OpenAPISpec.from_url(spec_url)
return cls.from_openapi_spec(spec, path, method)
@classmethod
def from_openapi_spec(
cls,
spec: OpenAPISpec,
path: str,
method: str,
) -> "APIOperation":
"""Create an APIOperation from an OpenAPI spec."""
operation = spec.get_operation(path, method)
parameters = spec.get_parameters_for_operation(operation)
properties = cls._get_properties_from_parameters(parameters, spec)
operation_id = OpenAPISpec.get_cleaned_operation_id(operation, path, method)
request_body = spec.get_request_body_for_operation(operation)
api_request_body = (
APIRequestBody.from_request_body(request_body, spec)
if request_body is not None
else None
)
description = operation.description or operation.summary
if not description and spec.paths is not None:
description = spec.paths[path].description or spec.paths[path].summary
return cls(
operation_id=operation_id,
description=description or "",
base_url=spec.base_url,
path=path,
method=method,
properties=properties,
request_body=api_request_body,
)
@staticmethod
def ts_type_from_python(type_: SCHEMA_TYPE) -> str:
if type_ is None:
# TODO: Handle Nones better. These often result when
# parsing specs that are < v3
return "any"
elif isinstance(type_, str):
return {
"str": "string",
"integer": "number",
"float": "number",
"date-time": "string",
}.get(type_, type_)
elif isinstance(type_, tuple):
return f"Array<{APIOperation.ts_type_from_python(type_[0])}>"
elif isinstance(type_, type) and issubclass(type_, Enum):
return " | ".join([f"'{e.value}'" for e in type_])
else:
return str(type_)
def _format_nested_properties(
self, properties: List[APIRequestBodyProperty], indent: int = 2
) -> str:
"""Format nested properties."""
formatted_props = []
for prop in properties:
prop_name = prop.name
prop_type = self.ts_type_from_python(prop.type)
prop_required = "" if prop.required else "?"
prop_desc = f"/* {prop.description} */" if prop.description else ""
if prop.properties:
nested_props = self._format_nested_properties(
prop.properties, indent + 2
)
prop_type = f"{{\n{nested_props}\n{' ' * indent}}}"
formatted_props.append(
f"{prop_desc}\n{' ' * indent}{prop_name}"
f"{prop_required}: {prop_type},"
)
return "\n".join(formatted_props)
def to_typescript(self) -> str:
"""Get typescript string representation of the operation."""
operation_name = self.operation_id
params = []
if self.request_body:
formatted_request_body_props = self._format_nested_properties(
self.request_body.properties
)
params.append(formatted_request_body_props)
for prop in self.properties:
prop_name = prop.name
prop_type = self.ts_type_from_python(prop.type)
prop_required = "" if prop.required else "?"
prop_desc = f"/* {prop.description} */" if prop.description else ""
params.append(f"{prop_desc}\n\t\t{prop_name}{prop_required}: {prop_type},")
formatted_params = "\n".join(params).strip()
description_str = f"/* {self.description} */" if self.description else ""
typescript_definition = f"""
{description_str}
type {operation_name} = (_: {{
{formatted_params}
}}) => any;
"""
return typescript_definition.strip()
@property
def query_params(self) -> List[str]:
return [
property.name
for property in self.properties
if property.location == APIPropertyLocation.QUERY
]
@property
def path_params(self) -> List[str]:
return [
property.name
for property in self.properties
if property.location == APIPropertyLocation.PATH
]
@property
def body_params(self) -> List[str]:
if self.request_body is None:
return []
return [prop.name for prop in self.request_body.properties]