langchain[minor], core[minor]: update json, pydantic parser. add openai-json structured output runnable (#16914)

5 months ago · 852973d616
parent e22c4d4eb0
commit 852973d616
8 changed files with 370 additions and 270 deletions
--- a/libs/core/langchain_core/output_parsers/json.py
+++ b/libs/core/langchain_core/output_parsers/json.py
@ -35,7 +35,7 @@ def _custom_parser(multiline_string: str) -> str:
        multiline_string = multiline_string.decode()

    multiline_string = re.sub(
-        r'("action_input"\:\s*")(.*)(")',
+        r'("action_input"\:\s*")(.*?)(")',
        _replace_new_line,
        multiline_string,
        flags=re.DOTALL,
@ -138,7 +138,7 @@ def parse_json_markdown(
        The parsed JSON object as a Python dictionary.
    """
    # Try to find JSON string within triple backticks
-    match = re.search(r"```(json)?(.*)(```)?", json_string, re.DOTALL)
+    match = re.search(r"```(json)?(.*)", json_string, re.DOTALL)

    # If no match found, assume the entire string is a JSON string
    if match is None:
@ -148,7 +148,7 @@ def parse_json_markdown(
        json_str = match.group(2)

    # Strip whitespace and newlines from the start and end
-    json_str = json_str.strip()
+    json_str = json_str.strip().strip("`")

    # handle newlines and other special characters inside the returned value
    json_str = _custom_parser(json_str)
@ -211,7 +211,8 @@ class JsonOutputParser(BaseCumulativeTransformOutputParser[Any]):
            try:
                return parse_json_markdown(text)
            except JSONDecodeError as e:
-                raise OutputParserException(f"Invalid json output: {text}") from e
+                msg = f"Invalid json output: {text}"
+                raise OutputParserException(msg, llm_output=text) from e

    def parse(self, text: str) -> Any:
        return self.parse_result([Generation(text=text)])
--- a/libs/core/tests/unit_tests/output_parsers/test_json.py
+++ b/libs/core/tests/unit_tests/output_parsers/test_json.py
@ -70,21 +70,7 @@ JSON_WITH_MARKDOWN_CODE_BLOCK = """```json
 JSON_WITH_MARKDOWN_CODE_BLOCK_AND_NEWLINES = """```json
 {
    "action": "Final Answer",
-    "action_input": "```bar\n<div id="1" class=\"value\">\n\ttext\n</div>```"
-}
-```"""
-
-JSON_WITH_UNESCAPED_QUOTES_IN_NESTED_JSON = """```json
-{
-    "action": "Final Answer",
-    "action_input": "{"foo": "bar", "bar": "foo"}"
-}
-```"""
-
-JSON_WITH_ESCAPED_QUOTES_IN_NESTED_JSON = """```json
-{
-    "action": "Final Answer",
-    "action_input": "{\"foo\": \"bar\", \"bar\": \"foo\"}"
+    "action_input": "```bar\n<div id=\\"1\\" class=\\"value\\">\n\ttext\n</div>```"
 }
 ```"""

@ -202,6 +188,8 @@ def test_parse_json_with_code_blocks() -> None:
    parsed = parse_json_markdown(JSON_WITH_MARKDOWN_CODE_BLOCK)
    assert parsed == {"foo": "```bar```"}

+
+def test_parse_json_with_code_blocks_and_newlines() -> None:
    parsed = parse_json_markdown(JSON_WITH_MARKDOWN_CODE_BLOCK_AND_NEWLINES)

    assert parsed == {
@ -211,8 +199,6 @@ def test_parse_json_with_code_blocks() -> None:


 TEST_CASES_ESCAPED_QUOTES = [
-    JSON_WITH_UNESCAPED_QUOTES_IN_NESTED_JSON,
-    JSON_WITH_ESCAPED_QUOTES_IN_NESTED_JSON,
    JSON_WITH_ESCAPED_DOUBLE_QUOTES_IN_NESTED_JSON,
 ]

--- a/libs/langchain/langchain/chains/openai_functions/init.py
+++ b/libs/langchain/langchain/chains/openai_functions/init.py
@ -1,10 +1,7 @@
 from langchain.chains.openai_functions.base import (
    convert_to_openai_function,
    create_openai_fn_chain,
-    create_openai_fn_runnable,
    create_structured_output_chain,
-    create_structured_output_runnable,
-    get_openai_output_parser,
 )
 from langchain.chains.openai_functions.citation_fuzzy_match import (
    create_citation_fuzzy_match_chain,
@ -21,6 +18,11 @@ from langchain.chains.openai_functions.tagging import (
    create_tagging_chain,
    create_tagging_chain_pydantic,
 )
+from langchain.chains.structured_output.base import (
+    create_openai_fn_runnable,
+    create_structured_output_runnable,
+    get_openai_output_parser,
+)

 __all__ = [
    "convert_to_openai_function",
@ -33,7 +35,7 @@ __all__ = [
    "create_qa_with_sources_chain",
    "create_structured_output_chain",
    "create_openai_fn_chain",
-    "create_structured_output_runnable",
-    "create_openai_fn_runnable",
-    "get_openai_output_parser",
+    "create_structured_output_runnable",  # backwards compatibility
+    "create_openai_fn_runnable",  # backwards compatibility
+    "get_openai_output_parser",  # backwards compatibility
 ]
--- a/libs/langchain/langchain/chains/openai_functions/base.py
+++ b/libs/langchain/langchain/chains/openai_functions/base.py
@ -12,229 +12,34 @@ from typing import (
 from langchain_core._api import deprecated
 from langchain_core.language_models import BaseLanguageModel
 from langchain_core.output_parsers import (
-    BaseGenerationOutputParser,
    BaseLLMOutputParser,
-    BaseOutputParser,
 )
 from langchain_core.prompts import BasePromptTemplate
 from langchain_core.pydantic_v1 import BaseModel
-from langchain_core.runnables import Runnable
 from langchain_core.utils.function_calling import (
    PYTHON_TO_JSON_TYPES,
    convert_to_openai_function,
 )

 from langchain.chains import LLMChain
+from langchain.chains.structured_output.base import (
+    create_openai_fn_runnable,
+    create_structured_output_runnable,
+    get_openai_output_parser,
+)
 from langchain.output_parsers.openai_functions import (
-    JsonOutputFunctionsParser,
    PydanticAttrOutputFunctionsParser,
-    PydanticOutputFunctionsParser,
 )

-
-def get_openai_output_parser(
-    functions: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable]],
-) -> Union[BaseOutputParser, BaseGenerationOutputParser]:
-    """Get the appropriate function output parser given the user functions.
-
-    Args:
-        functions: Sequence where element is a dictionary, a pydantic.BaseModel class,
-            or a Python function. If a dictionary is passed in, it is assumed to
-            already be a valid OpenAI function.
-
-    Returns:
-        A PydanticOutputFunctionsParser if functions are Pydantic classes, otherwise
-            a JsonOutputFunctionsParser. If there's only one function and it is
-            not a Pydantic class, then the output parser will automatically extract
-            only the function arguments and not the function name.
-    """
-    function_names = [convert_to_openai_function(f)["name"] for f in functions]
-    if isinstance(functions[0], type) and issubclass(functions[0], BaseModel):
-        if len(functions) > 1:
-            pydantic_schema: Union[Dict, Type[BaseModel]] = {
-                name: fn for name, fn in zip(function_names, functions)
-            }
-        else:
-            pydantic_schema = functions[0]
-        output_parser: Union[
-            BaseOutputParser, BaseGenerationOutputParser
-        ] = PydanticOutputFunctionsParser(pydantic_schema=pydantic_schema)
-    else:
-        output_parser = JsonOutputFunctionsParser(args_only=len(functions) <= 1)
-    return output_parser
-
-
-def create_openai_fn_runnable(
-    functions: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable]],
-    llm: Runnable,
-    prompt: BasePromptTemplate,
-    *,
-    enforce_single_function_usage: bool = True,
-    output_parser: Optional[Union[BaseOutputParser, BaseGenerationOutputParser]] = None,
-    **kwargs: Any,
-) -> Runnable:
-    """Create a runnable sequence that uses OpenAI functions.
-
-    Args:
-        functions: A sequence of either dictionaries, pydantic.BaseModels classes, or
-            Python functions. If dictionaries are passed in, they are assumed to
-            already be a valid OpenAI functions. If only a single
-            function is passed in, then it will be enforced that the model use that
-            function. pydantic.BaseModels and Python functions should have docstrings
-            describing what the function does. For best results, pydantic.BaseModels
-            should have descriptions of the parameters and Python functions should have
-            Google Python style args descriptions in the docstring. Additionally,
-            Python functions should only use primitive types (str, int, float, bool) or
-            pydantic.BaseModels for arguments.
-        llm: Language model to use, assumed to support the OpenAI function-calling API.
-        prompt: BasePromptTemplate to pass to the model.
-        enforce_single_function_usage: only used if a single function is passed in. If
-            True, then the model will be forced to use the given function. If False,
-            then the model will be given the option to use the given function or not.
-        output_parser: BaseLLMOutputParser to use for parsing model outputs. By default
-            will be inferred from the function types. If pydantic.BaseModels are passed
-            in, then the OutputParser will try to parse outputs using those. Otherwise
-            model outputs will simply be parsed as JSON. If multiple functions are
-            passed in and they are not pydantic.BaseModels, the chain output will
-            include both the name of the function that was returned and the arguments
-            to pass to the function.
-
-    Returns:
-        A runnable sequence that will pass in the given functions to the model when run.
-
-    Example:
-        .. code-block:: python
-
-                from typing import Optional
-
-                from langchain.chains.openai_functions import create_openai_fn_runnable
-                from langchain_community.chat_models import ChatOpenAI
-                from langchain_core.prompts import ChatPromptTemplate
-                from langchain_core.pydantic_v1 import BaseModel, Field
-
-
-                class RecordPerson(BaseModel):
-                    \"\"\"Record some identifying information about a person.\"\"\"
-
-                    name: str = Field(..., description="The person's name")
-                    age: int = Field(..., description="The person's age")
-                    fav_food: Optional[str] = Field(None, description="The person's favorite food")
-
-
-                class RecordDog(BaseModel):
-                    \"\"\"Record some identifying information about a dog.\"\"\"
-
-                    name: str = Field(..., description="The dog's name")
-                    color: str = Field(..., description="The dog's color")
-                    fav_food: Optional[str] = Field(None, description="The dog's favorite food")
-
-
-                llm = ChatOpenAI(model="gpt-4", temperature=0)
-                prompt = ChatPromptTemplate.from_messages(
-                    [
-                        ("system", "You are a world class algorithm for recording entities."),
-                        ("human", "Make calls to the relevant function to record the entities in the following input: {input}"),
-                        ("human", "Tip: Make sure to answer in the correct format"),
-                    ]
-                )
-                chain = create_openai_fn_runnable([RecordPerson, RecordDog], llm, prompt)
-                chain.invoke({"input": "Harry was a chubby brown beagle who loved chicken"})
-                # -> RecordDog(name="Harry", color="brown", fav_food="chicken")
-    """  # noqa: E501
-    if not functions:
-        raise ValueError("Need to pass in at least one function. Received zero.")
-    openai_functions = [convert_to_openai_function(f) for f in functions]
-    llm_kwargs: Dict[str, Any] = {"functions": openai_functions, **kwargs}
-    if len(openai_functions) == 1 and enforce_single_function_usage:
-        llm_kwargs["function_call"] = {"name": openai_functions[0]["name"]}
-    output_parser = output_parser or get_openai_output_parser(functions)
-    return prompt | llm.bind(**llm_kwargs) | output_parser
-
-
-def create_structured_output_runnable(
-    output_schema: Union[Dict[str, Any], Type[BaseModel]],
-    llm: Runnable,
-    prompt: BasePromptTemplate,
-    *,
-    output_parser: Optional[Union[BaseOutputParser, BaseGenerationOutputParser]] = None,
-    **kwargs: Any,
-) -> Runnable:
-    """Create a runnable that uses an OpenAI function to get a structured output.
-
-    Args:
-        output_schema: Either a dictionary or pydantic.BaseModel class. If a dictionary
-            is passed in, it's assumed to already be a valid JsonSchema.
-            For best results, pydantic.BaseModels should have docstrings describing what
-            the schema represents and descriptions for the parameters.
-        llm: Language model to use, assumed to support the OpenAI function-calling API.
-        prompt: BasePromptTemplate to pass to the model.
-        output_parser: BaseLLMOutputParser to use for parsing model outputs. By default
-            will be inferred from the function types. If pydantic.BaseModels are passed
-            in, then the OutputParser will try to parse outputs using those. Otherwise
-            model outputs will simply be parsed as JSON.
-
-    Returns:
-        A runnable sequence that will pass the given function to the model when run.
-
-    Example:
-        .. code-block:: python
-
-                from typing import Optional
-
-                from langchain.chains.openai_functions import create_structured_output_runnable
-                from langchain_community.chat_models import ChatOpenAI
-                from langchain_core.prompts import ChatPromptTemplate
-                from langchain_core.pydantic_v1 import BaseModel, Field
-
-                class Dog(BaseModel):
-                    \"\"\"Identifying information about a dog.\"\"\"
-
-                    name: str = Field(..., description="The dog's name")
-                    color: str = Field(..., description="The dog's color")
-                    fav_food: Optional[str] = Field(None, description="The dog's favorite food")
-
-                llm = ChatOpenAI(model="gpt-3.5-turbo-0613", temperature=0)
-                prompt = ChatPromptTemplate.from_messages(
-                    [
-                        ("system", "You are a world class algorithm for extracting information in structured formats."),
-                        ("human", "Use the given format to extract information from the following input: {input}"),
-                        ("human", "Tip: Make sure to answer in the correct format"),
-                    ]
-                )
-                chain = create_structured_output_runnable(Dog, llm, prompt)
-                chain.invoke({"input": "Harry was a chubby brown beagle who loved chicken"})
-                # -> Dog(name="Harry", color="brown", fav_food="chicken")
-    """  # noqa: E501
-    if isinstance(output_schema, dict):
-        function: Any = {
-            "name": "output_formatter",
-            "description": (
-                "Output formatter. Should always be used to format your response to the"
-                " user."
-            ),
-            "parameters": output_schema,
-        }
-    else:
-
-        class _OutputFormatter(BaseModel):
-            """Output formatter. Should always be used to format your response to the user."""  # noqa: E501
-
-            output: output_schema  # type: ignore
-
-        function = _OutputFormatter
-        output_parser = output_parser or PydanticAttrOutputFunctionsParser(
-            pydantic_schema=_OutputFormatter, attr_name="output"
-        )
-    return create_openai_fn_runnable(
-        [function],
-        llm,
-        prompt,
-        output_parser=output_parser,
-        **kwargs,
-    )
-
-
-""" --- Legacy --- """
+__all__ = [
+    "get_openai_output_parser",
+    "create_openai_fn_runnable",
+    "create_structured_output_runnable",
+    "create_openai_fn_chain",  # deprecated
+    "create_structured_output_chain",  # deprecated
+    "PYTHON_TO_JSON_TYPES",  # backwards compatibility
+    "convert_to_openai_function",  # backwards compatibility
+]


@deprecated(since="0.1.1", removal="0.2.0", alternative="create_openai_fn_runnable")
@ -426,14 +231,3 @@ def create_structured_output_chain(
        output_parser=output_parser,
        **kwargs,
    )
-
-
-__all__ = [
-    "create_openai_fn_chain",
-    "create_openai_fn_runnable",
-    "create_structured_output_chain",
-    "create_structured_output_runnable",
-    "get_openai_output_parser",
-    "PYTHON_TO_JSON_TYPES",
-    "convert_to_openai_function",
-]
--- a/libs/langchain/langchain/chains/structured_output/init.py
+++ b/libs/langchain/langchain/chains/structured_output/init.py
@ -0,0 +1,6 @@
+from langchain.chains.structured_output.base import (
+    create_openai_fn_runnable,
+    create_structured_output_runnable,
+)
+
+__all__ = ["create_structured_output_runnable", "create_openai_fn_runnable"]
--- a/libs/langchain/langchain/chains/structured_output/base.py
+++ b/libs/langchain/langchain/chains/structured_output/base.py
@ -0,0 +1,321 @@
+import json
+from typing import Any, Callable, Dict, Literal, Optional, Sequence, Type, Union
+
+from langchain_core.output_parsers import (
+    BaseGenerationOutputParser,
+    BaseOutputParser,
+    JsonOutputParser,
+)
+from langchain_core.prompts import BasePromptTemplate
+from langchain_core.pydantic_v1 import BaseModel
+from langchain_core.runnables import Runnable
+from langchain_core.utils.function_calling import convert_to_openai_function
+
+from langchain.output_parsers import PydanticOutputParser
+from langchain.output_parsers.openai_functions import (
+    JsonOutputFunctionsParser,
+    PydanticAttrOutputFunctionsParser,
+    PydanticOutputFunctionsParser,
+)
+
+
+def create_openai_fn_runnable(
+    functions: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable]],
+    llm: Runnable,
+    prompt: BasePromptTemplate,
+    *,
+    enforce_single_function_usage: bool = True,
+    output_parser: Optional[Union[BaseOutputParser, BaseGenerationOutputParser]] = None,
+    **kwargs: Any,
+) -> Runnable:
+    """Create a runnable sequence that uses OpenAI functions.
+
+    Args:
+        functions: A sequence of either dictionaries, pydantic.BaseModels classes, or
+            Python functions. If dictionaries are passed in, they are assumed to
+            already be a valid OpenAI functions. If only a single
+            function is passed in, then it will be enforced that the model use that
+            function. pydantic.BaseModels and Python functions should have docstrings
+            describing what the function does. For best results, pydantic.BaseModels
+            should have descriptions of the parameters and Python functions should have
+            Google Python style args descriptions in the docstring. Additionally,
+            Python functions should only use primitive types (str, int, float, bool) or
+            pydantic.BaseModels for arguments.
+        llm: Language model to use, assumed to support the OpenAI function-calling API.
+        prompt: BasePromptTemplate to pass to the model.
+        enforce_single_function_usage: only used if a single function is passed in. If
+            True, then the model will be forced to use the given function. If False,
+            then the model will be given the option to use the given function or not.
+        output_parser: BaseLLMOutputParser to use for parsing model outputs. By default
+            will be inferred from the function types. If pydantic.BaseModels are passed
+            in, then the OutputParser will try to parse outputs using those. Otherwise
+            model outputs will simply be parsed as JSON. If multiple functions are
+            passed in and they are not pydantic.BaseModels, the chain output will
+            include both the name of the function that was returned and the arguments
+            to pass to the function.
+
+    Returns:
+        A runnable sequence that will pass in the given functions to the model when run.
+
+    Example:
+        .. code-block:: python
+
+                from typing import Optional
+
+                from langchain.chains.structured_output import create_openai_fn_runnable
+                from langchain_openai import ChatOpenAI
+                from langchain_core.prompts import ChatPromptTemplate
+                from langchain_core.pydantic_v1 import BaseModel, Field
+
+
+                class RecordPerson(BaseModel):
+                    '''Record some identifying information about a person.'''
+
+                    name: str = Field(..., description="The person's name")
+                    age: int = Field(..., description="The person's age")
+                    fav_food: Optional[str] = Field(None, description="The person's favorite food")
+
+
+                class RecordDog(BaseModel):
+                    '''Record some identifying information about a dog.'''
+
+                    name: str = Field(..., description="The dog's name")
+                    color: str = Field(..., description="The dog's color")
+                    fav_food: Optional[str] = Field(None, description="The dog's favorite food")
+
+
+                llm = ChatOpenAI(model="gpt-4", temperature=0)
+                prompt = ChatPromptTemplate.from_messages(
+                    [
+                        ("system", "You are a world class algorithm for recording entities."),
+                        ("human", "Make calls to the relevant function to record the entities in the following input: {input}"),
+                        ("human", "Tip: Make sure to answer in the correct format"),
+                    ]
+                )
+                chain = create_openai_fn_runnable([RecordPerson, RecordDog], llm, prompt)
+                chain.invoke({"input": "Harry was a chubby brown beagle who loved chicken"})
+                # -> RecordDog(name="Harry", color="brown", fav_food="chicken")
+    """  # noqa: E501
+    if not functions:
+        raise ValueError("Need to pass in at least one function. Received zero.")
+    openai_functions = [convert_to_openai_function(f) for f in functions]
+    llm_kwargs: Dict[str, Any] = {"functions": openai_functions, **kwargs}
+    if len(openai_functions) == 1 and enforce_single_function_usage:
+        llm_kwargs["function_call"] = {"name": openai_functions[0]["name"]}
+    output_parser = output_parser or get_openai_output_parser(functions)
+    return prompt | llm.bind(**llm_kwargs) | output_parser
+
+
+# TODO: implement mode='openai-tools'.
+def create_structured_output_runnable(
+    output_schema: Union[Dict[str, Any], Type[BaseModel]],
+    llm: Runnable,
+    prompt: BasePromptTemplate,
+    *,
+    output_parser: Optional[Union[BaseOutputParser, BaseGenerationOutputParser]] = None,
+    mode: Literal["openai-functions", "openai-json"] = "openai-functions",
+    enforce_single_function_usage: bool = True,
+    **kwargs: Any,
+) -> Runnable:
+    """Create a runnable for extracting structured outputs.
+
+    Args:
+        output_schema: Either a dictionary or pydantic.BaseModel class. If a dictionary
+            is passed in, it's assumed to already be a valid JsonSchema.
+            For best results, pydantic.BaseModels should have docstrings describing what
+            the schema represents and descriptions for the parameters.
+        llm: Language model to use. Assumed to support the OpenAI function-calling API 
+            if mode is 'openai-function'. Assumed to support OpenAI response_format 
+            parameter if mode is 'openai-json'.
+        prompt: BasePromptTemplate to pass to the model. If mode is 'openai-json' and 
+            prompt has input variable 'output_schema' then the given output_schema 
+            will be converted to a JsonSchema and inserted in the prompt.
+        output_parser: Output parser to use for parsing model outputs. By default
+            will be inferred from the function types. If pydantic.BaseModel is passed
+            in, then the OutputParser will try to parse outputs using the pydantic 
+            class. Otherwise model outputs will be parsed as JSON.
+        mode: How structured outputs are extracted from the model. If 'openai-functions' 
+            then OpenAI function calling is used. If 'openai-json' then OpenAI model 
+            with response_format set to JSON is used.
+        enforce_single_function_usage: Only used if mode is 'openai-functions'. Only 
+            used if a single function is passed in. If
+            True, then the model will be forced to use the given function. If False,
+            then the model will be given the option to use the given function or not.
+        **kwargs: Additional named arguments.
+
+    Returns:
+        A runnable sequence that will return a structured output matching the given 
+            output_schema.
+
+    OpenAI functions example:
+        .. code-block:: python
+
+                from typing import Optional
+
+                from langchain.chains.structured_output import create_structured_output_runnable
+                from langchain_openai import ChatOpenAI
+                from langchain_core.prompts import ChatPromptTemplate
+                from langchain_core.pydantic_v1 import BaseModel, Field
+
+                class Dog(BaseModel):
+                    '''Identifying information about a dog.'''
+
+                    name: str = Field(..., description="The dog's name")
+                    color: str = Field(..., description="The dog's color")
+                    fav_food: Optional[str] = Field(None, description="The dog's favorite food")
+
+                llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0)
+                prompt = ChatPromptTemplate.from_messages(
+                    [
+                        ("system", "You are a world class algorithm for extracting information in structured formats."),
+                        ("human", "Use the given format to extract information from the following input: {input}"),
+                        ("human", "Tip: Make sure to answer in the correct format"),
+                    ]
+                )
+                chain = create_structured_output_runnable(Dog, llm, prompt, mode="openai-functions")
+                chain.invoke({"input": "Harry was a chubby brown beagle who loved chicken"})
+                # -> Dog(name="Harry", color="brown", fav_food="chicken")
+                
+    OpenAI json response format example:
+        .. code-block:: python
+        
+                from typing import Optional
+
+                from langchain.chains.structured_output import create_structured_output_runnable
+                from langchain_openai import ChatOpenAI
+                from langchain_core.prompts import ChatPromptTemplate
+                from langchain_core.pydantic_v1 import BaseModel, Field
+
+                class Dog(BaseModel):
+                    '''Identifying information about a dog.'''
+
+                    name: str = Field(..., description="The dog's name")
+                    color: str = Field(..., description="The dog's color")
+                    fav_food: Optional[str] = Field(None, description="The dog's favorite food")
+
+                llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0)
+                system = '''You are a world class assistant for extracting information in structured JSON formats. \
+                
+                Extract a valid JSON blob from the user input that matches the following JSON Schema:
+                
+                {output_schema}'''
+                prompt = ChatPromptTemplate.from_messages(
+                    [
+                        ("system", system),
+                        ("human", "{input}"),
+                    ]
+                )
+                chain = create_structured_output_runnable(Dog, llm, prompt, mode="openai-json")
+                chain.invoke({"input": "Harry was a chubby brown beagle who loved chicken"})
+    """  # noqa: E501
+    if mode == "openai-functions":
+        return _create_openai_functions_structured_output_runnable(
+            output_schema,
+            llm,
+            prompt,
+            output_parser=output_parser,
+            enforce_single_function_usage=enforce_single_function_usage,
+            **kwargs,
+        )
+    elif mode == "openai-json":
+        return _create_openai_json_runnable(
+            output_schema, llm, prompt, output_parser=output_parser, **kwargs
+        )
+    else:
+        raise ValueError(
+            f"Invalid mode {mode}. Expected one of 'openai-functions', "
+            f"'openai-json'."
+        )
+
+
+def get_openai_output_parser(
+    functions: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable]],
+) -> Union[BaseOutputParser, BaseGenerationOutputParser]:
+    """Get the appropriate function output parser given the user functions.
+
+    Args:
+        functions: Sequence where element is a dictionary, a pydantic.BaseModel class,
+            or a Python function. If a dictionary is passed in, it is assumed to
+            already be a valid OpenAI function.
+
+    Returns:
+        A PydanticOutputFunctionsParser if functions are Pydantic classes, otherwise
+            a JsonOutputFunctionsParser. If there's only one function and it is
+            not a Pydantic class, then the output parser will automatically extract
+            only the function arguments and not the function name.
+    """
+    function_names = [convert_to_openai_function(f)["name"] for f in functions]
+    if isinstance(functions[0], type) and issubclass(functions[0], BaseModel):
+        if len(functions) > 1:
+            pydantic_schema: Union[Dict, Type[BaseModel]] = {
+                name: fn for name, fn in zip(function_names, functions)
+            }
+        else:
+            pydantic_schema = functions[0]
+        output_parser: Union[
+            BaseOutputParser, BaseGenerationOutputParser
+        ] = PydanticOutputFunctionsParser(pydantic_schema=pydantic_schema)
+    else:
+        output_parser = JsonOutputFunctionsParser(args_only=len(functions) <= 1)
+    return output_parser
+
+
+def _create_openai_json_runnable(
+    output_schema: Union[Dict[str, Any], Type[BaseModel]],
+    llm: Runnable,
+    prompt: BasePromptTemplate,
+    *,
+    output_parser: Optional[Union[BaseOutputParser, BaseGenerationOutputParser]] = None,
+) -> Runnable:
+    """"""
+    if isinstance(output_schema, type) and issubclass(output_schema, BaseModel):
+        output_parser = output_parser or PydanticOutputParser(
+            pydantic_object=output_schema,
+        )
+        schema_as_dict = convert_to_openai_function(output_schema)["parameters"]
+    else:
+        output_parser = output_parser or JsonOutputParser()
+        schema_as_dict = output_schema
+
+    if "output_schema" in prompt.input_variables:
+        prompt = prompt.partial(output_schema=json.dumps(schema_as_dict, indent=2))
+
+    llm = llm.bind(response_format={"type": "json_object"})
+    return prompt | llm | output_parser
+
+
+def _create_openai_functions_structured_output_runnable(
+    output_schema: Union[Dict[str, Any], Type[BaseModel]],
+    llm: Runnable,
+    prompt: BasePromptTemplate,
+    *,
+    output_parser: Optional[Union[BaseOutputParser, BaseGenerationOutputParser]] = None,
+    **kwargs: Any,
+) -> Runnable:
+    if isinstance(output_schema, dict):
+        function: Any = {
+            "name": "output_formatter",
+            "description": (
+                "Output formatter. Should always be used to format your response to the"
+                " user."
+            ),
+            "parameters": output_schema,
+        }
+    else:
+
+        class _OutputFormatter(BaseModel):
+            """Output formatter. Should always be used to format your response to the user."""  # noqa: E501
+
+            output: output_schema  # type: ignore
+
+        function = _OutputFormatter
+        output_parser = output_parser or PydanticAttrOutputFunctionsParser(
+            pydantic_schema=_OutputFormatter, attr_name="output"
+        )
+    return create_openai_fn_runnable(
+        [function],
+        llm,
+        prompt,
+        output_parser=output_parser,
+        **kwargs,
+    )
--- a/libs/langchain/langchain/output_parsers/pydantic.py
+++ b/libs/langchain/langchain/output_parsers/pydantic.py
@ -1,42 +1,32 @@
 import json
-import re
-from typing import Type, TypeVar
+from typing import Any, List, Type

 from langchain_core.exceptions import OutputParserException
-from langchain_core.output_parsers import BaseOutputParser
+from langchain_core.output_parsers import JsonOutputParser
+from langchain_core.outputs import Generation
 from langchain_core.pydantic_v1 import BaseModel, ValidationError

 from langchain.output_parsers.format_instructions import PYDANTIC_FORMAT_INSTRUCTIONS

-T = TypeVar("T", bound=BaseModel)

-
-class PydanticOutputParser(BaseOutputParser[T]):
+class PydanticOutputParser(JsonOutputParser):
    """Parse an output using a pydantic model."""

-    pydantic_object: Type[T]
+    pydantic_object: Type[BaseModel]
    """The pydantic model to parse.
    
    Attention: To avoid potential compatibility issues, it's recommended to use
        pydantic <2 or leverage the v1 namespace in pydantic >= 2.
    """

-    def parse(self, text: str) -> T:
+    def parse_result(self, result: List[Generation], *, partial: bool = False) -> Any:
+        json_object = super().parse_result(result)
        try:
-            # Greedy search for 1st json candidate.
-            match = re.search(
-                r"\{.*\}", text.strip(), re.MULTILINE | re.IGNORECASE | re.DOTALL
-            )
-            json_str = ""
-            if match:
-                json_str = match.group()
-            json_object = json.loads(json_str, strict=False)
            return self.pydantic_object.parse_obj(json_object)
-
-        except (json.JSONDecodeError, ValidationError) as e:
+        except ValidationError as e:
            name = self.pydantic_object.__name__
-            msg = f"Failed to parse {name} from completion {text}. Got: {e}"
-            raise OutputParserException(msg, llm_output=text)
+            msg = f"Failed to parse {name} from completion {json_object}. Got: {e}"
+            raise OutputParserException(msg, llm_output=json_object)

    def get_format_instructions(self) -> str:
        schema = self.pydantic_object.schema()
@ -57,6 +47,6 @@ class PydanticOutputParser(BaseOutputParser[T]):
        return "pydantic"

    @property
-    def OutputType(self) -> Type[T]:
+    def OutputType(self) -> Type[BaseModel]:
        """Return the pydantic model."""
        return self.pydantic_object
--- a/libs/langchain/tests/unit_tests/output_parsers/test_pydantic_parser.py
+++ b/libs/langchain/tests/unit_tests/output_parsers/test_pydantic_parser.py
@ -53,7 +53,7 @@ DEF_EXPECTED_RESULT = TestModel(
 def test_pydantic_output_parser() -> None:
    """Test PydanticOutputParser."""

-    pydantic_parser: PydanticOutputParser[TestModel] = PydanticOutputParser(
+    pydantic_parser: PydanticOutputParser = PydanticOutputParser(
        pydantic_object=TestModel
    )

@ -65,7 +65,7 @@ def test_pydantic_output_parser() -> None:
 def test_pydantic_output_parser_fail() -> None:
    """Test PydanticOutputParser where completion result fails schema validation."""

-    pydantic_parser: PydanticOutputParser[TestModel] = PydanticOutputParser(
+    pydantic_parser: PydanticOutputParser = PydanticOutputParser(
        pydantic_object=TestModel
    )