Wfh/json schema evaluation (#12389)

Co-authored-by: Bagatur <22008038+baskaryan@users.noreply.github.com>
11 months ago · 922d7910ef
parent afcc12d99e
commit 922d7910ef
10 changed files with 289 additions and 42 deletions
--- a/docs/docs/guides/evaluation/string/json.ipynb
+++ b/docs/docs/guides/evaluation/string/json.ipynb
@ -221,7 +221,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 8,
   "id": "7a8f3ec5-1cde-4b0e-80cd-ac0ac290d375",
   "metadata": {},
   "outputs": [
@ -261,11 +261,102 @@
    "print(result)"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "id": "6b15d18e-9b97-434f-905c-70acd4c35aea",
+   "metadata": {},
+   "source": [
+    "## JsonSchemaEvaluator\n",
+    "\n",
+    "The `JsonSchemaEvaluator` validates a JSON prediction against a provided JSON schema. If the prediction conforms to the schema, it returns a score of True (indicating no errors). Otherwise, it returns a score of 0 (indicating an error).\n",
+    "\n",
+    "### Overview:\n",
+    "- **Requires Input?**: Yes\n",
+    "- **Requires Reference?**: Yes (A JSON schema)\n",
+    "- **Score**: True (No errors) or False (Error occurred)"
+   ]
+  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 12,
   "id": "85afcf33-d2f4-406e-9d8f-15dc0a4772f2",
   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'score': True}\n"
+     ]
+    }
+   ],
+   "source": [
+    "from langchain.evaluation import JsonSchemaEvaluator\n",
+    "\n",
+    "evaluator = JsonSchemaEvaluator()\n",
+    "# Equivalently\n",
+    "# evaluator = load_evaluator(\"json_schema_validation\")\n",
+    "\n",
+    "result = evaluator.evaluate_strings(\n",
+    "    prediction='{\"name\": \"John\", \"age\": 30}',\n",
+    "    reference={\n",
+    "        \"type\": \"object\",\n",
+    "        \"properties\": {\"name\": {\"type\": \"string\"}, \"age\": {\"type\": \"integer\"}},\n",
+    "    },\n",
+    ")\n",
+    "print(result)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "bb5b89f6-0c87-4335-9091-55fd67a0565f",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'score': True}\n"
+     ]
+    }
+   ],
+   "source": [
+    "result = evaluator.evaluate_strings(\n",
+    "    prediction='{\"name\": \"John\", \"age\": 30}',\n",
+    "    reference='{\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\"}, \"age\": {\"type\": \"integer\"}}}',\n",
+    ")\n",
+    "print(result)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "ff914d24-36bc-482a-a9ba-259cd0dd2a52",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'score': False, 'reasoning': \"<ValidationError: '30 is less than the minimum of 66'>\"}\n"
+     ]
+    }
+   ],
+   "source": [
+    "result = evaluator.evaluate_strings(\n",
+    "    prediction='{\"name\": \"John\", \"age\": 30}',\n",
+    "    reference='{\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\"},'\n",
+    "    '\"age\": {\"type\": \"integer\", \"minimum\": 66}}}',\n",
+    ")\n",
+    "print(result)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b073f12d-4603-481c-8081-fab1af6bfcfe",
+   "metadata": {},
   "outputs": [],
   "source": []
  }
--- a/libs/langchain/langchain/evaluation/init.py
+++ b/libs/langchain/langchain/evaluation/init.py
@ -74,6 +74,7 @@ from langchain.evaluation.parsing.base import (
    JsonValidityEvaluator,
 )
 from langchain.evaluation.parsing.json_distance import JsonEditDistanceEvaluator
+from langchain.evaluation.parsing.json_schema import JsonSchemaEvaluator
 from langchain.evaluation.qa import ContextQAEvalChain, CotQAEvalChain, QAEvalChain
 from langchain.evaluation.regex_match.base import RegexMatchStringEvaluator
 from langchain.evaluation.schema import (
@ -122,4 +123,5 @@ __all__ = [
    "JsonValidityEvaluator",
    "JsonEqualityEvaluator",
    "JsonEditDistanceEvaluator",
+    "JsonSchemaEvaluator",
 ]
--- a/libs/langchain/langchain/evaluation/loading.py
+++ b/libs/langchain/langchain/evaluation/loading.py
@ -20,6 +20,7 @@ from langchain.evaluation.parsing.base import (
    JsonValidityEvaluator,
 )
 from langchain.evaluation.parsing.json_distance import JsonEditDistanceEvaluator
+from langchain.evaluation.parsing.json_schema import JsonSchemaEvaluator
 from langchain.evaluation.qa import ContextQAEvalChain, CotQAEvalChain, QAEvalChain
 from langchain.evaluation.regex_match.base import RegexMatchStringEvaluator
 from langchain.evaluation.schema import EvaluatorType, LLMEvalChain, StringEvaluator
@ -88,6 +89,7 @@ _EVALUATOR_MAP: Dict[
    EvaluatorType.JSON_VALIDITY: JsonValidityEvaluator,
    EvaluatorType.JSON_EQUALITY: JsonEqualityEvaluator,
    EvaluatorType.JSON_EDIT_DISTANCE: JsonEditDistanceEvaluator,
+    EvaluatorType.JSON_SCHEMA_VALIDATION: JsonSchemaEvaluator,
    EvaluatorType.REGEX_MATCH: RegexMatchStringEvaluator,
    EvaluatorType.EXACT_MATCH: ExactMatchStringEvaluator,
 }
--- a/libs/langchain/langchain/evaluation/parsing/base.py
+++ b/libs/langchain/langchain/evaluation/parsing/base.py
@ -51,7 +51,7 @@ class JsonValidityEvaluator(StringEvaluator):
        prediction: str,
        input: Optional[str] = None,
        reference: Optional[str] = None,
-        **kwargs: Any
+        **kwargs: Any,
    ) -> dict:
        """Evaluate the prediction string.

@ -134,7 +134,7 @@ class JsonEqualityEvaluator(StringEvaluator):
        prediction: str,
        input: Optional[str] = None,
        reference: Optional[str] = None,
-        **kwargs: Any
+        **kwargs: Any,
    ) -> dict:
        """Evaluate the prediction string.

--- a/libs/langchain/langchain/evaluation/parsing/json_distance.py
+++ b/libs/langchain/langchain/evaluation/parsing/json_distance.py
@ -38,7 +38,7 @@ class JsonEditDistanceEvaluator(StringEvaluator):
        self,
        string_distance: Optional[Callable[[str, str], float]] = None,
        canonicalize: Optional[Callable[[Any], Any]] = None,
-        **kwargs: Any
+        **kwargs: Any,
    ) -> None:
        super().__init__()
        if string_distance is not None:
@ -58,7 +58,9 @@ class JsonEditDistanceEvaluator(StringEvaluator):
            self._canonicalize = canonicalize
        else:
            self._canonicalize = lambda x: json.dumps(
-                x, separators=(",", ":"), sort_keys=True  # eliminate whitespace
+                x,
+                separators=(",", ":"),
+                sort_keys=True,  # eliminate whitespace
            )

    @property
@ -83,7 +85,7 @@ class JsonEditDistanceEvaluator(StringEvaluator):
        prediction: str,
        input: Optional[str] = None,
        reference: Optional[str] = None,
-        **kwargs: Any
+        **kwargs: Any,
    ) -> dict:
        parsed = self._canonicalize(self._parse_json(prediction))
        label = self._canonicalize(self._parse_json(reference))
--- a/libs/langchain/langchain/evaluation/parsing/json_schema.py
+++ b/libs/langchain/langchain/evaluation/parsing/json_schema.py
@ -0,0 +1,95 @@
+from typing import Any, Union
+
+from langchain.evaluation.schema import StringEvaluator
+from langchain.output_parsers.json import parse_json_markdown
+
+
+class JsonSchemaEvaluator(StringEvaluator):
+    """An evaluator that validates a JSON prediction against a JSON schema reference.
+
+    This evaluator checks if a given JSON prediction conforms to the provided JSON schema.
+    If the prediction is valid, the score is True (no errors). Otherwise, the score is False (error occurred).
+
+    Attributes:
+        requires_input (bool): Whether the evaluator requires input.
+        requires_reference (bool): Whether the evaluator requires reference.
+        evaluation_name (str): The name of the evaluation.
+
+    Examples:
+        evaluator = JsonSchemaEvaluator()
+        result = evaluator.evaluate_strings(
+            prediction='{"name": "John", "age": 30}',
+            reference={
+                "type": "object",
+                "properties": {
+                    "name": {"type": "string"},
+                    "age": {"type": "integer"}
+                }
+            }
+        )
+        assert result["score"] is not None
+
+    """  # noqa: E501
+
+    def __init__(self, **kwargs: Any) -> None:
+        """Initializes the JsonSchemaEvaluator.
+
+        Args:
+            **kwargs: Additional keyword arguments.
+
+        Raises:
+            ImportError: If the jsonschema package is not installed.
+        """
+        super().__init__()
+        try:
+            import jsonschema  # noqa: F401
+        except ImportError:
+            raise ImportError(
+                "The JsonSchemaEvaluator requires the jsonschema package."
+                " Please install it with `pip install jsonschema`."
+            )
+
+    @property
+    def requires_input(self) -> bool:
+        """Returns whether the evaluator requires input."""
+        return False
+
+    @property
+    def requires_reference(self) -> bool:
+        """Returns whether the evaluator requires reference."""
+        return True
+
+    @property
+    def evaluation_name(self) -> str:
+        """Returns the name of the evaluation."""
+        return "json_schema_validation"
+
+    def _parse_json(self, node: Any) -> Union[dict, list, None, float, bool, int, str]:
+        if isinstance(node, str):
+            return parse_json_markdown(node)
+        elif hasattr(node, "schema") and callable(getattr(node, "schema")):
+            # Pydantic model
+            return getattr(node, "schema")()
+        return node
+
+    def _validate(self, prediction: Any, schema: Any) -> dict:
+        from jsonschema import ValidationError, validate  # noqa: F401
+
+        try:
+            validate(instance=prediction, schema=schema)
+            return {
+                "score": True,
+            }
+        except ValidationError as e:
+            return {"score": False, "reasoning": repr(e)}
+
+    def _evaluate_strings(
+        self,
+        prediction: Union[str, Any],
+        input: Union[str, Any] = None,
+        reference: Union[str, Any] = None,
+        **kwargs: Any,
+    ) -> dict:
+        parsed_prediction = self._parse_json(prediction)
+        schema = self._parse_json(reference)
+        return self._validate(parsed_prediction, schema)
--- a/libs/langchain/langchain/evaluation/schema.py
+++ b/libs/langchain/langchain/evaluation/schema.py
@ -6,7 +6,7 @@ import logging
 from abc import ABC, abstractmethod
 from enum import Enum
 from functools import partial
-from typing import Any, Optional, Sequence, Tuple
+from typing import Any, Optional, Sequence, Tuple, Union
 from warnings import warn

 from langchain.chains.base import Chain
@ -66,6 +66,8 @@ class EvaluatorType(str, Enum):
    """Check if a prediction is equal to a reference JSON."""
    JSON_EDIT_DISTANCE = "json_edit_distance"
    """Compute the edit distance between two JSON strings after canonicalization."""
+    JSON_SCHEMA_VALIDATION = "json_schema_validation"
+    """Check if a prediction is valid JSON according to a JSON schema."""


 class LLMEvalChain(Chain):
@ -144,9 +146,9 @@ class StringEvaluator(_EvalArgsMixin, ABC):
    def _evaluate_strings(
        self,
        *,
-        prediction: str,
-        reference: Optional[str] = None,
-        input: Optional[str] = None,
+        prediction: Union[str, Any],
+        reference: Optional[Union[str, Any]] = None,
+        input: Optional[Union[str, Any]] = None,
        **kwargs: Any,
    ) -> dict:
        """Evaluate Chain or LLM output, based on optional input and label.
@ -167,9 +169,9 @@ class StringEvaluator(_EvalArgsMixin, ABC):
    async def _aevaluate_strings(
        self,
        *,
-        prediction: str,
-        reference: Optional[str] = None,
-        input: Optional[str] = None,
+        prediction: Union[str, Any],
+        reference: Optional[Union[str, Any]] = None,
+        input: Optional[Union[str, Any]] = None,
        **kwargs: Any,
    ) -> dict:
        """Asynchronously evaluate Chain or LLM output, based on optional input and label.
--- a/libs/langchain/poetry.lock
+++ b/libs/langchain/poetry.lock
@ -3790,7 +3790,6 @@ optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*"
 files = [
    {file = "jsonpointer-2.4-py2.py3-none-any.whl", hash = "sha256:15d51bba20eea3165644553647711d150376234112651b4f1811022aecad7d7a"},
-    {file = "jsonpointer-2.4.tar.gz", hash = "sha256:585cee82b70211fa9e6043b7bb89db6e1aa49524340dde8ad6b63206ea689d88"},
 ]

 [[package]]
@ -4598,16 +4597,6 @@ files = [
    {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5bbe06f8eeafd38e5d0a4894ffec89378b6c6a625ff57e3028921f8ff59318ac"},
    {file = "MarkupSafe-2.1.3-cp311-cp311-win32.whl", hash = "sha256:dd15ff04ffd7e05ffcb7fe79f1b98041b8ea30ae9234aed2a9168b5797c3effb"},
    {file = "MarkupSafe-2.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:134da1eca9ec0ae528110ccc9e48041e0828d79f24121a1a146161103c76e686"},
-    {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:f698de3fd0c4e6972b92290a45bd9b1536bffe8c6759c62471efaa8acb4c37bc"},
-    {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:aa57bd9cf8ae831a362185ee444e15a93ecb2e344c8e52e4d721ea3ab6ef1823"},
-    {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffcc3f7c66b5f5b7931a5aa68fc9cecc51e685ef90282f4a82f0f5e9b704ad11"},
-    {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47d4f1c5f80fc62fdd7777d0d40a2e9dda0a05883ab11374334f6c4de38adffd"},
-    {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1f67c7038d560d92149c060157d623c542173016c4babc0c1913cca0564b9939"},
-    {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:9aad3c1755095ce347e26488214ef77e0485a3c34a50c5a5e2471dff60b9dd9c"},
-    {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:14ff806850827afd6b07a5f32bd917fb7f45b046ba40c57abdb636674a8b559c"},
-    {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8f9293864fe09b8149f0cc42ce56e3f0e54de883a9de90cd427f191c346eb2e1"},
-    {file = "MarkupSafe-2.1.3-cp312-cp312-win32.whl", hash = "sha256:715d3562f79d540f251b99ebd6d8baa547118974341db04f5ad06d5ea3eb8007"},
-    {file = "MarkupSafe-2.1.3-cp312-cp312-win_amd64.whl", hash = "sha256:1b8dd8c3fd14349433c79fa8abeb573a55fc0fdd769133baac1f5e07abf54aeb"},
    {file = "MarkupSafe-2.1.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8e254ae696c88d98da6555f5ace2279cf7cd5b3f52be2b5cf97feafe883b58d2"},
    {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb0932dc158471523c9637e807d9bfb93e06a95cbf010f1a38b98623b929ef2b"},
    {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9402b03f1a1b4dc4c19845e5c749e3ab82d5078d16a2a4c2cd2df62d57bb0707"},
@ -7728,7 +7717,6 @@ files = [
    {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"},
    {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"},
    {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"},
-    {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"},
    {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"},
    {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"},
    {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"},
@ -7736,15 +7724,8 @@ files = [
    {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"},
    {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"},
    {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"},
-    {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"},
    {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"},
    {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"},
-    {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"},
-    {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"},
-    {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"},
-    {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"},
-    {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"},
-    {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"},
    {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"},
    {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"},
    {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"},
@ -7761,7 +7742,6 @@ files = [
    {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"},
    {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"},
    {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"},
-    {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"},
    {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"},
    {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"},
    {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"},
@ -7769,7 +7749,6 @@ files = [
    {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"},
    {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"},
    {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"},
-    {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"},
    {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"},
    {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"},
    {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"},
@ -8733,11 +8712,6 @@ files = [
    {file = "scikit_learn-1.3.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f66eddfda9d45dd6cadcd706b65669ce1df84b8549875691b1f403730bdef217"},
    {file = "scikit_learn-1.3.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c6448c37741145b241eeac617028ba6ec2119e1339b1385c9720dae31367f2be"},
    {file = "scikit_learn-1.3.1-cp311-cp311-win_amd64.whl", hash = "sha256:c413c2c850241998168bbb3bd1bb59ff03b1195a53864f0b80ab092071af6028"},
-    {file = "scikit_learn-1.3.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:ef540e09873e31569bc8b02c8a9f745ee04d8e1263255a15c9969f6f5caa627f"},
-    {file = "scikit_learn-1.3.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:9147a3a4df4d401e618713880be023e36109c85d8569b3bf5377e6cd3fecdeac"},
-    {file = "scikit_learn-1.3.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d2cd3634695ad192bf71645702b3df498bd1e246fc2d529effdb45a06ab028b4"},
-    {file = "scikit_learn-1.3.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0c275a06c5190c5ce00af0acbb61c06374087949f643ef32d355ece12c4db043"},
-    {file = "scikit_learn-1.3.1-cp312-cp312-win_amd64.whl", hash = "sha256:0e1aa8f206d0de814b81b41d60c1ce31f7f2c7354597af38fae46d9c47c45122"},
    {file = "scikit_learn-1.3.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:52b77cc08bd555969ec5150788ed50276f5ef83abb72e6f469c5b91a0009bbca"},
    {file = "scikit_learn-1.3.1-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:a683394bc3f80b7c312c27f9b14ebea7766b1f0a34faf1a2e9158d80e860ec26"},
    {file = "scikit_learn-1.3.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a15d964d9eb181c79c190d3dbc2fff7338786bf017e9039571418a1d53dab236"},
@ -11041,7 +11015,7 @@ cli = ["typer"]
 cohere = ["cohere"]
 docarray = ["docarray"]
 embeddings = ["sentence-transformers"]
-extended-testing = ["aiosqlite", "amazon-textract-caller", "anthropic", "arxiv", "assemblyai", "atlassian-python-api", "beautifulsoup4", "bibtexparser", "cassio", "chardet", "dashvector", "esprima", "faiss-cpu", "feedparser", "geopandas", "gitpython", "google-cloud-documentai", "gql", "html2text", "jinja2", "jq", "lxml", "markdownify", "motor", "mwparserfromhell", "mwxml", "newspaper3k", "numexpr", "openai", "openai", "openapi-pydantic", "pandas", "pdfminer-six", "pgvector", "psychicapi", "py-trello", "pymupdf", "pypdf", "pypdfium2", "pyspark", "rank-bm25", "rapidfuzz", "rapidocr-onnxruntime", "requests-toolbelt", "rspace_client", "scikit-learn", "sqlite-vss", "streamlit", "sympy", "telethon", "timescale-vector", "tqdm", "upstash-redis", "xata", "xmltodict"]
+extended-testing = ["aiosqlite", "amazon-textract-caller", "anthropic", "arxiv", "assemblyai", "atlassian-python-api", "beautifulsoup4", "bibtexparser", "cassio", "chardet", "dashvector", "esprima", "faiss-cpu", "feedparser", "geopandas", "gitpython", "google-cloud-documentai", "gql", "html2text", "jinja2", "jq", "jsonschema", "lxml", "markdownify", "motor", "mwparserfromhell", "mwxml", "newspaper3k", "numexpr", "openai", "openai", "openapi-pydantic", "pandas", "pdfminer-six", "pgvector", "psychicapi", "py-trello", "pymupdf", "pypdf", "pypdfium2", "pyspark", "rank-bm25", "rapidfuzz", "rapidocr-onnxruntime", "requests-toolbelt", "rspace_client", "scikit-learn", "sqlite-vss", "streamlit", "sympy", "telethon", "timescale-vector", "tqdm", "upstash-redis", "xata", "xmltodict"]
 javascript = ["esprima"]
 llms = ["clarifai", "cohere", "huggingface_hub", "manifest-ml", "nlpcloud", "openai", "openlm", "torch", "transformers"]
 openai = ["openai", "tiktoken"]
@ -11051,4 +11025,4 @@ text-helpers = ["chardet"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.8.1,<4.0"
-content-hash = "6bf06e81190f228675452f1a7581614898c983d27f2d56ae9ddd92119c114b03"
+content-hash = "19dcb9abd0bda24034e36b571e7ac04d432f47281a80fdc5d4a9810add60966b"
--- a/libs/langchain/pyproject.toml
+++ b/libs/langchain/pyproject.toml
@ -114,6 +114,7 @@ cassio = {version = "^0.1.0", optional = true}
 rdflib = {version = "^6.3.2", optional = true}
 sympy = {version = "^1.12", optional = true}
 rapidfuzz = {version = "^3.1.1", optional = true}
+jsonschema = {version = ">1", optional = true}
 langsmith = "~0.0.52"
 rank-bm25 = {version = "^0.2.2", optional = true}
 amadeus = {version = ">=8.1.0", optional = true}
@ -350,6 +351,7 @@ extended_testing = [
 "openai",
 "sympy",
 "rapidfuzz",
+ "jsonschema",
 "openai",
 "rank-bm25",
 "geopandas",
--- a/libs/langchain/tests/unit_tests/evaluation/parsing/test_json_schema.py
+++ b/libs/langchain/tests/unit_tests/evaluation/parsing/test_json_schema.py
@ -0,0 +1,77 @@
+import pytest
+
+from langchain.evaluation.parsing.json_schema import JsonSchemaEvaluator
+
+
+@pytest.fixture
+def json_schema_evaluator() -> JsonSchemaEvaluator:
+    return JsonSchemaEvaluator()
+
+
+@pytest.mark.requires("jsonschema")
+def test_json_schema_evaluator_requires_input(
+    json_schema_evaluator: JsonSchemaEvaluator,
+) -> None:
+    assert json_schema_evaluator.requires_input is False
+
+
+@pytest.mark.requires("jsonschema")
+def test_json_schema_evaluator_requires_reference(
+    json_schema_evaluator: JsonSchemaEvaluator,
+) -> None:
+    assert json_schema_evaluator.requires_reference is True
+
+
+@pytest.mark.requires("jsonschema")
+def test_json_schema_evaluator_evaluation_name(
+    json_schema_evaluator: JsonSchemaEvaluator,
+) -> None:
+    assert json_schema_evaluator.evaluation_name == "json_schema_validation"
+
+
+@pytest.mark.requires("jsonschema")
+def test_json_schema_evaluator_valid_prediction(
+    json_schema_evaluator: JsonSchemaEvaluator,
+) -> None:
+    prediction = '{"name": "John", "age": 30}'
+    reference = {
+        "type": "object",
+        "properties": {"name": {"type": "string"}, "age": {"type": "integer"}},
+    }
+    result = json_schema_evaluator._evaluate_strings(
+        prediction=prediction, reference=reference
+    )
+    assert result["score"] is True
+
+
+@pytest.mark.requires("jsonschema")
+def test_json_schema_evaluator_invalid_prediction(
+    json_schema_evaluator: JsonSchemaEvaluator,
+) -> None:
+    prediction = '{"name": "John", "age": "30"}'  # age is a string instead of integer
+    reference = {
+        "type": "object",
+        "properties": {"name": {"type": "string"}, "age": {"type": "integer"}},
+    }
+    result = json_schema_evaluator._evaluate_strings(
+        prediction=prediction, reference=reference
+    )
+    assert result["score"] is False
+    assert "reasoning" in result
+
+
+@pytest.mark.requires("jsonschema")
+def test_json_schema_evaluator_missing_property(
+    json_schema_evaluator: JsonSchemaEvaluator,
+) -> None:
+    prediction = '{"name": "John"}'  # age property is missing
+    reference = {
+        "type": "object",
+        "properties": {"name": {"type": "string"}, "age": {"type": "integer"}},
+        "required": ["name", "age"],
+    }
+    result = json_schema_evaluator._evaluate_strings(
+        prediction=prediction, reference=reference
+    )
+    assert result["score"] is False
+    assert "reasoning" in result