Criteria Parser Improvements (#10824)

2024-11-04 06:00:26 +00:00 · 2023-09-20 11:18:33 -07:00 · 2023-09-20 11:18:33 -07:00 · f421af8b80
commit f421af8b80
parent 095f300bf6
2 changed files with 59 additions and 28 deletions
--- a/libs/langchain/langchain/evaluation/criteria/eval_chain.py
+++ b/libs/langchain/langchain/evaluation/criteria/eval_chain.py
@ -1,5 +1,6 @@
 from __future__ import annotations
 import re
 from enum import Enum
 from typing import Any, Dict, List, Mapping, Optional, Union
@ -73,15 +74,36 @@ class CriteriaResultOutputParser(BaseOutputParser[dict]):
        Returns:
            Dict: The parsed output.
        """
-        parsed = text.strip().rsplit("\n", maxsplit=1)
+        verdict = None
-        if len(parsed) == 1:
+        score = None
-            reasoning = ""
+        match_last = re.search(r"\s*(Y|N)\s*$", text, re.IGNORECASE)
-            verdict = parsed[0]
+        match_first = re.search(r"^\s*(Y|N)\s*", text, re.IGNORECASE)
        match_end = re.search(r"\b(Y|N)\b\s*$", text, re.IGNORECASE)
        if match_last:
            verdict = match_last.group(1).strip()
            text = text[: match_last.start()].strip()
        elif match_first:
            verdict = match_first.group(1).strip()
            text = text[match_first.end() :].strip()
        elif match_end:
            verdict = match_end.group(1).strip()
            text = text[: match_end.start()].strip()
        else:
-            reasoning, verdict = parsed
+            splits = text.strip().rsplit("\n", maxsplit=1)
-        score = 1 if verdict.upper() == "Y" else (0 if verdict.upper() == "N" else None)
+            if len(splits) == 1:
                reasoning = ""
                verdict = splits[0]
            else:
                reasoning, verdict = splits
        if verdict:
            score = (
                1 if verdict.upper() == "Y" else (0 if verdict.upper() == "N" else None)
            )
        return {
-            "reasoning": reasoning.strip(),
+            "reasoning": text.strip(),
            "value": verdict,
            "score": score,
        }
--- a/libs/langchain/tests/unit_tests/evaluation/criteria/test_eval_chain.py
+++ b/libs/langchain/tests/unit_tests/evaluation/criteria/test_eval_chain.py
@ -24,29 +24,38 @@ def test_resolve_criteria_str() -> None:
    }
-def test_CriteriaResultOutputParser_parse() -> None:
+@pytest.mark.parametrize(
-    output_parser = CriteriaResultOutputParser()
+    "text,want",
-    text = """Here is my step-by-step reasoning for the given criteria:
+    [
        ("Y", {"reasoning": "", "value": "Y", "score": 1}),
        (
            """Here is my step-by-step reasoning for the given criteria:
 The criterion is: "Do you like cake?" I like cake.
-Y"""
+Y""",
            {
                "reasoning": """Here is my step-by-step reasoning for the given criteria:
 The criterion is: "Do you like cake?" I like cake.""",  # noqa: E501
                "value": "Y",
                "score": 1,
            },
        ),
        (
            " NThe submission N is correct, accurate, and factual. It accurately"
            " identifies the specific effects of knowledge and interest on"
            " these factors. Therefore, the submission Y meets the criteria. Y",
            {
                "reasoning": "NThe submission N is correct, accurate, and factual. It"
                " accurately identifies the specific effects of knowledge and interest"
                " on these factors. Therefore, the submission Y meets the criteria.",
                "value": "Y",
                "score": 1,
            },
        ),
    ],
 )
 def test_CriteriaResultOutputParser_parse(text: str, want: dict) -> None:
    output_parser = CriteriaResultOutputParser()
    got = output_parser.parse(text)
    want = {
        "reasoning": """Here is my step-by-step reasoning for the given criteria:
 The criterion is: "Do you like cake?" I like cake.""",
        "value": "Y",
        "score": 1,
    }
    assert got.get("reasoning") == want["reasoning"]
    assert got.get("value") == want["value"]
    assert got.get("score") == want["score"]
    text = "Y"
    got = output_parser.parse(text)
    want = {
        "reasoning": "",
        "value": "Y",
        "score": 1,
    }
    assert got.get("reasoning") == want["reasoning"]
    assert got.get("value") == want["value"]
    assert got.get("score") == want["score"]