From f421af8b80e04a39c2888e1c77ead1d2e24d960d Mon Sep 17 00:00:00 2001 From: William FH <13333726+hinthornw@users.noreply.github.com> Date: Wed, 20 Sep 2023 11:18:33 -0700 Subject: [PATCH] Criteria Parser Improvements (#10824) --- .../evaluation/criteria/eval_chain.py | 36 ++++++++++--- .../evaluation/criteria/test_eval_chain.py | 51 +++++++++++-------- 2 files changed, 59 insertions(+), 28 deletions(-) diff --git a/libs/langchain/langchain/evaluation/criteria/eval_chain.py b/libs/langchain/langchain/evaluation/criteria/eval_chain.py index dbd34cb6e8..c351217f43 100644 --- a/libs/langchain/langchain/evaluation/criteria/eval_chain.py +++ b/libs/langchain/langchain/evaluation/criteria/eval_chain.py @@ -1,5 +1,6 @@ from __future__ import annotations +import re from enum import Enum from typing import Any, Dict, List, Mapping, Optional, Union @@ -73,15 +74,36 @@ class CriteriaResultOutputParser(BaseOutputParser[dict]): Returns: Dict: The parsed output. """ - parsed = text.strip().rsplit("\n", maxsplit=1) - if len(parsed) == 1: - reasoning = "" - verdict = parsed[0] + verdict = None + score = None + match_last = re.search(r"\s*(Y|N)\s*$", text, re.IGNORECASE) + match_first = re.search(r"^\s*(Y|N)\s*", text, re.IGNORECASE) + match_end = re.search(r"\b(Y|N)\b\s*$", text, re.IGNORECASE) + + if match_last: + verdict = match_last.group(1).strip() + text = text[: match_last.start()].strip() + elif match_first: + verdict = match_first.group(1).strip() + text = text[match_first.end() :].strip() + elif match_end: + verdict = match_end.group(1).strip() + text = text[: match_end.start()].strip() else: - reasoning, verdict = parsed - score = 1 if verdict.upper() == "Y" else (0 if verdict.upper() == "N" else None) + splits = text.strip().rsplit("\n", maxsplit=1) + if len(splits) == 1: + reasoning = "" + verdict = splits[0] + else: + reasoning, verdict = splits + + if verdict: + score = ( + 1 if verdict.upper() == "Y" else (0 if verdict.upper() == "N" else None) + ) + return { - "reasoning": reasoning.strip(), + "reasoning": text.strip(), "value": verdict, "score": score, } diff --git a/libs/langchain/tests/unit_tests/evaluation/criteria/test_eval_chain.py b/libs/langchain/tests/unit_tests/evaluation/criteria/test_eval_chain.py index cc1833f42b..f843854aff 100644 --- a/libs/langchain/tests/unit_tests/evaluation/criteria/test_eval_chain.py +++ b/libs/langchain/tests/unit_tests/evaluation/criteria/test_eval_chain.py @@ -24,29 +24,38 @@ def test_resolve_criteria_str() -> None: } -def test_CriteriaResultOutputParser_parse() -> None: - output_parser = CriteriaResultOutputParser() - text = """Here is my step-by-step reasoning for the given criteria: +@pytest.mark.parametrize( + "text,want", + [ + ("Y", {"reasoning": "", "value": "Y", "score": 1}), + ( + """Here is my step-by-step reasoning for the given criteria: The criterion is: "Do you like cake?" I like cake. -Y""" +Y""", + { + "reasoning": """Here is my step-by-step reasoning for the given criteria: +The criterion is: "Do you like cake?" I like cake.""", # noqa: E501 + "value": "Y", + "score": 1, + }, + ), + ( + " NThe submission N is correct, accurate, and factual. It accurately" + " identifies the specific effects of knowledge and interest on" + " these factors. Therefore, the submission Y meets the criteria. Y", + { + "reasoning": "NThe submission N is correct, accurate, and factual. It" + " accurately identifies the specific effects of knowledge and interest" + " on these factors. Therefore, the submission Y meets the criteria.", + "value": "Y", + "score": 1, + }, + ), + ], +) +def test_CriteriaResultOutputParser_parse(text: str, want: dict) -> None: + output_parser = CriteriaResultOutputParser() got = output_parser.parse(text) - want = { - "reasoning": """Here is my step-by-step reasoning for the given criteria: -The criterion is: "Do you like cake?" I like cake.""", - "value": "Y", - "score": 1, - } - assert got.get("reasoning") == want["reasoning"] - assert got.get("value") == want["value"] - assert got.get("score") == want["score"] - - text = "Y" - got = output_parser.parse(text) - want = { - "reasoning": "", - "value": "Y", - "score": 1, - } assert got.get("reasoning") == want["reasoning"] assert got.get("value") == want["value"] assert got.get("score") == want["score"]