fix evaluation parse test (#8859)

# What
- fix evaluation parse test

<!-- Thank you for contributing to LangChain!

Replace this comment with:
  - Description: Fix evaluation parse test
  - Issue: None
  - Dependencies: None
  - Tag maintainer: @baskaryan
  - Twitter handle: @MLOpsJ

Please make sure you're PR is passing linting and testing before
submitting. Run `make format`, `make lint` and `make test` to check this
locally.

If you're adding a new integration, please include:
1. a test for the integration, preferably unit tests that do not rely on
network access,
  2. an example notebook showing its use.

Maintainer responsibilities:
  - General / Misc / if you don't know who to tag: @baskaryan
  - DataLoaders / VectorStores / Retrievers: @rlancemartin, @eyurtsev
  - Models / Prompts: @hwchase17, @baskaryan
  - Memory: @hwchase17
  - Agents / Tools / Toolkits: @hinthornw
  - Tracing / Callbacks: @agola11
  - Async: @agola11

If no one reviews your PR within a few days, feel free to @-mention the
same people again.

See contribution guidelines for more information on how to write/run
tests, lint, etc:
https://github.com/hwchase17/langchain/blob/master/.github/CONTRIBUTING.md
 -->
pull/8878/head
shibuiwilliam 1 year ago committed by GitHub
parent 40096c73cd
commit ab47557db3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -100,14 +100,14 @@ class PairwiseStringResultOutputParser(BaseOutputParser[dict]):
"""
return "pairwise_string_result"
def parse(self, text: str) -> Any:
def parse(self, text: str) -> Dict[str, Any]:
"""Parse the output text.
Args:
text (str): The output text to parse.
Returns:
Any: The parsed output.
Dict: The parsed output.
Raises:
ValueError: If the verdict is invalid.

@ -65,14 +65,14 @@ class CriteriaResultOutputParser(BaseOutputParser[dict]):
def _type(self) -> str:
return "criteria_result"
def parse(self, text: str) -> Any:
def parse(self, text: str) -> Dict[str, Any]:
"""Parse the output text.
Args:
text (str): The output text to parse.
Returns:
Any: The parsed output.
Dict: The parsed output.
"""
parsed = text.strip().rsplit("\n", maxsplit=1)
if len(parsed) == 1:

@ -8,6 +8,7 @@ import pytest
from langchain.evaluation.comparison.eval_chain import (
LabeledPairwiseStringEvalChain,
PairwiseStringEvalChain,
PairwiseStringResultOutputParser,
resolve_pairwise_criteria,
)
from langchain.evaluation.criteria.eval_chain import Criteria
@ -27,6 +28,45 @@ def test_resolve_criteria_list_enum() -> None:
assert set(val.keys()) == set(c.value for c in list(Criteria))
def test_PairwiseStringResultOutputParser_parse() -> None:
output_parser = PairwiseStringResultOutputParser()
text = """I like pie better than cake.
[[A]]"""
got = output_parser.parse(text)
want = {
"reasoning": "I like pie better than cake.",
"value": "A",
"score": 1,
}
assert got.get("reasoning") == want["reasoning"]
assert got.get("value") == want["value"]
assert got.get("score") == want["score"]
text = """I like cake better than pie.
[[B]]"""
got = output_parser.parse(text)
want = {
"reasoning": "I like cake better than pie.",
"value": "B",
"score": 0,
}
assert got.get("reasoning") == want["reasoning"]
assert got.get("value") == want["value"]
assert got.get("score") == want["score"]
text = """I like cake and pie.
[[C]]"""
got = output_parser.parse(text)
want = {
"reasoning": "I like cake and pie.",
"value": None,
"score": 0.5,
}
assert got.get("reasoning") == want["reasoning"]
assert got.get("value") == want["value"]
assert got.get("score") == want["score"]
def test_pairwise_string_comparison_chain() -> None:
llm = FakeLLM(
queries={

@ -7,6 +7,7 @@ from langchain.evaluation.criteria.eval_chain import (
_SUPPORTED_CRITERIA,
Criteria,
CriteriaEvalChain,
CriteriaResultOutputParser,
LabeledCriteriaEvalChain,
)
from langchain.evaluation.schema import StringEvaluator
@ -23,6 +24,34 @@ def test_resolve_criteria_str() -> None:
}
def test_CriteriaResultOutputParser_parse() -> None:
output_parser = CriteriaResultOutputParser()
text = """Here is my step-by-step reasoning for the given criteria:
The criterion is: "Do you like cake?" I like cake.
Y"""
got = output_parser.parse(text)
want = {
"reasoning": """Here is my step-by-step reasoning for the given criteria:
The criterion is: "Do you like cake?" I like cake.""",
"value": "Y",
"score": 1,
}
assert got.get("reasoning") == want["reasoning"]
assert got.get("value") == want["value"]
assert got.get("score") == want["score"]
text = "Y"
got = output_parser.parse(text)
want = {
"reasoning": "",
"value": "Y",
"score": 1,
}
assert got.get("reasoning") == want["reasoning"]
assert got.get("value") == want["value"]
assert got.get("score") == want["score"]
@pytest.mark.parametrize("criterion", list(Criteria))
def test_resolve_criteria_enum(criterion: Criteria) -> None:
assert CriteriaEvalChain.resolve_criteria(criterion) == {

Loading…
Cancel
Save