langchain/langchain/output_parsers/regex_dict.py

from __future__ import annotations

import re
from typing import Dict, Optional

from pydantic import BaseModel

from langchain.schema import BaseOutputParser


class RegexDictParser(BaseOutputParser, BaseModel):
    """Class to parse the output into a dictionary."""

    regex_pattern: str = r"{}:\s?([^.'\n']*)\.?"  # : :meta private:
    output_key_to_format: Dict[str, str]
    no_update_value: Optional[str] = None

    @property
    def _type(self) -> str:
        """Return the type key."""
        return "regex_dict_parser"

    def parse(self, text: str) -> Dict[str, str]:
        """Parse the output of an LLM call."""
        result = {}
        for output_key, expected_format in self.output_key_to_format.items():
            specific_regex = self.regex_pattern.format(re.escape(expected_format))
            matches = re.findall(specific_regex, text)
            if not matches:
                raise ValueError(
                    f"No match found for output key: {output_key} with expected format \
                        {expected_format} on text {text}"
                )
            elif len(matches) > 1:
                raise ValueError(
                    f"Multiple matches found for output key: {output_key} with \
                        expected format {expected_format} on text {text}"
                )
            elif (
                self.no_update_value is not None and matches[0] == self.no_update_value
            ):
                continue
            else:
                result[output_key] = matches[0]
        return result