mirror of
https://github.com/hwchase17/langchain
synced 2024-11-06 03:20:49 +00:00
move output parsing (#1605)
This commit is contained in:
parent
cb04ba0136
commit
c9b5a30b37
@ -635,7 +635,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.prompts.base import RegexParser\n",
|
||||
"from langchain.output_parsers import RegexParser\n",
|
||||
"\n",
|
||||
"output_parser = RegexParser(\n",
|
||||
" regex=r\"(.*?)\\nScore: (.*)\",\n",
|
||||
|
@ -635,7 +635,7 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from langchain.prompts.base import RegexParser\n",
|
||||
"from langchain.output_parsers import RegexParser\n",
|
||||
"\n",
|
||||
"output_parser = RegexParser(\n",
|
||||
" regex=r\"(.*?)\\nScore: (.*)\",\n",
|
||||
|
@ -9,7 +9,7 @@ from pydantic import BaseModel, Extra, root_validator
|
||||
from langchain.chains.combine_documents.base import BaseCombineDocumentsChain
|
||||
from langchain.chains.llm import LLMChain
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.prompts.base import RegexParser
|
||||
from langchain.output_parsers.regex import RegexParser
|
||||
|
||||
|
||||
class MapRerankDocumentsChain(BaseCombineDocumentsChain, BaseModel):
|
||||
|
@ -1,6 +1,6 @@
|
||||
# flake8: noqa
|
||||
from langchain.prompts import PromptTemplate
|
||||
from langchain.prompts.base import RegexParser
|
||||
from langchain.output_parsers.regex import RegexParser
|
||||
|
||||
output_parser = RegexParser(
|
||||
regex=r"(.*?)\nScore: (.*)",
|
||||
|
@ -1,5 +1,5 @@
|
||||
# flake8: noqa
|
||||
from langchain.prompts.base import CommaSeparatedListOutputParser
|
||||
from langchain.output_parsers.list import CommaSeparatedListOutputParser
|
||||
from langchain.prompts.prompt import PromptTemplate
|
||||
|
||||
_DEFAULT_TEMPLATE = """Given an input question, first create a syntactically correct {dialect} query to run, then look at the results of the query and return the answer. Unless the user specifies in his question a specific number of examples he wishes to obtain, always limit your query to at most {top_k} results. You can order the results by a relevant column to return the most interesting examples in the database.
|
||||
|
@ -1,6 +1,6 @@
|
||||
# flake8: noqa
|
||||
from langchain.prompts import PromptTemplate
|
||||
from langchain.prompts.base import RegexParser
|
||||
from langchain.output_parsers.regex import RegexParser
|
||||
|
||||
template = """You are a teacher coming up with questions to ask on a quiz.
|
||||
Given the following document, please generate a question and answer based on that document.
|
||||
|
13
langchain/output_parsers/__init__.py
Normal file
13
langchain/output_parsers/__init__.py
Normal file
@ -0,0 +1,13 @@
|
||||
from langchain.output_parsers.base import BaseOutputParser
|
||||
from langchain.output_parsers.list import (
|
||||
CommaSeparatedListOutputParser,
|
||||
ListOutputParser,
|
||||
)
|
||||
from langchain.output_parsers.regex import RegexParser
|
||||
|
||||
__all__ = [
|
||||
"RegexParser",
|
||||
"ListOutputParser",
|
||||
"CommaSeparatedListOutputParser",
|
||||
"BaseOutputParser",
|
||||
]
|
25
langchain/output_parsers/base.py
Normal file
25
langchain/output_parsers/base.py
Normal file
@ -0,0 +1,25 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Any, Dict
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class BaseOutputParser(BaseModel, ABC):
|
||||
"""Class to parse the output of an LLM call."""
|
||||
|
||||
@abstractmethod
|
||||
def parse(self, text: str) -> Any:
|
||||
"""Parse the output of an LLM call."""
|
||||
|
||||
@property
|
||||
def _type(self) -> str:
|
||||
"""Return the type key."""
|
||||
raise NotImplementedError
|
||||
|
||||
def dict(self, **kwargs: Any) -> Dict:
|
||||
"""Return dictionary representation of output parser."""
|
||||
output_parser_dict = super().dict()
|
||||
output_parser_dict["_type"] = self._type
|
||||
return output_parser_dict
|
22
langchain/output_parsers/list.py
Normal file
22
langchain/output_parsers/list.py
Normal file
@ -0,0 +1,22 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from abc import abstractmethod
|
||||
from typing import List
|
||||
|
||||
from langchain.output_parsers.base import BaseOutputParser
|
||||
|
||||
|
||||
class ListOutputParser(BaseOutputParser):
|
||||
"""Class to parse the output of an LLM call to a list."""
|
||||
|
||||
@abstractmethod
|
||||
def parse(self, text: str) -> List[str]:
|
||||
"""Parse the output of an LLM call."""
|
||||
|
||||
|
||||
class CommaSeparatedListOutputParser(ListOutputParser):
|
||||
"""Parse out comma separated lists."""
|
||||
|
||||
def parse(self, text: str) -> List[str]:
|
||||
"""Parse the output of an LLM call."""
|
||||
return text.strip().split(", ")
|
15
langchain/output_parsers/loading.py
Normal file
15
langchain/output_parsers/loading.py
Normal file
@ -0,0 +1,15 @@
|
||||
from langchain.output_parsers.regex import RegexParser
|
||||
|
||||
|
||||
def load_output_parser(config: dict) -> dict:
|
||||
"""Load output parser."""
|
||||
if "output_parsers" in config:
|
||||
if config["output_parsers"] is not None:
|
||||
_config = config["output_parsers"]
|
||||
output_parser_type = _config["_type"]
|
||||
if output_parser_type == "regex_parser":
|
||||
output_parser = RegexParser(**_config)
|
||||
else:
|
||||
raise ValueError(f"Unsupported output parser {output_parser_type}")
|
||||
config["output_parsers"] = output_parser
|
||||
return config
|
35
langchain/output_parsers/regex.py
Normal file
35
langchain/output_parsers/regex.py
Normal file
@ -0,0 +1,35 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from langchain.output_parsers.base import BaseOutputParser
|
||||
|
||||
|
||||
class RegexParser(BaseOutputParser, BaseModel):
|
||||
"""Class to parse the output into a dictionary."""
|
||||
|
||||
regex: str
|
||||
output_keys: List[str]
|
||||
default_output_key: Optional[str] = None
|
||||
|
||||
@property
|
||||
def _type(self) -> str:
|
||||
"""Return the type key."""
|
||||
return "regex_parser"
|
||||
|
||||
def parse(self, text: str) -> Dict[str, str]:
|
||||
"""Parse the output of an LLM call."""
|
||||
match = re.search(self.regex, text)
|
||||
if match:
|
||||
return {key: match.group(i + 1) for i, key in enumerate(self.output_keys)}
|
||||
else:
|
||||
if self.default_output_key is None:
|
||||
raise ValueError(f"Could not parse output: {text}")
|
||||
else:
|
||||
return {
|
||||
key: text if key == self.default_output_key else ""
|
||||
for key in self.output_keys
|
||||
}
|
@ -2,7 +2,6 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
from abc import ABC, abstractmethod
|
||||
from pathlib import Path
|
||||
from typing import Any, Callable, Dict, List, Mapping, Optional, Union
|
||||
@ -11,6 +10,12 @@ import yaml
|
||||
from pydantic import BaseModel, Extra, Field, root_validator
|
||||
|
||||
from langchain.formatting import formatter
|
||||
from langchain.output_parsers.base import BaseOutputParser
|
||||
from langchain.output_parsers.list import ( # noqa: F401
|
||||
CommaSeparatedListOutputParser,
|
||||
ListOutputParser,
|
||||
)
|
||||
from langchain.output_parsers.regex import RegexParser # noqa: F401
|
||||
from langchain.schema import BaseMessage, HumanMessage, PromptValue
|
||||
|
||||
|
||||
@ -54,68 +59,6 @@ def check_valid_template(
|
||||
)
|
||||
|
||||
|
||||
class BaseOutputParser(BaseModel, ABC):
|
||||
"""Class to parse the output of an LLM call."""
|
||||
|
||||
@abstractmethod
|
||||
def parse(self, text: str) -> Union[str, List[str], Dict[str, str]]:
|
||||
"""Parse the output of an LLM call."""
|
||||
|
||||
@property
|
||||
def _type(self) -> str:
|
||||
"""Return the type key."""
|
||||
raise NotImplementedError
|
||||
|
||||
def dict(self, **kwargs: Any) -> Dict:
|
||||
"""Return dictionary representation of output parser."""
|
||||
output_parser_dict = super().dict()
|
||||
output_parser_dict["_type"] = self._type
|
||||
return output_parser_dict
|
||||
|
||||
|
||||
class ListOutputParser(BaseOutputParser):
|
||||
"""Class to parse the output of an LLM call to a list."""
|
||||
|
||||
@abstractmethod
|
||||
def parse(self, text: str) -> List[str]:
|
||||
"""Parse the output of an LLM call."""
|
||||
|
||||
|
||||
class CommaSeparatedListOutputParser(ListOutputParser):
|
||||
"""Parse out comma separated lists."""
|
||||
|
||||
def parse(self, text: str) -> List[str]:
|
||||
"""Parse the output of an LLM call."""
|
||||
return text.strip().split(", ")
|
||||
|
||||
|
||||
class RegexParser(BaseOutputParser, BaseModel):
|
||||
"""Class to parse the output into a dictionary."""
|
||||
|
||||
regex: str
|
||||
output_keys: List[str]
|
||||
default_output_key: Optional[str] = None
|
||||
|
||||
@property
|
||||
def _type(self) -> str:
|
||||
"""Return the type key."""
|
||||
return "regex_parser"
|
||||
|
||||
def parse(self, text: str) -> Dict[str, str]:
|
||||
"""Parse the output of an LLM call."""
|
||||
match = re.search(self.regex, text)
|
||||
if match:
|
||||
return {key: match.group(i + 1) for i, key in enumerate(self.output_keys)}
|
||||
else:
|
||||
if self.default_output_key is None:
|
||||
raise ValueError(f"Could not parse output: {text}")
|
||||
else:
|
||||
return {
|
||||
key: text if key == self.default_output_key else ""
|
||||
for key in self.output_keys
|
||||
}
|
||||
|
||||
|
||||
class StringPromptValue(PromptValue):
|
||||
text: str
|
||||
|
||||
|
@ -7,7 +7,8 @@ from typing import Union
|
||||
|
||||
import yaml
|
||||
|
||||
from langchain.prompts.base import BasePromptTemplate, RegexParser
|
||||
from langchain.output_parsers.regex import RegexParser
|
||||
from langchain.prompts.base import BasePromptTemplate
|
||||
from langchain.prompts.few_shot import FewShotPromptTemplate
|
||||
from langchain.prompts.prompt import PromptTemplate
|
||||
from langchain.utilities.loading import try_load_from_hub
|
||||
@ -73,15 +74,15 @@ def _load_examples(config: dict) -> dict:
|
||||
|
||||
def _load_output_parser(config: dict) -> dict:
|
||||
"""Load output parser."""
|
||||
if "output_parser" in config:
|
||||
if config["output_parser"] is not None:
|
||||
_config = config["output_parser"]
|
||||
if "output_parsers" in config:
|
||||
if config["output_parsers"] is not None:
|
||||
_config = config["output_parsers"]
|
||||
output_parser_type = _config["_type"]
|
||||
if output_parser_type == "regex_parser":
|
||||
output_parser = RegexParser(**_config)
|
||||
else:
|
||||
raise ValueError(f"Unsupported output parser {output_parser_type}")
|
||||
config["output_parser"] = output_parser
|
||||
config["output_parsers"] = output_parser
|
||||
return config
|
||||
|
||||
|
||||
|
@ -7,7 +7,7 @@ import pytest
|
||||
|
||||
from langchain.chains.llm import LLMChain
|
||||
from langchain.chains.loading import load_chain
|
||||
from langchain.prompts.base import BaseOutputParser
|
||||
from langchain.output_parsers.base import BaseOutputParser
|
||||
from langchain.prompts.prompt import PromptTemplate
|
||||
from tests.unit_tests.llms.fake_llm import FakeLLM
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user