langchain/docs/extras/modules/model_io/output_parsers/pydantic.ipynb
Joshua Sundance Bailey 0a1dc04875
PydanticOutputParser doc nb: use langchain.pydantic_v1; remove unused imports (#10651)
Description: This PR changes the import section of the
`PydanticOutputParser` notebook.
* Import from `langchain.pydantic_v1` instead of `pydantic`
* Remove unused imports

Issue: running the notebook as written, when pydantic v2 is installed,
results in the following:
```python
PydanticDeprecatedSince20: Pydantic V1 style `@validator` validators are deprecated. You should migrate to Pydantic V2 style `@field_validator` validators, see the migration guide for more details. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.3/migration/
```
[...]
```python
PydanticUserError: The `field` and `config` parameters are not available in Pydantic V2, please use the `info` parameter instead.

For further information visit https://errors.pydantic.dev/2.3/u/validator-field-config-info
```
2023-09-15 14:05:01 -07:00

157 lines
4.7 KiB
Plaintext

{
"cells": [
{
"cell_type": "markdown",
"id": "a1ae632a",
"metadata": {},
"source": [
"# Pydantic (JSON) parser\n",
"This output parser allows users to specify an arbitrary JSON schema and query LLMs for JSON outputs that conform to that schema.\n",
"\n",
"Keep in mind that large language models are leaky abstractions! You'll have to use an LLM with sufficient capacity to generate well-formed JSON. In the OpenAI family, DaVinci can do reliably but Curie's ability already drops off dramatically. \n",
"\n",
"Use Pydantic to declare your data model. Pydantic's BaseModel is like a Python dataclass, but with actual type checking + coercion."
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "cba6d8e3",
"metadata": {},
"outputs": [],
"source": [
"from typing import List\n",
"\n",
"from langchain.llms import OpenAI\n",
"from langchain.output_parsers import PydanticOutputParser\n",
"from langchain.prompts import PromptTemplate\n",
"from langchain.pydantic_v1 import BaseModel, Field, validator"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "0a203100",
"metadata": {},
"outputs": [],
"source": [
"model_name = \"text-davinci-003\"\n",
"temperature = 0.0\n",
"model = OpenAI(model_name=model_name, temperature=temperature)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "b3f16168",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Joke(setup='Why did the chicken cross the road?', punchline='To get to the other side!')"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Define your desired data structure.\n",
"class Joke(BaseModel):\n",
" setup: str = Field(description=\"question to set up a joke\")\n",
" punchline: str = Field(description=\"answer to resolve the joke\")\n",
"\n",
" # You can add custom validation logic easily with Pydantic.\n",
" @validator(\"setup\")\n",
" def question_ends_with_question_mark(cls, field):\n",
" if field[-1] != \"?\":\n",
" raise ValueError(\"Badly formed question!\")\n",
" return field\n",
"\n",
"\n",
"# And a query intented to prompt a language model to populate the data structure.\n",
"joke_query = \"Tell me a joke.\"\n",
"\n",
"# Set up a parser + inject instructions into the prompt template.\n",
"parser = PydanticOutputParser(pydantic_object=Joke)\n",
"\n",
"prompt = PromptTemplate(\n",
" template=\"Answer the user query.\\n{format_instructions}\\n{query}\\n\",\n",
" input_variables=[\"query\"],\n",
" partial_variables={\"format_instructions\": parser.get_format_instructions()},\n",
")\n",
"\n",
"_input = prompt.format_prompt(query=joke_query)\n",
"\n",
"output = model(_input.to_string())\n",
"\n",
"parser.parse(output)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "03049f88",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Actor(name='Tom Hanks', film_names=['Forrest Gump', 'Saving Private Ryan', 'The Green Mile', 'Cast Away', 'Toy Story'])"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Here's another example, but with a compound typed field.\n",
"class Actor(BaseModel):\n",
" name: str = Field(description=\"name of an actor\")\n",
" film_names: List[str] = Field(description=\"list of names of films they starred in\")\n",
"\n",
"\n",
"actor_query = \"Generate the filmography for a random actor.\"\n",
"\n",
"parser = PydanticOutputParser(pydantic_object=Actor)\n",
"\n",
"prompt = PromptTemplate(\n",
" template=\"Answer the user query.\\n{format_instructions}\\n{query}\\n\",\n",
" input_variables=[\"query\"],\n",
" partial_variables={\"format_instructions\": parser.get_format_instructions()},\n",
")\n",
"\n",
"_input = prompt.format_prompt(query=actor_query)\n",
"\n",
"output = model(_input.to_string())\n",
"\n",
"parser.parse(output)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}