mirror of
https://github.com/hwchase17/langchain
synced 2024-10-29 17:07:25 +00:00
87e502c6bc
Co-authored-by: jacoblee93 <jacoblee93@gmail.com> Co-authored-by: Harrison Chase <hw.chase.17@gmail.com>
170 lines
4.9 KiB
Plaintext
170 lines
4.9 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "a1ae632a",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Pydantic (JSON) parser\n",
|
|
"This output parser allows users to specify an arbitrary JSON schema and query LLMs for JSON outputs that conform to that schema.\n",
|
|
"\n",
|
|
"Keep in mind that large language models are leaky abstractions! You'll have to use an LLM with sufficient capacity to generate well-formed JSON. In the OpenAI family, DaVinci can do reliably but Curie's ability already drops off dramatically. \n",
|
|
"\n",
|
|
"Use Pydantic to declare your data model. Pydantic's BaseModel like a Python dataclass, but with actual type checking + coercion."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"id": "b322c447",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from langchain.prompts import (\n",
|
|
" PromptTemplate,\n",
|
|
" ChatPromptTemplate,\n",
|
|
" HumanMessagePromptTemplate,\n",
|
|
")\n",
|
|
"from langchain.llms import OpenAI\n",
|
|
"from langchain.chat_models import ChatOpenAI"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"id": "cba6d8e3",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from langchain.output_parsers import PydanticOutputParser\n",
|
|
"from pydantic import BaseModel, Field, validator\n",
|
|
"from typing import List"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"id": "0a203100",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"model_name = \"text-davinci-003\"\n",
|
|
"temperature = 0.0\n",
|
|
"model = OpenAI(model_name=model_name, temperature=temperature)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"id": "b3f16168",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"Joke(setup='Why did the chicken cross the road?', punchline='To get to the other side!')"
|
|
]
|
|
},
|
|
"execution_count": 4,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"# Define your desired data structure.\n",
|
|
"class Joke(BaseModel):\n",
|
|
" setup: str = Field(description=\"question to set up a joke\")\n",
|
|
" punchline: str = Field(description=\"answer to resolve the joke\")\n",
|
|
"\n",
|
|
" # You can add custom validation logic easily with Pydantic.\n",
|
|
" @validator(\"setup\")\n",
|
|
" def question_ends_with_question_mark(cls, field):\n",
|
|
" if field[-1] != \"?\":\n",
|
|
" raise ValueError(\"Badly formed question!\")\n",
|
|
" return field\n",
|
|
"\n",
|
|
"\n",
|
|
"# And a query intented to prompt a language model to populate the data structure.\n",
|
|
"joke_query = \"Tell me a joke.\"\n",
|
|
"\n",
|
|
"# Set up a parser + inject instructions into the prompt template.\n",
|
|
"parser = PydanticOutputParser(pydantic_object=Joke)\n",
|
|
"\n",
|
|
"prompt = PromptTemplate(\n",
|
|
" template=\"Answer the user query.\\n{format_instructions}\\n{query}\\n\",\n",
|
|
" input_variables=[\"query\"],\n",
|
|
" partial_variables={\"format_instructions\": parser.get_format_instructions()},\n",
|
|
")\n",
|
|
"\n",
|
|
"_input = prompt.format_prompt(query=joke_query)\n",
|
|
"\n",
|
|
"output = model(_input.to_string())\n",
|
|
"\n",
|
|
"parser.parse(output)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"id": "03049f88",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"Actor(name='Tom Hanks', film_names=['Forrest Gump', 'Saving Private Ryan', 'The Green Mile', 'Cast Away', 'Toy Story'])"
|
|
]
|
|
},
|
|
"execution_count": 5,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"# Here's another example, but with a compound typed field.\n",
|
|
"class Actor(BaseModel):\n",
|
|
" name: str = Field(description=\"name of an actor\")\n",
|
|
" film_names: List[str] = Field(description=\"list of names of films they starred in\")\n",
|
|
"\n",
|
|
"\n",
|
|
"actor_query = \"Generate the filmography for a random actor.\"\n",
|
|
"\n",
|
|
"parser = PydanticOutputParser(pydantic_object=Actor)\n",
|
|
"\n",
|
|
"prompt = PromptTemplate(\n",
|
|
" template=\"Answer the user query.\\n{format_instructions}\\n{query}\\n\",\n",
|
|
" input_variables=[\"query\"],\n",
|
|
" partial_variables={\"format_instructions\": parser.get_format_instructions()},\n",
|
|
")\n",
|
|
"\n",
|
|
"_input = prompt.format_prompt(query=actor_query)\n",
|
|
"\n",
|
|
"output = model(_input.to_string())\n",
|
|
"\n",
|
|
"parser.parse(output)"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3 (ipykernel)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.11.3"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|