mirror of
https://github.com/hwchase17/langchain
synced 2024-11-04 06:00:26 +00:00
Use Case: Extraction set temperature to 0, qualify a statement (#18672)
Minor changes: 1) Set temperature to 0 (important) 2) Better qualify one of the statements with confidence
This commit is contained in:
parent
a4a6978224
commit
b9f3c7a0c9
@ -430,14 +430,6 @@
|
||||
" }\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d18bb013",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@ -456,7 +448,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.1"
|
||||
"version": "3.11.2"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
@ -394,14 +394,6 @@
|
||||
"* Large chunk overlap may cause the same information to be extracted twice, so be prepared to de-duplicate!\n",
|
||||
"* LLMs can make up data. If looking for a single fact across a large text and using a brute force approach, you may end up getting more made up data."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "b5f9685f-9d68-4155-a78c-0cb50821e21f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@ -420,7 +412,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.1"
|
||||
"version": "3.11.2"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
@ -32,7 +32,7 @@
|
||||
"source": [
|
||||
"from langchain_anthropic.chat_models import ChatAnthropic\n",
|
||||
"\n",
|
||||
"model = ChatAnthropic(model_name=\"claude-3-sonnet-20240229\")"
|
||||
"model = ChatAnthropic(model_name=\"claude-3-sonnet-20240229\", temperature=0)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -59,7 +59,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 2,
|
||||
"id": "497eb023-c043-443d-ac62-2d4ea85fe1b0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -111,7 +111,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 3,
|
||||
"id": "20b99ffb-a114-49a9-a7be-154c525f8ada",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -121,7 +121,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": 4,
|
||||
"id": "4f3a66ce-de19-4571-9e54-67504ae3fba7",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -149,7 +149,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"execution_count": 5,
|
||||
"id": "3a46b5fd-9242-4b8c-a4e2-3f04fc19b3a4",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -159,7 +159,7 @@
|
||||
"People(people=[Person(name='Anna', height_in_meters=1.83)])"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -183,7 +183,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"execution_count": 6,
|
||||
"id": "b1f11912-c1bb-4a2a-a482-79bf3996961f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -253,7 +253,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"execution_count": 7,
|
||||
"id": "cda52ef5-a354-47a7-9c25-45153c2389e2",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -261,7 +261,7 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"System: Answer the user query. Output your answer as JSON that matches the given schema: ```json\n",
|
||||
"System: Answer the user query. Output your answer as JSON that matches the given schema: ```json\n",
|
||||
"{'title': 'People', 'description': 'Identifying information about all people in a text.', 'type': 'object', 'properties': {'people': {'title': 'People', 'type': 'array', 'items': {'$ref': '#/definitions/Person'}}}, 'required': ['people'], 'definitions': {'Person': {'title': 'Person', 'description': 'Information about a person.', 'type': 'object', 'properties': {'name': {'title': 'Name', 'description': 'The name of the person', 'type': 'string'}, 'height_in_meters': {'title': 'Height In Meters', 'description': 'The height of the person expressed in meters.', 'type': 'number'}}, 'required': ['name', 'height_in_meters']}}}\n",
|
||||
"```. Make sure to wrap the answer in ```json and ``` tags\n",
|
||||
"Human: Anna is 23 years old and she is 6 feet tall\n"
|
||||
@ -275,7 +275,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"execution_count": 8,
|
||||
"id": "993dc61a-229d-4795-a746-0d17df86b5c0",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -285,7 +285,7 @@
|
||||
"[{'people': [{'name': 'Anna', 'height_in_meters': 1.83}]}]"
|
||||
]
|
||||
},
|
||||
"execution_count": 18,
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
@ -34,7 +34,7 @@
|
||||
"\n",
|
||||
"- **Tool/Function Calling** Mode: Some LLMs support a *tool or function calling* mode. These LLMs can structure output according to a given **schema**. Generally, this approach is the easiest to work with and is expected to yield good results.\n",
|
||||
"\n",
|
||||
"- **JSON Mode**: Some LLMs are can be forced to output valid JSON. This is similar to **tool/function Calling** approach, except that the schema is provided as part of the prompt. Generally, our intuition is that this performs worse than a **tool/function calling** approach.\n",
|
||||
"- **JSON Mode**: Some LLMs are can be forced to output valid JSON. This is similar to **tool/function Calling** approach, except that the schema is provided as part of the prompt. Generally, our intuition is that this performs worse than a **tool/function calling** approach, but don't trust us and verify for your own use case!\n",
|
||||
"\n",
|
||||
"- **Prompting Based**: LLMs that can follow instructions well can be instructed to generate text in a desired format. The generated text can be parsed downstream using existing [Output Parsers](/docs/modules/model_io/output_parsers/) or using [custom parsers](/docs/modules/model_io/output_parsers/custom) into a structured format like JSON. This approach can be used with LLMs that **do not support** JSON mode or tool/function calling modes. This approach is more broadly applicable, though may yield worse results than models that have been fine-tuned for extraction or function calling.\n",
|
||||
"\n",
|
||||
@ -71,14 +71,6 @@
|
||||
"* [OpenAI's function and tool calling](https://platform.openai.com/docs/guides/function-calling)\n",
|
||||
"* For example, see [OpenAI's JSON mode](https://platform.openai.com/docs/guides/text-generation/json-mode)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "6e171cab",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@ -97,7 +89,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.1"
|
||||
"version": "3.11.2"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
@ -68,7 +68,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": 7,
|
||||
"id": "c141084c-fb94-4093-8d6a-81175d688e40",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -91,9 +91,11 @@
|
||||
" # Having a good description can help improve extraction results.\n",
|
||||
" name: Optional[str] = Field(..., description=\"The name of the person\")\n",
|
||||
" hair_color: Optional[str] = Field(\n",
|
||||
" ..., description=\"The color of the peron's eyes if known\"\n",
|
||||
" ..., description=\"The color of the peron's hair if known\"\n",
|
||||
" )\n",
|
||||
" height_in_meters: Optional[str] = Field(..., description=\"Height in METERs\")"
|
||||
" height_in_meters: Optional[str] = Field(\n",
|
||||
" ..., description=\"Height measured in meters\"\n",
|
||||
" )"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -117,14 +119,13 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 8,
|
||||
"id": "a5e490f6-35ad-455e-8ae4-2bae021583ff",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from typing import Optional\n",
|
||||
"\n",
|
||||
"from langchain.chains import create_structured_output_runnable\n",
|
||||
"from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder\n",
|
||||
"from langchain_core.pydantic_v1 import BaseModel, Field\n",
|
||||
"from langchain_openai import ChatOpenAI\n",
|
||||
@ -162,14 +163,14 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 31,
|
||||
"id": "04d846a6-d5cb-4009-ac19-61e3aac0177e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain_mistralai import ChatMistralAI\n",
|
||||
"\n",
|
||||
"llm = ChatMistralAI(model=\"mistral-large-latest\")\n",
|
||||
"llm = ChatMistralAI(model=\"mistral-large-latest\", temperature=0)\n",
|
||||
"\n",
|
||||
"runnable = prompt | llm.with_structured_output(schema=Person)"
|
||||
]
|
||||
@ -184,17 +185,17 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 32,
|
||||
"id": "13165ac8-a1dc-44ce-a6ed-f52b577473e4",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Person(name='Alan Smith', hair_color='blond', height_in_meters='1.83')"
|
||||
"Person(name='Alan Smith', hair_color='blond', height_in_meters='1.8288')"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"execution_count": 32,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -232,7 +233,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"execution_count": 33,
|
||||
"id": "591a0c16-7a17-4883-91ee-0d6d2fdb265c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -255,9 +256,11 @@
|
||||
" # Having a good description can help improve extraction results.\n",
|
||||
" name: Optional[str] = Field(..., description=\"The name of the person\")\n",
|
||||
" hair_color: Optional[str] = Field(\n",
|
||||
" ..., description=\"The color of the peron's eyes if known\"\n",
|
||||
" ..., description=\"The color of the peron's hair if known\"\n",
|
||||
" )\n",
|
||||
" height_in_meters: Optional[str] = Field(\n",
|
||||
" ..., description=\"Height measured in meters\"\n",
|
||||
" )\n",
|
||||
" height_in_meters: Optional[str] = Field(..., description=\"Height in meters\")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"class Data(BaseModel):\n",
|
||||
@ -267,26 +270,36 @@
|
||||
" people: List[Person]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5f5cda33-fd7b-481e-956a-703f45e40e1d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
":::{.callout-important}\n",
|
||||
"Extraction might not be perfect here. Please continue to see how to use **Reference Examples** to improve the quality of extraction, and see the **guidelines** section!\n",
|
||||
":::"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"execution_count": 34,
|
||||
"id": "cf7062cc-1d1d-4a37-9122-509d1b87f0a6",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"Data(people=[Person(name='Jeff', hair_color='black', height_in_meters='2'), Person(name='Anna', hair_color=None, height_in_meters=None)])"
|
||||
"Data(people=[Person(name='Jeff', hair_color=None, height_in_meters=None), Person(name='Anna', hair_color=None, height_in_meters=None)])"
|
||||
]
|
||||
},
|
||||
"execution_count": 12,
|
||||
"execution_count": 34,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"runnable = prompt | llm.with_structured_output(schema=Data)\n",
|
||||
"text = \"My name is Jeff and I am 2 meters. I have black hair. Anna has the same color hair as me.\"\n",
|
||||
"text = \"My name is Jeff, my hair is black and i am 6 feet tall. Anna has the same color hair as me.\"\n",
|
||||
"runnable.invoke({\"text\": text})"
|
||||
]
|
||||
},
|
||||
@ -318,14 +331,6 @@
|
||||
"- [Use a Parsing Approach](/docs/use_cases/extraction/how_to/parse): Use a prompt based approach to extract with models that do not support **tool/function calling**.\n",
|
||||
"- [Guidelines](/docs/use_cases/extraction/guidelines): Guidelines for getting good performance on extraction tasks."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "082fc1af",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@ -344,7 +349,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.1"
|
||||
"version": "3.11.2"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
Loading…
Reference in New Issue
Block a user