mirror of
https://github.com/hwchase17/langchain
synced 2024-11-08 07:10:35 +00:00
180 lines
5.0 KiB
Plaintext
180 lines
5.0 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "9b5c258f",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Citing retrieval sources\n",
|
|
"\n",
|
|
"This notebook shows how to use OpenAI functions ability to extract citations from text."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"id": "eae4ca3e",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"/Users/harrisonchase/.pyenv/versions/3.9.1/envs/langchain/lib/python3.9/site-packages/deeplake/util/check_latest_version.py:32: UserWarning: A newer version of deeplake (3.6.4) is available. It's recommended that you update to the latest version using `pip install -U deeplake`.\n",
|
|
" warnings.warn(\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"from langchain.chains import create_citation_fuzzy_match_chain\n",
|
|
"from langchain.chat_models import ChatOpenAI"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"id": "2c6e62ee",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"question = \"What did the author do during college?\"\n",
|
|
"context = \"\"\"\n",
|
|
"My name is Jason Liu, and I grew up in Toronto Canada but I was born in China.\n",
|
|
"I went to an arts highschool but in university I studied Computational Mathematics and physics. \n",
|
|
"As part of coop I worked at many companies including Stitchfix, Facebook.\n",
|
|
"I also started the Data Science club at the University of Waterloo and I was the president of the club for 2 years.\n",
|
|
"\"\"\""
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"id": "078e0300",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"llm = ChatOpenAI(temperature=0, model=\"gpt-3.5-turbo-0613\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"id": "02cad6d0",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"chain = create_citation_fuzzy_match_chain(llm)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"id": "e3c6e7ba",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"result = chain.run(question=question, context=context)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 6,
|
|
"id": "6f7615f2",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"question='What did the author do during college?' answer=[FactWithEvidence(fact='The author studied Computational Mathematics and physics in university.', substring_quote=['in university I studied Computational Mathematics and physics']), FactWithEvidence(fact='The author started the Data Science club at the University of Waterloo and was the president of the club for 2 years.', substring_quote=['started the Data Science club at the University of Waterloo', 'president of the club for 2 years'])]\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"print(result)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 7,
|
|
"id": "3be6f366",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def highlight(text, span):\n",
|
|
" return (\n",
|
|
" \"...\"\n",
|
|
" + text[span[0] - 20 : span[0]]\n",
|
|
" + \"*\"\n",
|
|
" + \"\\033[91m\"\n",
|
|
" + text[span[0] : span[1]]\n",
|
|
" + \"\\033[0m\"\n",
|
|
" + \"*\"\n",
|
|
" + text[span[1] : span[1] + 20]\n",
|
|
" + \"...\"\n",
|
|
" )"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 8,
|
|
"id": "636c4528",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Statement: The author studied Computational Mathematics and physics in university.\n",
|
|
"Citation: ...arts highschool but *\u001b[91min university I studied Computational Mathematics and physics\u001b[0m*. \n",
|
|
"As part of coop I...\n",
|
|
"\n",
|
|
"Statement: The author started the Data Science club at the University of Waterloo and was the president of the club for 2 years.\n",
|
|
"Citation: ...x, Facebook.\n",
|
|
"I also *\u001b[91mstarted the Data Science club at the University of Waterloo\u001b[0m* and I was the presi...\n",
|
|
"Citation: ...erloo and I was the *\u001b[91mpresident of the club for 2 years\u001b[0m*.\n",
|
|
"...\n",
|
|
"\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"for fact in result.answer:\n",
|
|
" print(\"Statement:\", fact.fact)\n",
|
|
" for span in fact.get_spans(context):\n",
|
|
" print(\"Citation:\", highlight(context, span))\n",
|
|
" print()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "8409cab0",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3 (ipykernel)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.9.1"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|