{ "cells": [ { "cell_type": "markdown", "id": "9b5c258f", "metadata": {}, "source": [ "# Citing retrieval sources\n", "\n", "This notebook shows how to use OpenAI functions ability to extract citations from text." ] }, { "cell_type": "code", "execution_count": 1, "id": "eae4ca3e", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/Users/harrisonchase/.pyenv/versions/3.9.1/envs/langchain/lib/python3.9/site-packages/deeplake/util/check_latest_version.py:32: UserWarning: A newer version of deeplake (3.6.4) is available. It's recommended that you update to the latest version using `pip install -U deeplake`.\n", " warnings.warn(\n" ] } ], "source": [ "from langchain.chains import create_citation_fuzzy_match_chain\n", "from langchain_openai import ChatOpenAI" ] }, { "cell_type": "code", "execution_count": 2, "id": "2c6e62ee", "metadata": {}, "outputs": [], "source": [ "question = \"What did the author do during college?\"\n", "context = \"\"\"\n", "My name is Jason Liu, and I grew up in Toronto Canada but I was born in China.\n", "I went to an arts highschool but in university I studied Computational Mathematics and physics. \n", "As part of coop I worked at many companies including Stitchfix, Facebook.\n", "I also started the Data Science club at the University of Waterloo and I was the president of the club for 2 years.\n", "\"\"\"" ] }, { "cell_type": "code", "execution_count": 3, "id": "078e0300", "metadata": {}, "outputs": [], "source": [ "llm = ChatOpenAI(temperature=0, model=\"gpt-3.5-turbo-0613\")" ] }, { "cell_type": "code", "execution_count": 4, "id": "02cad6d0", "metadata": {}, "outputs": [], "source": [ "chain = create_citation_fuzzy_match_chain(llm)" ] }, { "cell_type": "code", "execution_count": 5, "id": "e3c6e7ba", "metadata": {}, "outputs": [], "source": [ "result = chain.run(question=question, context=context)" ] }, { "cell_type": "code", "execution_count": 6, "id": "6f7615f2", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "question='What did the author do during college?' answer=[FactWithEvidence(fact='The author studied Computational Mathematics and physics in university.', substring_quote=['in university I studied Computational Mathematics and physics']), FactWithEvidence(fact='The author started the Data Science club at the University of Waterloo and was the president of the club for 2 years.', substring_quote=['started the Data Science club at the University of Waterloo', 'president of the club for 2 years'])]\n" ] } ], "source": [ "print(result)" ] }, { "cell_type": "code", "execution_count": 7, "id": "3be6f366", "metadata": {}, "outputs": [], "source": [ "def highlight(text, span):\n", " return (\n", " \"...\"\n", " + text[span[0] - 20 : span[0]]\n", " + \"*\"\n", " + \"\\033[91m\"\n", " + text[span[0] : span[1]]\n", " + \"\\033[0m\"\n", " + \"*\"\n", " + text[span[1] : span[1] + 20]\n", " + \"...\"\n", " )" ] }, { "cell_type": "code", "execution_count": 8, "id": "636c4528", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Statement: The author studied Computational Mathematics and physics in university.\n", "Citation: ...arts highschool but *\u001b[91min university I studied Computational Mathematics and physics\u001b[0m*. \n", "As part of coop I...\n", "\n", "Statement: The author started the Data Science club at the University of Waterloo and was the president of the club for 2 years.\n", "Citation: ...x, Facebook.\n", "I also *\u001b[91mstarted the Data Science club at the University of Waterloo\u001b[0m* and I was the presi...\n", "Citation: ...erloo and I was the *\u001b[91mpresident of the club for 2 years\u001b[0m*.\n", "...\n", "\n" ] } ], "source": [ "for fact in result.answer:\n", " print(\"Statement:\", fact.fact)\n", " for span in fact.get_spans(context):\n", " print(\"Citation:\", highlight(context, span))\n", " print()" ] }, { "cell_type": "code", "execution_count": null, "id": "8409cab0", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.1" } }, "nbformat": 4, "nbformat_minor": 5 }