langchain/docs/modules/indexes/getting_started.ipynb

{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "07c1e3b9",
   "metadata": {},
   "source": [
    "# Getting Started\n",
    "\n",
    "This example showcases question answering over a vector database.\n",
    "We have chosen this as the example for getting started because it nicely combines a lot of different elements (Text splitters, embeddings, vectorstores) and then also shows how to use them in a chain."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "82525493",
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain.embeddings.openai import OpenAIEmbeddings\n",
    "from langchain.vectorstores import Chroma\n",
    "from langchain.text_splitter import CharacterTextSplitter\n",
    "from langchain import OpenAI, VectorDBQA"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0b7adc54",
   "metadata": {},
   "source": [
    "Here we load in the documents we want to use to create our index."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "611e0c19",
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain.document_loaders import TextLoader\n",
    "loader = TextLoader('../state_of_the_union.txt')\n",
    "documents = loader.load()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "9fdc0fc2",
   "metadata": {},
   "source": [
    "Next, we will split the documents into chunks."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "afecb8cf",
   "metadata": {},
   "outputs": [],
   "source": [
    "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
    "texts = text_splitter.split_documents(documents)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4bebc041",
   "metadata": {},
   "source": [
    "We will then select which embeddings we want to use."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "9eaaa735",
   "metadata": {},
   "outputs": [],
   "source": [
    "embeddings = OpenAIEmbeddings()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "24612905",
   "metadata": {},
   "source": [
    "We now create the vectorstore to use as the index."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "5c7049db",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Running Chroma using direct local API.\n",
      "Using DuckDB in-memory for database. Data will be transient.\n"
     ]
    }
   ],
   "source": [
    "db = Chroma.from_documents(texts, embeddings)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "30c4e5c6",
   "metadata": {},
   "source": [
    "Finally, we create a chain and use it to answer questions!"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "3018f865",
   "metadata": {},
   "outputs": [],
   "source": [
    "qa = VectorDBQA.from_chain_type(llm=OpenAI(), chain_type=\"stuff\", vectorstore=db)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "032a47f8",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "\" The President said that Ketanji Brown Jackson is one of the nation's top legal minds, a former top litigator in private practice, a former federal public defender, and from a family of public school educators and police officers. He said that she is a consensus builder and has received a broad range of support from the Fraternal Order of Police to former judges appointed by Democrats and Republicans.\""
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "query = \"What did the president say about Ketanji Brown Jackson\"\n",
    "qa.run(query)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8b403637",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.1"
  },
  "vscode": {
   "interpreter": {
    "hash": "b1677b440931f40d89ef8be7bf03acb108ce003de0ac9b18e8d43753ea2e7103"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
improve docs for indexes (#1146) 2023-02-20 07:14:50 +00:00			`{`
			`"cells": [`
			`{`
			`"cell_type": "markdown",`
			`"id": "07c1e3b9",`
			`"metadata": {},`
			`"source": [`
			`"# Getting Started\n",`
			`"\n",`
			`"This example showcases question answering over a vector database.\n",`
			`"We have chosen this as the example for getting started because it nicely combines a lot of different elements (Text splitters, embeddings, vectorstores) and then also shows how to use them in a chain."`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 1,`
			`"id": "82525493",`
			`"metadata": {},`
			`"outputs": [],`
			`"source": [`
			`"from langchain.embeddings.openai import OpenAIEmbeddings\n",`
			`"from langchain.vectorstores import Chroma\n",`
			`"from langchain.text_splitter import CharacterTextSplitter\n",`
			`"from langchain import OpenAI, VectorDBQA"`
			`]`
			`},`
			`{`
			`"cell_type": "markdown",`
			`"id": "0b7adc54",`
			`"metadata": {},`
			`"source": [`
			`"Here we load in the documents we want to use to create our index."`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 3,`
			`"id": "611e0c19",`
			`"metadata": {},`
			`"outputs": [],`
			`"source": [`
			`"from langchain.document_loaders import TextLoader\n",`
			`"loader = TextLoader('../state_of_the_union.txt')\n",`
			`"documents = loader.load()"`
			`]`
			`},`
			`{`
			`"cell_type": "markdown",`
			`"id": "9fdc0fc2",`
			`"metadata": {},`
			`"source": [`
			`"Next, we will split the documents into chunks."`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 4,`
			`"id": "afecb8cf",`
			`"metadata": {},`
			`"outputs": [],`
			`"source": [`
			`"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",`
			`"texts = text_splitter.split_documents(documents)"`
			`]`
			`},`
			`{`
			`"cell_type": "markdown",`
			`"id": "4bebc041",`
			`"metadata": {},`
			`"source": [`
			`"We will then select which embeddings we want to use."`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 5,`
			`"id": "9eaaa735",`
			`"metadata": {},`
			`"outputs": [],`
			`"source": [`
			`"embeddings = OpenAIEmbeddings()"`
			`]`
			`},`
			`{`
			`"cell_type": "markdown",`
			`"id": "24612905",`
			`"metadata": {},`
			`"source": [`
			`"We now create the vectorstore to use as the index."`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 6,`
			`"id": "5c7049db",`
			`"metadata": {},`
			`"outputs": [`
			`{`
			`"name": "stdout",`
			`"output_type": "stream",`
			`"text": [`
			`"Running Chroma using direct local API.\n",`
			`"Using DuckDB in-memory for database. Data will be transient.\n"`
			`]`
			`}`
			`],`
			`"source": [`
			`"db = Chroma.from_documents(texts, embeddings)"`
			`]`
			`},`
			`{`
			`"cell_type": "markdown",`
			`"id": "30c4e5c6",`
			`"metadata": {},`
			`"source": [`
			`"Finally, we create a chain and use it to answer questions!"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 9,`
			`"id": "3018f865",`
			`"metadata": {},`
			`"outputs": [],`
			`"source": [`
			`"qa = VectorDBQA.from_chain_type(llm=OpenAI(), chain_type=\"stuff\", vectorstore=db)"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 10,`
			`"id": "032a47f8",`
			`"metadata": {},`
			`"outputs": [`
			`{`
			`"data": {`
			`"text/plain": [`
			`"\" The President said that Ketanji Brown Jackson is one of the nation's top legal minds, a former top litigator in private practice, a former federal public defender, and from a family of public school educators and police officers. He said that she is a consensus builder and has received a broad range of support from the Fraternal Order of Police to former judges appointed by Democrats and Republicans.\""`
			`]`
			`},`
			`"execution_count": 10,`
			`"metadata": {},`
			`"output_type": "execute_result"`
			`}`
			`],`
			`"source": [`
			`"query = \"What did the president say about Ketanji Brown Jackson\"\n",`
			`"qa.run(query)"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": null,`
			`"id": "8b403637",`
			`"metadata": {},`
			`"outputs": [],`
			`"source": []`
			`}`
			`],`
			`"metadata": {`
			`"kernelspec": {`
			`"display_name": "Python 3 (ipykernel)",`
			`"language": "python",`
			`"name": "python3"`
			`},`
			`"language_info": {`
			`"codemirror_mode": {`
			`"name": "ipython",`
			`"version": 3`
			`},`
			`"file_extension": ".py",`
			`"mimetype": "text/x-python",`
			`"name": "python",`
			`"nbconvert_exporter": "python",`
			`"pygments_lexer": "ipython3",`
			`"version": "3.9.1"`
			`},`
			`"vscode": {`
			`"interpreter": {`
			`"hash": "b1677b440931f40d89ef8be7bf03acb108ce003de0ac9b18e8d43753ea2e7103"`
			`}`
			`}`
			`},`
			`"nbformat": 4,`
			`"nbformat_minor": 5`
			`}`