From 4eee789dd37a474e9e83275c0978f99a90d9d23d Mon Sep 17 00:00:00 2001 From: Greg Richardson Date: Fri, 22 Sep 2023 09:18:56 -0600 Subject: [PATCH] Docs: Using SupabaseVectorStore with existing documents (#10907) ## Description Adds additional docs on how to use `SupabaseVectorStore` with existing data in your DB (vs inserting new documents each time). --- .../integrations/vectorstores/supabase.ipynb | 91 ++++++++++++++----- 1 file changed, 66 insertions(+), 25 deletions(-) diff --git a/docs/extras/integrations/vectorstores/supabase.ipynb b/docs/extras/integrations/vectorstores/supabase.ipynb index 9a5f583adb..961ac208da 100644 --- a/docs/extras/integrations/vectorstores/supabase.ipynb +++ b/docs/extras/integrations/vectorstores/supabase.ipynb @@ -92,7 +92,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "id": "19846a7b-99bc-47a7-8e1c-f13c2497f1ae", "metadata": {}, "outputs": [], @@ -105,7 +105,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "id": "c71c3901-d44b-4d09-92c5-3018628c28fa", "metadata": {}, "outputs": [], @@ -115,7 +115,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "id": "8b91ecfa-f61b-489a-a337-dff1f12f6ab2", "metadata": {}, "outputs": [], @@ -138,67 +138,108 @@ "load_dotenv()" ] }, + { + "cell_type": "markdown", + "id": "924d4df5", + "metadata": {}, + "source": [ + "First we'll create a Supabase client and instantiate a OpenAI embeddings class." + ] + }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 19, "id": "5ce44f7c", "metadata": {}, "outputs": [], "source": [ "import os\n", "from supabase.client import Client, create_client\n", + "from langchain.embeddings.openai import OpenAIEmbeddings\n", + "from langchain.vectorstores import SupabaseVectorStore\n", "\n", "supabase_url = os.environ.get(\"SUPABASE_URL\")\n", "supabase_key = os.environ.get(\"SUPABASE_SERVICE_KEY\")\n", - "supabase: Client = create_client(supabase_url, supabase_key)" + "supabase: Client = create_client(supabase_url, supabase_key)\n", + "\n", + "embeddings = OpenAIEmbeddings()" + ] + }, + { + "cell_type": "markdown", + "id": "0c707d4c", + "metadata": {}, + "source": [ + "Next we'll load and parse some data for our vector store (skip if you already have documents with embeddings stored in your DB)." ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 20, "id": "aac9563e", "metadata": { "tags": [] }, "outputs": [], "source": [ - "from langchain.embeddings.openai import OpenAIEmbeddings\n", + "\n", "from langchain.text_splitter import CharacterTextSplitter\n", - "from langchain.vectorstores import SupabaseVectorStore\n", - "from langchain.document_loaders import TextLoader" + "from langchain.document_loaders import TextLoader\n", + "\n", + "loader = TextLoader(\"../../../state_of_the_union.txt\")\n", + "documents = loader.load()\n", + "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", + "docs = text_splitter.split_documents(documents)" + ] + }, + { + "cell_type": "markdown", + "id": "5abb9b93", + "metadata": {}, + "source": [ + "Insert the above documents into the database. Embeddings will automatically be generated for each document." ] }, { "cell_type": "code", - "execution_count": 5, - "id": "a3c3999a", + "execution_count": 6, + "id": "efec97f8", "metadata": {}, "outputs": [], "source": [ - "from langchain.document_loaders import TextLoader\n", "\n", - "loader = TextLoader(\"../../../state_of_the_union.txt\")\n", - "documents = loader.load()\n", - "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", - "docs = text_splitter.split_documents(documents)\n", - "\n", - "embeddings = OpenAIEmbeddings()" + "vector_store = SupabaseVectorStore.from_documents(docs, embeddings, client=supabase, table_name=\"documents\", query_name=\"match_documents\")" + ] + }, + { + "cell_type": "markdown", + "id": "e169345d", + "metadata": {}, + "source": [ + "Alternatively if you already have documents with embeddings in your database, simply instantiate a new `SupabaseVectorStore` directly:" ] }, { "cell_type": "code", - "execution_count": 6, - "id": "efec97f8", + "execution_count": 10, + "id": "397e3e7d", "metadata": {}, "outputs": [], "source": [ - "# We're using the default `documents` table here. You can modify this by passing in a `table_name` argument to the `from_documents` method.\n", - "vector_store = SupabaseVectorStore.from_documents(docs, embeddings, client=supabase)" + "vector_store = SupabaseVectorStore(embedding=embeddings, client=supabase, table_name=\"documents\", query_name=\"match_documents\")" + ] + }, + { + "cell_type": "markdown", + "id": "e28ce092", + "metadata": {}, + "source": [ + "Finally, test it out by performing a similarity search:" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "id": "5eabdb75", "metadata": {}, "outputs": [], @@ -209,7 +250,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "id": "4b172de8", "metadata": {}, "outputs": [ @@ -431,7 +472,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.12" + "version": "3.11.5" } }, "nbformat": 4,