|
|
|
@ -92,7 +92,7 @@
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": null,
|
|
|
|
|
"execution_count": 15,
|
|
|
|
|
"id": "19846a7b-99bc-47a7-8e1c-f13c2497f1ae",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
@ -105,7 +105,7 @@
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": null,
|
|
|
|
|
"execution_count": 16,
|
|
|
|
|
"id": "c71c3901-d44b-4d09-92c5-3018628c28fa",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
@ -115,7 +115,7 @@
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": null,
|
|
|
|
|
"execution_count": 17,
|
|
|
|
|
"id": "8b91ecfa-f61b-489a-a337-dff1f12f6ab2",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
@ -138,67 +138,108 @@
|
|
|
|
|
"load_dotenv()"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"id": "924d4df5",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"First we'll create a Supabase client and instantiate a OpenAI embeddings class."
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 3,
|
|
|
|
|
"execution_count": 19,
|
|
|
|
|
"id": "5ce44f7c",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"import os\n",
|
|
|
|
|
"from supabase.client import Client, create_client\n",
|
|
|
|
|
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
|
|
|
|
|
"from langchain.vectorstores import SupabaseVectorStore\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"supabase_url = os.environ.get(\"SUPABASE_URL\")\n",
|
|
|
|
|
"supabase_key = os.environ.get(\"SUPABASE_SERVICE_KEY\")\n",
|
|
|
|
|
"supabase: Client = create_client(supabase_url, supabase_key)"
|
|
|
|
|
"supabase: Client = create_client(supabase_url, supabase_key)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"embeddings = OpenAIEmbeddings()"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"id": "0c707d4c",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"Next we'll load and parse some data for our vector store (skip if you already have documents with embeddings stored in your DB)."
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 3,
|
|
|
|
|
"execution_count": 20,
|
|
|
|
|
"id": "aac9563e",
|
|
|
|
|
"metadata": {
|
|
|
|
|
"tags": []
|
|
|
|
|
},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"from langchain.text_splitter import CharacterTextSplitter\n",
|
|
|
|
|
"from langchain.vectorstores import SupabaseVectorStore\n",
|
|
|
|
|
"from langchain.document_loaders import TextLoader"
|
|
|
|
|
"from langchain.document_loaders import TextLoader\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"loader = TextLoader(\"../../../state_of_the_union.txt\")\n",
|
|
|
|
|
"documents = loader.load()\n",
|
|
|
|
|
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
|
|
|
|
"docs = text_splitter.split_documents(documents)"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"id": "5abb9b93",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"Insert the above documents into the database. Embeddings will automatically be generated for each document."
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 5,
|
|
|
|
|
"id": "a3c3999a",
|
|
|
|
|
"execution_count": 6,
|
|
|
|
|
"id": "efec97f8",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"from langchain.document_loaders import TextLoader\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"loader = TextLoader(\"../../../state_of_the_union.txt\")\n",
|
|
|
|
|
"documents = loader.load()\n",
|
|
|
|
|
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
|
|
|
|
"docs = text_splitter.split_documents(documents)\n",
|
|
|
|
|
"\n",
|
|
|
|
|
"embeddings = OpenAIEmbeddings()"
|
|
|
|
|
"vector_store = SupabaseVectorStore.from_documents(docs, embeddings, client=supabase, table_name=\"documents\", query_name=\"match_documents\")"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"id": "e169345d",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"Alternatively if you already have documents with embeddings in your database, simply instantiate a new `SupabaseVectorStore` directly:"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 6,
|
|
|
|
|
"id": "efec97f8",
|
|
|
|
|
"execution_count": 10,
|
|
|
|
|
"id": "397e3e7d",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"# We're using the default `documents` table here. You can modify this by passing in a `table_name` argument to the `from_documents` method.\n",
|
|
|
|
|
"vector_store = SupabaseVectorStore.from_documents(docs, embeddings, client=supabase)"
|
|
|
|
|
"vector_store = SupabaseVectorStore(embedding=embeddings, client=supabase, table_name=\"documents\", query_name=\"match_documents\")"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "markdown",
|
|
|
|
|
"id": "e28ce092",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"source": [
|
|
|
|
|
"Finally, test it out by performing a similarity search:"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 7,
|
|
|
|
|
"execution_count": null,
|
|
|
|
|
"id": "5eabdb75",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
@ -209,7 +250,7 @@
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 8,
|
|
|
|
|
"execution_count": null,
|
|
|
|
|
"id": "4b172de8",
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
@ -431,7 +472,7 @@
|
|
|
|
|
"name": "python",
|
|
|
|
|
"nbconvert_exporter": "python",
|
|
|
|
|
"pygments_lexer": "ipython3",
|
|
|
|
|
"version": "3.10.12"
|
|
|
|
|
"version": "3.11.5"
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
"nbformat": 4,
|
|
|
|
|