Revised notebook and add delete to MyScale vector store (#11848)

- **Description:** 
  - Add `.delete` to myscale vector store. 
  - Revised vector store notebooks
- **Tag maintainer:** @baskaryan 
- **Twitter handle:** @myscaledb @mpsk_liu
This commit is contained in:
刘 方瑞 2023-10-18 02:42:21 +08:00 committed by GitHub
parent 3fb5e4d185
commit 0a24ac7388
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
44 changed files with 275 additions and 194 deletions

View File

@ -1259,7 +1259,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"with open(\"../../../state_of_the_union.txt\") as f:\n", "with open(\"../../modules/state_of_the_union.txt\") as f:\n",
" state_of_the_union = f.read()\n", " state_of_the_union = f.read()\n",
"texts = text_splitter.split_text(state_of_the_union)" "texts = text_splitter.split_text(state_of_the_union)"
] ]

View File

@ -117,7 +117,7 @@
} }
], ],
"source": [ "source": [
"with open(\"../../../state_of_the_union.txt\") as f:\n", "with open(\"../../modules/state_of_the_union.txt\") as f:\n",
" state_of_the_union = f.read()\n", " state_of_the_union = f.read()\n",
"mp_chain.run(state_of_the_union)" "mp_chain.run(state_of_the_union)"
] ]

View File

@ -51,6 +51,7 @@ supported functions:
- `similarity_search_by_vector` - `similarity_search_by_vector`
- `asimilarity_search_by_vector` - `asimilarity_search_by_vector`
- `similarity_search_with_relevance_scores` - `similarity_search_with_relevance_scores`
- `delete`
### VectorStore ### VectorStore

View File

@ -330,7 +330,7 @@
"from langchain.document_loaders import TextLoader\n", "from langchain.document_loaders import TextLoader\n",
"from langchain.vectorstores import FAISS\n", "from langchain.vectorstores import FAISS\n",
"\n", "\n",
"documents = TextLoader(\"../../../state_of_the_union.txt\").load()\n", "documents = TextLoader(\"../../modules/state_of_the_union.txt\").load()\n",
"text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)\n", "text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)\n",
"texts = text_splitter.split_documents(documents)\n", "texts = text_splitter.split_documents(documents)\n",
"retriever = FAISS.from_documents(texts, OpenAIEmbeddings()).as_retriever(\n", "retriever = FAISS.from_documents(texts, OpenAIEmbeddings()).as_retriever(\n",

View File

@ -30,7 +30,8 @@
"from langchain.embeddings.openai import OpenAIEmbeddings\n", "from langchain.embeddings.openai import OpenAIEmbeddings\n",
"from langchain.vectorstores import Chroma\n", "from langchain.vectorstores import Chroma\n",
"from langchain.text_splitter import CharacterTextSplitter\n", "from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain.llms import OpenAI\nfrom langchain.chains import VectorDBQA\n", "from langchain.llms import OpenAI\n",
"from langchain.chains import VectorDBQA\n",
"\n", "\n",
"llm = OpenAI(temperature=0)" "llm = OpenAI(temperature=0)"
] ]
@ -55,7 +56,7 @@
"source": [ "source": [
"from langchain.document_loaders import TextLoader\n", "from langchain.document_loaders import TextLoader\n",
"\n", "\n",
"loader = TextLoader(\"../../../state_of_the_union.txt\")\n", "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n",
"documents = loader.load()\n", "documents = loader.load()\n",
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
"texts = text_splitter.split_documents(documents)\n", "texts = text_splitter.split_documents(documents)\n",

View File

@ -43,7 +43,7 @@
"source": [ "source": [
"from langchain.document_loaders import TextLoader\n", "from langchain.document_loaders import TextLoader\n",
"\n", "\n",
"loader = TextLoader(\"../../../state_of_the_union.txt\")\n", "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n",
"documents = loader.load()\n", "documents = loader.load()\n",
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
"docs = text_splitter.split_documents(documents)\n", "docs = text_splitter.split_documents(documents)\n",

View File

@ -151,7 +151,7 @@
"from langchain.document_loaders import TextLoader\n", "from langchain.document_loaders import TextLoader\n",
"from langchain.text_splitter import CharacterTextSplitter\n", "from langchain.text_splitter import CharacterTextSplitter\n",
"\n", "\n",
"loader = TextLoader(\"../../../state_of_the_union.txt\")\n", "loader = TextLoader(\"../../modules/state_of_the_union.txtn.txtn.txt\")\n",
"documents = loader.load()\n", "documents = loader.load()\n",
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
"docs = text_splitter.split_documents(documents)" "docs = text_splitter.split_documents(documents)"

View File

@ -103,7 +103,7 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"loader = TextLoader(\"../../../state_of_the_union.txt\")\n", "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n",
"documents = loader.load()\n", "documents = loader.load()\n",
"text_splitter = SpacyTextSplitter(separator=\"|\")\n", "text_splitter = SpacyTextSplitter(separator=\"|\")\n",
"texts = []\n", "texts = []\n",

View File

@ -40,7 +40,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"loader = TextLoader(\"../../../state_of_the_union.txt\")\n", "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n",
"documents = loader.load()\n", "documents = loader.load()\n",
"text_splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=0)\n", "text_splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=0)\n",
"docs = text_splitter.split_documents(documents)" "docs = text_splitter.split_documents(documents)"
@ -112,7 +112,7 @@
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"(Document(page_content='And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nations top legal minds, who will continue Justice Breyers legacy of excellence.', metadata={'source': '../../../state_of_the_union.txt'}), 0.561813814013747)\n" "(Document(page_content='And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nations top legal minds, who will continue Justice Breyers legacy of excellence.', metadata={'source': '../../modules/state_of_the_union.txt'}), 0.561813814013747)\n"
] ]
} }
], ],

View File

@ -2,16 +2,16 @@
"cells": [ "cells": [
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {
"collapsed": false
},
"source": [ "source": [
"# Azure Cognitive Search\n", "# Azure Cognitive Search\n",
"\n", "\n",
"[Azure Cognitive Search](https://learn.microsoft.com/azure/search/search-what-is-azure-search) (formerly known as `Azure Search`) is a cloud search service that gives developers infrastructure, APIs, and tools for building a rich search experience over private, heterogeneous content in web, mobile, and enterprise applications.\n", "[Azure Cognitive Search](https://learn.microsoft.com/azure/search/search-what-is-azure-search) (formerly known as `Azure Search`) is a cloud search service that gives developers infrastructure, APIs, and tools for building a rich search experience over private, heterogeneous content in web, mobile, and enterprise applications.\n",
"\n", "\n",
"Vector search is currently in public preview. It's available through the Azure portal, preview REST API and beta client libraries. [More info](https://learn.microsoft.com/en-us/azure/search/vector-search-overview) Beta client libraries are subject to potential breaking changes, please be sure to use the SDK package version identified below. azure-search-documents==11.4.0b8" "Vector search is currently in public preview. It's available through the Azure portal, preview REST API and beta client libraries. [More info](https://learn.microsoft.com/en-us/azure/search/vector-search-overview) Beta client libraries are subject to potential breaking changes, please be sure to use the SDK package version identified below. azure-search-documents==11.4.0b8"
], ]
"metadata": {
"collapsed": false
}
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
@ -134,7 +134,7 @@
"from langchain.document_loaders import TextLoader\n", "from langchain.document_loaders import TextLoader\n",
"from langchain.text_splitter import CharacterTextSplitter\n", "from langchain.text_splitter import CharacterTextSplitter\n",
"\n", "\n",
"loader = TextLoader(\"../../../state_of_the_union.txt\", encoding=\"utf-8\")\n", "loader = TextLoader(\"../../modules/state_of_the_union.txt\", encoding=\"utf-8\")\n",
"\n", "\n",
"documents = loader.load()\n", "documents = loader.load()\n",
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",

View File

@ -111,7 +111,7 @@
"from langchain.document_loaders import TextLoader\n", "from langchain.document_loaders import TextLoader\n",
"from langchain.text_splitter import CharacterTextSplitter\n", "from langchain.text_splitter import CharacterTextSplitter\n",
"\n", "\n",
"loader = TextLoader(\"../../../state_of_the_union.txt\")\n", "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n",
"documents = loader.load()\n", "documents = loader.load()\n",
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
"docs = text_splitter.split_documents(documents)[:10]" "docs = text_splitter.split_documents(documents)[:10]"

View File

@ -79,7 +79,7 @@
"from langchain.document_loaders import TextLoader\n", "from langchain.document_loaders import TextLoader\n",
"\n", "\n",
"# load the document and split it into chunks\n", "# load the document and split it into chunks\n",
"loader = TextLoader(\"../../../state_of_the_union.txt\")\n", "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n",
"documents = loader.load()\n", "documents = loader.load()\n",
"\n", "\n",
"# split it into chunks\n", "# split it into chunks\n",
@ -319,7 +319,7 @@
"\n", "\n",
"# update the metadata for a document\n", "# update the metadata for a document\n",
"docs[0].metadata = {\n", "docs[0].metadata = {\n",
" \"source\": \"../../../state_of_the_union.txt\",\n", " \"source\": \"../../modules/state_of_the_union.txt\",\n",
" \"new_value\": \"hello world\",\n", " \"new_value\": \"hello world\",\n",
"}\n", "}\n",
"example_db.update_document(ids[0], docs[0])\n", "example_db.update_document(ids[0], docs[0])\n",

View File

@ -208,7 +208,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"loader = TextLoader(\"../../../state_of_the_union.txt\")\n", "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n",
"documents = loader.load()\n", "documents = loader.load()\n",
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
"docs = text_splitter.split_documents(documents)" "docs = text_splitter.split_documents(documents)"

View File

@ -121,7 +121,7 @@
"source": [ "source": [
"from langchain.document_loaders import TextLoader\n", "from langchain.document_loaders import TextLoader\n",
"\n", "\n",
"loader = TextLoader(\"../../../state_of_the_union.txt\")\n", "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n",
"documents = loader.load()\n", "documents = loader.load()\n",
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
"docs = text_splitter.split_documents(documents)\n", "docs = text_splitter.split_documents(documents)\n",
@ -306,7 +306,7 @@
"from langchain.vectorstores import Clickhouse, ClickhouseSettings\n", "from langchain.vectorstores import Clickhouse, ClickhouseSettings\n",
"from langchain.document_loaders import TextLoader\n", "from langchain.document_loaders import TextLoader\n",
"\n", "\n",
"loader = TextLoader(\"../../../state_of_the_union.txt\")\n", "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n",
"documents = loader.load()\n", "documents = loader.load()\n",
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
"docs = text_splitter.split_documents(documents)\n", "docs = text_splitter.split_documents(documents)\n",

View File

@ -85,7 +85,7 @@
"source": [ "source": [
"from langchain.document_loaders import TextLoader\n", "from langchain.document_loaders import TextLoader\n",
"\n", "\n",
"loader = TextLoader(\"../../../state_of_the_union.txt\")\n", "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n",
"documents = loader.load()\n", "documents = loader.load()\n",
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
"docs = text_splitter.split_documents(documents)\n", "docs = text_splitter.split_documents(documents)\n",

View File

@ -88,7 +88,7 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"documents = TextLoader(\"../../../state_of_the_union.txt\").load()\n", "documents = TextLoader(\"../../modules/state_of_the_union.txt\").load()\n",
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
"docs = text_splitter.split_documents(documents)\n", "docs = text_splitter.split_documents(documents)\n",
"\n", "\n",

View File

@ -85,7 +85,7 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"documents = TextLoader(\"../../../state_of_the_union.txt\").load()\n", "documents = TextLoader(\"../../modules/state_of_the_union.txt\").load()\n",
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
"docs = text_splitter.split_documents(documents)\n", "docs = text_splitter.split_documents(documents)\n",
"\n", "\n",

View File

@ -157,7 +157,7 @@
{ {
"data": { "data": {
"text/plain": [ "text/plain": [
"(Document(page_content='Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while youre at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, Id like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nations top legal minds, who will continue Justice Breyers legacy of excellence.', metadata={'source': '../../../state_of_the_union.txt'}),\n", "(Document(page_content='Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while youre at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, Id like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nations top legal minds, who will continue Justice Breyers legacy of excellence.', metadata={'source': '../../modules/state_of_the_union.txt'}),\n",
" 0.36913747)" " 0.36913747)"
] ]
}, },

View File

@ -53,7 +53,7 @@
"source": [ "source": [
"from langchain.document_loaders import TextLoader\n", "from langchain.document_loaders import TextLoader\n",
"\n", "\n",
"loader = TextLoader(\"../../../state_of_the_union.txt\")\n", "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n",
"documents = loader.load()\n", "documents = loader.load()\n",
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
"docs = text_splitter.split_documents(documents)\n", "docs = text_splitter.split_documents(documents)\n",

View File

@ -41,7 +41,7 @@
}, },
"outputs": [ "outputs": [
{ {
"name": "stdin", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"OpenAI API Key: ········\n" "OpenAI API Key: ········\n"
@ -78,7 +78,7 @@
"from langchain.document_loaders import TextLoader\n", "from langchain.document_loaders import TextLoader\n",
"from langchain.text_splitter import CharacterTextSplitter\n", "from langchain.text_splitter import CharacterTextSplitter\n",
"\n", "\n",
"loader = TextLoader(\"../../../state_of_the_union.txt\")\n", "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n",
"documents = loader.load()\n", "documents = loader.load()\n",
"\n", "\n",
"documents = CharacterTextSplitter().split_documents(documents)\n", "documents = CharacterTextSplitter().split_documents(documents)\n",

View File

@ -52,7 +52,7 @@
"source": [ "source": [
"from langchain.document_loaders import TextLoader\n", "from langchain.document_loaders import TextLoader\n",
"\n", "\n",
"loader = TextLoader(\"../../../state_of_the_union.txt\")\n", "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n",
"documents = loader.load()\n", "documents = loader.load()\n",
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
"docs = text_splitter.split_documents(documents)" "docs = text_splitter.split_documents(documents)"
@ -489,7 +489,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"with open(\"../../../state_of_the_union.txt\") as f:\n", "with open(\"../../modules/state_of_the_union.txt\") as f:\n",
" state_of_the_union = f.read()\n", " state_of_the_union = f.read()\n",
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
"texts = text_splitter.split_text(state_of_the_union)" "texts = text_splitter.split_text(state_of_the_union)"

View File

@ -139,7 +139,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"with open(\"../../../state_of_the_union.txt\") as f:\n", "with open(\"../../modules/state_of_the_union.txt\") as f:\n",
" state_of_the_union = f.read()\n", " state_of_the_union = f.read()\n",
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
"texts = text_splitter.split_text(state_of_the_union)" "texts = text_splitter.split_text(state_of_the_union)"
@ -180,7 +180,7 @@
"from langchain.document_loaders import TextLoader\n", "from langchain.document_loaders import TextLoader\n",
"\n", "\n",
"# Load text\n", "# Load text\n",
"loader = TextLoader(\"../../../state_of_the_union.txt\")\n", "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n",
"documents = loader.load()\n", "documents = loader.load()\n",
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
"\n", "\n",

View File

@ -83,7 +83,7 @@
"source": [ "source": [
"from langchain.document_loaders import TextLoader\n", "from langchain.document_loaders import TextLoader\n",
"\n", "\n",
"loader = TextLoader(\"../../../state_of_the_union.txt\")\n", "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n",
"documents = loader.load()\n", "documents = loader.load()\n",
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
"docs = text_splitter.split_documents(documents)\n", "docs = text_splitter.split_documents(documents)\n",

View File

@ -98,7 +98,7 @@
"from langchain.vectorstores import MongoDBAtlasVectorSearch\n", "from langchain.vectorstores import MongoDBAtlasVectorSearch\n",
"from langchain.document_loaders import TextLoader\n", "from langchain.document_loaders import TextLoader\n",
"\n", "\n",
"loader = TextLoader(\"../../../state_of_the_union.txt\")\n", "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n",
"documents = loader.load()\n", "documents = loader.load()\n",
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
"docs = text_splitter.split_documents(documents)\n", "docs = text_splitter.split_documents(documents)\n",

View File

@ -45,7 +45,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 1,
"id": "91003ea5-0c8c-436c-a5de-aaeaeef2f458", "id": "91003ea5-0c8c-436c-a5de-aaeaeef2f458",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@ -53,7 +53,12 @@
"import os\n", "import os\n",
"import getpass\n", "import getpass\n",
"\n", "\n",
"os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")" "os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")\n",
"os.environ[\"OPENAI_API_BASE\"] = getpass.getpass(\"OpenAI Base:\")\n",
"os.environ[\"MYSCALE_HOST\"] = getpass.getpass(\"MyScale Host:\")\n",
"os.environ[\"MYSCALE_PORT\"] = getpass.getpass(\"MyScale Port:\")\n",
"os.environ[\"MYSCALE_USERNAME\"] = getpass.getpass(\"MyScale Username:\")\n",
"os.environ[\"MYSCALE_PASSWORD\"] = getpass.getpass(\"MyScale Password:\")"
] ]
}, },
{ {
@ -86,7 +91,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 3,
"id": "aac9563e", "id": "aac9563e",
"metadata": { "metadata": {
"tags": [] "tags": []
@ -101,7 +106,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 4,
"id": "a3c3999a", "id": "a3c3999a",
"metadata": { "metadata": {
"tags": [] "tags": []
@ -110,7 +115,7 @@
"source": [ "source": [
"from langchain.document_loaders import TextLoader\n", "from langchain.document_loaders import TextLoader\n",
"\n", "\n",
"loader = TextLoader(\"../../../state_of_the_union.txt\")\n", "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n",
"documents = loader.load()\n", "documents = loader.load()\n",
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
"docs = text_splitter.split_documents(documents)\n", "docs = text_splitter.split_documents(documents)\n",
@ -120,10 +125,18 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 5,
"id": "6e104aee", "id": "6e104aee",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Inserting data...: 100%|██████████| 42/42 [00:15<00:00, 2.66it/s]\n"
]
}
],
"source": [ "source": [
"for d in docs:\n", "for d in docs:\n",
" d.metadata = {\"some\": \"metadata\"}\n", " d.metadata = {\"some\": \"metadata\"}\n",
@ -135,10 +148,24 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 7,
"id": "9c608226", "id": "9c608226",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while youre at it, pass the Disclose Act so Americans can know who is funding our elections. \n",
"\n",
"Tonight, Id like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n",
"\n",
"One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n",
"\n",
"And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nations top legal minds, who will continue Justice Breyers legacy of excellence.\n"
]
}
],
"source": [ "source": [
"print(docs[0].page_content)" "print(docs[0].page_content)"
] ]
@ -179,15 +206,23 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 7,
"id": "232055f6", "id": "232055f6",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Inserting data...: 100%|██████████| 42/42 [00:15<00:00, 2.68it/s]\n"
]
}
],
"source": [ "source": [
"from langchain.vectorstores import MyScale, MyScaleSettings\n", "from langchain.vectorstores import MyScale, MyScaleSettings\n",
"from langchain.document_loaders import TextLoader\n", "from langchain.document_loaders import TextLoader\n",
"\n", "\n",
"loader = TextLoader(\"../../../state_of_the_union.txt\")\n", "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n",
"documents = loader.load()\n", "documents = loader.load()\n",
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
"docs = text_splitter.split_documents(documents)\n", "docs = text_splitter.split_documents(documents)\n",
@ -220,10 +255,21 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 8,
"id": "ddbcee77", "id": "ddbcee77",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.229655921459198 {'doc_id': 0} Madam Speaker, Madam...\n",
"0.24506962299346924 {'doc_id': 8} And so many families...\n",
"0.24786919355392456 {'doc_id': 1} Groups of citizens b...\n",
"0.24875116348266602 {'doc_id': 6} And Im taking robus...\n"
]
}
],
"source": [ "source": [
"meta = docsearch.metadata_column\n", "meta = docsearch.metadata_column\n",
"output = docsearch.similarity_search_with_relevance_scores(\n", "output = docsearch.similarity_search_with_relevance_scores(\n",
@ -241,12 +287,44 @@
"id": "a359ed74", "id": "a359ed74",
"metadata": {}, "metadata": {},
"source": [ "source": [
"## Deleting your data" "## Deleting your data\n",
"\n",
"You can either drop the table with `.drop()` method or partially delete your data with `.delete()` method."
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 9,
"id": "3a0cc43b",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.24506962299346924 {'doc_id': 8} And so many families...\n",
"0.24875116348266602 {'doc_id': 6} And Im taking robus...\n",
"0.26027143001556396 {'doc_id': 7} We see the unity amo...\n",
"0.26390212774276733 {'doc_id': 9} And unlike the $2 Tr...\n"
]
}
],
"source": [
"# use directly a `where_str` to delete\n",
"docsearch.delete(where_str=f\"{docsearch.metadata_column}.doc_id < 5\")\n",
"meta = docsearch.metadata_column\n",
"output = docsearch.similarity_search_with_relevance_scores(\n",
" \"What did the president say about Ketanji Brown Jackson?\",\n",
" k=4,\n",
" where_str=f\"{meta}.doc_id<10\",\n",
")\n",
"for d, dist in output:\n",
" print(dist, d.metadata, d.page_content[:20] + \"...\")"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "fb6a9d36", "id": "fb6a9d36",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@ -279,7 +357,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.8.8" "version": "3.11.3"
} }
}, },
"nbformat": 4, "nbformat": 4,

View File

@ -85,7 +85,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"loader = TextLoader(\"../../../state_of_the_union.txt\")\n", "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n",
"\n", "\n",
"documents = loader.load()\n", "documents = loader.load()\n",
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",

View File

@ -83,7 +83,7 @@
"source": [ "source": [
"from langchain.document_loaders import TextLoader\n", "from langchain.document_loaders import TextLoader\n",
"\n", "\n",
"loader = TextLoader(\"../../../state_of_the_union.txt\")\n", "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n",
"documents = loader.load()\n", "documents = loader.load()\n",
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
"docs = text_splitter.split_documents(documents)\n", "docs = text_splitter.split_documents(documents)\n",

View File

@ -119,7 +119,7 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"loader = TextLoader(\"../../../state_of_the_union.txt\")\n", "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n",
"documents = loader.load()\n", "documents = loader.load()\n",
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
"docs = text_splitter.split_documents(documents)\n", "docs = text_splitter.split_documents(documents)\n",

View File

@ -94,7 +94,7 @@
"source": [ "source": [
"from langchain.document_loaders import TextLoader\n", "from langchain.document_loaders import TextLoader\n",
"\n", "\n",
"loader = TextLoader(\"../../../state_of_the_union.txt\")\n", "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n",
"documents = loader.load()\n", "documents = loader.load()\n",
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
"docs = text_splitter.split_documents(documents)\n", "docs = text_splitter.split_documents(documents)\n",
@ -128,7 +128,6 @@
" dimension=1536 \n", " dimension=1536 \n",
")\n", ")\n",
"# The OpenAI embedding model `text-embedding-ada-002 uses 1536 dimensions`\n", "# The OpenAI embedding model `text-embedding-ada-002 uses 1536 dimensions`\n",
"docsearch = Pinecone.from_documents(docs, embeddings, index_name=index_name)\n", "docsearch = Pinecone.from_documents(docs, embeddings, index_name=index_name)\n",
"\n", "\n",
"# if you already have an index, you can load it like this\n", "# if you already have an index, you can load it like this\n",

View File

@ -54,7 +54,7 @@
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"OpenAI API Key: \u00b7\u00b7\u00b7\u00b7\u00b7\u00b7\u00b7\u00b7\n" "OpenAI API Key: ········\n"
] ]
} }
], ],
@ -97,7 +97,7 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"loader = TextLoader(\"../../../state_of_the_union.txt\")\n", "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n",
"documents = loader.load()\n", "documents = loader.load()\n",
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
"docs = text_splitter.split_documents(documents)\n", "docs = text_splitter.split_documents(documents)\n",
@ -326,13 +326,13 @@
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you\u2019re at it, pass the Disclose Act so Americans can know who is funding our elections. \n", "Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while youre at it, pass the Disclose Act so Americans can know who is funding our elections. \n",
"\n", "\n",
"Tonight, I\u2019d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer\u2014an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n", "Tonight, Id like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n",
"\n", "\n",
"One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n", "One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n",
"\n", "\n",
"And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation\u2019s top legal minds, who will continue Justice Breyer\u2019s legacy of excellence.\n" "And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nations top legal minds, who will continue Justice Breyers legacy of excellence.\n"
] ]
} }
], ],
@ -383,13 +383,13 @@
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you\u2019re at it, pass the Disclose Act so Americans can know who is funding our elections. \n", "Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while youre at it, pass the Disclose Act so Americans can know who is funding our elections. \n",
"\n", "\n",
"Tonight, I\u2019d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer\u2014an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n", "Tonight, Id like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n",
"\n", "\n",
"One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n", "One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n",
"\n", "\n",
"And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation\u2019s top legal minds, who will continue Justice Breyer\u2019s legacy of excellence.\n", "And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nations top legal minds, who will continue Justice Breyers legacy of excellence.\n",
"\n", "\n",
"Score: 0.8153784913324512\n" "Score: 0.8153784913324512\n"
] ]
@ -473,15 +473,15 @@
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"1. Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you\u2019re at it, pass the Disclose Act so Americans can know who is funding our elections. \n", "1. Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while youre at it, pass the Disclose Act so Americans can know who is funding our elections. \n",
"\n", "\n",
"Tonight, I\u2019d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer\u2014an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n", "Tonight, Id like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n",
"\n", "\n",
"One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n", "One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n",
"\n", "\n",
"And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation\u2019s top legal minds, who will continue Justice Breyer\u2019s legacy of excellence. \n", "And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nations top legal minds, who will continue Justice Breyers legacy of excellence. \n",
"\n", "\n",
"2. We can\u2019t change how divided we\u2019ve been. But we can change how we move forward\u2014on COVID-19 and other issues we must face together. \n", "2. We cant change how divided weve been. But we can change how we move forward—on COVID-19 and other issues we must face together. \n",
"\n", "\n",
"I recently visited the New York City Police Department days after the funerals of Officer Wilbert Mora and his partner, Officer Jason Rivera. \n", "I recently visited the New York City Police Department days after the funerals of Officer Wilbert Mora and his partner, Officer Jason Rivera. \n",
"\n", "\n",
@ -491,13 +491,13 @@
"\n", "\n",
"Officer Rivera was 22. \n", "Officer Rivera was 22. \n",
"\n", "\n",
"Both Dominican Americans who\u2019d grown up on the same streets they later chose to patrol as police officers. \n", "Both Dominican Americans whod grown up on the same streets they later chose to patrol as police officers. \n",
"\n", "\n",
"I spoke with their families and told them that we are forever in debt for their sacrifice, and we will carry on their mission to restore the trust and safety every community deserves. \n", "I spoke with their families and told them that we are forever in debt for their sacrifice, and we will carry on their mission to restore the trust and safety every community deserves. \n",
"\n", "\n",
"I\u2019ve worked on these issues a long time. \n", "Ive worked on these issues a long time. \n",
"\n", "\n",
"I know what works: Investing in crime prevention and community police officers who\u2019ll walk the beat, who\u2019ll know the neighborhood, and who can restore trust and safety. \n", "I know what works: Investing in crime prevention and community police officers wholl walk the beat, wholl know the neighborhood, and who can restore trust and safety. \n",
"\n" "\n"
] ]
} }
@ -595,7 +595,7 @@
{ {
"data": { "data": {
"text/plain": [ "text/plain": [
"Document(page_content='Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you\u2019re at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, I\u2019d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer\u2014an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation\u2019s top legal minds, who will continue Justice Breyer\u2019s legacy of excellence.', metadata={'source': '../../../state_of_the_union.txt'})" "Document(page_content='Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while youre at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, Id like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nations top legal minds, who will continue Justice Breyers legacy of excellence.', metadata={'source': '../../../state_of_the_union.txt'})"
] ]
}, },
"execution_count": 17, "execution_count": 17,
@ -626,6 +626,10 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"id": "1f11adf8",
"metadata": {
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"Qdrant.from_documents(\n", "Qdrant.from_documents(\n",
@ -635,24 +639,24 @@
" collection_name=\"my_documents_2\",\n", " collection_name=\"my_documents_2\",\n",
" vector_name=\"custom_vector\",\n", " vector_name=\"custom_vector\",\n",
")" ")"
], ]
"metadata": {
"collapsed": false
},
"id": "1f11adf8"
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "b34f5230",
"metadata": {
"collapsed": false
},
"source": [ "source": [
"As a Langchain user, you won't see any difference whether you use named vectors or not. Qdrant integration will handle the conversion under the hood." "As a Langchain user, you won't see any difference whether you use named vectors or not. Qdrant integration will handle the conversion under the hood."
], ]
"metadata": {
"collapsed": false
},
"id": "b34f5230"
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "b2350093",
"metadata": {
"collapsed": false
},
"source": [ "source": [
"### Metadata\n", "### Metadata\n",
"\n", "\n",
@ -670,11 +674,7 @@
"```\n", "```\n",
"\n", "\n",
"You can, however, decide to use different keys for the page content and metadata. That's useful if you already have a collection that you'd like to reuse." "You can, however, decide to use different keys for the page content and metadata. That's useful if you already have a collection that you'd like to reuse."
], ]
"metadata": {
"collapsed": false
},
"id": "b2350093"
}, },
{ {
"cell_type": "code", "cell_type": "code",

View File

@ -115,7 +115,7 @@
"from langchain.document_loaders import TextLoader\n", "from langchain.document_loaders import TextLoader\n",
"from langchain.vectorstores import Rockset\n", "from langchain.vectorstores import Rockset\n",
"\n", "\n",
"loader = TextLoader('../../../state_of_the_union.txt')\n", "loader = TextLoader('../../modules/state_of_the_union.txt')\n",
"documents = loader.load()\n", "documents = loader.load()\n",
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
"docs = text_splitter.split_documents(documents)" "docs = text_splitter.split_documents(documents)"

View File

@ -60,7 +60,7 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"# Load text samples\n", "# Load text samples\n",
"loader = TextLoader(\"../../../state_of_the_union.txt\")\n", "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n",
"documents = loader.load()\n", "documents = loader.load()\n",
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
"docs = text_splitter.split_documents(documents)\n", "docs = text_splitter.split_documents(documents)\n",

View File

@ -65,7 +65,7 @@
"from langchain.vectorstores import SKLearnVectorStore\n", "from langchain.vectorstores import SKLearnVectorStore\n",
"from langchain.document_loaders import TextLoader\n", "from langchain.document_loaders import TextLoader\n",
"\n", "\n",
"loader = TextLoader(\"../../../state_of_the_union.txt\")\n", "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n",
"documents = loader.load()\n", "documents = loader.load()\n",
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
"docs = text_splitter.split_documents(documents)\n", "docs = text_splitter.split_documents(documents)\n",

View File

@ -2,45 +2,54 @@
"cells": [ "cells": [
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {
"collapsed": false
},
"source": [ "source": [
"# sqlite-vss\n", "# sqlite-vss\n",
"\n", "\n",
">[sqlite-vss](https://alexgarcia.xyz/sqlite-vss/) is an SQLite extension designed for vector search, emphasizing local-first operations and easy integration into applications without external servers. Leveraging the Faiss library, it offers efficient similarity search and clustering capabilities.\n", ">[sqlite-vss](https://alexgarcia.xyz/sqlite-vss/) is an SQLite extension designed for vector search, emphasizing local-first operations and easy integration into applications without external servers. Leveraging the Faiss library, it offers efficient similarity search and clustering capabilities.\n",
"\n", "\n",
"This notebook shows how to use the `SQLiteVSS` vector database." "This notebook shows how to use the `SQLiteVSS` vector database."
], ]
"metadata": {
"collapsed": false
}
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"# You need to install sqlite-vss as a dependency.\n", "# You need to install sqlite-vss as a dependency.\n",
"%pip install sqlite-vss" "%pip install sqlite-vss"
], ]
"metadata": {
"collapsed": false
}
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"source": [
"### Quickstart"
],
"metadata": { "metadata": {
"collapsed": false "collapsed": false
} },
"source": [
"### Quickstart"
]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 2, "execution_count": 2,
"metadata": {
"ExecuteTime": {
"end_time": "2023-09-06T14:55:55.370351Z",
"start_time": "2023-09-06T14:55:53.547755Z"
},
"collapsed": false
},
"outputs": [ "outputs": [
{ {
"data": { "data": {
"text/plain": "'Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while youre at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, Id like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nations top legal minds, who will continue Justice Breyers legacy of excellence.'" "text/plain": [
"'Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while youre at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, Id like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nations top legal minds, who will continue Justice Breyers legacy of excellence.'"
]
}, },
"execution_count": 2, "execution_count": 2,
"metadata": {}, "metadata": {},
@ -54,7 +63,7 @@
"from langchain.document_loaders import TextLoader\n", "from langchain.document_loaders import TextLoader\n",
"\n", "\n",
"# load the document and split it into chunks\n", "# load the document and split it into chunks\n",
"loader = TextLoader(\"../../../state_of_the_union.txt\")\n", "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n",
"documents = loader.load()\n", "documents = loader.load()\n",
"\n", "\n",
"# split it into chunks\n", "# split it into chunks\n",
@ -83,31 +92,33 @@
"\n", "\n",
"# print results\n", "# print results\n",
"data[0].page_content" "data[0].page_content"
], ]
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2023-09-06T14:55:55.370351Z",
"start_time": "2023-09-06T14:55:53.547755Z"
}
}
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"source": [
"### Using existing sqlite connection"
],
"metadata": { "metadata": {
"collapsed": false "collapsed": false
} },
"source": [
"### Using existing sqlite connection"
]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 7, "execution_count": 7,
"metadata": {
"ExecuteTime": {
"end_time": "2023-09-06T14:59:22.086252Z",
"start_time": "2023-09-06T14:59:21.693237Z"
},
"collapsed": false
},
"outputs": [ "outputs": [
{ {
"data": { "data": {
"text/plain": "'Ketanji Brown Jackson is awesome'" "text/plain": [
"'Ketanji Brown Jackson is awesome'"
]
}, },
"execution_count": 7, "execution_count": 7,
"metadata": {}, "metadata": {},
@ -121,7 +132,7 @@
"from langchain.document_loaders import TextLoader\n", "from langchain.document_loaders import TextLoader\n",
"\n", "\n",
"# load the document and split it into chunks\n", "# load the document and split it into chunks\n",
"loader = TextLoader(\"../../../state_of_the_union.txt\")\n", "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n",
"documents = loader.load()\n", "documents = loader.load()\n",
"\n", "\n",
"# split it into chunks\n", "# split it into chunks\n",
@ -147,40 +158,33 @@
"\n", "\n",
"# print results\n", "# print results\n",
"data[0].page_content" "data[0].page_content"
], ]
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2023-09-06T14:59:22.086252Z",
"start_time": "2023-09-06T14:59:21.693237Z"
}
}
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 13, "execution_count": 13,
"metadata": {
"ExecuteTime": {
"end_time": "2023-09-06T15:01:15.550318Z",
"start_time": "2023-09-06T15:01:15.546428Z"
},
"collapsed": false
},
"outputs": [], "outputs": [],
"source": [ "source": [
"# Cleaning up\n", "# Cleaning up\n",
"import os\n", "import os\n",
"os.remove(\"/tmp/vss.db\")" "os.remove(\"/tmp/vss.db\")"
], ]
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2023-09-06T15:01:15.550318Z",
"start_time": "2023-09-06T15:01:15.546428Z"
}
}
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"outputs": [],
"source": [],
"metadata": { "metadata": {
"collapsed": false "collapsed": false
} },
"outputs": [],
"source": []
} }
], ],
"metadata": { "metadata": {

View File

@ -186,7 +186,7 @@
"from langchain.text_splitter import CharacterTextSplitter\n", "from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain.document_loaders import TextLoader\n", "from langchain.document_loaders import TextLoader\n",
"\n", "\n",
"loader = TextLoader(\"../../../state_of_the_union.txt\")\n", "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n",
"documents = loader.load()\n", "documents = loader.load()\n",
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
"docs = text_splitter.split_documents(documents)" "docs = text_splitter.split_documents(documents)"

View File

@ -16,7 +16,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 24, "execution_count": null,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -27,30 +27,13 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 30, "execution_count": null,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [],
{
"ename": "RuntimeError",
"evalue": "Error loading ../../../state_of_the_union.txt",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)",
"File \u001b[0;32m/opt/homebrew/lib/python3.10/site-packages/langchain/document_loaders/text.py:40\u001b[0m, in \u001b[0;36mTextLoader.load\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 39\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m---> 40\u001b[0m \u001b[39mwith\u001b[39;00m \u001b[39mopen\u001b[39;49m(\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mfile_path, encoding\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mencoding) \u001b[39mas\u001b[39;00m f:\n\u001b[1;32m 41\u001b[0m text \u001b[39m=\u001b[39m f\u001b[39m.\u001b[39mread()\n",
"\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: '../../../state_of_the_union.txt'",
"\nThe above exception was the direct cause of the following exception:\n",
"\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[30], line 4\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mlangchain\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mdocument_loaders\u001b[39;00m \u001b[39mimport\u001b[39;00m TextLoader\n\u001b[1;32m 3\u001b[0m loader \u001b[39m=\u001b[39m TextLoader(\u001b[39m\"\u001b[39m\u001b[39m../../../state_of_the_union.txt\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[0;32m----> 4\u001b[0m documents \u001b[39m=\u001b[39m loader\u001b[39m.\u001b[39;49mload()\n\u001b[1;32m 5\u001b[0m text_splitter \u001b[39m=\u001b[39m CharacterTextSplitter(chunk_size\u001b[39m=\u001b[39m\u001b[39m1000\u001b[39m, chunk_overlap\u001b[39m=\u001b[39m\u001b[39m0\u001b[39m)\n\u001b[1;32m 6\u001b[0m docs \u001b[39m=\u001b[39m text_splitter\u001b[39m.\u001b[39msplit_documents(documents)\n",
"File \u001b[0;32m/opt/homebrew/lib/python3.10/site-packages/langchain/document_loaders/text.py:56\u001b[0m, in \u001b[0;36mTextLoader.load\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 54\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mRuntimeError\u001b[39;00m(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mError loading \u001b[39m\u001b[39m{\u001b[39;00m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mfile_path\u001b[39m}\u001b[39;00m\u001b[39m\"\u001b[39m) \u001b[39mfrom\u001b[39;00m \u001b[39me\u001b[39;00m\n\u001b[1;32m 55\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mException\u001b[39;00m \u001b[39mas\u001b[39;00m e:\n\u001b[0;32m---> 56\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mRuntimeError\u001b[39;00m(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mError loading \u001b[39m\u001b[39m{\u001b[39;00m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mfile_path\u001b[39m}\u001b[39;00m\u001b[39m\"\u001b[39m) \u001b[39mfrom\u001b[39;00m \u001b[39me\u001b[39;00m\n\u001b[1;32m 58\u001b[0m metadata \u001b[39m=\u001b[39m {\u001b[39m\"\u001b[39m\u001b[39msource\u001b[39m\u001b[39m\"\u001b[39m: \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mfile_path}\n\u001b[1;32m 59\u001b[0m \u001b[39mreturn\u001b[39;00m [Document(page_content\u001b[39m=\u001b[39mtext, metadata\u001b[39m=\u001b[39mmetadata)]\n",
"\u001b[0;31mRuntimeError\u001b[0m: Error loading ../../../state_of_the_union.txt"
]
}
],
"source": [ "source": [
"from langchain.document_loaders import TextLoader\n", "from langchain.document_loaders import TextLoader\n",
"\n", "\n",
"loader = TextLoader(\"../../../extras/modules/state_of_the_union.txt\")\n", "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n",
"documents = loader.load()\n", "documents = loader.load()\n",
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
"docs = text_splitter.split_documents(documents)\n", "docs = text_splitter.split_documents(documents)\n",
@ -74,21 +57,9 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 6, "execution_count": null,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [],
{
"ename": "NameError",
"evalue": "name 'docs' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[6], line 6\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[39m# drop first if index already exists\u001b[39;00m\n\u001b[1;32m 4\u001b[0m Tair\u001b[39m.\u001b[39mdrop_index(tair_url\u001b[39m=\u001b[39mtair_url)\n\u001b[0;32m----> 6\u001b[0m vector_store \u001b[39m=\u001b[39m Tair\u001b[39m.\u001b[39mfrom_documents(docs, embeddings, tair_url\u001b[39m=\u001b[39mtair_url)\n",
"\u001b[0;31mNameError\u001b[0m: name 'docs' is not defined"
]
}
],
"source": [ "source": [
"tair_url = \"redis://localhost:6379\"\n", "tair_url = \"redis://localhost:6379\"\n",
"\n", "\n",
@ -107,20 +78,9 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 4, "execution_count": null,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [],
{
"data": {
"text/plain": [
"Document(page_content='Were going after the criminals who stole billions in relief money meant for small businesses and millions of Americans. \\n\\nAnd tonight, Im announcing that the Justice Department will name a chief prosecutor for pandemic fraud. \\n\\nBy the end of this year, the deficit will be down to less than half what it was before I took office. \\n\\nThe only president ever to cut the deficit by more than one trillion dollars in a single year. \\n\\nLowering your costs also means demanding more competition. \\n\\nIm a capitalist, but capitalism without competition isnt capitalism. \\n\\nIts exploitation—and it drives up prices. \\n\\nWhen corporations dont have to compete, their profits go up, your prices go up, and small businesses and family farmers and ranchers go under. \\n\\nWe see it happening with ocean carriers moving goods in and out of America. \\n\\nDuring the pandemic, these foreign-owned companies raised prices by as much as 1,000% and made record profits.', metadata={'source': '../../../state_of_the_union.txt'})"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"query = \"What did the president say about Ketanji Brown Jackson\"\n", "query = \"What did the president say about Ketanji Brown Jackson\"\n",
"docs = vector_store.similarity_search(query)\n", "docs = vector_store.similarity_search(query)\n",

View File

@ -46,7 +46,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"loader = TextLoader(\"../../../state_of_the_union.txt\")\n", "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n",
"documents = loader.load()\n", "documents = loader.load()\n",
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
"docs = text_splitter.split_documents(documents)\n", "docs = text_splitter.split_documents(documents)\n",

View File

@ -112,7 +112,7 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"loader = TextLoader(\"../../../state_of_the_union.txt\")\n", "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n",
"documents = loader.load()\n", "documents = loader.load()\n",
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
"docs = text_splitter.split_documents(documents)\n", "docs = text_splitter.split_documents(documents)\n",

View File

@ -114,7 +114,7 @@
"source": [ "source": [
"from langchain.document_loaders import TextLoader\n", "from langchain.document_loaders import TextLoader\n",
"\n", "\n",
"loader = TextLoader(\"../../../state_of_the_union.txt\")\n", "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n",
"documents = loader.load()\n", "documents = loader.load()\n",
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
"docs = text_splitter.split_documents(documents)\n", "docs = text_splitter.split_documents(documents)\n",
@ -342,7 +342,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"with open(\"../../../state_of_the_union.txt\") as f:\n", "with open(\"../../modules/state_of_the_union.txt\") as f:\n",
" state_of_the_union = f.read()\n", " state_of_the_union = f.read()\n",
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
"texts = text_splitter.split_text(state_of_the_union)" "texts = text_splitter.split_text(state_of_the_union)"

View File

@ -132,7 +132,7 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"loader = TextLoader(\"../../../state_of_the_union.txt\")\n", "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n",
"documents = loader.load()\n", "documents = loader.load()\n",
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
"docs = text_splitter.split_documents(documents)\n", "docs = text_splitter.split_documents(documents)\n",

View File

@ -91,7 +91,7 @@
"source": [ "source": [
"from langchain.document_loaders import TextLoader\n", "from langchain.document_loaders import TextLoader\n",
"\n", "\n",
"loader = TextLoader(\"../../../state_of_the_union.txt\")\n", "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n",
"documents = loader.load()\n", "documents = loader.load()\n",
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
"docs = text_splitter.split_documents(documents)\n", "docs = text_splitter.split_documents(documents)\n",

View File

@ -60,7 +60,7 @@
" * document addition by id (`add_documents` method with `ids` argument)\n", " * document addition by id (`add_documents` method with `ids` argument)\n",
" * delete by id (`delete` method with)\n", " * delete by id (`delete` method with)\n",
"\n", "\n",
"Compatible Vectorstores: `AnalyticDB`, `AwaDB`, `Bagel`, `Cassandra`, `Chroma`, `DashVector`, `DeepLake`, `Dingo`, `ElasticVectorSearch`, `ElasticsearchStore`, `FAISS`, `PGVector`, `Pinecone`, `Qdrant`, `Redis`, `ScaNN`, `SupabaseVectorStore`, `TimescaleVector`, `Vald`, `Vearch`, `VespaStore`, `Weaviate`, `ZepVectorStore`.\n", "Compatible Vectorstores: `AnalyticDB`, `AwaDB`, `Bagel`, `Cassandra`, `Chroma`, `DashVector`, `DeepLake`, `Dingo`, `ElasticVectorSearch`, `ElasticsearchStore`, `FAISS`, `MyScale`, `PGVector`, `Pinecone`, `Qdrant`, `Redis`, `ScaNN`, `SupabaseVectorStore`, `TimescaleVector`, `Vald`, `Vearch`, `VespaStore`, `Weaviate`, `ZepVectorStore`.\n",
" \n", " \n",
"## Caution\n", "## Caution\n",
"\n", "\n",

View File

@ -450,6 +450,43 @@ class MyScale(VectorStore):
f"DROP TABLE IF EXISTS {self.config.database}.{self.config.table}" f"DROP TABLE IF EXISTS {self.config.database}.{self.config.table}"
) )
def delete(
self,
ids: Optional[List[str]] = None,
where_str: Optional[str] = None,
**kwargs: Any,
) -> Optional[bool]:
"""Delete by vector ID or other criteria.
Args:
ids: List of ids to delete.
**kwargs: Other keyword arguments that subclasses might use.
Returns:
Optional[bool]: True if deletion is successful,
False otherwise, None if not implemented.
"""
assert not (
ids is None and where_str is None
), "You need to specify where to be deleted! Either with `ids` or `where_str`"
conds = []
if ids:
conds.extend([f"{self.config.column_map['id']} = '{id}'" for id in ids])
if where_str:
conds.append(where_str)
assert len(conds) > 0
where_str_final = " AND ".join(conds)
qstr = (
f"DELETE FROM {self.config.database}.{self.config.table} "
f"WHERE {where_str_final}"
)
try:
self.client.command(qstr)
return True
except Exception as e:
logger.error(str(e))
return False
@property @property
def metadata_column(self) -> str: def metadata_column(self) -> str:
return self.config.column_map["metadata"] return self.config.column_map["metadata"]

View File

@ -1137,6 +1137,7 @@ def test_compatible_vectorstore_documentation() -> None:
"ElasticsearchStore", "ElasticsearchStore",
"FAISS", "FAISS",
"MomentoVectorIndex", "MomentoVectorIndex",
"MyScale",
"PGVector", "PGVector",
"Pinecone", "Pinecone",
"Qdrant", "Qdrant",