diff --git a/docs/docs/integrations/llms/llm_caching.ipynb b/docs/docs/integrations/llms/llm_caching.ipynb index 4683177d94..d164678eaa 100644 --- a/docs/docs/integrations/llms/llm_caching.ipynb +++ b/docs/docs/integrations/llms/llm_caching.ipynb @@ -1259,7 +1259,7 @@ "metadata": {}, "outputs": [], "source": [ - "with open(\"../../../state_of_the_union.txt\") as f:\n", + "with open(\"../../modules/state_of_the_union.txt\") as f:\n", " state_of_the_union = f.read()\n", "texts = text_splitter.split_text(state_of_the_union)" ] diff --git a/docs/docs/integrations/llms/manifest.ipynb b/docs/docs/integrations/llms/manifest.ipynb index 514c3f8472..5656247900 100644 --- a/docs/docs/integrations/llms/manifest.ipynb +++ b/docs/docs/integrations/llms/manifest.ipynb @@ -117,7 +117,7 @@ } ], "source": [ - "with open(\"../../../state_of_the_union.txt\") as f:\n", + "with open(\"../../modules/state_of_the_union.txt\") as f:\n", " state_of_the_union = f.read()\n", "mp_chain.run(state_of_the_union)" ] diff --git a/docs/docs/integrations/providers/myscale.mdx b/docs/docs/integrations/providers/myscale.mdx index c4eec626d4..27a53b8002 100644 --- a/docs/docs/integrations/providers/myscale.mdx +++ b/docs/docs/integrations/providers/myscale.mdx @@ -51,6 +51,7 @@ supported functions: - `similarity_search_by_vector` - `asimilarity_search_by_vector` - `similarity_search_with_relevance_scores` +- `delete` ### VectorStore diff --git a/docs/docs/integrations/retrievers/cohere-reranker.ipynb b/docs/docs/integrations/retrievers/cohere-reranker.ipynb index 7f18cd00f0..5a11bef51e 100644 --- a/docs/docs/integrations/retrievers/cohere-reranker.ipynb +++ b/docs/docs/integrations/retrievers/cohere-reranker.ipynb @@ -330,7 +330,7 @@ "from langchain.document_loaders import TextLoader\n", "from langchain.vectorstores import FAISS\n", "\n", - "documents = TextLoader(\"../../../state_of_the_union.txt\").load()\n", + "documents = TextLoader(\"../../modules/state_of_the_union.txt\").load()\n", "text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)\n", "texts = text_splitter.split_documents(documents)\n", "retriever = FAISS.from_documents(texts, OpenAIEmbeddings()).as_retriever(\n", diff --git a/docs/docs/integrations/toolkits/vectorstore.ipynb b/docs/docs/integrations/toolkits/vectorstore.ipynb index 75909b6406..22322494d8 100644 --- a/docs/docs/integrations/toolkits/vectorstore.ipynb +++ b/docs/docs/integrations/toolkits/vectorstore.ipynb @@ -30,7 +30,8 @@ "from langchain.embeddings.openai import OpenAIEmbeddings\n", "from langchain.vectorstores import Chroma\n", "from langchain.text_splitter import CharacterTextSplitter\n", - "from langchain.llms import OpenAI\nfrom langchain.chains import VectorDBQA\n", + "from langchain.llms import OpenAI\n", + "from langchain.chains import VectorDBQA\n", "\n", "llm = OpenAI(temperature=0)" ] @@ -55,7 +56,7 @@ "source": [ "from langchain.document_loaders import TextLoader\n", "\n", - "loader = TextLoader(\"../../../state_of_the_union.txt\")\n", + "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n", "documents = loader.load()\n", "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", "texts = text_splitter.split_documents(documents)\n", diff --git a/docs/docs/integrations/vectorstores/analyticdb.ipynb b/docs/docs/integrations/vectorstores/analyticdb.ipynb index 86894ce8dd..6d33565d20 100644 --- a/docs/docs/integrations/vectorstores/analyticdb.ipynb +++ b/docs/docs/integrations/vectorstores/analyticdb.ipynb @@ -43,7 +43,7 @@ "source": [ "from langchain.document_loaders import TextLoader\n", "\n", - "loader = TextLoader(\"../../../state_of_the_union.txt\")\n", + "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n", "documents = loader.load()\n", "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", "docs = text_splitter.split_documents(documents)\n", diff --git a/docs/docs/integrations/vectorstores/annoy.ipynb b/docs/docs/integrations/vectorstores/annoy.ipynb index 7f71d0c9e5..11b7a47ab6 100644 --- a/docs/docs/integrations/vectorstores/annoy.ipynb +++ b/docs/docs/integrations/vectorstores/annoy.ipynb @@ -151,7 +151,7 @@ "from langchain.document_loaders import TextLoader\n", "from langchain.text_splitter import CharacterTextSplitter\n", "\n", - "loader = TextLoader(\"../../../state_of_the_union.txt\")\n", + "loader = TextLoader(\"../../modules/state_of_the_union.txtn.txtn.txt\")\n", "documents = loader.load()\n", "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", "docs = text_splitter.split_documents(documents)" diff --git a/docs/docs/integrations/vectorstores/atlas.ipynb b/docs/docs/integrations/vectorstores/atlas.ipynb index 0f761a8dc5..162ac705a9 100644 --- a/docs/docs/integrations/vectorstores/atlas.ipynb +++ b/docs/docs/integrations/vectorstores/atlas.ipynb @@ -103,7 +103,7 @@ }, "outputs": [], "source": [ - "loader = TextLoader(\"../../../state_of_the_union.txt\")\n", + "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n", "documents = loader.load()\n", "text_splitter = SpacyTextSplitter(separator=\"|\")\n", "texts = []\n", diff --git a/docs/docs/integrations/vectorstores/awadb.ipynb b/docs/docs/integrations/vectorstores/awadb.ipynb index 9760010d8e..a0cad5d480 100644 --- a/docs/docs/integrations/vectorstores/awadb.ipynb +++ b/docs/docs/integrations/vectorstores/awadb.ipynb @@ -40,7 +40,7 @@ "metadata": {}, "outputs": [], "source": [ - "loader = TextLoader(\"../../../state_of_the_union.txt\")\n", + "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n", "documents = loader.load()\n", "text_splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=0)\n", "docs = text_splitter.split_documents(documents)" @@ -112,7 +112,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "(Document(page_content='And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.', metadata={'source': '../../../state_of_the_union.txt'}), 0.561813814013747)\n" + "(Document(page_content='And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.', metadata={'source': '../../modules/state_of_the_union.txt'}), 0.561813814013747)\n" ] } ], diff --git a/docs/docs/integrations/vectorstores/azuresearch.ipynb b/docs/docs/integrations/vectorstores/azuresearch.ipynb index d7de279e16..54c028e177 100644 --- a/docs/docs/integrations/vectorstores/azuresearch.ipynb +++ b/docs/docs/integrations/vectorstores/azuresearch.ipynb @@ -2,16 +2,16 @@ "cells": [ { "cell_type": "markdown", + "metadata": { + "collapsed": false + }, "source": [ "# Azure Cognitive Search\n", "\n", "[Azure Cognitive Search](https://learn.microsoft.com/azure/search/search-what-is-azure-search) (formerly known as `Azure Search`) is a cloud search service that gives developers infrastructure, APIs, and tools for building a rich search experience over private, heterogeneous content in web, mobile, and enterprise applications.\n", "\n", "Vector search is currently in public preview. It's available through the Azure portal, preview REST API and beta client libraries. [More info](https://learn.microsoft.com/en-us/azure/search/vector-search-overview) Beta client libraries are subject to potential breaking changes, please be sure to use the SDK package version identified below. azure-search-documents==11.4.0b8" - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "markdown", @@ -134,7 +134,7 @@ "from langchain.document_loaders import TextLoader\n", "from langchain.text_splitter import CharacterTextSplitter\n", "\n", - "loader = TextLoader(\"../../../state_of_the_union.txt\", encoding=\"utf-8\")\n", + "loader = TextLoader(\"../../modules/state_of_the_union.txt\", encoding=\"utf-8\")\n", "\n", "documents = loader.load()\n", "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", diff --git a/docs/docs/integrations/vectorstores/bageldb.ipynb b/docs/docs/integrations/vectorstores/bageldb.ipynb index 7f65486569..6f846d5442 100644 --- a/docs/docs/integrations/vectorstores/bageldb.ipynb +++ b/docs/docs/integrations/vectorstores/bageldb.ipynb @@ -111,7 +111,7 @@ "from langchain.document_loaders import TextLoader\n", "from langchain.text_splitter import CharacterTextSplitter\n", "\n", - "loader = TextLoader(\"../../../state_of_the_union.txt\")\n", + "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n", "documents = loader.load()\n", "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", "docs = text_splitter.split_documents(documents)[:10]" diff --git a/docs/docs/integrations/vectorstores/chroma.ipynb b/docs/docs/integrations/vectorstores/chroma.ipynb index 4a1411f7b6..a87d1ef55d 100644 --- a/docs/docs/integrations/vectorstores/chroma.ipynb +++ b/docs/docs/integrations/vectorstores/chroma.ipynb @@ -79,7 +79,7 @@ "from langchain.document_loaders import TextLoader\n", "\n", "# load the document and split it into chunks\n", - "loader = TextLoader(\"../../../state_of_the_union.txt\")\n", + "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n", "documents = loader.load()\n", "\n", "# split it into chunks\n", @@ -319,7 +319,7 @@ "\n", "# update the metadata for a document\n", "docs[0].metadata = {\n", - " \"source\": \"../../../state_of_the_union.txt\",\n", + " \"source\": \"../../modules/state_of_the_union.txt\",\n", " \"new_value\": \"hello world\",\n", "}\n", "example_db.update_document(ids[0], docs[0])\n", diff --git a/docs/docs/integrations/vectorstores/clarifai.ipynb b/docs/docs/integrations/vectorstores/clarifai.ipynb index 9c48f1917b..c9fb0c604f 100644 --- a/docs/docs/integrations/vectorstores/clarifai.ipynb +++ b/docs/docs/integrations/vectorstores/clarifai.ipynb @@ -208,7 +208,7 @@ "metadata": {}, "outputs": [], "source": [ - "loader = TextLoader(\"../../../state_of_the_union.txt\")\n", + "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n", "documents = loader.load()\n", "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", "docs = text_splitter.split_documents(documents)" diff --git a/docs/docs/integrations/vectorstores/clickhouse.ipynb b/docs/docs/integrations/vectorstores/clickhouse.ipynb index 97cdcccf0a..a750530710 100644 --- a/docs/docs/integrations/vectorstores/clickhouse.ipynb +++ b/docs/docs/integrations/vectorstores/clickhouse.ipynb @@ -121,7 +121,7 @@ "source": [ "from langchain.document_loaders import TextLoader\n", "\n", - "loader = TextLoader(\"../../../state_of_the_union.txt\")\n", + "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n", "documents = loader.load()\n", "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", "docs = text_splitter.split_documents(documents)\n", @@ -306,7 +306,7 @@ "from langchain.vectorstores import Clickhouse, ClickhouseSettings\n", "from langchain.document_loaders import TextLoader\n", "\n", - "loader = TextLoader(\"../../../state_of_the_union.txt\")\n", + "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n", "documents = loader.load()\n", "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", "docs = text_splitter.split_documents(documents)\n", diff --git a/docs/docs/integrations/vectorstores/dingo.ipynb b/docs/docs/integrations/vectorstores/dingo.ipynb index 0369f1ecb0..6deb9d4dea 100644 --- a/docs/docs/integrations/vectorstores/dingo.ipynb +++ b/docs/docs/integrations/vectorstores/dingo.ipynb @@ -85,7 +85,7 @@ "source": [ "from langchain.document_loaders import TextLoader\n", "\n", - "loader = TextLoader(\"../../../state_of_the_union.txt\")\n", + "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n", "documents = loader.load()\n", "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", "docs = text_splitter.split_documents(documents)\n", diff --git a/docs/docs/integrations/vectorstores/docarray_hnsw.ipynb b/docs/docs/integrations/vectorstores/docarray_hnsw.ipynb index 1537668b2e..99fc5bab9b 100644 --- a/docs/docs/integrations/vectorstores/docarray_hnsw.ipynb +++ b/docs/docs/integrations/vectorstores/docarray_hnsw.ipynb @@ -88,7 +88,7 @@ }, "outputs": [], "source": [ - "documents = TextLoader(\"../../../state_of_the_union.txt\").load()\n", + "documents = TextLoader(\"../../modules/state_of_the_union.txt\").load()\n", "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", "docs = text_splitter.split_documents(documents)\n", "\n", diff --git a/docs/docs/integrations/vectorstores/docarray_in_memory.ipynb b/docs/docs/integrations/vectorstores/docarray_in_memory.ipynb index 220c69ad0e..ac12f01dcc 100644 --- a/docs/docs/integrations/vectorstores/docarray_in_memory.ipynb +++ b/docs/docs/integrations/vectorstores/docarray_in_memory.ipynb @@ -85,7 +85,7 @@ }, "outputs": [], "source": [ - "documents = TextLoader(\"../../../state_of_the_union.txt\").load()\n", + "documents = TextLoader(\"../../modules/state_of_the_union.txt\").load()\n", "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", "docs = text_splitter.split_documents(documents)\n", "\n", diff --git a/docs/docs/integrations/vectorstores/faiss.ipynb b/docs/docs/integrations/vectorstores/faiss.ipynb index 7969982ca8..1dd14edd47 100644 --- a/docs/docs/integrations/vectorstores/faiss.ipynb +++ b/docs/docs/integrations/vectorstores/faiss.ipynb @@ -157,7 +157,7 @@ { "data": { "text/plain": [ - "(Document(page_content='Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.', metadata={'source': '../../../state_of_the_union.txt'}),\n", + "(Document(page_content='Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.', metadata={'source': '../../modules/state_of_the_union.txt'}),\n", " 0.36913747)" ] }, diff --git a/docs/docs/integrations/vectorstores/hologres.ipynb b/docs/docs/integrations/vectorstores/hologres.ipynb index de28c27136..1289205680 100644 --- a/docs/docs/integrations/vectorstores/hologres.ipynb +++ b/docs/docs/integrations/vectorstores/hologres.ipynb @@ -53,7 +53,7 @@ "source": [ "from langchain.document_loaders import TextLoader\n", "\n", - "loader = TextLoader(\"../../../state_of_the_union.txt\")\n", + "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n", "documents = loader.load()\n", "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", "docs = text_splitter.split_documents(documents)\n", diff --git a/docs/docs/integrations/vectorstores/lancedb.ipynb b/docs/docs/integrations/vectorstores/lancedb.ipynb index fc12cdf287..5d2735411c 100644 --- a/docs/docs/integrations/vectorstores/lancedb.ipynb +++ b/docs/docs/integrations/vectorstores/lancedb.ipynb @@ -41,7 +41,7 @@ }, "outputs": [ { - "name": "stdin", + "name": "stdout", "output_type": "stream", "text": [ "OpenAI API Key: ········\n" @@ -78,7 +78,7 @@ "from langchain.document_loaders import TextLoader\n", "from langchain.text_splitter import CharacterTextSplitter\n", "\n", - "loader = TextLoader(\"../../../state_of_the_union.txt\")\n", + "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n", "documents = loader.load()\n", "\n", "documents = CharacterTextSplitter().split_documents(documents)\n", diff --git a/docs/docs/integrations/vectorstores/marqo.ipynb b/docs/docs/integrations/vectorstores/marqo.ipynb index 01c88627ff..3b8f296cb7 100644 --- a/docs/docs/integrations/vectorstores/marqo.ipynb +++ b/docs/docs/integrations/vectorstores/marqo.ipynb @@ -52,7 +52,7 @@ "source": [ "from langchain.document_loaders import TextLoader\n", "\n", - "loader = TextLoader(\"../../../state_of_the_union.txt\")\n", + "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n", "documents = loader.load()\n", "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", "docs = text_splitter.split_documents(documents)" @@ -489,7 +489,7 @@ "metadata": {}, "outputs": [], "source": [ - "with open(\"../../../state_of_the_union.txt\") as f:\n", + "with open(\"../../modules/state_of_the_union.txt\") as f:\n", " state_of_the_union = f.read()\n", "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", "texts = text_splitter.split_text(state_of_the_union)" diff --git a/docs/docs/integrations/vectorstores/meilisearch.ipynb b/docs/docs/integrations/vectorstores/meilisearch.ipynb index 572c29e1c1..817aeeb598 100644 --- a/docs/docs/integrations/vectorstores/meilisearch.ipynb +++ b/docs/docs/integrations/vectorstores/meilisearch.ipynb @@ -139,7 +139,7 @@ "metadata": {}, "outputs": [], "source": [ - "with open(\"../../../state_of_the_union.txt\") as f:\n", + "with open(\"../../modules/state_of_the_union.txt\") as f:\n", " state_of_the_union = f.read()\n", "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", "texts = text_splitter.split_text(state_of_the_union)" @@ -180,7 +180,7 @@ "from langchain.document_loaders import TextLoader\n", "\n", "# Load text\n", - "loader = TextLoader(\"../../../state_of_the_union.txt\")\n", + "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n", "documents = loader.load()\n", "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", "\n", diff --git a/docs/docs/integrations/vectorstores/milvus.ipynb b/docs/docs/integrations/vectorstores/milvus.ipynb index 3364985666..a932d68221 100644 --- a/docs/docs/integrations/vectorstores/milvus.ipynb +++ b/docs/docs/integrations/vectorstores/milvus.ipynb @@ -83,7 +83,7 @@ "source": [ "from langchain.document_loaders import TextLoader\n", "\n", - "loader = TextLoader(\"../../../state_of_the_union.txt\")\n", + "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n", "documents = loader.load()\n", "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", "docs = text_splitter.split_documents(documents)\n", diff --git a/docs/docs/integrations/vectorstores/mongodb_atlas.ipynb b/docs/docs/integrations/vectorstores/mongodb_atlas.ipynb index 5c4475d029..ecd57f3e0b 100644 --- a/docs/docs/integrations/vectorstores/mongodb_atlas.ipynb +++ b/docs/docs/integrations/vectorstores/mongodb_atlas.ipynb @@ -98,7 +98,7 @@ "from langchain.vectorstores import MongoDBAtlasVectorSearch\n", "from langchain.document_loaders import TextLoader\n", "\n", - "loader = TextLoader(\"../../../state_of_the_union.txt\")\n", + "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n", "documents = loader.load()\n", "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", "docs = text_splitter.split_documents(documents)\n", diff --git a/docs/docs/integrations/vectorstores/myscale.ipynb b/docs/docs/integrations/vectorstores/myscale.ipynb index 0eb98f88d9..c3090836f4 100644 --- a/docs/docs/integrations/vectorstores/myscale.ipynb +++ b/docs/docs/integrations/vectorstores/myscale.ipynb @@ -45,7 +45,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "91003ea5-0c8c-436c-a5de-aaeaeef2f458", "metadata": {}, "outputs": [], @@ -53,7 +53,12 @@ "import os\n", "import getpass\n", "\n", - "os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")" + "os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")\n", + "os.environ[\"OPENAI_API_BASE\"] = getpass.getpass(\"OpenAI Base:\")\n", + "os.environ[\"MYSCALE_HOST\"] = getpass.getpass(\"MyScale Host:\")\n", + "os.environ[\"MYSCALE_PORT\"] = getpass.getpass(\"MyScale Port:\")\n", + "os.environ[\"MYSCALE_USERNAME\"] = getpass.getpass(\"MyScale Username:\")\n", + "os.environ[\"MYSCALE_PASSWORD\"] = getpass.getpass(\"MyScale Password:\")" ] }, { @@ -86,7 +91,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "aac9563e", "metadata": { "tags": [] @@ -101,7 +106,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "a3c3999a", "metadata": { "tags": [] @@ -110,7 +115,7 @@ "source": [ "from langchain.document_loaders import TextLoader\n", "\n", - "loader = TextLoader(\"../../../state_of_the_union.txt\")\n", + "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n", "documents = loader.load()\n", "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", "docs = text_splitter.split_documents(documents)\n", @@ -120,10 +125,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "6e104aee", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Inserting data...: 100%|██████████| 42/42 [00:15<00:00, 2.66it/s]\n" + ] + } + ], "source": [ "for d in docs:\n", " d.metadata = {\"some\": \"metadata\"}\n", @@ -135,10 +148,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "id": "9c608226", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \n", + "\n", + "Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n", + "\n", + "One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n", + "\n", + "And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.\n" + ] + } + ], "source": [ "print(docs[0].page_content)" ] @@ -179,15 +206,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "id": "232055f6", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Inserting data...: 100%|██████████| 42/42 [00:15<00:00, 2.68it/s]\n" + ] + } + ], "source": [ "from langchain.vectorstores import MyScale, MyScaleSettings\n", "from langchain.document_loaders import TextLoader\n", "\n", - "loader = TextLoader(\"../../../state_of_the_union.txt\")\n", + "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n", "documents = loader.load()\n", "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", "docs = text_splitter.split_documents(documents)\n", @@ -220,10 +255,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "id": "ddbcee77", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.229655921459198 {'doc_id': 0} Madam Speaker, Madam...\n", + "0.24506962299346924 {'doc_id': 8} And so many families...\n", + "0.24786919355392456 {'doc_id': 1} Groups of citizens b...\n", + "0.24875116348266602 {'doc_id': 6} And I’m taking robus...\n" + ] + } + ], "source": [ "meta = docsearch.metadata_column\n", "output = docsearch.similarity_search_with_relevance_scores(\n", @@ -241,12 +287,44 @@ "id": "a359ed74", "metadata": {}, "source": [ - "## Deleting your data" + "## Deleting your data\n", + "\n", + "You can either drop the table with `.drop()` method or partially delete your data with `.delete()` method." ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, + "id": "3a0cc43b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.24506962299346924 {'doc_id': 8} And so many families...\n", + "0.24875116348266602 {'doc_id': 6} And I’m taking robus...\n", + "0.26027143001556396 {'doc_id': 7} We see the unity amo...\n", + "0.26390212774276733 {'doc_id': 9} And unlike the $2 Tr...\n" + ] + } + ], + "source": [ + "# use directly a `where_str` to delete\n", + "docsearch.delete(where_str=f\"{docsearch.metadata_column}.doc_id < 5\")\n", + "meta = docsearch.metadata_column\n", + "output = docsearch.similarity_search_with_relevance_scores(\n", + " \"What did the president say about Ketanji Brown Jackson?\",\n", + " k=4,\n", + " where_str=f\"{meta}.doc_id<10\",\n", + ")\n", + "for d, dist in output:\n", + " print(dist, d.metadata, d.page_content[:20] + \"...\")" + ] + }, + { + "cell_type": "code", + "execution_count": 10, "id": "fb6a9d36", "metadata": {}, "outputs": [], @@ -279,7 +357,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.8" + "version": "3.11.3" } }, "nbformat": 4, diff --git a/docs/docs/integrations/vectorstores/neo4jvector.ipynb b/docs/docs/integrations/vectorstores/neo4jvector.ipynb index 4a807a6043..db5ca55639 100644 --- a/docs/docs/integrations/vectorstores/neo4jvector.ipynb +++ b/docs/docs/integrations/vectorstores/neo4jvector.ipynb @@ -85,7 +85,7 @@ "metadata": {}, "outputs": [], "source": [ - "loader = TextLoader(\"../../../state_of_the_union.txt\")\n", + "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n", "\n", "documents = loader.load()\n", "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", diff --git a/docs/docs/integrations/vectorstores/opensearch.ipynb b/docs/docs/integrations/vectorstores/opensearch.ipynb index 3ed96707d9..935a37b4c5 100644 --- a/docs/docs/integrations/vectorstores/opensearch.ipynb +++ b/docs/docs/integrations/vectorstores/opensearch.ipynb @@ -83,7 +83,7 @@ "source": [ "from langchain.document_loaders import TextLoader\n", "\n", - "loader = TextLoader(\"../../../state_of_the_union.txt\")\n", + "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n", "documents = loader.load()\n", "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", "docs = text_splitter.split_documents(documents)\n", diff --git a/docs/docs/integrations/vectorstores/pgvector.ipynb b/docs/docs/integrations/vectorstores/pgvector.ipynb index 4e34060d3e..7e9b944469 100644 --- a/docs/docs/integrations/vectorstores/pgvector.ipynb +++ b/docs/docs/integrations/vectorstores/pgvector.ipynb @@ -119,7 +119,7 @@ }, "outputs": [], "source": [ - "loader = TextLoader(\"../../../state_of_the_union.txt\")\n", + "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n", "documents = loader.load()\n", "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", "docs = text_splitter.split_documents(documents)\n", diff --git a/docs/docs/integrations/vectorstores/pinecone.ipynb b/docs/docs/integrations/vectorstores/pinecone.ipynb index 8d5cb9d423..2f10711479 100644 --- a/docs/docs/integrations/vectorstores/pinecone.ipynb +++ b/docs/docs/integrations/vectorstores/pinecone.ipynb @@ -94,7 +94,7 @@ "source": [ "from langchain.document_loaders import TextLoader\n", "\n", - "loader = TextLoader(\"../../../state_of_the_union.txt\")\n", + "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n", "documents = loader.load()\n", "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", "docs = text_splitter.split_documents(documents)\n", @@ -128,7 +128,6 @@ " dimension=1536 \n", ")\n", "# The OpenAI embedding model `text-embedding-ada-002 uses 1536 dimensions`\n", - "docsearch = Pinecone.from_documents(docs, embeddings, index_name=index_name)\n", "\n", "# if you already have an index, you can load it like this\n", diff --git a/docs/docs/integrations/vectorstores/qdrant.ipynb b/docs/docs/integrations/vectorstores/qdrant.ipynb index e82388ac39..f5f381da12 100644 --- a/docs/docs/integrations/vectorstores/qdrant.ipynb +++ b/docs/docs/integrations/vectorstores/qdrant.ipynb @@ -54,7 +54,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "OpenAI API Key: \u00b7\u00b7\u00b7\u00b7\u00b7\u00b7\u00b7\u00b7\n" + "OpenAI API Key: ········\n" ] } ], @@ -97,7 +97,7 @@ }, "outputs": [], "source": [ - "loader = TextLoader(\"../../../state_of_the_union.txt\")\n", + "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n", "documents = loader.load()\n", "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", "docs = text_splitter.split_documents(documents)\n", @@ -326,13 +326,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you\u2019re at it, pass the Disclose Act so Americans can know who is funding our elections. \n", + "Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \n", "\n", - "Tonight, I\u2019d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer\u2014an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n", + "Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n", "\n", "One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n", "\n", - "And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation\u2019s top legal minds, who will continue Justice Breyer\u2019s legacy of excellence.\n" + "And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.\n" ] } ], @@ -383,13 +383,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you\u2019re at it, pass the Disclose Act so Americans can know who is funding our elections. \n", + "Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \n", "\n", - "Tonight, I\u2019d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer\u2014an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n", + "Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n", "\n", "One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n", "\n", - "And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation\u2019s top legal minds, who will continue Justice Breyer\u2019s legacy of excellence.\n", + "And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.\n", "\n", "Score: 0.8153784913324512\n" ] @@ -473,15 +473,15 @@ "name": "stdout", "output_type": "stream", "text": [ - "1. Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you\u2019re at it, pass the Disclose Act so Americans can know who is funding our elections. \n", + "1. Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \n", "\n", - "Tonight, I\u2019d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer\u2014an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n", + "Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n", "\n", "One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n", "\n", - "And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation\u2019s top legal minds, who will continue Justice Breyer\u2019s legacy of excellence. \n", + "And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence. \n", "\n", - "2. We can\u2019t change how divided we\u2019ve been. But we can change how we move forward\u2014on COVID-19 and other issues we must face together. \n", + "2. We can’t change how divided we’ve been. But we can change how we move forward—on COVID-19 and other issues we must face together. \n", "\n", "I recently visited the New York City Police Department days after the funerals of Officer Wilbert Mora and his partner, Officer Jason Rivera. \n", "\n", @@ -491,13 +491,13 @@ "\n", "Officer Rivera was 22. \n", "\n", - "Both Dominican Americans who\u2019d grown up on the same streets they later chose to patrol as police officers. \n", + "Both Dominican Americans who’d grown up on the same streets they later chose to patrol as police officers. \n", "\n", "I spoke with their families and told them that we are forever in debt for their sacrifice, and we will carry on their mission to restore the trust and safety every community deserves. \n", "\n", - "I\u2019ve worked on these issues a long time. \n", + "I’ve worked on these issues a long time. \n", "\n", - "I know what works: Investing in crime prevention and community police officers who\u2019ll walk the beat, who\u2019ll know the neighborhood, and who can restore trust and safety. \n", + "I know what works: Investing in crime prevention and community police officers who’ll walk the beat, who’ll know the neighborhood, and who can restore trust and safety. \n", "\n" ] } @@ -595,7 +595,7 @@ { "data": { "text/plain": [ - "Document(page_content='Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you\u2019re at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, I\u2019d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer\u2014an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation\u2019s top legal minds, who will continue Justice Breyer\u2019s legacy of excellence.', metadata={'source': '../../../state_of_the_union.txt'})" + "Document(page_content='Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.', metadata={'source': '../../../state_of_the_union.txt'})" ] }, "execution_count": 17, @@ -626,6 +626,10 @@ { "cell_type": "code", "execution_count": null, + "id": "1f11adf8", + "metadata": { + "collapsed": false + }, "outputs": [], "source": [ "Qdrant.from_documents(\n", @@ -635,24 +639,24 @@ " collection_name=\"my_documents_2\",\n", " vector_name=\"custom_vector\",\n", ")" - ], - "metadata": { - "collapsed": false - }, - "id": "1f11adf8" + ] }, { "cell_type": "markdown", - "source": [ - "As a Langchain user, you won't see any difference whether you use named vectors or not. Qdrant integration will handle the conversion under the hood." - ], + "id": "b34f5230", "metadata": { "collapsed": false }, - "id": "b34f5230" + "source": [ + "As a Langchain user, you won't see any difference whether you use named vectors or not. Qdrant integration will handle the conversion under the hood." + ] }, { "cell_type": "markdown", + "id": "b2350093", + "metadata": { + "collapsed": false + }, "source": [ "### Metadata\n", "\n", @@ -670,11 +674,7 @@ "```\n", "\n", "You can, however, decide to use different keys for the page content and metadata. That's useful if you already have a collection that you'd like to reuse." - ], - "metadata": { - "collapsed": false - }, - "id": "b2350093" + ] }, { "cell_type": "code", @@ -739,4 +739,4 @@ }, "nbformat": 4, "nbformat_minor": 5 -} \ No newline at end of file +} diff --git a/docs/docs/integrations/vectorstores/rockset.ipynb b/docs/docs/integrations/vectorstores/rockset.ipynb index a8a8ca864d..497c3657d5 100644 --- a/docs/docs/integrations/vectorstores/rockset.ipynb +++ b/docs/docs/integrations/vectorstores/rockset.ipynb @@ -115,7 +115,7 @@ "from langchain.document_loaders import TextLoader\n", "from langchain.vectorstores import Rockset\n", "\n", - "loader = TextLoader('../../../state_of_the_union.txt')\n", + "loader = TextLoader('../../modules/state_of_the_union.txt')\n", "documents = loader.load()\n", "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", "docs = text_splitter.split_documents(documents)" diff --git a/docs/docs/integrations/vectorstores/singlestoredb.ipynb b/docs/docs/integrations/vectorstores/singlestoredb.ipynb index 1276a8213c..85a0beff2f 100644 --- a/docs/docs/integrations/vectorstores/singlestoredb.ipynb +++ b/docs/docs/integrations/vectorstores/singlestoredb.ipynb @@ -60,7 +60,7 @@ "outputs": [], "source": [ "# Load text samples\n", - "loader = TextLoader(\"../../../state_of_the_union.txt\")\n", + "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n", "documents = loader.load()\n", "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", "docs = text_splitter.split_documents(documents)\n", diff --git a/docs/docs/integrations/vectorstores/sklearn.ipynb b/docs/docs/integrations/vectorstores/sklearn.ipynb index ce397fa647..319ddf9a51 100644 --- a/docs/docs/integrations/vectorstores/sklearn.ipynb +++ b/docs/docs/integrations/vectorstores/sklearn.ipynb @@ -65,7 +65,7 @@ "from langchain.vectorstores import SKLearnVectorStore\n", "from langchain.document_loaders import TextLoader\n", "\n", - "loader = TextLoader(\"../../../state_of_the_union.txt\")\n", + "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n", "documents = loader.load()\n", "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", "docs = text_splitter.split_documents(documents)\n", diff --git a/docs/docs/integrations/vectorstores/sqlitevss.ipynb b/docs/docs/integrations/vectorstores/sqlitevss.ipynb index e670d5683f..0d1e7b97e3 100644 --- a/docs/docs/integrations/vectorstores/sqlitevss.ipynb +++ b/docs/docs/integrations/vectorstores/sqlitevss.ipynb @@ -2,45 +2,54 @@ "cells": [ { "cell_type": "markdown", + "metadata": { + "collapsed": false + }, "source": [ "# sqlite-vss\n", "\n", ">[sqlite-vss](https://alexgarcia.xyz/sqlite-vss/) is an SQLite extension designed for vector search, emphasizing local-first operations and easy integration into applications without external servers. Leveraging the Faiss library, it offers efficient similarity search and clustering capabilities.\n", "\n", "This notebook shows how to use the `SQLiteVSS` vector database." - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "code", "execution_count": null, + "metadata": { + "collapsed": false + }, "outputs": [], "source": [ "# You need to install sqlite-vss as a dependency.\n", "%pip install sqlite-vss" - ], - "metadata": { - "collapsed": false - } + ] }, { "cell_type": "markdown", - "source": [ - "### Quickstart" - ], "metadata": { "collapsed": false - } + }, + "source": [ + "### Quickstart" + ] }, { "cell_type": "code", "execution_count": 2, + "metadata": { + "ExecuteTime": { + "end_time": "2023-09-06T14:55:55.370351Z", + "start_time": "2023-09-06T14:55:53.547755Z" + }, + "collapsed": false + }, "outputs": [ { "data": { - "text/plain": "'Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.'" + "text/plain": [ + "'Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \\n\\nTonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \\n\\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \\n\\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.'" + ] }, "execution_count": 2, "metadata": {}, @@ -54,7 +63,7 @@ "from langchain.document_loaders import TextLoader\n", "\n", "# load the document and split it into chunks\n", - "loader = TextLoader(\"../../../state_of_the_union.txt\")\n", + "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n", "documents = loader.load()\n", "\n", "# split it into chunks\n", @@ -83,31 +92,33 @@ "\n", "# print results\n", "data[0].page_content" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-09-06T14:55:55.370351Z", - "start_time": "2023-09-06T14:55:53.547755Z" - } - } + ] }, { "cell_type": "markdown", - "source": [ - "### Using existing sqlite connection" - ], "metadata": { "collapsed": false - } + }, + "source": [ + "### Using existing sqlite connection" + ] }, { "cell_type": "code", "execution_count": 7, + "metadata": { + "ExecuteTime": { + "end_time": "2023-09-06T14:59:22.086252Z", + "start_time": "2023-09-06T14:59:21.693237Z" + }, + "collapsed": false + }, "outputs": [ { "data": { - "text/plain": "'Ketanji Brown Jackson is awesome'" + "text/plain": [ + "'Ketanji Brown Jackson is awesome'" + ] }, "execution_count": 7, "metadata": {}, @@ -121,7 +132,7 @@ "from langchain.document_loaders import TextLoader\n", "\n", "# load the document and split it into chunks\n", - "loader = TextLoader(\"../../../state_of_the_union.txt\")\n", + "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n", "documents = loader.load()\n", "\n", "# split it into chunks\n", @@ -147,40 +158,33 @@ "\n", "# print results\n", "data[0].page_content" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-09-06T14:59:22.086252Z", - "start_time": "2023-09-06T14:59:21.693237Z" - } - } + ] }, { "cell_type": "code", "execution_count": 13, + "metadata": { + "ExecuteTime": { + "end_time": "2023-09-06T15:01:15.550318Z", + "start_time": "2023-09-06T15:01:15.546428Z" + }, + "collapsed": false + }, "outputs": [], "source": [ "# Cleaning up\n", "import os\n", "os.remove(\"/tmp/vss.db\")" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2023-09-06T15:01:15.550318Z", - "start_time": "2023-09-06T15:01:15.546428Z" - } - } + ] }, { "cell_type": "code", "execution_count": null, - "outputs": [], - "source": [], "metadata": { "collapsed": false - } + }, + "outputs": [], + "source": [] } ], "metadata": { diff --git a/docs/docs/integrations/vectorstores/supabase.ipynb b/docs/docs/integrations/vectorstores/supabase.ipynb index ad142aa9ff..24008d9a71 100644 --- a/docs/docs/integrations/vectorstores/supabase.ipynb +++ b/docs/docs/integrations/vectorstores/supabase.ipynb @@ -186,7 +186,7 @@ "from langchain.text_splitter import CharacterTextSplitter\n", "from langchain.document_loaders import TextLoader\n", "\n", - "loader = TextLoader(\"../../../state_of_the_union.txt\")\n", + "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n", "documents = loader.load()\n", "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", "docs = text_splitter.split_documents(documents)" diff --git a/docs/docs/integrations/vectorstores/tair.ipynb b/docs/docs/integrations/vectorstores/tair.ipynb index 8f0f6a22aa..5012d9e61a 100644 --- a/docs/docs/integrations/vectorstores/tair.ipynb +++ b/docs/docs/integrations/vectorstores/tair.ipynb @@ -16,7 +16,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -27,30 +27,13 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "ename": "RuntimeError", - "evalue": "Error loading ../../../state_of_the_union.txt", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", - "File \u001b[0;32m/opt/homebrew/lib/python3.10/site-packages/langchain/document_loaders/text.py:40\u001b[0m, in \u001b[0;36mTextLoader.load\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 39\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m---> 40\u001b[0m \u001b[39mwith\u001b[39;00m \u001b[39mopen\u001b[39;49m(\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mfile_path, encoding\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mencoding) \u001b[39mas\u001b[39;00m f:\n\u001b[1;32m 41\u001b[0m text \u001b[39m=\u001b[39m f\u001b[39m.\u001b[39mread()\n", - "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: '../../../state_of_the_union.txt'", - "\nThe above exception was the direct cause of the following exception:\n", - "\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[30], line 4\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mlangchain\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mdocument_loaders\u001b[39;00m \u001b[39mimport\u001b[39;00m TextLoader\n\u001b[1;32m 3\u001b[0m loader \u001b[39m=\u001b[39m TextLoader(\u001b[39m\"\u001b[39m\u001b[39m../../../state_of_the_union.txt\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[0;32m----> 4\u001b[0m documents \u001b[39m=\u001b[39m loader\u001b[39m.\u001b[39;49mload()\n\u001b[1;32m 5\u001b[0m text_splitter \u001b[39m=\u001b[39m CharacterTextSplitter(chunk_size\u001b[39m=\u001b[39m\u001b[39m1000\u001b[39m, chunk_overlap\u001b[39m=\u001b[39m\u001b[39m0\u001b[39m)\n\u001b[1;32m 6\u001b[0m docs \u001b[39m=\u001b[39m text_splitter\u001b[39m.\u001b[39msplit_documents(documents)\n", - "File \u001b[0;32m/opt/homebrew/lib/python3.10/site-packages/langchain/document_loaders/text.py:56\u001b[0m, in \u001b[0;36mTextLoader.load\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 54\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mRuntimeError\u001b[39;00m(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mError loading \u001b[39m\u001b[39m{\u001b[39;00m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mfile_path\u001b[39m}\u001b[39;00m\u001b[39m\"\u001b[39m) \u001b[39mfrom\u001b[39;00m \u001b[39me\u001b[39;00m\n\u001b[1;32m 55\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mException\u001b[39;00m \u001b[39mas\u001b[39;00m e:\n\u001b[0;32m---> 56\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mRuntimeError\u001b[39;00m(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mError loading \u001b[39m\u001b[39m{\u001b[39;00m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mfile_path\u001b[39m}\u001b[39;00m\u001b[39m\"\u001b[39m) \u001b[39mfrom\u001b[39;00m \u001b[39me\u001b[39;00m\n\u001b[1;32m 58\u001b[0m metadata \u001b[39m=\u001b[39m {\u001b[39m\"\u001b[39m\u001b[39msource\u001b[39m\u001b[39m\"\u001b[39m: \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mfile_path}\n\u001b[1;32m 59\u001b[0m \u001b[39mreturn\u001b[39;00m [Document(page_content\u001b[39m=\u001b[39mtext, metadata\u001b[39m=\u001b[39mmetadata)]\n", - "\u001b[0;31mRuntimeError\u001b[0m: Error loading ../../../state_of_the_union.txt" - ] - } - ], + "outputs": [], "source": [ "from langchain.document_loaders import TextLoader\n", "\n", - "loader = TextLoader(\"../../../extras/modules/state_of_the_union.txt\")\n", + "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n", "documents = loader.load()\n", "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", "docs = text_splitter.split_documents(documents)\n", @@ -74,21 +57,9 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "ename": "NameError", - "evalue": "name 'docs' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[6], line 6\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[39m# drop first if index already exists\u001b[39;00m\n\u001b[1;32m 4\u001b[0m Tair\u001b[39m.\u001b[39mdrop_index(tair_url\u001b[39m=\u001b[39mtair_url)\n\u001b[0;32m----> 6\u001b[0m vector_store \u001b[39m=\u001b[39m Tair\u001b[39m.\u001b[39mfrom_documents(docs, embeddings, tair_url\u001b[39m=\u001b[39mtair_url)\n", - "\u001b[0;31mNameError\u001b[0m: name 'docs' is not defined" - ] - } - ], + "outputs": [], "source": [ "tair_url = \"redis://localhost:6379\"\n", "\n", @@ -107,20 +78,9 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Document(page_content='We’re going after the criminals who stole billions in relief money meant for small businesses and millions of Americans. \\n\\nAnd tonight, I’m announcing that the Justice Department will name a chief prosecutor for pandemic fraud. \\n\\nBy the end of this year, the deficit will be down to less than half what it was before I took office. \\n\\nThe only president ever to cut the deficit by more than one trillion dollars in a single year. \\n\\nLowering your costs also means demanding more competition. \\n\\nI’m a capitalist, but capitalism without competition isn’t capitalism. \\n\\nIt’s exploitation—and it drives up prices. \\n\\nWhen corporations don’t have to compete, their profits go up, your prices go up, and small businesses and family farmers and ranchers go under. \\n\\nWe see it happening with ocean carriers moving goods in and out of America. \\n\\nDuring the pandemic, these foreign-owned companies raised prices by as much as 1,000% and made record profits.', metadata={'source': '../../../state_of_the_union.txt'})" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "query = \"What did the president say about Ketanji Brown Jackson\"\n", "docs = vector_store.similarity_search(query)\n", diff --git a/docs/docs/integrations/vectorstores/tencentvectordb.ipynb b/docs/docs/integrations/vectorstores/tencentvectordb.ipynb index fb9bf232e3..66090f2de7 100644 --- a/docs/docs/integrations/vectorstores/tencentvectordb.ipynb +++ b/docs/docs/integrations/vectorstores/tencentvectordb.ipynb @@ -46,7 +46,7 @@ "metadata": {}, "outputs": [], "source": [ - "loader = TextLoader(\"../../../state_of_the_union.txt\")\n", + "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n", "documents = loader.load()\n", "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", "docs = text_splitter.split_documents(documents)\n", diff --git a/docs/docs/integrations/vectorstores/typesense.ipynb b/docs/docs/integrations/vectorstores/typesense.ipynb index 2daf5cf160..94655a629c 100644 --- a/docs/docs/integrations/vectorstores/typesense.ipynb +++ b/docs/docs/integrations/vectorstores/typesense.ipynb @@ -112,7 +112,7 @@ }, "outputs": [], "source": [ - "loader = TextLoader(\"../../../state_of_the_union.txt\")\n", + "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n", "documents = loader.load()\n", "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", "docs = text_splitter.split_documents(documents)\n", diff --git a/docs/docs/integrations/vectorstores/weaviate.ipynb b/docs/docs/integrations/vectorstores/weaviate.ipynb index 7b25d80d62..98f09e996d 100644 --- a/docs/docs/integrations/vectorstores/weaviate.ipynb +++ b/docs/docs/integrations/vectorstores/weaviate.ipynb @@ -114,7 +114,7 @@ "source": [ "from langchain.document_loaders import TextLoader\n", "\n", - "loader = TextLoader(\"../../../state_of_the_union.txt\")\n", + "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n", "documents = loader.load()\n", "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", "docs = text_splitter.split_documents(documents)\n", @@ -342,7 +342,7 @@ "metadata": {}, "outputs": [], "source": [ - "with open(\"../../../state_of_the_union.txt\") as f:\n", + "with open(\"../../modules/state_of_the_union.txt\") as f:\n", " state_of_the_union = f.read()\n", "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", "texts = text_splitter.split_text(state_of_the_union)" diff --git a/docs/docs/integrations/vectorstores/xata.ipynb b/docs/docs/integrations/vectorstores/xata.ipynb index d04b64afd3..4648d5218d 100644 --- a/docs/docs/integrations/vectorstores/xata.ipynb +++ b/docs/docs/integrations/vectorstores/xata.ipynb @@ -132,7 +132,7 @@ }, "outputs": [], "source": [ - "loader = TextLoader(\"../../../state_of_the_union.txt\")\n", + "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n", "documents = loader.load()\n", "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", "docs = text_splitter.split_documents(documents)\n", diff --git a/docs/docs/integrations/vectorstores/zilliz.ipynb b/docs/docs/integrations/vectorstores/zilliz.ipynb index 1b436d0233..6f4fb2edd3 100644 --- a/docs/docs/integrations/vectorstores/zilliz.ipynb +++ b/docs/docs/integrations/vectorstores/zilliz.ipynb @@ -91,7 +91,7 @@ "source": [ "from langchain.document_loaders import TextLoader\n", "\n", - "loader = TextLoader(\"../../../state_of_the_union.txt\")\n", + "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n", "documents = loader.load()\n", "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", "docs = text_splitter.split_documents(documents)\n", diff --git a/docs/docs/modules/data_connection/indexing.ipynb b/docs/docs/modules/data_connection/indexing.ipynb index 0e25abe8e1..acad7bb745 100644 --- a/docs/docs/modules/data_connection/indexing.ipynb +++ b/docs/docs/modules/data_connection/indexing.ipynb @@ -60,7 +60,7 @@ " * document addition by id (`add_documents` method with `ids` argument)\n", " * delete by id (`delete` method with)\n", "\n", - "Compatible Vectorstores: `AnalyticDB`, `AwaDB`, `Bagel`, `Cassandra`, `Chroma`, `DashVector`, `DeepLake`, `Dingo`, `ElasticVectorSearch`, `ElasticsearchStore`, `FAISS`, `PGVector`, `Pinecone`, `Qdrant`, `Redis`, `ScaNN`, `SupabaseVectorStore`, `TimescaleVector`, `Vald`, `Vearch`, `VespaStore`, `Weaviate`, `ZepVectorStore`.\n", + "Compatible Vectorstores: `AnalyticDB`, `AwaDB`, `Bagel`, `Cassandra`, `Chroma`, `DashVector`, `DeepLake`, `Dingo`, `ElasticVectorSearch`, `ElasticsearchStore`, `FAISS`, `MyScale`, `PGVector`, `Pinecone`, `Qdrant`, `Redis`, `ScaNN`, `SupabaseVectorStore`, `TimescaleVector`, `Vald`, `Vearch`, `VespaStore`, `Weaviate`, `ZepVectorStore`.\n", " \n", "## Caution\n", "\n", diff --git a/libs/langchain/langchain/vectorstores/myscale.py b/libs/langchain/langchain/vectorstores/myscale.py index e4f461273c..609c496e14 100644 --- a/libs/langchain/langchain/vectorstores/myscale.py +++ b/libs/langchain/langchain/vectorstores/myscale.py @@ -450,6 +450,43 @@ class MyScale(VectorStore): f"DROP TABLE IF EXISTS {self.config.database}.{self.config.table}" ) + def delete( + self, + ids: Optional[List[str]] = None, + where_str: Optional[str] = None, + **kwargs: Any, + ) -> Optional[bool]: + """Delete by vector ID or other criteria. + + Args: + ids: List of ids to delete. + **kwargs: Other keyword arguments that subclasses might use. + + Returns: + Optional[bool]: True if deletion is successful, + False otherwise, None if not implemented. + """ + assert not ( + ids is None and where_str is None + ), "You need to specify where to be deleted! Either with `ids` or `where_str`" + conds = [] + if ids: + conds.extend([f"{self.config.column_map['id']} = '{id}'" for id in ids]) + if where_str: + conds.append(where_str) + assert len(conds) > 0 + where_str_final = " AND ".join(conds) + qstr = ( + f"DELETE FROM {self.config.database}.{self.config.table} " + f"WHERE {where_str_final}" + ) + try: + self.client.command(qstr) + return True + except Exception as e: + logger.error(str(e)) + return False + @property def metadata_column(self) -> str: return self.config.column_map["metadata"] diff --git a/libs/langchain/tests/unit_tests/indexes/test_indexing.py b/libs/langchain/tests/unit_tests/indexes/test_indexing.py index 68615c7542..bfca586874 100644 --- a/libs/langchain/tests/unit_tests/indexes/test_indexing.py +++ b/libs/langchain/tests/unit_tests/indexes/test_indexing.py @@ -1137,6 +1137,7 @@ def test_compatible_vectorstore_documentation() -> None: "ElasticsearchStore", "FAISS", "MomentoVectorIndex", + "MyScale", "PGVector", "Pinecone", "Qdrant",