text-splitters[minor], langchain[minor], community[patch], templates, docs: langchain-text-splitters 0.0.1 (#18346)

pull/18357/head
Bagatur 3 months ago committed by GitHub
parent 7891934173
commit 5efb5c099f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -5,6 +5,7 @@ from typing import Dict
LANGCHAIN_DIRS = [ LANGCHAIN_DIRS = [
"libs/core", "libs/core",
"libs/text-splitters",
"libs/community", "libs/community",
"libs/langchain", "libs/langchain",
"libs/experimental", "libs/experimental",

@ -4,7 +4,7 @@ import tomllib
from packaging.version import parse as parse_version from packaging.version import parse as parse_version
import re import re
MIN_VERSION_LIBS = ["langchain-core", "langchain-community", "langchain"] MIN_VERSION_LIBS = ["langchain-core", "langchain-community", "langchain", "langchain-text-splitters"]
def get_min_version(version: str) -> str: def get_min_version(version: str) -> str:

@ -116,7 +116,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain.text_splitter import CharacterTextSplitter\n", "from langchain_text_splitters import CharacterTextSplitter\n",
"from unstructured.partition.pdf import partition_pdf\n", "from unstructured.partition.pdf import partition_pdf\n",
"\n", "\n",
"\n", "\n",

@ -68,7 +68,7 @@
"pdf_pages = loader.load()\n", "pdf_pages = loader.load()\n",
"\n", "\n",
"# Split\n", "# Split\n",
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n", "from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
"\n", "\n",
"text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)\n", "text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)\n",
"all_splits_pypdf = text_splitter.split_documents(pdf_pages)\n", "all_splits_pypdf = text_splitter.split_documents(pdf_pages)\n",

@ -28,9 +28,9 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain.chains import RetrievalQA\n", "from langchain.chains import RetrievalQA\n",
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.vectorstores import Chroma\n", "from langchain_community.vectorstores import Chroma\n",
"from langchain_openai import OpenAI, OpenAIEmbeddings\n", "from langchain_openai import OpenAI, OpenAIEmbeddings\n",
"from langchain_text_splitters import CharacterTextSplitter\n",
"\n", "\n",
"llm = OpenAI(temperature=0)" "llm = OpenAI(temperature=0)"
] ]

@ -227,8 +227,8 @@
" BaseCombineDocumentsChain,\n", " BaseCombineDocumentsChain,\n",
" load_qa_with_sources_chain,\n", " load_qa_with_sources_chain,\n",
")\n", ")\n",
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
"from langchain.tools import BaseTool, DuckDuckGoSearchRun\n", "from langchain.tools import BaseTool, DuckDuckGoSearchRun\n",
"from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
"from pydantic import Field\n", "from pydantic import Field\n",
"\n", "\n",
"\n", "\n",

@ -24,7 +24,7 @@
"source": [ "source": [
"1. Prepare data:\n", "1. Prepare data:\n",
" 1. Upload all python project files using the `langchain_community.document_loaders.TextLoader`. We will call these files the **documents**.\n", " 1. Upload all python project files using the `langchain_community.document_loaders.TextLoader`. We will call these files the **documents**.\n",
" 2. Split all documents to chunks using the `langchain.text_splitter.CharacterTextSplitter`.\n", " 2. Split all documents to chunks using the `langchain_text_splitters.CharacterTextSplitter`.\n",
" 3. Embed chunks and upload them into the DeepLake using `langchain.embeddings.openai.OpenAIEmbeddings` and `langchain_community.vectorstores.DeepLake`\n", " 3. Embed chunks and upload them into the DeepLake using `langchain.embeddings.openai.OpenAIEmbeddings` and `langchain_community.vectorstores.DeepLake`\n",
"2. Question-Answering:\n", "2. Question-Answering:\n",
" 1. Build a chain from `langchain.chat_models.ChatOpenAI` and `langchain.chains.ConversationalRetrievalChain`\n", " 1. Build a chain from `langchain.chat_models.ChatOpenAI` and `langchain.chains.ConversationalRetrievalChain`\n",
@ -621,7 +621,7 @@
} }
], ],
"source": [ "source": [
"from langchain.text_splitter import CharacterTextSplitter\n", "from langchain_text_splitters import CharacterTextSplitter\n",
"\n", "\n",
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
"texts = text_splitter.split_documents(docs)\n", "texts = text_splitter.split_documents(docs)\n",

@ -52,12 +52,12 @@
"import os\n", "import os\n",
"\n", "\n",
"from langchain.chains import RetrievalQA\n", "from langchain.chains import RetrievalQA\n",
"from langchain.text_splitter import (\n", "from langchain_community.vectorstores import DeepLake\n",
"from langchain_openai import OpenAI, OpenAIEmbeddings\n",
"from langchain_text_splitters import (\n",
" CharacterTextSplitter,\n", " CharacterTextSplitter,\n",
" RecursiveCharacterTextSplitter,\n", " RecursiveCharacterTextSplitter,\n",
")\n", ")\n",
"from langchain_community.vectorstores import DeepLake\n",
"from langchain_openai import OpenAI, OpenAIEmbeddings\n",
"\n", "\n",
"os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")\n", "os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")\n",
"activeloop_token = getpass.getpass(\"Activeloop Token:\")\n", "activeloop_token = getpass.getpass(\"Activeloop Token:\")\n",

@ -132,7 +132,7 @@
"data = loader.load()\n", "data = loader.load()\n",
"\n", "\n",
"# Split\n", "# Split\n",
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n", "from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
"\n", "\n",
"text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=0)\n", "text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=0)\n",
"all_splits = text_splitter.split_documents(data)\n", "all_splits = text_splitter.split_documents(data)\n",

@ -170,8 +170,8 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.vectorstores import Chroma\n", "from langchain_community.vectorstores import Chroma\n",
"from langchain_text_splitters import CharacterTextSplitter\n",
"\n", "\n",
"with open(\"../../state_of_the_union.txt\") as f:\n", "with open(\"../../state_of_the_union.txt\") as f:\n",
" state_of_the_union = f.read()\n", " state_of_the_union = f.read()\n",

@ -124,7 +124,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain.text_splitter import CharacterTextSplitter\n", "from langchain_text_splitters import CharacterTextSplitter\n",
"\n", "\n",
"text_splitter = CharacterTextSplitter.from_tiktoken_encoder(\n", "text_splitter = CharacterTextSplitter.from_tiktoken_encoder(\n",
" chunk_size=7500, chunk_overlap=100\n", " chunk_size=7500, chunk_overlap=100\n",

@ -20,10 +20,10 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain.chains import RetrievalQA\n", "from langchain.chains import RetrievalQA\n",
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.document_loaders import TextLoader\n", "from langchain_community.document_loaders import TextLoader\n",
"from langchain_community.vectorstores import Chroma\n", "from langchain_community.vectorstores import Chroma\n",
"from langchain_openai import OpenAIEmbeddings" "from langchain_openai import OpenAIEmbeddings\n",
"from langchain_text_splitters import CharacterTextSplitter"
] ]
}, },
{ {

@ -59,13 +59,13 @@
"from baidubce.auth.bce_credentials import BceCredentials\n", "from baidubce.auth.bce_credentials import BceCredentials\n",
"from baidubce.bce_client_configuration import BceClientConfiguration\n", "from baidubce.bce_client_configuration import BceClientConfiguration\n",
"from langchain.chains.retrieval_qa import RetrievalQA\n", "from langchain.chains.retrieval_qa import RetrievalQA\n",
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
"from langchain_community.document_loaders.baiducloud_bos_directory import (\n", "from langchain_community.document_loaders.baiducloud_bos_directory import (\n",
" BaiduBOSDirectoryLoader,\n", " BaiduBOSDirectoryLoader,\n",
")\n", ")\n",
"from langchain_community.embeddings.huggingface import HuggingFaceEmbeddings\n", "from langchain_community.embeddings.huggingface import HuggingFaceEmbeddings\n",
"from langchain_community.llms.baidu_qianfan_endpoint import QianfanLLMEndpoint\n", "from langchain_community.llms.baidu_qianfan_endpoint import QianfanLLMEndpoint\n",
"from langchain_community.vectorstores import BESVectorStore" "from langchain_community.vectorstores import BESVectorStore\n",
"from langchain_text_splitters import RecursiveCharacterTextSplitter"
] ]
}, },
{ {

@ -36,9 +36,6 @@
"from bs4 import BeautifulSoup as Soup\n", "from bs4 import BeautifulSoup as Soup\n",
"from langchain.retrievers.multi_vector import MultiVectorRetriever\n", "from langchain.retrievers.multi_vector import MultiVectorRetriever\n",
"from langchain.storage import InMemoryByteStore, LocalFileStore\n", "from langchain.storage import InMemoryByteStore, LocalFileStore\n",
"\n",
"# For our example, we'll load docs from the web\n",
"from langchain.text_splitter import RecursiveCharacterTextSplitter # noqa\n",
"from langchain_community.document_loaders.recursive_url_loader import (\n", "from langchain_community.document_loaders.recursive_url_loader import (\n",
" RecursiveUrlLoader,\n", " RecursiveUrlLoader,\n",
")\n", ")\n",
@ -46,6 +43,9 @@
"# noqa\n", "# noqa\n",
"from langchain_community.vectorstores import Chroma\n", "from langchain_community.vectorstores import Chroma\n",
"\n", "\n",
"# For our example, we'll load docs from the web\n",
"from langchain_text_splitters import RecursiveCharacterTextSplitter # noqa\n",
"\n",
"DOCSTORE_DIR = \".\"\n", "DOCSTORE_DIR = \".\"\n",
"DOCSTORE_ID_KEY = \"doc_id\"" "DOCSTORE_ID_KEY = \"doc_id\""
] ]

@ -51,11 +51,11 @@
"from langchain.chains.base import Chain\n", "from langchain.chains.base import Chain\n",
"from langchain.prompts import PromptTemplate\n", "from langchain.prompts import PromptTemplate\n",
"from langchain.prompts.base import StringPromptTemplate\n", "from langchain.prompts.base import StringPromptTemplate\n",
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.llms import BaseLLM\n", "from langchain_community.llms import BaseLLM\n",
"from langchain_community.vectorstores import Chroma\n", "from langchain_community.vectorstores import Chroma\n",
"from langchain_core.agents import AgentAction, AgentFinish\n", "from langchain_core.agents import AgentAction, AgentFinish\n",
"from langchain_openai import ChatOpenAI, OpenAI, OpenAIEmbeddings\n", "from langchain_openai import ChatOpenAI, OpenAI, OpenAIEmbeddings\n",
"from langchain_text_splitters import CharacterTextSplitter\n",
"from pydantic import BaseModel, Field" "from pydantic import BaseModel, Field"
] ]
}, },

@ -39,7 +39,7 @@
"data = loader.load()\n", "data = loader.load()\n",
"\n", "\n",
"# Split\n", "# Split\n",
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n", "from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
"\n", "\n",
"text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=0)\n", "text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=0)\n",
"all_splits = text_splitter.split_documents(data)\n", "all_splits = text_splitter.split_documents(data)\n",

@ -2610,7 +2610,7 @@
} }
], ],
"source": [ "source": [
"from langchain.text_splitter import CharacterTextSplitter\n", "from langchain_text_splitters import CharacterTextSplitter\n",
"\n", "\n",
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
"texts = text_splitter.split_documents(docs)" "texts = text_splitter.split_documents(docs)"

@ -281,7 +281,7 @@ Then we can build our index:
```python ```python
from langchain_community.vectorstores import FAISS from langchain_community.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_text_splitters import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter() text_splitter = RecursiveCharacterTextSplitter()
@ -531,7 +531,7 @@ from langchain_openai import ChatOpenAI
from langchain_community.document_loaders import WebBaseLoader from langchain_community.document_loaders import WebBaseLoader
from langchain_openai import OpenAIEmbeddings from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS from langchain_community.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.tools.retriever import create_retriever_tool from langchain.tools.retriever import create_retriever_tool
from langchain_community.tools.tavily_search import TavilySearchResults from langchain_community.tools.tavily_search import TavilySearchResults
from langchain_openai import ChatOpenAI from langchain_openai import ChatOpenAI

@ -643,9 +643,9 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
"from langchain_community.vectorstores import FAISS\n", "from langchain_community.vectorstores import FAISS\n",
"from langchain_openai import OpenAIEmbeddings\n", "from langchain_openai import OpenAIEmbeddings\n",
"from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
"\n", "\n",
"# 2. Load the data: In our case data's already loaded\n", "# 2. Load the data: In our case data's already loaded\n",
"# 3. Anonymize the data before indexing\n", "# 3. Anonymize the data before indexing\n",

@ -215,10 +215,10 @@
"source": [ "source": [
"import requests\n", "import requests\n",
"from langchain.chains import RetrievalQA\n", "from langchain.chains import RetrievalQA\n",
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.document_loaders import TextLoader\n", "from langchain_community.document_loaders import TextLoader\n",
"from langchain_community.vectorstores import Chroma\n", "from langchain_community.vectorstores import Chroma\n",
"from langchain_openai import OpenAI, OpenAIEmbeddings\n", "from langchain_openai import OpenAI, OpenAIEmbeddings\n",
"from langchain_text_splitters import CharacterTextSplitter\n",
"\n", "\n",
"text_file_url = \"https://raw.githubusercontent.com/hwchase17/chat-your-data/master/state_of_the_union.txt\"\n", "text_file_url = \"https://raw.githubusercontent.com/hwchase17/chat-your-data/master/state_of_the_union.txt\"\n",
"\n", "\n",

@ -78,9 +78,9 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain.chains import RetrievalQAWithSourcesChain\n", "from langchain.chains import RetrievalQAWithSourcesChain\n",
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.vectorstores import Chroma\n", "from langchain_community.vectorstores import Chroma\n",
"from langchain_openai import OpenAI, OpenAIEmbeddings" "from langchain_openai import OpenAI, OpenAIEmbeddings\n",
"from langchain_text_splitters import CharacterTextSplitter"
] ]
}, },
{ {

@ -62,9 +62,9 @@
"warnings.filterwarnings(\"ignore\")\n", "warnings.filterwarnings(\"ignore\")\n",
"from pprint import pprint\n", "from pprint import pprint\n",
"\n", "\n",
"from langchain.text_splitter import Language\n",
"from langchain_community.document_loaders.generic import GenericLoader\n", "from langchain_community.document_loaders.generic import GenericLoader\n",
"from langchain_community.document_loaders.parsers import LanguageParser" "from langchain_community.document_loaders.parsers import LanguageParser\n",
"from langchain_text_splitters import Language"
] ]
}, },
{ {
@ -323,7 +323,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain.text_splitter import (\n", "from langchain_text_splitters import (\n",
" Language,\n", " Language,\n",
" RecursiveCharacterTextSplitter,\n", " RecursiveCharacterTextSplitter,\n",
")" ")"
@ -426,6 +426,7 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "7fb27b941602401d91542211134fc71a",
"metadata": {}, "metadata": {},
"source": [ "source": [
"## Adding Languages using Tree-sitter Template\n", "## Adding Languages using Tree-sitter Template\n",

@ -168,9 +168,9 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain.chains import RetrievalQA\n", "from langchain.chains import RetrievalQA\n",
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
"from langchain_community.vectorstores import FAISS\n", "from langchain_community.vectorstores import FAISS\n",
"from langchain_openai import ChatOpenAI, OpenAIEmbeddings" "from langchain_openai import ChatOpenAI, OpenAIEmbeddings\n",
"from langchain_text_splitters import RecursiveCharacterTextSplitter"
] ]
}, },
{ {

@ -1463,7 +1463,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain.text_splitter import CharacterTextSplitter\n", "from langchain_text_splitters import CharacterTextSplitter\n",
"\n", "\n",
"text_splitter = CharacterTextSplitter()" "text_splitter = CharacterTextSplitter()"
] ]

@ -82,7 +82,7 @@
"# Map reduce example\n", "# Map reduce example\n",
"from langchain.chains.mapreduce import MapReduceChain\n", "from langchain.chains.mapreduce import MapReduceChain\n",
"from langchain.prompts import PromptTemplate\n", "from langchain.prompts import PromptTemplate\n",
"from langchain.text_splitter import CharacterTextSplitter\n", "from langchain_text_splitters import CharacterTextSplitter\n",
"\n", "\n",
"_prompt = \"\"\"Write a concise summary of the following:\n", "_prompt = \"\"\"Write a concise summary of the following:\n",
"\n", "\n",

@ -68,7 +68,7 @@ for OpenAI LLMs.
You can also use it to count tokens when splitting documents with You can also use it to count tokens when splitting documents with
```python ```python
from langchain.text_splitter import CharacterTextSplitter from langchain_text_splitters import CharacterTextSplitter
CharacterTextSplitter.from_tiktoken_encoder(...) CharacterTextSplitter.from_tiktoken_encoder(...)
``` ```
For a more detailed walkthrough of this, see [this notebook](/docs/modules/data_connection/document_transformers/split_by_token#tiktoken) For a more detailed walkthrough of this, see [this notebook](/docs/modules/data_connection/document_transformers/split_by_token#tiktoken)

@ -34,7 +34,7 @@ The vector store is a simple wrapper around Elasticsearch. It provides a simple
from langchain_elasticsearch import ElasticsearchStore from langchain_elasticsearch import ElasticsearchStore
from langchain_community.document_loaders import TextLoader from langchain_community.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter from langchain_text_splitters import CharacterTextSplitter
loader = TextLoader("./state_of_the_union.txt") loader = TextLoader("./state_of_the_union.txt")
documents = loader.load() documents = loader.load()

@ -87,9 +87,9 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"import requests\n", "import requests\n",
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
"from langchain_community.vectorstores import FAISS\n", "from langchain_community.vectorstores import FAISS\n",
"from langchain_openai import OpenAIEmbeddings\n", "from langchain_openai import OpenAIEmbeddings\n",
"from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
"\n", "\n",
"\n", "\n",
"def get_wikipedia_page(title: str):\n", "def get_wikipedia_page(title: str):\n",

@ -16,7 +16,7 @@ pip install spacy
See a [usage example](/docs/modules/data_connection/document_transformers/split_by_token#spacy). See a [usage example](/docs/modules/data_connection/document_transformers/split_by_token#spacy).
```python ```python
from langchain.text_splitter import SpacyTextSplitter from langchain_text_splitters import SpacyTextSplitter
``` ```
## Text Embedding Models ## Text Embedding Models

@ -192,7 +192,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n", "from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
"\n", "\n",
"chunk_size = 4096\n", "chunk_size = 4096\n",
"docs_new = []\n", "docs_new = []\n",

@ -301,10 +301,10 @@
} }
], ],
"source": [ "source": [
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
"from langchain_community.document_loaders import TextLoader\n", "from langchain_community.document_loaders import TextLoader\n",
"from langchain_community.embeddings import CohereEmbeddings\n", "from langchain_community.embeddings import CohereEmbeddings\n",
"from langchain_community.vectorstores import FAISS\n", "from langchain_community.vectorstores import FAISS\n",
"from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
"\n", "\n",
"documents = TextLoader(\"../../modules/state_of_the_union.txt\").load()\n", "documents = TextLoader(\"../../modules/state_of_the_union.txt\").load()\n",
"text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)\n", "text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)\n",

@ -288,10 +288,10 @@
} }
], ],
"source": [ "source": [
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
"from langchain_community.document_loaders import TextLoader\n", "from langchain_community.document_loaders import TextLoader\n",
"from langchain_community.vectorstores import FAISS\n", "from langchain_community.vectorstores import FAISS\n",
"from langchain_openai import OpenAIEmbeddings\n", "from langchain_openai import OpenAIEmbeddings\n",
"from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
"\n", "\n",
"documents = TextLoader(\n", "documents = TextLoader(\n",
" \"../../modules/state_of_the_union.txt\",\n", " \"../../modules/state_of_the_union.txt\",\n",

@ -52,10 +52,10 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.document_loaders import TextLoader\n", "from langchain_community.document_loaders import TextLoader\n",
"from langchain_community.vectorstores.jaguar import Jaguar\n", "from langchain_community.vectorstores.jaguar import Jaguar\n",
"from langchain_openai import OpenAIEmbeddings\n", "from langchain_openai import OpenAIEmbeddings\n",
"from langchain_text_splitters import CharacterTextSplitter\n",
"\n", "\n",
"\"\"\" \n", "\"\"\" \n",
"Load a text file into a set of documents \n", "Load a text file into a set of documents \n",

@ -282,10 +282,10 @@
} }
], ],
"source": [ "source": [
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
"from langchain_community.document_loaders import TextLoader\n", "from langchain_community.document_loaders import TextLoader\n",
"from langchain_community.vectorstores import FAISS\n", "from langchain_community.vectorstores import FAISS\n",
"from langchain_openai import OpenAIEmbeddings\n", "from langchain_openai import OpenAIEmbeddings\n",
"from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
"\n", "\n",
"documents = TextLoader(\n", "documents = TextLoader(\n",
" \"../../modules/state_of_the_union.txt\",\n", " \"../../modules/state_of_the_union.txt\",\n",

@ -28,10 +28,10 @@
"import logging\n", "import logging\n",
"\n", "\n",
"from langchain.retrievers import RePhraseQueryRetriever\n", "from langchain.retrievers import RePhraseQueryRetriever\n",
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
"from langchain_community.document_loaders import WebBaseLoader\n", "from langchain_community.document_loaders import WebBaseLoader\n",
"from langchain_community.vectorstores import Chroma\n", "from langchain_community.vectorstores import Chroma\n",
"from langchain_openai import ChatOpenAI, OpenAIEmbeddings" "from langchain_openai import ChatOpenAI, OpenAIEmbeddings\n",
"from langchain_text_splitters import RecursiveCharacterTextSplitter"
] ]
}, },
{ {

@ -89,12 +89,12 @@
"from langchain.chains import ConversationalRetrievalChain\n", "from langchain.chains import ConversationalRetrievalChain\n",
"from langchain.chains.query_constructor.base import AttributeInfo\n", "from langchain.chains.query_constructor.base import AttributeInfo\n",
"from langchain.retrievers.self_query.base import SelfQueryRetriever\n", "from langchain.retrievers.self_query.base import SelfQueryRetriever\n",
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.document_loaders import TextLoader\n", "from langchain_community.document_loaders import TextLoader\n",
"from langchain_community.embeddings import FakeEmbeddings\n", "from langchain_community.embeddings import FakeEmbeddings\n",
"from langchain_community.vectorstores import Vectara\n", "from langchain_community.vectorstores import Vectara\n",
"from langchain_core.documents import Document\n", "from langchain_core.documents import Document\n",
"from langchain_openai import OpenAI" "from langchain_openai import OpenAI\n",
"from langchain_text_splitters import CharacterTextSplitter"
] ]
}, },
{ {

@ -50,10 +50,10 @@
"# We want to use OpenAIEmbeddings so we have to get the OpenAI API Key.\n", "# We want to use OpenAIEmbeddings so we have to get the OpenAI API Key.\n",
"os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")\n", "os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")\n",
"\n", "\n",
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.document_loaders import TextLoader\n", "from langchain_community.document_loaders import TextLoader\n",
"from langchain_community.vectorstores import SingleStoreDB\n", "from langchain_community.vectorstores import SingleStoreDB\n",
"from langchain_openai import OpenAIEmbeddings\n", "from langchain_openai import OpenAIEmbeddings\n",
"from langchain_text_splitters import CharacterTextSplitter\n",
"\n", "\n",
"loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n", "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n",
"documents = loader.load()\n", "documents = loader.load()\n",

@ -21,10 +21,10 @@
"source": [ "source": [
"from langchain.agents import Tool\n", "from langchain.agents import Tool\n",
"from langchain.chains import RetrievalQA\n", "from langchain.chains import RetrievalQA\n",
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.document_loaders import PyPDFLoader\n", "from langchain_community.document_loaders import PyPDFLoader\n",
"from langchain_community.vectorstores import FAISS\n", "from langchain_community.vectorstores import FAISS\n",
"from langchain_openai import ChatOpenAI, OpenAIEmbeddings\n", "from langchain_openai import ChatOpenAI, OpenAIEmbeddings\n",
"from langchain_text_splitters import CharacterTextSplitter\n",
"from pydantic import BaseModel, Field" "from pydantic import BaseModel, Field"
] ]
}, },

@ -51,9 +51,9 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.vectorstores import DeepLake\n", "from langchain_community.vectorstores import DeepLake\n",
"from langchain_openai import OpenAIEmbeddings" "from langchain_openai import OpenAIEmbeddings\n",
"from langchain_text_splitters import CharacterTextSplitter"
] ]
}, },
{ {

@ -129,12 +129,12 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.vectorstores import (\n", "from langchain_community.vectorstores import (\n",
" AlibabaCloudOpenSearch,\n", " AlibabaCloudOpenSearch,\n",
" AlibabaCloudOpenSearchSettings,\n", " AlibabaCloudOpenSearchSettings,\n",
")\n", ")\n",
"from langchain_openai import OpenAIEmbeddings" "from langchain_openai import OpenAIEmbeddings\n",
"from langchain_text_splitters import CharacterTextSplitter"
] ]
}, },
{ {

@ -23,9 +23,9 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.vectorstores import AnalyticDB\n", "from langchain_community.vectorstores import AnalyticDB\n",
"from langchain_openai import OpenAIEmbeddings" "from langchain_openai import OpenAIEmbeddings\n",
"from langchain_text_splitters import CharacterTextSplitter"
] ]
}, },
{ {

@ -148,8 +148,8 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.document_loaders import TextLoader\n", "from langchain_community.document_loaders import TextLoader\n",
"from langchain_text_splitters import CharacterTextSplitter\n",
"\n", "\n",
"loader = TextLoader(\"../../modules/state_of_the_union.txtn.txtn.txt\")\n", "loader = TextLoader(\"../../modules/state_of_the_union.txtn.txtn.txt\")\n",
"documents = loader.load()\n", "documents = loader.load()\n",

@ -70,7 +70,6 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain.chains import RetrievalQA\n", "from langchain.chains import RetrievalQA\n",
"from langchain.text_splitter import TokenTextSplitter\n",
"from langchain_community.document_loaders import (\n", "from langchain_community.document_loaders import (\n",
" DirectoryLoader,\n", " DirectoryLoader,\n",
" UnstructuredMarkdownLoader,\n", " UnstructuredMarkdownLoader,\n",
@ -80,6 +79,7 @@
" ApacheDorisSettings,\n", " ApacheDorisSettings,\n",
")\n", ")\n",
"from langchain_openai import OpenAI, OpenAIEmbeddings\n", "from langchain_openai import OpenAI, OpenAIEmbeddings\n",
"from langchain_text_splitters import TokenTextSplitter\n",
"\n", "\n",
"update_vectordb = False" "update_vectordb = False"
] ]

@ -91,13 +91,13 @@
"from datasets import (\n", "from datasets import (\n",
" load_dataset,\n", " load_dataset,\n",
")\n", ")\n",
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
"from langchain_community.document_loaders import PyPDFLoader\n", "from langchain_community.document_loaders import PyPDFLoader\n",
"from langchain_core.documents import Document\n", "from langchain_core.documents import Document\n",
"from langchain_core.output_parsers import StrOutputParser\n", "from langchain_core.output_parsers import StrOutputParser\n",
"from langchain_core.prompts import ChatPromptTemplate\n", "from langchain_core.prompts import ChatPromptTemplate\n",
"from langchain_core.runnables import RunnablePassthrough\n", "from langchain_core.runnables import RunnablePassthrough\n",
"from langchain_openai import ChatOpenAI, OpenAIEmbeddings" "from langchain_openai import ChatOpenAI, OpenAIEmbeddings\n",
"from langchain_text_splitters import RecursiveCharacterTextSplitter"
] ]
}, },
{ {

@ -71,9 +71,9 @@
"source": [ "source": [
"import time\n", "import time\n",
"\n", "\n",
"from langchain.text_splitter import SpacyTextSplitter\n",
"from langchain_community.document_loaders import TextLoader\n", "from langchain_community.document_loaders import TextLoader\n",
"from langchain_community.vectorstores import AtlasDB" "from langchain_community.vectorstores import AtlasDB\n",
"from langchain_text_splitters import SpacyTextSplitter"
] ]
}, },
{ {

@ -28,9 +28,9 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.document_loaders import TextLoader\n", "from langchain_community.document_loaders import TextLoader\n",
"from langchain_community.vectorstores import AwaDB" "from langchain_community.vectorstores import AwaDB\n",
"from langchain_text_splitters import CharacterTextSplitter"
] ]
}, },
{ {

@ -130,13 +130,13 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.document_loaders import TextLoader\n", "from langchain_community.document_loaders import TextLoader\n",
"from langchain_community.vectorstores.azure_cosmos_db import (\n", "from langchain_community.vectorstores.azure_cosmos_db import (\n",
" AzureCosmosDBVectorSearch,\n", " AzureCosmosDBVectorSearch,\n",
" CosmosDBSimilarityType,\n", " CosmosDBSimilarityType,\n",
")\n", ")\n",
"from langchain_openai import OpenAIEmbeddings\n", "from langchain_openai import OpenAIEmbeddings\n",
"from langchain_text_splitters import CharacterTextSplitter\n",
"\n", "\n",
"SOURCE_FILE_NAME = \"../../modules/state_of_the_union.txt\"\n", "SOURCE_FILE_NAME = \"../../modules/state_of_the_union.txt\"\n",
"\n", "\n",

@ -234,8 +234,8 @@
} }
], ],
"source": [ "source": [
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.document_loaders import TextLoader\n", "from langchain_community.document_loaders import TextLoader\n",
"from langchain_text_splitters import CharacterTextSplitter\n",
"\n", "\n",
"loader = TextLoader(\"../../modules/state_of_the_union.txt\", encoding=\"utf-8\")\n", "loader = TextLoader(\"../../modules/state_of_the_union.txt\", encoding=\"utf-8\")\n",
"\n", "\n",

@ -108,8 +108,8 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.document_loaders import TextLoader\n", "from langchain_community.document_loaders import TextLoader\n",
"from langchain_text_splitters import CharacterTextSplitter\n",
"\n", "\n",
"loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n", "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n",
"documents = loader.load()\n", "documents = loader.load()\n",

@ -77,8 +77,8 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.document_loaders import TextLoader\n", "from langchain_community.document_loaders import TextLoader\n",
"from langchain_text_splitters import CharacterTextSplitter\n",
"\n", "\n",
"loader = TextLoader(\"../../../state_of_the_union.txt\")\n", "loader = TextLoader(\"../../../state_of_the_union.txt\")\n",
"documents = loader.load()\n", "documents = loader.load()\n",

@ -74,13 +74,13 @@
"from datasets import (\n", "from datasets import (\n",
" load_dataset,\n", " load_dataset,\n",
")\n", ")\n",
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
"from langchain_community.document_loaders import PyPDFLoader\n", "from langchain_community.document_loaders import PyPDFLoader\n",
"from langchain_core.documents import Document\n", "from langchain_core.documents import Document\n",
"from langchain_core.output_parsers import StrOutputParser\n", "from langchain_core.output_parsers import StrOutputParser\n",
"from langchain_core.prompts import ChatPromptTemplate\n", "from langchain_core.prompts import ChatPromptTemplate\n",
"from langchain_core.runnables import RunnablePassthrough\n", "from langchain_core.runnables import RunnablePassthrough\n",
"from langchain_openai import ChatOpenAI, OpenAIEmbeddings" "from langchain_openai import ChatOpenAI, OpenAIEmbeddings\n",
"from langchain_text_splitters import RecursiveCharacterTextSplitter"
] ]
}, },
{ {

@ -65,12 +65,12 @@
], ],
"source": [ "source": [
"# import\n", "# import\n",
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.document_loaders import TextLoader\n", "from langchain_community.document_loaders import TextLoader\n",
"from langchain_community.embeddings.sentence_transformer import (\n", "from langchain_community.embeddings.sentence_transformer import (\n",
" SentenceTransformerEmbeddings,\n", " SentenceTransformerEmbeddings,\n",
")\n", ")\n",
"from langchain_community.vectorstores import Chroma\n", "from langchain_community.vectorstores import Chroma\n",
"from langchain_text_splitters import CharacterTextSplitter\n",
"\n", "\n",
"# load the document and split it into chunks\n", "# load the document and split it into chunks\n",
"loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n", "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n",

@ -79,9 +79,9 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"# Import the required modules\n", "# Import the required modules\n",
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.document_loaders import TextLoader\n", "from langchain_community.document_loaders import TextLoader\n",
"from langchain_community.vectorstores import Clarifai" "from langchain_community.vectorstores import Clarifai\n",
"from langchain_text_splitters import CharacterTextSplitter"
] ]
}, },
{ {

@ -101,9 +101,9 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.vectorstores import Clickhouse, ClickhouseSettings\n", "from langchain_community.vectorstores import Clickhouse, ClickhouseSettings\n",
"from langchain_openai import OpenAIEmbeddings" "from langchain_openai import OpenAIEmbeddings\n",
"from langchain_text_splitters import CharacterTextSplitter"
] ]
}, },
{ {

@ -101,9 +101,9 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.embeddings.dashscope import DashScopeEmbeddings\n", "from langchain_community.embeddings.dashscope import DashScopeEmbeddings\n",
"from langchain_community.vectorstores import DashVector" "from langchain_community.vectorstores import DashVector\n",
"from langchain_text_splitters import CharacterTextSplitter"
] ]
}, },
{ {

@ -59,9 +59,9 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.document_loaders import TextLoader\n", "from langchain_community.document_loaders import TextLoader\n",
"from langchain_openai import OpenAIEmbeddings\n", "from langchain_openai import OpenAIEmbeddings\n",
"from langchain_text_splitters import CharacterTextSplitter\n",
"\n", "\n",
"loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n", "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n",
"documents = loader.load()\n", "documents = loader.load()\n",

@ -68,10 +68,10 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.document_loaders import TextLoader\n", "from langchain_community.document_loaders import TextLoader\n",
"from langchain_community.vectorstores import Dingo\n", "from langchain_community.vectorstores import Dingo\n",
"from langchain_openai import OpenAIEmbeddings" "from langchain_openai import OpenAIEmbeddings\n",
"from langchain_text_splitters import CharacterTextSplitter"
] ]
}, },
{ {
@ -130,10 +130,10 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.document_loaders import TextLoader\n", "from langchain_community.document_loaders import TextLoader\n",
"from langchain_community.vectorstores import Dingo\n", "from langchain_community.vectorstores import Dingo\n",
"from langchain_openai import OpenAIEmbeddings" "from langchain_openai import OpenAIEmbeddings\n",
"from langchain_text_splitters import CharacterTextSplitter"
] ]
}, },
{ {

@ -73,10 +73,10 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.document_loaders import TextLoader\n", "from langchain_community.document_loaders import TextLoader\n",
"from langchain_community.vectorstores import DocArrayHnswSearch\n", "from langchain_community.vectorstores import DocArrayHnswSearch\n",
"from langchain_openai import OpenAIEmbeddings" "from langchain_openai import OpenAIEmbeddings\n",
"from langchain_text_splitters import CharacterTextSplitter"
] ]
}, },
{ {

@ -70,10 +70,10 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.document_loaders import TextLoader\n", "from langchain_community.document_loaders import TextLoader\n",
"from langchain_community.vectorstores import DocArrayInMemorySearch\n", "from langchain_community.vectorstores import DocArrayInMemorySearch\n",
"from langchain_openai import OpenAIEmbeddings" "from langchain_openai import OpenAIEmbeddings\n",
"from langchain_text_splitters import CharacterTextSplitter"
] ]
}, },
{ {

@ -216,8 +216,8 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.document_loaders import TextLoader\n", "from langchain_community.document_loaders import TextLoader\n",
"from langchain_text_splitters import CharacterTextSplitter\n",
"\n", "\n",
"loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n", "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n",
"documents = loader.load()\n", "documents = loader.load()\n",

@ -67,8 +67,8 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.document_loaders import TextLoader\n", "from langchain_community.document_loaders import TextLoader\n",
"from langchain_text_splitters import CharacterTextSplitter\n",
"\n", "\n",
"loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n", "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n",
"documents = loader.load()\n", "documents = loader.load()\n",

@ -88,10 +88,10 @@
"# Uncomment the following line if you need to initialize FAISS with no AVX2 optimization\n", "# Uncomment the following line if you need to initialize FAISS with no AVX2 optimization\n",
"# os.environ['FAISS_NO_AVX2'] = '1'\n", "# os.environ['FAISS_NO_AVX2'] = '1'\n",
"\n", "\n",
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.document_loaders import TextLoader\n", "from langchain_community.document_loaders import TextLoader\n",
"from langchain_community.vectorstores import FAISS\n", "from langchain_community.vectorstores import FAISS\n",
"from langchain_openai import OpenAIEmbeddings\n", "from langchain_openai import OpenAIEmbeddings\n",
"from langchain_text_splitters import CharacterTextSplitter\n",
"\n", "\n",
"loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n", "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n",
"documents = loader.load()\n", "documents = loader.load()\n",

@ -56,10 +56,10 @@
"# Uncomment the following line if you need to initialize FAISS with no AVX2 optimization\n", "# Uncomment the following line if you need to initialize FAISS with no AVX2 optimization\n",
"# os.environ['FAISS_NO_AVX2'] = '1'\n", "# os.environ['FAISS_NO_AVX2'] = '1'\n",
"\n", "\n",
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.document_loaders import TextLoader\n", "from langchain_community.document_loaders import TextLoader\n",
"from langchain_community.vectorstores import FAISS\n", "from langchain_community.vectorstores import FAISS\n",
"from langchain_openai import OpenAIEmbeddings\n", "from langchain_openai import OpenAIEmbeddings\n",
"from langchain_text_splitters import CharacterTextSplitter\n",
"\n", "\n",
"loader = TextLoader(\"../../../extras/modules/state_of_the_union.txt\")\n", "loader = TextLoader(\"../../../extras/modules/state_of_the_union.txt\")\n",
"documents = loader.load()\n", "documents = loader.load()\n",

@ -184,8 +184,8 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.document_loaders import TextLoader\n", "from langchain_community.document_loaders import TextLoader\n",
"from langchain_text_splitters import CharacterTextSplitter\n",
"\n", "\n",
"loader = TextLoader(\"./state_of_the_union.txt\")\n", "loader = TextLoader(\"./state_of_the_union.txt\")\n",
"documents = loader.load()\n", "documents = loader.load()\n",

@ -97,10 +97,10 @@
"source": [ "source": [
"import os\n", "import os\n",
"\n", "\n",
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.document_loaders import TextLoader\n", "from langchain_community.document_loaders import TextLoader\n",
"from langchain_community.vectorstores.hippo import Hippo\n", "from langchain_community.vectorstores.hippo import Hippo\n",
"from langchain_openai import ChatOpenAI, OpenAIEmbeddings" "from langchain_openai import ChatOpenAI, OpenAIEmbeddings\n",
"from langchain_text_splitters import CharacterTextSplitter"
] ]
}, },
{ {

@ -33,9 +33,9 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.vectorstores import Hologres\n", "from langchain_community.vectorstores import Hologres\n",
"from langchain_openai import OpenAIEmbeddings" "from langchain_openai import OpenAIEmbeddings\n",
"from langchain_text_splitters import CharacterTextSplitter"
] ]
}, },
{ {

@ -56,13 +56,13 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain.chains import RetrievalQAWithSourcesChain\n", "from langchain.chains import RetrievalQAWithSourcesChain\n",
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.document_loaders import TextLoader\n", "from langchain_community.document_loaders import TextLoader\n",
"from langchain_community.vectorstores.jaguar import Jaguar\n", "from langchain_community.vectorstores.jaguar import Jaguar\n",
"from langchain_core.output_parsers import StrOutputParser\n", "from langchain_core.output_parsers import StrOutputParser\n",
"from langchain_core.prompts import ChatPromptTemplate\n", "from langchain_core.prompts import ChatPromptTemplate\n",
"from langchain_core.runnables import RunnablePassthrough\n", "from langchain_core.runnables import RunnablePassthrough\n",
"from langchain_openai import ChatOpenAI, OpenAI, OpenAIEmbeddings\n", "from langchain_openai import ChatOpenAI, OpenAI, OpenAIEmbeddings\n",
"from langchain_text_splitters import CharacterTextSplitter\n",
"\n", "\n",
"\"\"\" \n", "\"\"\" \n",
"Load a text file into a set of documents \n", "Load a text file into a set of documents \n",

@ -114,14 +114,14 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain.docstore.document import Document\n", "from langchain.docstore.document import Document\n",
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.document_loaders import TextLoader\n", "from langchain_community.document_loaders import TextLoader\n",
"from langchain_community.vectorstores import (\n", "from langchain_community.vectorstores import (\n",
" DistanceStrategy,\n", " DistanceStrategy,\n",
" Kinetica,\n", " Kinetica,\n",
" KineticaSettings,\n", " KineticaSettings,\n",
")\n", ")\n",
"from langchain_openai import OpenAIEmbeddings" "from langchain_openai import OpenAIEmbeddings\n",
"from langchain_text_splitters import CharacterTextSplitter"
] ]
}, },
{ {

@ -104,7 +104,7 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain.document_loaders import TextLoader\n", "from langchain.document_loaders import TextLoader\n",
"from langchain.text_splitter import CharacterTextSplitter\n", "from langchain_text_splitters import CharacterTextSplitter\n",
"\n", "\n",
"loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n", "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n",
"documents = loader.load()\n", "documents = loader.load()\n",

@ -107,11 +107,11 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.document_loaders import TextLoader\n", "from langchain_community.document_loaders import TextLoader\n",
"from langchain_community.embeddings import OpenAIEmbeddings\n", "from langchain_community.embeddings import OpenAIEmbeddings\n",
"from langchain_community.vectorstores import Lantern\n", "from langchain_community.vectorstores import Lantern\n",
"from langchain_core.documents import Document" "from langchain_core.documents import Document\n",
"from langchain_text_splitters import CharacterTextSplitter"
] ]
}, },
{ {

@ -38,9 +38,9 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.document_loaders import TextLoader\n", "from langchain_community.document_loaders import TextLoader\n",
"from langchain_community.vectorstores import Marqo" "from langchain_community.vectorstores import Marqo\n",
"from langchain_text_splitters import CharacterTextSplitter"
] ]
}, },
{ {

@ -126,9 +126,9 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.vectorstores import Meilisearch\n", "from langchain_community.vectorstores import Meilisearch\n",
"from langchain_openai import OpenAIEmbeddings\n", "from langchain_openai import OpenAIEmbeddings\n",
"from langchain_text_splitters import CharacterTextSplitter\n",
"\n", "\n",
"embeddings = OpenAIEmbeddings()" "embeddings = OpenAIEmbeddings()"
] ]

@ -66,10 +66,10 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.document_loaders import TextLoader\n", "from langchain_community.document_loaders import TextLoader\n",
"from langchain_community.vectorstores import Milvus\n", "from langchain_community.vectorstores import Milvus\n",
"from langchain_openai import OpenAIEmbeddings" "from langchain_openai import OpenAIEmbeddings\n",
"from langchain_text_splitters import CharacterTextSplitter"
] ]
}, },
{ {
@ -204,6 +204,7 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "7fb27b941602401d91542211134fc71a",
"metadata": { "metadata": {
"collapsed": false, "collapsed": false,
"pycharm": { "pycharm": {
@ -221,6 +222,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 2, "execution_count": 2,
"id": "acae54e37e7d407bbb7b55eff062a284",
"metadata": { "metadata": {
"collapsed": false, "collapsed": false,
"pycharm": { "pycharm": {
@ -246,6 +248,7 @@
}, },
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "9a63283cbaf04dbcab1f6479b197f3a8",
"metadata": { "metadata": {
"collapsed": false, "collapsed": false,
"pycharm": { "pycharm": {
@ -267,6 +270,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 3, "execution_count": 3,
"id": "8dd0d8092fe74a7c96281538738b07e2",
"metadata": { "metadata": {
"collapsed": false, "collapsed": false,
"pycharm": { "pycharm": {
@ -295,6 +299,7 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 4, "execution_count": 4,
"id": "72eea5119410473aa328ad9291626812",
"metadata": { "metadata": {
"collapsed": false, "collapsed": false,
"pycharm": { "pycharm": {

@ -143,10 +143,10 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.document_loaders import TextLoader\n", "from langchain_community.document_loaders import TextLoader\n",
"from langchain_community.vectorstores import MomentoVectorIndex\n", "from langchain_community.vectorstores import MomentoVectorIndex\n",
"from langchain_openai import OpenAIEmbeddings" "from langchain_openai import OpenAIEmbeddings\n",
"from langchain_text_splitters import CharacterTextSplitter"
] ]
}, },
{ {

@ -171,7 +171,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain.text_splitter import RecursiveCharacterTextSplitter\n", "from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
"\n", "\n",
"text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)\n", "text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)\n",
"docs = text_splitter.split_documents(data)" "docs = text_splitter.split_documents(data)"

@ -98,10 +98,10 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.document_loaders import TextLoader\n", "from langchain_community.document_loaders import TextLoader\n",
"from langchain_community.vectorstores import MyScale\n", "from langchain_community.vectorstores import MyScale\n",
"from langchain_openai import OpenAIEmbeddings" "from langchain_openai import OpenAIEmbeddings\n",
"from langchain_text_splitters import CharacterTextSplitter"
] ]
}, },
{ {

@ -73,10 +73,10 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain.docstore.document import Document\n", "from langchain.docstore.document import Document\n",
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.document_loaders import TextLoader\n", "from langchain_community.document_loaders import TextLoader\n",
"from langchain_community.vectorstores import Neo4jVector\n", "from langchain_community.vectorstores import Neo4jVector\n",
"from langchain_openai import OpenAIEmbeddings" "from langchain_openai import OpenAIEmbeddings\n",
"from langchain_text_splitters import CharacterTextSplitter"
] ]
}, },
{ {

@ -68,10 +68,10 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.document_loaders import TextLoader\n", "from langchain_community.document_loaders import TextLoader\n",
"from langchain_community.vectorstores import OpenSearchVectorSearch\n", "from langchain_community.vectorstores import OpenSearchVectorSearch\n",
"from langchain_openai import OpenAIEmbeddings" "from langchain_openai import OpenAIEmbeddings\n",
"from langchain_text_splitters import CharacterTextSplitter"
] ]
}, },
{ {

@ -82,10 +82,10 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain.docstore.document import Document\n", "from langchain.docstore.document import Document\n",
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.document_loaders import TextLoader\n", "from langchain_community.document_loaders import TextLoader\n",
"from langchain_community.vectorstores import PGEmbedding\n", "from langchain_community.vectorstores import PGEmbedding\n",
"from langchain_openai import OpenAIEmbeddings" "from langchain_openai import OpenAIEmbeddings\n",
"from langchain_text_splitters import CharacterTextSplitter"
] ]
}, },
{ {

@ -27,10 +27,10 @@
"from typing import List\n", "from typing import List\n",
"\n", "\n",
"from langchain.docstore.document import Document\n", "from langchain.docstore.document import Document\n",
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.document_loaders import TextLoader\n", "from langchain_community.document_loaders import TextLoader\n",
"from langchain_community.embeddings.fake import FakeEmbeddings\n", "from langchain_community.embeddings.fake import FakeEmbeddings\n",
"from langchain_community.vectorstores.pgvecto_rs import PGVecto_rs" "from langchain_community.vectorstores.pgvecto_rs import PGVecto_rs\n",
"from langchain_text_splitters import CharacterTextSplitter"
] ]
}, },
{ {

@ -101,10 +101,10 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain.docstore.document import Document\n", "from langchain.docstore.document import Document\n",
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.document_loaders import TextLoader\n", "from langchain_community.document_loaders import TextLoader\n",
"from langchain_community.vectorstores.pgvector import PGVector\n", "from langchain_community.vectorstores.pgvector import PGVector\n",
"from langchain_openai import OpenAIEmbeddings" "from langchain_openai import OpenAIEmbeddings\n",
"from langchain_text_splitters import CharacterTextSplitter"
] ]
}, },
{ {

@ -52,9 +52,9 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.document_loaders import TextLoader\n", "from langchain_community.document_loaders import TextLoader\n",
"from langchain_openai import OpenAIEmbeddings\n", "from langchain_openai import OpenAIEmbeddings\n",
"from langchain_text_splitters import CharacterTextSplitter\n",
"\n", "\n",
"loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n", "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n",
"documents = loader.load()\n", "documents = loader.load()\n",

@ -78,10 +78,10 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.document_loaders import TextLoader\n", "from langchain_community.document_loaders import TextLoader\n",
"from langchain_community.vectorstores import Qdrant\n", "from langchain_community.vectorstores import Qdrant\n",
"from langchain_openai import OpenAIEmbeddings" "from langchain_openai import OpenAIEmbeddings\n",
"from langchain_text_splitters import CharacterTextSplitter"
] ]
}, },
{ {

@ -108,10 +108,10 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.document_loaders import TextLoader\n", "from langchain_community.document_loaders import TextLoader\n",
"from langchain_community.vectorstores import Rockset\n", "from langchain_community.vectorstores import Rockset\n",
"from langchain_openai import OpenAIEmbeddings\n", "from langchain_openai import OpenAIEmbeddings\n",
"from langchain_text_splitters import CharacterTextSplitter\n",
"\n", "\n",
"loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n", "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n",
"documents = loader.load()\n", "documents = loader.load()\n",

@ -110,10 +110,10 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain.docstore.document import Document\n", "from langchain.docstore.document import Document\n",
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.document_loaders import TextLoader\n", "from langchain_community.document_loaders import TextLoader\n",
"from langchain_community.vectorstores.hanavector import HanaDB\n", "from langchain_community.vectorstores.hanavector import HanaDB\n",
"from langchain_openai import OpenAIEmbeddings\n", "from langchain_openai import OpenAIEmbeddings\n",
"from langchain_text_splitters import CharacterTextSplitter\n",
"\n", "\n",
"text_documents = TextLoader(\"../../modules/state_of_the_union.txt\").load()\n", "text_documents = TextLoader(\"../../modules/state_of_the_union.txt\").load()\n",
"text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=0)\n", "text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=0)\n",

@ -59,10 +59,10 @@
} }
], ],
"source": [ "source": [
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.document_loaders import TextLoader\n", "from langchain_community.document_loaders import TextLoader\n",
"from langchain_community.embeddings import HuggingFaceEmbeddings\n", "from langchain_community.embeddings import HuggingFaceEmbeddings\n",
"from langchain_community.vectorstores import ScaNN\n", "from langchain_community.vectorstores import ScaNN\n",
"from langchain_text_splitters import CharacterTextSplitter\n",
"\n", "\n",
"loader = TextLoader(\"state_of_the_union.txt\")\n", "loader = TextLoader(\"state_of_the_union.txt\")\n",
"documents = loader.load()\n", "documents = loader.load()\n",

@ -61,8 +61,8 @@
} }
], ],
"source": [ "source": [
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.document_loaders import TextLoader\n", "from langchain_community.document_loaders import TextLoader\n",
"from langchain_text_splitters import CharacterTextSplitter\n",
"\n", "\n",
"loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n", "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n",
"documents = loader.load()\n", "documents = loader.load()\n",

@ -46,10 +46,10 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.document_loaders import TextLoader\n", "from langchain_community.document_loaders import TextLoader\n",
"from langchain_community.vectorstores import SingleStoreDB\n", "from langchain_community.vectorstores import SingleStoreDB\n",
"from langchain_openai import OpenAIEmbeddings" "from langchain_openai import OpenAIEmbeddings\n",
"from langchain_text_splitters import CharacterTextSplitter"
] ]
}, },
{ {

@ -60,10 +60,10 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.document_loaders import TextLoader\n", "from langchain_community.document_loaders import TextLoader\n",
"from langchain_community.vectorstores import SKLearnVectorStore\n", "from langchain_community.vectorstores import SKLearnVectorStore\n",
"from langchain_openai import OpenAIEmbeddings\n", "from langchain_openai import OpenAIEmbeddings\n",
"from langchain_text_splitters import CharacterTextSplitter\n",
"\n", "\n",
"loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n", "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n",
"documents = loader.load()\n", "documents = loader.load()\n",

@ -69,12 +69,12 @@
} }
], ],
"source": [ "source": [
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.document_loaders import TextLoader\n", "from langchain_community.document_loaders import TextLoader\n",
"from langchain_community.embeddings.sentence_transformer import (\n", "from langchain_community.embeddings.sentence_transformer import (\n",
" SentenceTransformerEmbeddings,\n", " SentenceTransformerEmbeddings,\n",
")\n", ")\n",
"from langchain_community.vectorstores import SQLiteVSS\n", "from langchain_community.vectorstores import SQLiteVSS\n",
"from langchain_text_splitters import CharacterTextSplitter\n",
"\n", "\n",
"# load the document and split it into chunks\n", "# load the document and split it into chunks\n",
"loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n", "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n",
@ -146,12 +146,12 @@
} }
], ],
"source": [ "source": [
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.document_loaders import TextLoader\n", "from langchain_community.document_loaders import TextLoader\n",
"from langchain_community.embeddings.sentence_transformer import (\n", "from langchain_community.embeddings.sentence_transformer import (\n",
" SentenceTransformerEmbeddings,\n", " SentenceTransformerEmbeddings,\n",
")\n", ")\n",
"from langchain_community.vectorstores import SQLiteVSS\n", "from langchain_community.vectorstores import SQLiteVSS\n",
"from langchain_text_splitters import CharacterTextSplitter\n",
"\n", "\n",
"# load the document and split it into chunks\n", "# load the document and split it into chunks\n",
"loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n", "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n",

@ -58,7 +58,6 @@
], ],
"source": [ "source": [
"from langchain.chains import RetrievalQA\n", "from langchain.chains import RetrievalQA\n",
"from langchain.text_splitter import TokenTextSplitter\n",
"from langchain_community.document_loaders import (\n", "from langchain_community.document_loaders import (\n",
" DirectoryLoader,\n", " DirectoryLoader,\n",
" UnstructuredMarkdownLoader,\n", " UnstructuredMarkdownLoader,\n",
@ -66,6 +65,7 @@
"from langchain_community.vectorstores import StarRocks\n", "from langchain_community.vectorstores import StarRocks\n",
"from langchain_community.vectorstores.starrocks import StarRocksSettings\n", "from langchain_community.vectorstores.starrocks import StarRocksSettings\n",
"from langchain_openai import OpenAI, OpenAIEmbeddings\n", "from langchain_openai import OpenAI, OpenAIEmbeddings\n",
"from langchain_text_splitters import TokenTextSplitter\n",
"\n", "\n",
"update_vectordb = False" "update_vectordb = False"
] ]

@ -183,8 +183,8 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.document_loaders import TextLoader\n", "from langchain_community.document_loaders import TextLoader\n",
"from langchain_text_splitters import CharacterTextSplitter\n",
"\n", "\n",
"loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n", "loader = TextLoader(\"../../modules/state_of_the_union.txt\")\n",
"documents = loader.load()\n", "documents = loader.load()\n",

@ -73,10 +73,10 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.document_loaders import TextLoader\n", "from langchain_community.document_loaders import TextLoader\n",
"from langchain_community.embeddings import HuggingFaceEmbeddings\n", "from langchain_community.embeddings import HuggingFaceEmbeddings\n",
"from langchain_community.vectorstores import SurrealDBStore" "from langchain_community.vectorstores import SurrealDBStore\n",
"from langchain_text_splitters import CharacterTextSplitter"
] ]
}, },
{ {

@ -20,9 +20,9 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.embeddings.fake import FakeEmbeddings\n", "from langchain_community.embeddings.fake import FakeEmbeddings\n",
"from langchain_community.vectorstores import Tair" "from langchain_community.vectorstores import Tair\n",
"from langchain_text_splitters import CharacterTextSplitter"
] ]
}, },
{ {

@ -33,11 +33,11 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.document_loaders import TextLoader\n", "from langchain_community.document_loaders import TextLoader\n",
"from langchain_community.embeddings.fake import FakeEmbeddings\n", "from langchain_community.embeddings.fake import FakeEmbeddings\n",
"from langchain_community.vectorstores import TencentVectorDB\n", "from langchain_community.vectorstores import TencentVectorDB\n",
"from langchain_community.vectorstores.tencentvectordb import ConnectionParams" "from langchain_community.vectorstores.tencentvectordb import ConnectionParams\n",
"from langchain_text_splitters import CharacterTextSplitter"
] ]
}, },
{ {

@ -85,10 +85,10 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.document_loaders import TextLoader\n", "from langchain_community.document_loaders import TextLoader\n",
"from langchain_community.vectorstores import Tigris\n", "from langchain_community.vectorstores import Tigris\n",
"from langchain_openai import OpenAIEmbeddings" "from langchain_openai import OpenAIEmbeddings\n",
"from langchain_text_splitters import CharacterTextSplitter"
] ]
}, },
{ {

@ -43,10 +43,10 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.document_loaders import TextLoader\n", "from langchain_community.document_loaders import TextLoader\n",
"from langchain_community.embeddings import HuggingFaceEmbeddings\n", "from langchain_community.embeddings import HuggingFaceEmbeddings\n",
"from langchain_community.vectorstores import TileDB\n", "from langchain_community.vectorstores import TileDB\n",
"from langchain_text_splitters import CharacterTextSplitter\n",
"\n", "\n",
"raw_documents = TextLoader(\"../../modules/state_of_the_union.txt\").load()\n", "raw_documents = TextLoader(\"../../modules/state_of_the_union.txt\").load()\n",
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",

@ -123,11 +123,11 @@
"from datetime import datetime, timedelta\n", "from datetime import datetime, timedelta\n",
"\n", "\n",
"from langchain.docstore.document import Document\n", "from langchain.docstore.document import Document\n",
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.document_loaders import TextLoader\n", "from langchain_community.document_loaders import TextLoader\n",
"from langchain_community.document_loaders.json_loader import JSONLoader\n", "from langchain_community.document_loaders.json_loader import JSONLoader\n",
"from langchain_community.vectorstores.timescalevector import TimescaleVector\n", "from langchain_community.vectorstores.timescalevector import TimescaleVector\n",
"from langchain_openai import OpenAIEmbeddings" "from langchain_openai import OpenAIEmbeddings\n",
"from langchain_text_splitters import CharacterTextSplitter"
] ]
}, },
{ {

@ -84,10 +84,10 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.document_loaders import TextLoader\n", "from langchain_community.document_loaders import TextLoader\n",
"from langchain_community.vectorstores import Typesense\n", "from langchain_community.vectorstores import Typesense\n",
"from langchain_openai import OpenAIEmbeddings" "from langchain_openai import OpenAIEmbeddings\n",
"from langchain_text_splitters import CharacterTextSplitter"
] ]
}, },
{ {

@ -55,10 +55,10 @@
}, },
"outputs": [], "outputs": [],
"source": [ "source": [
"from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain_community.document_loaders import TextLoader\n", "from langchain_community.document_loaders import TextLoader\n",
"from langchain_community.vectorstores import USearch\n", "from langchain_community.vectorstores import USearch\n",
"from langchain_openai import OpenAIEmbeddings" "from langchain_openai import OpenAIEmbeddings\n",
"from langchain_text_splitters import CharacterTextSplitter"
] ]
}, },
{ {

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save