Harrison/0083 (#996)

Co-authored-by: Harrison Chase <harrisonchase@Harrisons-MBP.attlocal.net>
makefile-update-1
Harrison Chase 1 year ago committed by GitHub
parent b7747017d7
commit e51fad1488
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -21,28 +21,83 @@
"from langchain.vectorstores.faiss import FAISS\n", "from langchain.vectorstores.faiss import FAISS\n",
"from langchain.text_splitter import CharacterTextSplitter\n", "from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain.llms import OpenAI\n", "from langchain.llms import OpenAI\n",
"from langchain.chains import ChatVectorDBChain" "from langchain.chains import ChatVectorDBChain\n",
"from langchain.document_loaders import TextLoader"
]
},
{
"cell_type": "markdown",
"id": "cdff94be",
"metadata": {},
"source": [
"Load in documents. You can replace this with a loader for whatever type of data you want"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "01c46e92",
"metadata": {},
"outputs": [],
"source": [
"loader = TextLoader('../../state_of_the_union.txt')\n",
"documents = loader.load()"
]
},
{
"cell_type": "markdown",
"id": "e9be4779",
"metadata": {},
"source": [
"If you had multiple loaders that you wanted to combine, you do something like:"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 4, "execution_count": 3,
"id": "433363a5",
"metadata": {},
"outputs": [],
"source": [
"# loaders = [....]\n",
"# docs = []\n",
"# for loader in loaders:\n",
"# docs.extend(loader.load())"
]
},
{
"cell_type": "markdown",
"id": "239475d2",
"metadata": {},
"source": [
"We now split the documents, create embeddings for them, and put them in a vectorstore. This allows us to do semantic search over them."
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "a8930cf7", "id": "a8930cf7",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"with open('../../state_of_the_union.txt') as f:\n",
" state_of_the_union = f.read()\n",
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
"texts = text_splitter.split_text(state_of_the_union)\n", "documents = text_splitter.split_documents(documents)\n",
"\n", "\n",
"embeddings = OpenAIEmbeddings()\n", "embeddings = OpenAIEmbeddings()\n",
"vectorstore = FAISS.from_texts(texts, embeddings)" "vectorstore = FAISS.from_documents(documents, embeddings)"
]
},
{
"cell_type": "markdown",
"id": "3c96b118",
"metadata": {},
"source": [
"We now initialize the ChatVectorDBChain"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 5, "execution_count": 7,
"id": "7b4110f3", "id": "7b4110f3",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@ -157,7 +212,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.10.9" "version": "3.9.1"
} }
}, },
"nbformat": 4, "nbformat": 4,

@ -23,6 +23,7 @@ from langchain.document_loaders.readthedocs import ReadTheDocsLoader
from langchain.document_loaders.roam import RoamLoader from langchain.document_loaders.roam import RoamLoader
from langchain.document_loaders.s3_directory import S3DirectoryLoader from langchain.document_loaders.s3_directory import S3DirectoryLoader
from langchain.document_loaders.s3_file import S3FileLoader from langchain.document_loaders.s3_file import S3FileLoader
from langchain.document_loaders.text import TextLoader
from langchain.document_loaders.unstructured import UnstructuredFileLoader from langchain.document_loaders.unstructured import UnstructuredFileLoader
from langchain.document_loaders.url import UnstructuredURLLoader from langchain.document_loaders.url import UnstructuredURLLoader
from langchain.document_loaders.web_base import WebBaseLoader from langchain.document_loaders.web_base import WebBaseLoader
@ -44,6 +45,7 @@ __all__ = [
"RoamLoader", "RoamLoader",
"YoutubeLoader", "YoutubeLoader",
"S3FileLoader", "S3FileLoader",
"TextLoader",
"S3DirectoryLoader", "S3DirectoryLoader",
"GCSFileLoader", "GCSFileLoader",
"GCSDirectoryLoader", "GCSDirectoryLoader",

@ -0,0 +1,20 @@
"""Load text files."""
from typing import List
from langchain.docstore.document import Document
from langchain.document_loaders.base import BaseLoader
class TextLoader(BaseLoader):
"""Load text files."""
def __init__(self, file_path: str):
"""Initialize with file path."""
self.file_path = file_path
def load(self) -> List[Document]:
"""Load from file path."""
with open(self.file_path) as f:
text = f.read()
metadata = {"source": self.file_path}
return [Document(page_content=text, metadata=metadata)]

@ -1,6 +1,6 @@
[tool.poetry] [tool.poetry]
name = "langchain" name = "langchain"
version = "0.0.82" version = "0.0.83"
description = "Building applications with LLMs through composability" description = "Building applications with LLMs through composability"
authors = [] authors = []
license = "MIT" license = "MIT"

Loading…
Cancel
Save