{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Atlas\n",
"\n",
"\n",
">[Atlas](https://docs.nomic.ai/index.html) is a platform for interacting with both small and internet scale unstructured datasets by `Nomic`. \n",
"\n",
"This notebook shows you how to use functionality related to the `AtlasDB` vectorstore."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"!pip install spacy"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"pycharm": {
"is_executing": true
},
"scrolled": true,
"tags": []
},
"outputs": [],
"source": [
"!python3 -m spacy download en_core_web_sm"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"!pip install nomic"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"pycharm": {
"is_executing": true
},
"tags": []
},
"outputs": [],
"source": [
"import time\n",
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
"from langchain.text_splitter import SpacyTextSplitter\n",
"from langchain.vectorstores import AtlasDB\n",
"from langchain.document_loaders import TextLoader"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"ATLAS_TEST_API_KEY = \"7xDPkYXSYDc1_ErdTPIcoAR9RNd8YDlkS3nVNXcVoIMZ6\""
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"loader = TextLoader(\"../../../state_of_the_union.txt\")\n",
"documents = loader.load()\n",
"text_splitter = SpacyTextSplitter(separator=\"|\")\n",
"texts = []\n",
"for doc in text_splitter.split_documents(documents):\n",
" texts.extend(doc.page_content.split(\"|\"))\n",
"\n",
"texts = [e.strip() for e in texts]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"pycharm": {
"is_executing": true
},
"tags": []
},
"outputs": [],
"source": [
"db = AtlasDB.from_texts(\n",
" texts=texts,\n",
" name=\"test_index_\" + str(time.time()), # unique name for your vector store\n",
" description=\"test_index\", # a description for your vector store\n",
" api_key=ATLAS_TEST_API_KEY,\n",
" index_kwargs={\"build_topic_model\": True},\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"db.project.wait_for_project_lock()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
" test_index_1677255228.136989\n",
"
\n",
" A description for your project 508 datums inserted.\n",
"
\n",
" 1 index built.\n",
"
Projections\n",
"