AtlasDB vector store documentation updates. (#1572)

- Updated errors in the AtlasDB vector store documentation
- Removed extraneous output logs in example notebook.
This commit is contained in:
Andriy Mulyar 2023-03-09 19:31:14 -05:00 committed by GitHub
parent 622578a022
commit c9189d354a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 31 additions and 93 deletions

View File

@ -13,7 +13,9 @@ It is broken into two parts: installation and setup, and then references to spec
There exists a wrapper around the Atlas neural database, allowing you to use it as a vectorstore.
This vectorstore also gives you full access to the underlying AtlasProject object, which will allow you to use the full range of Atlas map interactions, such as bulk tagging and automatic topic modeling.
Please see [the Nomic docs](https://docs.nomic.ai/atlas_api.html) for more detailed information.
Please see [the Atlas docs](https://docs.nomic.ai/atlas_api.html) for more detailed information.
@ -22,4 +24,4 @@ To import this vectorstore:
from langchain.vectorstores import AtlasDB
```
For a more detailed walkthrough of the Chroma wrapper, see [this notebook](../modules/indexes/examples/vectorstores.ipynb)
For a more detailed walkthrough of the AtlasDB wrapper, see [this notebook](../modules/indexes/vectorstore_examples/atlas.ipynb)

View File

@ -36,6 +36,8 @@ In the below guides, we cover different types of vectorstores and how to use the
`Chroma <./vectorstore_examples/chroma.html>`_: A walkthrough of how to use the Chroma vectorstore wrapper.
`AtlasDB <./vectorstore_examples/atlas.html>`_: A walkthrough of how to use the AtlasDB vectorstore and visualizer wrapper.
`DeepLake <./vectorstore_examples/deeplake.html>`_: A walkthrough of how to use the Deep Lake, data lake, wrapper.
`FAISS <./vectorstore_examples/faiss.html>`_: A walkthrough of how to use the FAISS vectorstore wrapper.

View File

@ -2,11 +2,7 @@
"cells": [
{
"cell_type": "markdown",
"metadata": {
"pycharm": {
"name": "#%% md\n"
}
},
"metadata": {},
"source": [
"# AtlasDB\n",
"\n",
@ -15,10 +11,10 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": null,
"metadata": {
"pycharm": {
"name": "#%%\n"
"is_executing": true
}
},
"outputs": [],
@ -32,56 +28,14 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Collecting en-core-web-sm==3.5.0\n",
" Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.5.0/en_core_web_sm-3.5.0-py3-none-any.whl (12.8 MB)\n",
"\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m12.8/12.8 MB\u001B[0m \u001B[31m90.8 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m00:01\u001B[0m00:01\u001B[0m\n",
"\u001B[?25hRequirement already satisfied: spacy<3.6.0,>=3.5.0 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from en-core-web-sm==3.5.0) (3.5.0)\n",
"Requirement already satisfied: packaging>=20.0 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (23.0)\n",
"Requirement already satisfied: wasabi<1.2.0,>=0.9.1 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (1.1.1)\n",
"Requirement already satisfied: langcodes<4.0.0,>=3.2.0 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (3.3.0)\n",
"Requirement already satisfied: srsly<3.0.0,>=2.4.3 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (2.4.5)\n",
"Requirement already satisfied: pathy>=0.10.0 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (0.10.1)\n",
"Requirement already satisfied: setuptools in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (67.4.0)\n",
"Requirement already satisfied: tqdm<5.0.0,>=4.38.0 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (4.64.1)\n",
"Requirement already satisfied: spacy-loggers<2.0.0,>=1.0.0 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (1.0.4)\n",
"Requirement already satisfied: smart-open<7.0.0,>=5.2.1 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (6.3.0)\n",
"Requirement already satisfied: thinc<8.2.0,>=8.1.0 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (8.1.7)\n",
"Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (2.0.7)\n",
"Requirement already satisfied: typer<0.8.0,>=0.3.0 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (0.7.0)\n",
"Requirement already satisfied: requests<3.0.0,>=2.13.0 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (2.28.2)\n",
"Requirement already satisfied: jinja2 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (3.1.2)\n",
"Requirement already satisfied: pydantic!=1.8,!=1.8.1,<1.11.0,>=1.7.4 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (1.10.5)\n",
"Requirement already satisfied: catalogue<2.1.0,>=2.0.6 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (2.0.8)\n",
"Requirement already satisfied: spacy-legacy<3.1.0,>=3.0.11 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (3.0.12)\n",
"Requirement already satisfied: numpy>=1.15.0 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (1.24.2)\n",
"Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (1.0.9)\n",
"Requirement already satisfied: preshed<3.1.0,>=3.0.2 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (3.0.8)\n",
"Requirement already satisfied: typing-extensions>=4.2.0 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from pydantic!=1.8,!=1.8.1,<1.11.0,>=1.7.4->spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (4.5.0)\n",
"Requirement already satisfied: charset-normalizer<4,>=2 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from requests<3.0.0,>=2.13.0->spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (3.0.1)\n",
"Requirement already satisfied: idna<4,>=2.5 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from requests<3.0.0,>=2.13.0->spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (3.4)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from requests<3.0.0,>=2.13.0->spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (2022.12.7)\n",
"Requirement already satisfied: urllib3<1.27,>=1.21.1 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from requests<3.0.0,>=2.13.0->spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (1.26.14)\n",
"Requirement already satisfied: blis<0.8.0,>=0.7.8 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from thinc<8.2.0,>=8.1.0->spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (0.7.9)\n",
"Requirement already satisfied: confection<1.0.0,>=0.0.1 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from thinc<8.2.0,>=8.1.0->spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (0.0.4)\n",
"Requirement already satisfied: click<9.0.0,>=7.1.1 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from typer<0.8.0,>=0.3.0->spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (8.1.3)\n",
"Requirement already satisfied: MarkupSafe>=2.0 in /home/ubuntu/langchain/.venv/lib/python3.9/site-packages (from jinja2->spacy<3.6.0,>=3.5.0->en-core-web-sm==3.5.0) (2.1.2)\n",
"\n",
"\u001B[1m[\u001B[0m\u001B[34;49mnotice\u001B[0m\u001B[1;39;49m]\u001B[0m\u001B[39;49m A new release of pip is available: \u001B[0m\u001B[31;49m23.0\u001B[0m\u001B[39;49m -> \u001B[0m\u001B[32;49m23.0.1\u001B[0m\n",
"\u001B[1m[\u001B[0m\u001B[34;49mnotice\u001B[0m\u001B[1;39;49m]\u001B[0m\u001B[39;49m To update, run: \u001B[0m\u001B[32;49mpip install --upgrade pip\u001B[0m\n",
"\u001B[38;5;2m✔ Download and installation successful\u001B[0m\n",
"You can now load the package via spacy.load('en_core_web_sm')\n"
]
"scrolled": true,
"pycharm": {
"is_executing": true
}
],
},
"outputs": [],
"source": [
"!python -m spacy download en_core_web_sm"
]
@ -113,51 +67,31 @@
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"2023-02-24 16:13:49.696 | INFO | nomic.project:_create_project:884 - Creating project `test_index_1677255228.136989` in organization `Atlas Demo`\n",
"2023-02-24 16:13:51.087 | INFO | nomic.project:wait_for_project_lock:993 - test_index_1677255228.136989: Project lock is released.\n",
"2023-02-24 16:13:51.225 | INFO | nomic.project:wait_for_project_lock:993 - test_index_1677255228.136989: Project lock is released.\n",
"2023-02-24 16:13:51.481 | INFO | nomic.project:add_text:1351 - Uploading text to Atlas.\n",
"1it [00:00, 1.20it/s]\n",
"2023-02-24 16:13:52.318 | INFO | nomic.project:add_text:1422 - Text upload succeeded.\n",
"2023-02-24 16:13:52.628 | INFO | nomic.project:wait_for_project_lock:993 - test_index_1677255228.136989: Project lock is released.\n",
"2023-02-24 16:13:53.380 | INFO | nomic.project:create_index:1192 - Created map `test_index_1677255228.136989_index` in project `test_index_1677255228.136989`: https://atlas.nomic.ai/map/ee2354a3-7f9a-4c6b-af43-b0cda09d7198/db996d77-8981-48a0-897a-ff2c22bbf541\n"
]
"execution_count": null,
"metadata": {
"pycharm": {
"is_executing": true
}
],
},
"outputs": [],
"source": [
"db = AtlasDB.from_texts(texts=texts,\n",
" name='test_index_'+str(time.time()),\n",
" description='test_index',\n",
" name='test_index_'+str(time.time()), # unique name for your vector store\n",
" description='test_index', #a description for your vector store\n",
" api_key=ATLAS_TEST_API_KEY,\n",
" index_kwargs={'build_topic_model': True})"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"scrolled": false
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"2023-02-24 16:14:09.106 | INFO | nomic.project:wait_for_project_lock:993 - test_index_1677255228.136989: Project lock is released.\n"
]
}
],
"execution_count": null,
"outputs": [],
"source": [
"with db.project.wait_for_project_lock():\n",
" time.sleep(1)"
]
"db.project.wait_for_project_lock()"
],
"metadata": {
"collapsed": false
}
},
{
"cell_type": "code",
@ -263,4 +197,4 @@
},
"nbformat": 4,
"nbformat_minor": 1
}
}