mirror of
https://github.com/hwchase17/langchain
synced 2024-10-29 17:07:25 +00:00
a8c916955f
Description: Updates for Nomic AI Atlas and GPT4All integrations documentation. --------- Co-authored-by: Bagatur <baskaryan@gmail.com>
194 lines
4.1 KiB
Plaintext
194 lines
4.1 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"attachments": {},
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Atlas\n",
|
|
"\n",
|
|
"\n",
|
|
">[Atlas](https://docs.nomic.ai/index.html) is a platform by Nomic made for interacting with both small and internet scale unstructured datasets. It enables anyone to visualize, search, and share massive datasets in their browser.\n",
|
|
"\n",
|
|
"This notebook shows you how to use functionality related to the `AtlasDB` vectorstore."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"tags": []
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"!pip install spacy"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"pycharm": {
|
|
"is_executing": true
|
|
},
|
|
"scrolled": true,
|
|
"tags": []
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"!python3 -m spacy download en_core_web_sm"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"tags": []
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"!pip install nomic"
|
|
]
|
|
},
|
|
{
|
|
"attachments": {},
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Load Packages"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 6,
|
|
"metadata": {
|
|
"pycharm": {
|
|
"is_executing": true
|
|
},
|
|
"tags": []
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"import time\n",
|
|
"from langchain.embeddings.openai import OpenAIEmbeddings\n",
|
|
"from langchain.text_splitter import SpacyTextSplitter\n",
|
|
"from langchain.vectorstores import AtlasDB\n",
|
|
"from langchain.document_loaders import TextLoader"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 7,
|
|
"metadata": {
|
|
"tags": []
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"ATLAS_TEST_API_KEY = \"7xDPkYXSYDc1_ErdTPIcoAR9RNd8YDlkS3nVNXcVoIMZ6\""
|
|
]
|
|
},
|
|
{
|
|
"attachments": {},
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Prepare the Data"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 8,
|
|
"metadata": {
|
|
"tags": []
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"loader = TextLoader(\"../../../state_of_the_union.txt\")\n",
|
|
"documents = loader.load()\n",
|
|
"text_splitter = SpacyTextSplitter(separator=\"|\")\n",
|
|
"texts = []\n",
|
|
"for doc in text_splitter.split_documents(documents):\n",
|
|
" texts.extend(doc.page_content.split(\"|\"))\n",
|
|
"\n",
|
|
"texts = [e.strip() for e in texts]"
|
|
]
|
|
},
|
|
{
|
|
"attachments": {},
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Map the Data using Nomic's Atlas"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"pycharm": {
|
|
"is_executing": true
|
|
},
|
|
"tags": []
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"db = AtlasDB.from_texts(\n",
|
|
" texts=texts,\n",
|
|
" name=\"test_index_\" + str(time.time()), # unique name for your vector store\n",
|
|
" description=\"test_index\", # a description for your vector store\n",
|
|
" api_key=ATLAS_TEST_API_KEY,\n",
|
|
" index_kwargs={\"build_topic_model\": True},\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"db.project.wait_for_project_lock()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"db.project"
|
|
]
|
|
},
|
|
{
|
|
"attachments": {},
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Here is a map with the result of this code. This map displays the texts of the State of the Union.\n",
|
|
"https://atlas.nomic.ai/map/3e4de075-89ff-486a-845c-36c23f30bb67/d8ce2284-8edb-4050-8b9b-9bb543d7f647"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3 (ipykernel)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.10.6"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 4
|
|
}
|