Added notes where users can consider adding thread pool to speed up upsert operation

2 years ago · ed17c4c1b9
parent 1912234327
commit ed17c4c1b9
1 changed files with 5 additions and 2 deletions
--- a/examples/vector_databases/Vector_db_introduction.ipynb
+++ b/examples/vector_databases/Vector_db_introduction.ipynb
@ -476,6 +476,7 @@
   ],
   "source": [
    "# Upsert content vectors in content namespace\n",
+    "# NOTE: Using a thread pool here can accelerate this upsert operation\n",
    "print(\"Uploading vectors to content namespace..\")\n",
    "for batch_df in df_batcher(article_df):\n",
    "    index.upsert(vectors=zip(batch_df.vector_id, batch_df.content_vector), namespace='content')"
@ -497,6 +498,7 @@
   ],
   "source": [
    "# Upsert title vectors in title namespace\n",
+    "# NOTE: Using a thread pool here can accelerate this upsert operation\n",
    "print(\"Uploading vectors to title namespace..\")\n",
    "for batch_df in df_batcher(article_df):\n",
    "    index.upsert(vectors=zip(batch_df.vector_id, batch_df.title_vector), namespace='title')"
@ -841,13 +843,14 @@
    "    data_objects.append((v['title'],v['text'],v['title_vector'],v['vector_id']))\n",
    "\n",
    "# Upsert into article schema\n",
+    "# NOTE: Using a thread pool here can accelerate this upsert operation\n",
    "print(\"Uploading vectors to article schema..\")\n",
    "uuids = []\n",
    "for articles in data_objects:\n",
    "    uuid = client.data_object.create(\n",
    "                              {\n",
-    "                                  \"title\": articles[0],\n",
-    "                                  \"content\": articles[1]\n",
+    "                               \"title\": articles[0],\n",
+    "                               \"content\": articles[1]\n",
    "                              },\n",
    "                              \"Article\",\n",
    "                              vector=articles[2]\n",