new cassIO connect experience with the newest cassio.init (#745)

pull/746/head
Stefano Lottini 8 months ago committed by GitHub
parent 8d329cf9a3
commit 1ca286c180
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -96,7 +96,7 @@
},
"outputs": [],
"source": [
"!pip install cassio openai"
"!pip install \"cassio>=0.1.3\" openai"
]
},
{
@ -112,108 +112,59 @@
"id": "65a8edc1-4633-491b-9ed3-11163ec24e46",
"metadata": {},
"source": [
"A couple of secrets are required to create a `Session` object (a connection to your Astra DB instance).\n",
"In order to connect to you Astra DB, you need two things:\n",
"- An Astra Token, with role \"Database Administrator\" (it looks like `AstraCS:...`)\n",
"- the database ID (it looks like `3df2a5b6-...`)\n",
"\n",
"_(Note: some steps will be slightly different on Google Colab and on local Jupyter, that's why the notebook will detect the runtime type.)_"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "a7429ed4-b3fe-44b0-ad00-60883df32070",
"metadata": {},
"outputs": [],
"source": [
"from cassandra.cluster import Cluster\n",
"from cassandra.auth import PlainTextAuthProvider"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "e4f2eec1-b784-4cea-9006-03cfe7b31e25",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"from getpass import getpass\n",
" Make sure you have both strings, Both are obtained in the [Astra UI](https://astra.datastax.com) once you sign in. For more information, see here: [database ID](https://awesome-astra.github.io/docs/pages/astra/faq/#where-should-i-find-a-database-identifier) and [Token](https://awesome-astra.github.io/docs/pages/astra/create-token/#c-procedure).\n",
"\n",
"try:\n",
" from google.colab import files\n",
" IS_COLAB = True\n",
"except ModuleNotFoundError:\n",
" IS_COLAB = False"
"If you want to _connect to a Cassandra cluster_ (which however must [support](https://cassio.org/more_info/#use-a-local-vector-capable-cassandra) Vectors), replace with `cassio.init(session=..., keyspace=...)` with suitable Session and keyspace name for your cluster."
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "7615e522-574f-427e-9f7f-87fc721207a4",
"execution_count": 2,
"id": "ca5a2f5d-3ff2-43d6-91c0-4a52c0ecd06a",
"metadata": {},
"outputs": [
{
"name": "stdin",
"output_type": "stream",
"text": [
"Please provide the full path to your Secure Connect Bundle zipfile: /path/to/secure-connect-DATABASE.zip\n",
"Please provide your Database Token ('AstraCS:...' string): ········\n",
"Please provide the Keyspace name for your Database: my_keyspace\n"
"Please enter your Astra token ('AstraCS:...') ········\n",
"Please enter your database id ('3df2a5b6-...') 00000000-0000-0000-0000-000000000000\n"
]
}
],
"source": [
"# Your database's Secure Connect Bundle zip file is needed:\n",
"if IS_COLAB:\n",
" print('Please upload your Secure Connect Bundle zipfile: ')\n",
" uploaded = files.upload()\n",
" if uploaded:\n",
" astraBundleFileTitle = list(uploaded.keys())[0]\n",
" ASTRA_DB_SECURE_BUNDLE_PATH = os.path.join(os.getcwd(), astraBundleFileTitle)\n",
" else:\n",
" raise ValueError(\n",
" 'Cannot proceed without Secure Connect Bundle. Please re-run the cell.'\n",
" )\n",
"else:\n",
" # you are running a local-jupyter notebook:\n",
" ASTRA_DB_SECURE_BUNDLE_PATH = input(\"Please provide the full path to your Secure Connect Bundle zipfile: \")\n",
"from getpass import getpass\n",
"\n",
"ASTRA_DB_APPLICATION_TOKEN = getpass(\"Please provide your Database Token ('AstraCS:...' string): \")\n",
"ASTRA_DB_KEYSPACE = input(\"Please provide the Keyspace name for your Database: \")"
"astra_token = getpass(\"Please enter your Astra token ('AstraCS:...')\")\n",
"database_id = input(\"Please enter your database id ('3df2a5b6-...')\")"
]
},
{
"cell_type": "markdown",
"id": "f8c4e5ec-2ab2-4d41-b3ec-c946469fed8b",
"cell_type": "code",
"execution_count": 3,
"id": "0fe028b0-3a40-4f12-b07c-8fd8bbee29b0",
"metadata": {},
"outputs": [],
"source": [
"### Creation of the DB connection\n",
"import cassio\n",
"\n",
"This is how you create a connection to Astra DB:\n",
"\n",
"_(Incidentally, you could also use any Cassandra cluster (as long as it provides Vector capabilities), just by [changing the parameters](https://docs.datastax.com/en/developer/python-driver/latest/getting_started/#connecting-to-cassandra) to the following `Cluster` instantiation.)_"
"cassio.init(token=astra_token, database_id=database_id)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "949ab020-90c8-499b-a139-f69f07af50ed",
"cell_type": "markdown",
"id": "f8c4e5ec-2ab2-4d41-b3ec-c946469fed8b",
"metadata": {},
"outputs": [],
"source": [
"# Don't mind the \"Closing connection\" error after \"downgrading protocol...\" messages,\n",
"# it is really just a warning: the connection will work smoothly.\n",
"cluster = Cluster(\n",
" cloud={\n",
" \"secure_connect_bundle\": ASTRA_DB_SECURE_BUNDLE_PATH,\n",
" },\n",
" auth_provider=PlainTextAuthProvider(\n",
" \"token\",\n",
" ASTRA_DB_APPLICATION_TOKEN,\n",
" ),\n",
")\n",
"### Creation of the DB connection\n",
"\n",
"This is how you create a connection to Astra DB:\n",
"\n",
"session = cluster.connect()\n",
"keyspace = ASTRA_DB_KEYSPACE"
"_(Incidentally, you could also use any Cassandra cluster (as long as it provides Vector capabilities), just by [changing the parameters](https://docs.datastax.com/en/developer/python-driver/latest/getting_started/#connecting-to-cassandra) to the following `Cluster` instantiation.)_"
]
},
{
@ -228,7 +179,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 4,
"id": "8db837dc-cd49-41e2-8b5d-edb17ccc470e",
"metadata": {},
"outputs": [],
@ -239,17 +190,12 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 5,
"id": "691f1a07-cab4-42a1-baba-f17b561ddd3f",
"metadata": {},
"outputs": [],
"source": [
"v_table = MetadataVectorCassandraTable(\n",
" session,\n",
" keyspace,\n",
" \"philosophers_cassio\",\n",
" vector_dimension=1536,\n",
")"
"v_table = MetadataVectorCassandraTable(table=\"philosophers_cassio\", vector_dimension=1536)"
]
},
{
@ -270,7 +216,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 6,
"id": "37fe7653-dd64-4494-83e1-5702ec41725c",
"metadata": {},
"outputs": [
@ -288,7 +234,7 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 7,
"id": "8065a42a-0ece-4453-b771-1dbef6d8a620",
"metadata": {},
"outputs": [],
@ -310,7 +256,7 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 8,
"id": "6bf89454-9a55-4202-ab6b-ea15b2048f3d",
"metadata": {},
"outputs": [],
@ -328,7 +274,7 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 9,
"id": "50a8e6f0-0aa7-4ffc-94e9-702b68566815",
"metadata": {},
"outputs": [
@ -337,7 +283,7 @@
"output_type": "stream",
"text": [
"len(result.data) = 2\n",
"result.data[1].embedding = [-0.01075850147753954, 0.0013505702372640371, 0.0036223...\n",
"result.data[1].embedding = [-0.011011358350515366, 0.0033741754014045, 0.004608382...\n",
"len(result.data[1].embedding) = 1536\n"
]
}
@ -368,7 +314,22 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 10,
"id": "aa68f038-3240-4e22-b7c6-a5f214eda381",
"metadata": {},
"outputs": [],
"source": [
"# Don't mind this cell, just autodetecting if we're on a Colab or not\n",
"try:\n",
" from google.colab import files\n",
" IS_COLAB = True\n",
"except ModuleNotFoundError:\n",
" IS_COLAB = False"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "94ff33fb-4b52-4c15-ab74-4af4fe973cbf",
"metadata": {},
"outputs": [],
@ -395,7 +356,7 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": 12,
"id": "6ab84ccb-3363-4bdc-9484-0d68c25a58ff",
"metadata": {},
"outputs": [
@ -457,7 +418,7 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 13,
"id": "68e80e81-886b-45a4-be61-c33b8028bcfb",
"metadata": {},
"outputs": [
@ -517,7 +478,7 @@
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": 14,
"id": "d6fcf182-3ab7-4d28-9472-dce35cc38182",
"metadata": {},
"outputs": [],
@ -563,7 +524,7 @@
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": 15,
"id": "6722c2c0-3e54-4738-80ce-4d1149e95414",
"metadata": {},
"outputs": [
@ -572,13 +533,13 @@
"text/plain": [
"[('Life to the great majority is only a constant struggle for mere existence, with the certainty of losing it at last.',\n",
" 'schopenhauer'),\n",
" ('We give up leisure in order that we may have leisure, just as we go to war in order that we may have peace.',\n",
" 'aristotle'),\n",
" ('Perhaps the gods are kind to us, by making life more disagreeable as we grow older. In the end death seems less intolerable than the manifold burdens we carry',\n",
" 'freud')]"
" ('The meager satisfaction that man can extract from reality leaves him starving.',\n",
" 'freud'),\n",
" ('To live is to suffer, to survive is to find some meaning in the suffering.',\n",
" 'nietzsche')]"
]
},
"execution_count": 16,
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
@ -597,7 +558,7 @@
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": 16,
"id": "da9c705f-5c12-42b3-a038-202f89a3c6da",
"metadata": {},
"outputs": [
@ -610,7 +571,7 @@
" 'nietzsche')]"
]
},
"execution_count": 17,
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
@ -629,7 +590,7 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": 17,
"id": "abcfaec9-8f42-4789-a5ed-1073fa2932c2",
"metadata": {},
"outputs": [
@ -642,7 +603,7 @@
" 'nietzsche')]"
]
},
"execution_count": 18,
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
@ -670,7 +631,7 @@
},
{
"cell_type": "code",
"execution_count": 19,
"execution_count": 18,
"id": "b9b43721-a3b0-4ac4-b730-7a6aeec52e70",
"metadata": {},
"outputs": [
@ -679,14 +640,14 @@
"output_type": "stream",
"text": [
"8 quotes within the threshold:\n",
" 0. [distance=0.855] \"The assumption that animals are without rights, and the illusion that ...\"\n",
" 1. [distance=0.843] \"Animals are in possession of themselves; their soul is in possession o...\"\n",
" 2. [distance=0.841] \"At his best, man is the noblest of all animals; separated from law and...\"\n",
" 3. [distance=0.832] \"Man is the only animal that must be encouraged to live....\"\n",
" 4. [distance=0.831] \".... we are a part of nature as a whole, whose order we follow....\"\n",
" 5. [distance=0.824] \"Every human endeavor, however singular it seems, involves the whole hu...\"\n",
" 6. [distance=0.820] \"Because Christian morality leaves animals out of account, they are at ...\"\n",
" 7. [distance=0.819] \"A dog has the soul of a philosopher....\"\n"
" 0. [distance=0.858] \"The assumption that animals are without rights, and the illusion that ...\"\n",
" 1. [distance=0.849] \"Animals are in possession of themselves; their soul is in possession o...\"\n",
" 2. [distance=0.846] \"At his best, man is the noblest of all animals; separated from law and...\"\n",
" 3. [distance=0.840] \"Man is the only animal that must be encouraged to live....\"\n",
" 4. [distance=0.838] \".... we are a part of nature as a whole, whose order we follow....\"\n",
" 5. [distance=0.828] \"Because Christian morality leaves animals out of account, they are at ...\"\n",
" 6. [distance=0.827] \"Every human endeavor, however singular it seems, involves the whole hu...\"\n",
" 7. [distance=0.826] \"A dog has the soul of a philosopher....\"\n"
]
}
],
@ -734,7 +695,7 @@
},
{
"cell_type": "code",
"execution_count": 20,
"execution_count": 19,
"id": "a6dd366d-665a-45fd-917b-b6b5312b0865",
"metadata": {},
"outputs": [],
@ -762,7 +723,7 @@
},
{
"cell_type": "code",
"execution_count": 21,
"execution_count": 20,
"id": "397e6ebd-b30e-413b-be63-81a62947a7b8",
"metadata": {},
"outputs": [],
@ -810,7 +771,7 @@
},
{
"cell_type": "code",
"execution_count": 22,
"execution_count": 21,
"id": "806ba758-8988-410e-9eeb-b9c6799e6b25",
"metadata": {},
"outputs": [
@ -820,11 +781,11 @@
"text": [
"** quotes found:\n",
"** - Happiness is the reward of virtue. (aristotle)\n",
"** - It is better for a city to be governed by a good man than by good laws. (aristotle)\n",
"** - Enthusiasm is always connected with the senses, whatever be the object that excites it. The true strength of virtue is serenity of mind, combined with a deliberate and steadfast determination to execute her laws. That is the healthful condition of the moral life; on the other hand, enthusiasm, even when excited by representations of goodness, is a brilliant but feverish glow which leaves only exhaustion and languor behind. (kant)\n",
"** end of logging\n",
"\n",
"A new generated quote:\n",
"Politics without virtue is like a ship without a captain - destined to be guided by turbulent currents, lacking true direction.\n"
"Politics without virtue is like a ship without a compass - destined to drift aimlessly, guided only by self-interest and corruption.\n"
]
}
],
@ -844,7 +805,7 @@
},
{
"cell_type": "code",
"execution_count": 23,
"execution_count": 22,
"id": "7c2e2d4e-865f-4b2d-80cd-a695271415d9",
"metadata": {},
"outputs": [
@ -858,7 +819,7 @@
"** end of logging\n",
"\n",
"A new generated quote:\n",
"Neglecting the moral worth of animals reflects a crude and barbaric mindset. True morality lies in universal compassion.\n"
"By disregarding the worth of animals, we reveal our own moral ignorance. True morality lies in extending compassion to all living beings.\n"
]
}
],
@ -900,7 +861,7 @@
},
{
"cell_type": "code",
"execution_count": 24,
"execution_count": 23,
"id": "49cabc31-47e3-4326-8ef5-d95690317321",
"metadata": {},
"outputs": [],
@ -910,17 +871,12 @@
},
{
"cell_type": "code",
"execution_count": 25,
"execution_count": 24,
"id": "a614c333-4143-4ad6-abdf-7b3853fbf423",
"metadata": {},
"outputs": [],
"source": [
"v_table_partitioned = ClusteredMetadataVectorCassandraTable(\n",
" session,\n",
" keyspace,\n",
" \"philosophers_cassio_partitioned\",\n",
" vector_dimension=1536,\n",
")"
"v_table_partitioned = ClusteredMetadataVectorCassandraTable(table=\"philosophers_cassio_partitioned\", vector_dimension=1536)"
]
},
{
@ -939,7 +895,7 @@
},
{
"cell_type": "code",
"execution_count": 26,
"execution_count": 25,
"id": "424513a6-0a9d-4164-bf30-22d5b7e3bb25",
"metadata": {},
"outputs": [
@ -978,7 +934,7 @@
" ))\n",
" for future in futures:\n",
" future.result()\n",
" print(f\" Done ({len(quotes)} quotes inserted).\")\n",
" print(f\"Done ({len(quotes)} quotes inserted).\")\n",
"print(\"Finished inserting.\")"
]
},
@ -992,7 +948,7 @@
},
{
"cell_type": "code",
"execution_count": 27,
"execution_count": 26,
"id": "a3217a90-c682-4c72-b834-7717ed13a3af",
"metadata": {},
"outputs": [],
@ -1032,7 +988,7 @@
},
{
"cell_type": "code",
"execution_count": 28,
"execution_count": 27,
"id": "d7343a7a-5a06-47c5-ad96-8b60b6948352",
"metadata": {},
"outputs": [
@ -1041,13 +997,13 @@
"text/plain": [
"[('Life to the great majority is only a constant struggle for mere existence, with the certainty of losing it at last.',\n",
" 'schopenhauer'),\n",
" ('We give up leisure in order that we may have leisure, just as we go to war in order that we may have peace.',\n",
" 'aristotle'),\n",
" ('Perhaps the gods are kind to us, by making life more disagreeable as we grow older. In the end death seems less intolerable than the manifold burdens we carry',\n",
" 'freud')]"
" ('The meager satisfaction that man can extract from reality leaves him starving.',\n",
" 'freud'),\n",
" ('To live is to suffer, to survive is to find some meaning in the suffering.',\n",
" 'nietzsche')]"
]
},
"execution_count": 28,
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
@ -1066,7 +1022,7 @@
},
{
"cell_type": "code",
"execution_count": 29,
"execution_count": 28,
"id": "d1abb677-5a8b-48c2-82c5-dbca94ef56f1",
"metadata": {},
"outputs": [
@ -1079,7 +1035,7 @@
" 'nietzsche')]"
]
},
"execution_count": 29,
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
@ -1122,22 +1078,26 @@
},
{
"cell_type": "code",
"execution_count": 30,
"execution_count": 29,
"id": "1eb0fd16-7e15-4742-8fc5-94d9eeeda620",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<cassandra.cluster.ResultSet at 0x7f1705d44880>"
"<cassandra.cluster.ResultSet at 0x7fc7c4287940>"
]
},
"execution_count": 30,
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# we peek at CassIO's config to get a direct handle to the DB connection\n",
"session = cassio.config.resolve_session()\n",
"keyspace = cassio.config.resolve_keyspace()\n",
"\n",
"session.execute(f\"DROP TABLE IF EXISTS {keyspace}.philosophers_cassio;\")\n",
"session.execute(f\"DROP TABLE IF EXISTS {keyspace}.philosophers_cassio_partitioned;\")"
]

Loading…
Cancel
Save