From 645ec14ea2a7920634efe425ab2fae994f57b726 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kacper=20=C5=81ukawski?= Date: Thu, 29 Jun 2023 16:47:18 +0200 Subject: [PATCH] Refactor Qdrant notebooks (#556) * Upgrade Qdrant to 1.3.0 * Adapt the descriptions and run the missing cells --- .../Using_Qdrant_for_embeddings_search.ipynb | 82 +++++++++++++++---- .../qdrant/docker-compose.yaml | 2 +- 2 files changed, 69 insertions(+), 15 deletions(-) diff --git a/examples/vector_databases/qdrant/Using_Qdrant_for_embeddings_search.ipynb b/examples/vector_databases/qdrant/Using_Qdrant_for_embeddings_search.ipynb index 8193963e..814b2261 100644 --- a/examples/vector_databases/qdrant/Using_Qdrant_for_embeddings_search.ipynb +++ b/examples/vector_databases/qdrant/Using_Qdrant_for_embeddings_search.ipynb @@ -42,10 +42,35 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "8d8810f9", - "metadata": {}, - "outputs": [], + "metadata": { + "ExecuteTime": { + "end_time": "2023-06-29T12:59:21.344233180Z", + "start_time": "2023-06-29T12:59:00.815088712Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting qdrant-client\r\n", + " ...\r\n", + "Successfully installed certifi-2023.5.7 grpcio-1.56.0 grpcio-tools-1.56.0 h11-0.14.0 h2-4.1.0 hpack-4.0.0 httpcore-0.17.2 httpx-0.24.1 hyperframe-6.0.1 numpy-1.25.0 portalocker-2.7.0 protobuf-4.23.3 pydantic-1.10.9 qdrant-client-1.3.1 typing-extensions-4.5.0 urllib3-1.26.16\r\n", + "Collecting wget\r\n", + " Using cached wget-3.2.zip (10 kB)\r\n", + " Preparing metadata (setup.py) ... \u001B[?25ldone\r\n", + "\u001B[?25hBuilding wheels for collected packages: wget\r\n", + " Building wheel for wget (setup.py) ... \u001B[?25ldone\r\n", + "\u001B[?25h Created wheel for wget: filename=wget-3.2-py3-none-any.whl size=9657 sha256=eb5f15f12150fc304e7b14973424f696fa8d95225772bc0cbc0b318bf92e04b9\r\n", + " Stored in directory: /home/user/.cache/pip/wheels/04/5f/3e/46cc37c5d698415694d83f607f833f83f0149e49b3af9d0f38\r\n", + "Successfully built wget\r\n", + "Installing collected packages: wget\r\n", + "Successfully installed wget-3.2\r\n" + ] + } + ], "source": [ "# We'll need to install Qdrant client\n", "!pip install qdrant-client\n", @@ -58,7 +83,12 @@ "cell_type": "code", "execution_count": 4, "id": "5be94df6", - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-06-29T13:00:32.715638041Z", + "start_time": "2023-06-29T13:00:31.654032435Z" + } + }, "outputs": [], "source": [ "import openai\n", @@ -95,10 +125,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "5dff8b55", - "metadata": {}, - "outputs": [], + "metadata": { + "ExecuteTime": { + "end_time": "2023-06-29T13:02:47.656128622Z", + "start_time": "2023-06-29T13:00:39.079229873Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": "'vector_database_wikipedia_articles_embedded.zip'" + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "embeddings_url = 'https://cdn.openai.com/API/examples/data/vector_database_wikipedia_articles_embedded.zip'\n", "\n", @@ -108,9 +152,14 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "id": "21097972", - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-06-29T13:03:08.268413005Z", + "start_time": "2023-06-29T13:02:47.626254476Z" + } + }, "outputs": [], "source": [ "import zipfile\n", @@ -120,9 +169,14 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 7, "id": "70bbd8ba", - "metadata": {}, + "metadata": { + "ExecuteTime": { + "end_time": "2023-06-29T13:03:28.291797292Z", + "start_time": "2023-06-29T13:03:08.269033964Z" + } + }, "outputs": [], "source": [ "article_df = pd.read_csv('../data/vector_database_wikipedia_articles_embedded.csv')" @@ -311,7 +365,7 @@ "source": [ "## Qdrant\n", "\n", - "The last vector database we'll consider is **[Qdrant](https://qdrant.tech/)**. This is a high-performant vector search database written in Rust. It offers both on-premise and cloud version, but for the purposes of that example we're going to use the local deployment mode.\n", + "**[Qdrant](https://qdrant.tech/)**. is a high-performant vector search database written in Rust. It offers both on-premise and cloud version, but for the purposes of that example we're going to use the local deployment mode.\n", "\n", "Setting everything up will require:\n", "- Spinning up a local instance of Qdrant\n", @@ -646,9 +700,9 @@ ], "metadata": { "kernelspec": { - "display_name": "vector_db_split", + "name": "python3", "language": "python", - "name": "vector_db_split" + "display_name": "Python 3 (ipykernel)" }, "language_info": { "codemirror_mode": { diff --git a/examples/vector_databases/qdrant/docker-compose.yaml b/examples/vector_databases/qdrant/docker-compose.yaml index e970fdea..be26784b 100644 --- a/examples/vector_databases/qdrant/docker-compose.yaml +++ b/examples/vector_databases/qdrant/docker-compose.yaml @@ -1,7 +1,7 @@ version: '3.4' services: qdrant: - image: qdrant/qdrant:v1.0.1 + image: qdrant/qdrant:v1.3.0 restart: on-failure ports: - "6333:6333"