You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
openai-cookbook/examples/vector_databases/milvus/Getting_started_with_Milvus...

576 lines
64 KiB
Plaintext

{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"# Getting Started with Milvus and OpenAI\n",
"### Finding your next book\n",
"\n",
"In this notebook we will be going over generating embeddings of book descriptions with OpenAI and using those embeddings within Milvus to find relevant books. The dataset in this example is sourced from HuggingFace datasets, and contains a little over 1 million title-description pairs.\n",
"\n",
"Lets begin by first downloading the required libraries for this notebook:\n",
"- `openai` is used for communicating with the OpenAI embedding service\n",
"- `pymilvus` is used for communicating with the Milvus server\n",
"- `datasets` is used for downloading the dataset\n",
"- `tqdm` is used for the progress bars\n"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com\n",
"Requirement already satisfied: openai in /Users/filiphaltmayer/miniconda3/envs/haystack/lib/python3.9/site-packages (0.27.2)\n",
"Requirement already satisfied: pymilvus in /Users/filiphaltmayer/miniconda3/envs/haystack/lib/python3.9/site-packages (2.2.2)\n",
"Requirement already satisfied: datasets in /Users/filiphaltmayer/miniconda3/envs/haystack/lib/python3.9/site-packages (2.10.1)\n",
"Requirement already satisfied: tqdm in /Users/filiphaltmayer/miniconda3/envs/haystack/lib/python3.9/site-packages (4.64.1)\n",
"Requirement already satisfied: aiohttp in /Users/filiphaltmayer/miniconda3/envs/haystack/lib/python3.9/site-packages (from openai) (3.8.4)\n",
"Requirement already satisfied: requests>=2.20 in /Users/filiphaltmayer/miniconda3/envs/haystack/lib/python3.9/site-packages (from openai) (2.28.2)\n",
"Requirement already satisfied: pandas>=1.2.4 in /Users/filiphaltmayer/miniconda3/envs/haystack/lib/python3.9/site-packages (from pymilvus) (1.5.3)\n",
"Requirement already satisfied: ujson<=5.4.0,>=2.0.0 in /Users/filiphaltmayer/miniconda3/envs/haystack/lib/python3.9/site-packages (from pymilvus) (5.1.0)\n",
"Requirement already satisfied: mmh3<=3.0.0,>=2.0 in /Users/filiphaltmayer/miniconda3/envs/haystack/lib/python3.9/site-packages (from pymilvus) (3.0.0)\n",
"Requirement already satisfied: grpcio<=1.48.0,>=1.47.0 in /Users/filiphaltmayer/miniconda3/envs/haystack/lib/python3.9/site-packages (from pymilvus) (1.47.2)\n",
"Requirement already satisfied: grpcio-tools<=1.48.0,>=1.47.0 in /Users/filiphaltmayer/miniconda3/envs/haystack/lib/python3.9/site-packages (from pymilvus) (1.47.2)\n",
"Requirement already satisfied: huggingface-hub<1.0.0,>=0.2.0 in /Users/filiphaltmayer/miniconda3/envs/haystack/lib/python3.9/site-packages (from datasets) (0.12.1)\n",
"Requirement already satisfied: dill<0.3.7,>=0.3.0 in /Users/filiphaltmayer/miniconda3/envs/haystack/lib/python3.9/site-packages (from datasets) (0.3.6)\n",
"Requirement already satisfied: xxhash in /Users/filiphaltmayer/miniconda3/envs/haystack/lib/python3.9/site-packages (from datasets) (3.2.0)\n",
"Requirement already satisfied: pyyaml>=5.1 in /Users/filiphaltmayer/miniconda3/envs/haystack/lib/python3.9/site-packages (from datasets) (5.4.1)\n",
"Requirement already satisfied: fsspec[http]>=2021.11.1 in /Users/filiphaltmayer/miniconda3/envs/haystack/lib/python3.9/site-packages (from datasets) (2023.1.0)\n",
"Requirement already satisfied: packaging in /Users/filiphaltmayer/miniconda3/envs/haystack/lib/python3.9/site-packages (from datasets) (23.0)\n",
"Requirement already satisfied: numpy>=1.17 in /Users/filiphaltmayer/miniconda3/envs/haystack/lib/python3.9/site-packages (from datasets) (1.23.5)\n",
"Requirement already satisfied: multiprocess in /Users/filiphaltmayer/miniconda3/envs/haystack/lib/python3.9/site-packages (from datasets) (0.70.14)\n",
"Requirement already satisfied: pyarrow>=6.0.0 in /Users/filiphaltmayer/miniconda3/envs/haystack/lib/python3.9/site-packages (from datasets) (10.0.1)\n",
"Requirement already satisfied: responses<0.19 in /Users/filiphaltmayer/miniconda3/envs/haystack/lib/python3.9/site-packages (from datasets) (0.18.0)\n",
"Requirement already satisfied: multidict<7.0,>=4.5 in /Users/filiphaltmayer/miniconda3/envs/haystack/lib/python3.9/site-packages (from aiohttp->openai) (6.0.4)\n",
"Requirement already satisfied: frozenlist>=1.1.1 in /Users/filiphaltmayer/miniconda3/envs/haystack/lib/python3.9/site-packages (from aiohttp->openai) (1.3.3)\n",
"Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /Users/filiphaltmayer/miniconda3/envs/haystack/lib/python3.9/site-packages (from aiohttp->openai) (4.0.2)\n",
"Requirement already satisfied: yarl<2.0,>=1.0 in /Users/filiphaltmayer/miniconda3/envs/haystack/lib/python3.9/site-packages (from aiohttp->openai) (1.8.2)\n",
"Requirement already satisfied: aiosignal>=1.1.2 in /Users/filiphaltmayer/miniconda3/envs/haystack/lib/python3.9/site-packages (from aiohttp->openai) (1.3.1)\n",
"Requirement already satisfied: charset-normalizer<4.0,>=2.0 in /Users/filiphaltmayer/miniconda3/envs/haystack/lib/python3.9/site-packages (from aiohttp->openai) (3.0.1)\n",
"Requirement already satisfied: attrs>=17.3.0 in /Users/filiphaltmayer/miniconda3/envs/haystack/lib/python3.9/site-packages (from aiohttp->openai) (22.2.0)\n",
"Requirement already satisfied: six>=1.5.2 in /Users/filiphaltmayer/miniconda3/envs/haystack/lib/python3.9/site-packages (from grpcio<=1.48.0,>=1.47.0->pymilvus) (1.16.0)\n",
"Requirement already satisfied: protobuf<4.0dev,>=3.12.0 in /Users/filiphaltmayer/miniconda3/envs/haystack/lib/python3.9/site-packages (from grpcio-tools<=1.48.0,>=1.47.0->pymilvus) (3.20.1)\n",
"Requirement already satisfied: setuptools in /Users/filiphaltmayer/miniconda3/envs/haystack/lib/python3.9/site-packages (from grpcio-tools<=1.48.0,>=1.47.0->pymilvus) (65.6.3)\n",
"Requirement already satisfied: filelock in /Users/filiphaltmayer/miniconda3/envs/haystack/lib/python3.9/site-packages (from huggingface-hub<1.0.0,>=0.2.0->datasets) (3.9.0)\n",
"Requirement already satisfied: typing-extensions>=3.7.4.3 in /Users/filiphaltmayer/miniconda3/envs/haystack/lib/python3.9/site-packages (from huggingface-hub<1.0.0,>=0.2.0->datasets) (4.5.0)\n",
"Requirement already satisfied: python-dateutil>=2.8.1 in /Users/filiphaltmayer/miniconda3/envs/haystack/lib/python3.9/site-packages (from pandas>=1.2.4->pymilvus) (2.8.2)\n",
"Requirement already satisfied: pytz>=2020.1 in /Users/filiphaltmayer/miniconda3/envs/haystack/lib/python3.9/site-packages (from pandas>=1.2.4->pymilvus) (2022.7.1)\n",
"Requirement already satisfied: urllib3<1.27,>=1.21.1 in /Users/filiphaltmayer/miniconda3/envs/haystack/lib/python3.9/site-packages (from requests>=2.20->openai) (1.26.14)\n",
"Requirement already satisfied: idna<4,>=2.5 in /Users/filiphaltmayer/miniconda3/envs/haystack/lib/python3.9/site-packages (from requests>=2.20->openai) (3.4)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /Users/filiphaltmayer/miniconda3/envs/haystack/lib/python3.9/site-packages (from requests>=2.20->openai) (2022.12.7)\n"
]
}
],
"source": [
"! pip install openai pymilvus datasets tqdm"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"With the required packages installed we can get started. Lets begin by launching the Milvus service. The file being run is the `docker-compose.yaml` found in the folder of this file. This command launches a Milvus standalone instance which we will use for this test. "
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[1A\u001b[1B\u001b[0G\u001b[?25l[+] Running 0/0\n",
"\u001b[37m ⠋ Network milvus Creating 0.1s\n",
"\u001b[0m\u001b[?25h\u001b[1A\u001b[1A\u001b[0G\u001b[?25l\u001b[34m[+] Running 1/1\u001b[0m\n",
"\u001b[34m ⠿ Network milvus Created 0.1s\n",
"\u001b[0m\u001b[37m ⠋ Container milvus-minio Creating 0.1s\n",
"\u001b[0m\u001b[37m ⠋ Container milvus-etcd Creating 0.1s\n",
"\u001b[0m\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 1/3\n",
"\u001b[34m ⠿ Network milvus Created 0.1s\n",
"\u001b[0m\u001b[37m ⠙ Container milvus-minio Creating 0.2s\n",
"\u001b[0m\u001b[37m ⠙ Container milvus-etcd Creating 0.2s\n",
"\u001b[0m\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 1/3\n",
"\u001b[34m ⠿ Network milvus Created 0.1s\n",
"\u001b[0m\u001b[37m ⠹ Container milvus-minio Creating 0.3s\n",
"\u001b[0m\u001b[37m ⠹ Container milvus-etcd Creating 0.3s\n",
"\u001b[0m\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l\u001b[34m[+] Running 3/3\u001b[0m\n",
"\u001b[34m ⠿ Network milvus Created 0.1s\n",
"\u001b[0m\u001b[34m ⠿ Container milvus-minio Created 0.3s\n",
"\u001b[0m\u001b[34m ⠿ Container milvus-etcd Created 0.3s\n",
"\u001b[0m\u001b[37m ⠋ Container milvus-standalone Creating 0.1s\n",
"\u001b[0m\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 3/4\n",
"\u001b[34m ⠿ Network milvus Created 0.1s\n",
"\u001b[0m\u001b[34m ⠿ Container milvus-minio Created 0.3s\n",
"\u001b[0m\u001b[34m ⠿ Container milvus-etcd Created 0.3s\n",
"\u001b[0m\u001b[37m ⠙ Container milvus-standalone Creating 0.2s\n",
"\u001b[0m\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l\u001b[34m[+] Running 4/4\u001b[0m\n",
"\u001b[34m ⠿ Network milvus Created 0.1s\n",
"\u001b[0m\u001b[34m ⠿ Container milvus-minio Created 0.3s\n",
"\u001b[0m\u001b[34m ⠿ Container milvus-etcd Created 0.3s\n",
"\u001b[0m\u001b[34m ⠿ Container milvus-standalone Created 0.3s\n",
"\u001b[0m\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 2/4\n",
"\u001b[34m ⠿ Network milvus Created 0.1s\n",
"\u001b[0m\u001b[37m ⠿ Container milvus-minio Starting 0.7s\n",
"\u001b[0m\u001b[37m ⠿ Container milvus-etcd Starting 0.7s\n",
"\u001b[0m\u001b[34m ⠿ Container milvus-standalone Created 0.3s\n",
"\u001b[0m\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 2/4\n",
"\u001b[34m ⠿ Network milvus Created 0.1s\n",
"\u001b[0m\u001b[37m ⠿ Container milvus-minio Starting 0.8s\n",
"\u001b[0m\u001b[37m ⠿ Container milvus-etcd Starting 0.8s\n",
"\u001b[0m\u001b[34m ⠿ Container milvus-standalone Created 0.3s\n",
"\u001b[0m\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 2/4\n",
"\u001b[34m ⠿ Network milvus Created 0.1s\n",
"\u001b[0m\u001b[37m ⠿ Container milvus-minio Starting 0.9s\n",
"\u001b[0m\u001b[37m ⠿ Container milvus-etcd Starting 0.9s\n",
"\u001b[0m\u001b[34m ⠿ Container milvus-standalone Created 0.3s\n",
"\u001b[0m\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 2/4\n",
"\u001b[34m ⠿ Network milvus Created 0.1s\n",
"\u001b[0m\u001b[37m ⠿ Container milvus-minio Starting 1.0s\n",
"\u001b[0m\u001b[37m ⠿ Container milvus-etcd Starting 1.0s\n",
"\u001b[0m\u001b[34m ⠿ Container milvus-standalone Created 0.3s\n",
"\u001b[0m\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 2/4\n",
"\u001b[34m ⠿ Network milvus Created 0.1s\n",
"\u001b[0m\u001b[37m ⠿ Container milvus-minio Starting 1.1s\n",
"\u001b[0m\u001b[37m ⠿ Container milvus-etcd Starting 1.1s\n",
"\u001b[0m\u001b[34m ⠿ Container milvus-standalone Created 0.3s\n",
"\u001b[0m\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 2/4\n",
"\u001b[34m ⠿ Network milvus Created 0.1s\n",
"\u001b[0m\u001b[37m ⠿ Container milvus-minio Starting 1.2s\n",
"\u001b[0m\u001b[37m ⠿ Container milvus-etcd Starting 1.2s\n",
"\u001b[0m\u001b[34m ⠿ Container milvus-standalone Created 0.3s\n",
"\u001b[0m\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 2/4\n",
"\u001b[34m ⠿ Network milvus Created 0.1s\n",
"\u001b[0m\u001b[37m ⠿ Container milvus-minio Starting 1.3s\n",
"\u001b[0m\u001b[37m ⠿ Container milvus-etcd Starting 1.3s\n",
"\u001b[0m\u001b[34m ⠿ Container milvus-standalone Created 0.3s\n",
"\u001b[0m\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 2/4\n",
"\u001b[34m ⠿ Network milvus Created 0.1s\n",
"\u001b[0m\u001b[37m ⠿ Container milvus-minio Starting 1.4s\n",
"\u001b[0m\u001b[37m ⠿ Container milvus-etcd Starting 1.4s\n",
"\u001b[0m\u001b[34m ⠿ Container milvus-standalone Created 0.3s\n",
"\u001b[0m\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 2/4\n",
"\u001b[34m ⠿ Network milvus Created 0.1s\n",
"\u001b[0m\u001b[37m ⠿ Container milvus-minio Starting 1.5s\n",
"\u001b[0m\u001b[37m ⠿ Container milvus-etcd Starting 1.5s\n",
"\u001b[0m\u001b[34m ⠿ Container milvus-standalone Created 0.3s\n",
"\u001b[0m\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 2/4\n",
"\u001b[34m ⠿ Network milvus Created 0.1s\n",
"\u001b[0m\u001b[37m ⠿ Container milvus-minio Starting 1.6s\n",
"\u001b[0m\u001b[37m ⠿ Container milvus-etcd Starting 1.6s\n",
"\u001b[0m\u001b[34m ⠿ Container milvus-standalone Created 0.3s\n",
"\u001b[0m\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 2/4\n",
"\u001b[34m ⠿ Network milvus Created 0.1s\n",
"\u001b[0m\u001b[37m ⠿ Container milvus-minio Starting 1.7s\n",
"\u001b[0m\u001b[37m ⠿ Container milvus-etcd Starting 1.7s\n",
"\u001b[0m\u001b[34m ⠿ Container milvus-standalone Created 0.3s\n",
"\u001b[0m\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 3/4\n",
"\u001b[34m ⠿ Network milvus Created 0.1s\n",
"\u001b[0m\u001b[37m ⠿ Container milvus-minio Starting 1.8s\n",
"\u001b[0m\u001b[34m ⠿ Container milvus-etcd Started 1.7s\n",
"\u001b[0m\u001b[34m ⠿ Container milvus-standalone Created 0.3s\n",
"\u001b[0m\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 3/4\n",
"\u001b[34m ⠿ Network milvus Created 0.1s\n",
"\u001b[0m\u001b[34m ⠿ Container milvus-minio Started 1.8s\n",
"\u001b[0m\u001b[34m ⠿ Container milvus-etcd Started 1.7s\n",
"\u001b[0m\u001b[37m ⠿ Container milvus-standalone Starting 1.6s\n",
"\u001b[0m\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 3/4\n",
"\u001b[34m ⠿ Network milvus Created 0.1s\n",
"\u001b[0m\u001b[34m ⠿ Container milvus-minio Started 1.8s\n",
"\u001b[0m\u001b[34m ⠿ Container milvus-etcd Started 1.7s\n",
"\u001b[0m\u001b[37m ⠿ Container milvus-standalone Starting 1.7s\n",
"\u001b[0m\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 3/4\n",
"\u001b[34m ⠿ Network milvus Created 0.1s\n",
"\u001b[0m\u001b[34m ⠿ Container milvus-minio Started 1.8s\n",
"\u001b[0m\u001b[34m ⠿ Container milvus-etcd Started 1.7s\n",
"\u001b[0m\u001b[37m ⠿ Container milvus-standalone Starting 1.8s\n",
"\u001b[0m\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 3/4\n",
"\u001b[34m ⠿ Network milvus Created 0.1s\n",
"\u001b[0m\u001b[34m ⠿ Container milvus-minio Started 1.8s\n",
"\u001b[0m\u001b[34m ⠿ Container milvus-etcd Started 1.7s\n",
"\u001b[0m\u001b[37m ⠿ Container milvus-standalone Starting 1.9s\n",
"\u001b[0m\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 3/4\n",
"\u001b[34m ⠿ Network milvus Created 0.1s\n",
"\u001b[0m\u001b[34m ⠿ Container milvus-minio Started 1.8s\n",
"\u001b[0m\u001b[34m ⠿ Container milvus-etcd Started 1.7s\n",
"\u001b[0m\u001b[37m ⠿ Container milvus-standalone Starting 2.0s\n",
"\u001b[0m\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 3/4\n",
"\u001b[34m ⠿ Network milvus Created 0.1s\n",
"\u001b[0m\u001b[34m ⠿ Container milvus-minio Started 1.8s\n",
"\u001b[0m\u001b[34m ⠿ Container milvus-etcd Started 1.7s\n",
"\u001b[0m\u001b[37m ⠿ Container milvus-standalone Starting 2.1s\n",
"\u001b[0m\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 3/4\n",
"\u001b[34m ⠿ Network milvus Created 0.1s\n",
"\u001b[0m\u001b[34m ⠿ Container milvus-minio Started 1.8s\n",
"\u001b[0m\u001b[34m ⠿ Container milvus-etcd Started 1.7s\n",
"\u001b[0m\u001b[37m ⠿ Container milvus-standalone Starting 2.2s\n",
"\u001b[0m\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 3/4\n",
"\u001b[34m ⠿ Network milvus Created 0.1s\n",
"\u001b[0m\u001b[34m ⠿ Container milvus-minio Started 1.8s\n",
"\u001b[0m\u001b[34m ⠿ Container milvus-etcd Started 1.7s\n",
"\u001b[0m\u001b[37m ⠿ Container milvus-standalone Starting 2.3s\n",
"\u001b[0m\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 3/4\n",
"\u001b[34m ⠿ Network milvus Created 0.1s\n",
"\u001b[0m\u001b[34m ⠿ Container milvus-minio Started 1.8s\n",
"\u001b[0m\u001b[34m ⠿ Container milvus-etcd Started 1.7s\n",
"\u001b[0m\u001b[37m ⠿ Container milvus-standalone Starting 2.4s\n",
"\u001b[0m\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 3/4\n",
"\u001b[34m ⠿ Network milvus Created 0.1s\n",
"\u001b[0m\u001b[34m ⠿ Container milvus-minio Started 1.8s\n",
"\u001b[0m\u001b[34m ⠿ Container milvus-etcd Started 1.7s\n",
"\u001b[0m\u001b[37m ⠿ Container milvus-standalone Starting 2.5s\n",
"\u001b[0m\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l[+] Running 3/4\n",
"\u001b[34m ⠿ Network milvus Created 0.1s\n",
"\u001b[0m\u001b[34m ⠿ Container milvus-minio Started 1.8s\n",
"\u001b[0m\u001b[34m ⠿ Container milvus-etcd Started 1.7s\n",
"\u001b[0m\u001b[37m ⠿ Container milvus-standalone Starting 2.6s\n",
"\u001b[0m\u001b[?25h\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[1A\u001b[0G\u001b[?25l\u001b[34m[+] Running 4/4\u001b[0m\n",
"\u001b[34m ⠿ Network milvus Created 0.1s\n",
"\u001b[0m\u001b[34m ⠿ Container milvus-minio Started 1.8s\n",
"\u001b[0m\u001b[34m ⠿ Container milvus-etcd Started 1.7s\n",
"\u001b[0m\u001b[34m ⠿ Container milvus-standalone Started 2.6s\n",
"\u001b[0m\u001b[?25h"
]
}
],
"source": [
"! docker compose up -d"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"With Milvus running we can setup our global variables:\n",
"- HOST: The Milvus host address\n",
"- PORT: The Milvus port number\n",
"- COLLECTION_NAME: What to name the collection within Milvus\n",
"- DIMENSION: The dimension of the embeddings\n",
"- OPENAI_ENGINE: Which embedding model to use\n",
"- openai.api_key: Your OpenAI account key\n",
"- INDEX_PARAM: The index settings to use for the collection\n",
"- QUERY_PARAM: The search parameters to use\n",
"- BATCH_SIZE: How many texts to embed and insert at once"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [],
"source": [
"import openai\n",
"\n",
"HOST = 'localhost'\n",
"PORT = 19530\n",
"COLLECTION_NAME = 'book_search'\n",
"DIMENSION = 1536\n",
"OPENAI_ENGINE = 'text-embedding-3-small'\n",
"openai.api_key = 'sk-your_key'\n",
"\n",
"INDEX_PARAM = {\n",
" 'metric_type':'L2',\n",
" 'index_type':\"HNSW\",\n",
" 'params':{'M': 8, 'efConstruction': 64}\n",
"}\n",
"\n",
"QUERY_PARAM = {\n",
" \"metric_type\": \"L2\",\n",
" \"params\": {\"ef\": 64},\n",
"}\n",
"\n",
"BATCH_SIZE = 1000"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"## Milvus\n",
"This segment deals with Milvus and setting up the database for this use case. Within Milvus we need to setup a collection and index the collection. "
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"from pymilvus import connections, utility, FieldSchema, Collection, CollectionSchema, DataType\n",
"\n",
"# Connect to Milvus Database\n",
"connections.connect(host=HOST, port=PORT)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"# Remove collection if it already exists\n",
"if utility.has_collection(COLLECTION_NAME):\n",
" utility.drop_collection(COLLECTION_NAME)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"# Create collection which includes the id, title, and embedding.\n",
"fields = [\n",
" FieldSchema(name='id', dtype=DataType.INT64, is_primary=True, auto_id=True),\n",
" FieldSchema(name='title', dtype=DataType.VARCHAR, max_length=64000),\n",
" FieldSchema(name='description', dtype=DataType.VARCHAR, max_length=64000),\n",
" FieldSchema(name='embedding', dtype=DataType.FLOAT_VECTOR, dim=DIMENSION)\n",
"]\n",
"schema = CollectionSchema(fields=fields)\n",
"collection = Collection(name=COLLECTION_NAME, schema=schema)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"# Create the index on the collection and load it.\n",
"collection.create_index(field_name=\"embedding\", index_params=INDEX_PARAM)\n",
"collection.load()"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"## Dataset\n",
"With Milvus up and running we can begin grabbing our data. Hugging Face Datasets is a hub that holds many different user datasets, and for this example we are using Skelebor's book dataset. This dataset contains title-description pairs for over 1 million books. We are going to embed each description and store it within Milvus along with its title. "
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/filiphaltmayer/miniconda3/envs/haystack/lib/python3.9/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
" from .autonotebook import tqdm as notebook_tqdm\n",
"Found cached dataset parquet (/Users/filiphaltmayer/.cache/huggingface/datasets/Skelebor___parquet/Skelebor--book_titles_and_descriptions_en_clean-3596935b1d8a7747/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)\n"
]
}
],
"source": [
"import datasets\n",
"\n",
"# Download the dataset and only use the `train` portion (file is around 800Mb)\n",
"dataset = datasets.load_dataset('Skelebor/book_titles_and_descriptions_en_clean', split='train')"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"## Insert the Data\n",
"Now that we have our data on our machine we can begin embedding it and inserting it into Milvus. The embedding function takes in text and returns the embeddings in a list format. "
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"# Simple function that converts the texts to embeddings\n",
"def embed(texts):\n",
" embeddings = openai.Embedding.create(\n",
" input=texts,\n",
" engine=OPENAI_ENGINE\n",
" )\n",
" return [x['embedding'] for x in embeddings['data']]\n"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"This next step does the actual inserting. Due to having so many datapoints, if you want to immidiately test it out you can stop the inserting cell block early and move along. Doing this will probably decrease the accuracy of the results due to less datapoints, but it should still be good enough. "
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
" 0%| | 1999/1032335 [00:06<57:22, 299.31it/s] \n"
]
},
{
"ename": "KeyboardInterrupt",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[18], line 13\u001b[0m\n\u001b[1;32m 11\u001b[0m data[\u001b[39m1\u001b[39m]\u001b[39m.\u001b[39mappend(dataset[i][\u001b[39m'\u001b[39m\u001b[39mdescription\u001b[39m\u001b[39m'\u001b[39m])\n\u001b[1;32m 12\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mlen\u001b[39m(data[\u001b[39m0\u001b[39m]) \u001b[39m%\u001b[39m BATCH_SIZE \u001b[39m==\u001b[39m \u001b[39m0\u001b[39m:\n\u001b[0;32m---> 13\u001b[0m data\u001b[39m.\u001b[39mappend(embed(data[\u001b[39m1\u001b[39;49m]))\n\u001b[1;32m 14\u001b[0m collection\u001b[39m.\u001b[39minsert(data)\n\u001b[1;32m 15\u001b[0m data \u001b[39m=\u001b[39m [[],[]]\n",
"Cell \u001b[0;32mIn[17], line 3\u001b[0m, in \u001b[0;36membed\u001b[0;34m(texts)\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39membed\u001b[39m(texts):\n\u001b[0;32m----> 3\u001b[0m embeddings \u001b[39m=\u001b[39m openai\u001b[39m.\u001b[39;49mEmbedding\u001b[39m.\u001b[39;49mcreate(\n\u001b[1;32m 4\u001b[0m \u001b[39minput\u001b[39;49m\u001b[39m=\u001b[39;49mtexts,\n\u001b[1;32m 5\u001b[0m engine\u001b[39m=\u001b[39;49mOPENAI_ENGINE\n\u001b[1;32m 6\u001b[0m )\n\u001b[1;32m 7\u001b[0m \u001b[39mreturn\u001b[39;00m [x[\u001b[39m'\u001b[39m\u001b[39membedding\u001b[39m\u001b[39m'\u001b[39m] \u001b[39mfor\u001b[39;00m x \u001b[39min\u001b[39;00m embeddings[\u001b[39m'\u001b[39m\u001b[39mdata\u001b[39m\u001b[39m'\u001b[39m]]\n",
"File \u001b[0;32m~/miniconda3/envs/haystack/lib/python3.9/site-packages/openai/api_resources/embedding.py:33\u001b[0m, in \u001b[0;36mEmbedding.create\u001b[0;34m(cls, *args, **kwargs)\u001b[0m\n\u001b[1;32m 31\u001b[0m \u001b[39mwhile\u001b[39;00m \u001b[39mTrue\u001b[39;00m:\n\u001b[1;32m 32\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m---> 33\u001b[0m response \u001b[39m=\u001b[39m \u001b[39msuper\u001b[39;49m()\u001b[39m.\u001b[39;49mcreate(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m 35\u001b[0m \u001b[39m# If a user specifies base64, we'll just return the encoded string.\u001b[39;00m\n\u001b[1;32m 36\u001b[0m \u001b[39m# This is only for the default case.\u001b[39;00m\n\u001b[1;32m 37\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m user_provided_encoding_format:\n",
"File \u001b[0;32m~/miniconda3/envs/haystack/lib/python3.9/site-packages/openai/api_resources/abstract/engine_api_resource.py:153\u001b[0m, in \u001b[0;36mEngineAPIResource.create\u001b[0;34m(cls, api_key, api_base, api_type, request_id, api_version, organization, **params)\u001b[0m\n\u001b[1;32m 127\u001b[0m \u001b[39m@classmethod\u001b[39m\n\u001b[1;32m 128\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mcreate\u001b[39m(\n\u001b[1;32m 129\u001b[0m \u001b[39mcls\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 136\u001b[0m \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mparams,\n\u001b[1;32m 137\u001b[0m ):\n\u001b[1;32m 138\u001b[0m (\n\u001b[1;32m 139\u001b[0m deployment_id,\n\u001b[1;32m 140\u001b[0m engine,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 150\u001b[0m api_key, api_base, api_type, api_version, organization, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mparams\n\u001b[1;32m 151\u001b[0m )\n\u001b[0;32m--> 153\u001b[0m response, _, api_key \u001b[39m=\u001b[39m requestor\u001b[39m.\u001b[39;49mrequest(\n\u001b[1;32m 154\u001b[0m \u001b[39m\"\u001b[39;49m\u001b[39mpost\u001b[39;49m\u001b[39m\"\u001b[39;49m,\n\u001b[1;32m 155\u001b[0m url,\n\u001b[1;32m 156\u001b[0m params\u001b[39m=\u001b[39;49mparams,\n\u001b[1;32m 157\u001b[0m headers\u001b[39m=\u001b[39;49mheaders,\n\u001b[1;32m 158\u001b[0m stream\u001b[39m=\u001b[39;49mstream,\n\u001b[1;32m 159\u001b[0m request_id\u001b[39m=\u001b[39;49mrequest_id,\n\u001b[1;32m 160\u001b[0m request_timeout\u001b[39m=\u001b[39;49mrequest_timeout,\n\u001b[1;32m 161\u001b[0m )\n\u001b[1;32m 163\u001b[0m \u001b[39mif\u001b[39;00m stream:\n\u001b[1;32m 164\u001b[0m \u001b[39m# must be an iterator\u001b[39;00m\n\u001b[1;32m 165\u001b[0m \u001b[39massert\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39misinstance\u001b[39m(response, OpenAIResponse)\n",
"File \u001b[0;32m~/miniconda3/envs/haystack/lib/python3.9/site-packages/openai/api_requestor.py:216\u001b[0m, in \u001b[0;36mAPIRequestor.request\u001b[0;34m(self, method, url, params, headers, files, stream, request_id, request_timeout)\u001b[0m\n\u001b[1;32m 205\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mrequest\u001b[39m(\n\u001b[1;32m 206\u001b[0m \u001b[39mself\u001b[39m,\n\u001b[1;32m 207\u001b[0m method,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 214\u001b[0m request_timeout: Optional[Union[\u001b[39mfloat\u001b[39m, Tuple[\u001b[39mfloat\u001b[39m, \u001b[39mfloat\u001b[39m]]] \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m,\n\u001b[1;32m 215\u001b[0m ) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m Tuple[Union[OpenAIResponse, Iterator[OpenAIResponse]], \u001b[39mbool\u001b[39m, \u001b[39mstr\u001b[39m]:\n\u001b[0;32m--> 216\u001b[0m result \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mrequest_raw(\n\u001b[1;32m 217\u001b[0m method\u001b[39m.\u001b[39;49mlower(),\n\u001b[1;32m 218\u001b[0m url,\n\u001b[1;32m 219\u001b[0m params\u001b[39m=\u001b[39;49mparams,\n\u001b[1;32m 220\u001b[0m supplied_headers\u001b[39m=\u001b[39;49mheaders,\n\u001b[1;32m 221\u001b[0m files\u001b[39m=\u001b[39;49mfiles,\n\u001b[1;32m 222\u001b[0m stream\u001b[39m=\u001b[39;49mstream,\n\u001b[1;32m 223\u001b[0m request_id\u001b[39m=\u001b[39;49mrequest_id,\n\u001b[1;32m 224\u001b[0m request_timeout\u001b[39m=\u001b[39;49mrequest_timeout,\n\u001b[1;32m 225\u001b[0m )\n\u001b[1;32m 226\u001b[0m resp, got_stream \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_interpret_response(result, stream)\n\u001b[1;32m 227\u001b[0m \u001b[39mreturn\u001b[39;00m resp, got_stream, \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mapi_key\n",
"File \u001b[0;32m~/miniconda3/envs/haystack/lib/python3.9/site-packages/openai/api_requestor.py:516\u001b[0m, in \u001b[0;36mAPIRequestor.request_raw\u001b[0;34m(self, method, url, params, supplied_headers, files, stream, request_id, request_timeout)\u001b[0m\n\u001b[1;32m 514\u001b[0m _thread_context\u001b[39m.\u001b[39msession \u001b[39m=\u001b[39m _make_session()\n\u001b[1;32m 515\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m--> 516\u001b[0m result \u001b[39m=\u001b[39m _thread_context\u001b[39m.\u001b[39;49msession\u001b[39m.\u001b[39;49mrequest(\n\u001b[1;32m 517\u001b[0m method,\n\u001b[1;32m 518\u001b[0m abs_url,\n\u001b[1;32m 519\u001b[0m headers\u001b[39m=\u001b[39;49mheaders,\n\u001b[1;32m 520\u001b[0m data\u001b[39m=\u001b[39;49mdata,\n\u001b[1;32m 521\u001b[0m files\u001b[39m=\u001b[39;49mfiles,\n\u001b[1;32m 522\u001b[0m stream\u001b[39m=\u001b[39;49mstream,\n\u001b[1;32m 523\u001b[0m timeout\u001b[39m=\u001b[39;49mrequest_timeout \u001b[39mif\u001b[39;49;00m request_timeout \u001b[39melse\u001b[39;49;00m TIMEOUT_SECS,\n\u001b[1;32m 524\u001b[0m )\n\u001b[1;32m 525\u001b[0m \u001b[39mexcept\u001b[39;00m requests\u001b[39m.\u001b[39mexceptions\u001b[39m.\u001b[39mTimeout \u001b[39mas\u001b[39;00m e:\n\u001b[1;32m 526\u001b[0m \u001b[39mraise\u001b[39;00m error\u001b[39m.\u001b[39mTimeout(\u001b[39m\"\u001b[39m\u001b[39mRequest timed out: \u001b[39m\u001b[39m{}\u001b[39;00m\u001b[39m\"\u001b[39m\u001b[39m.\u001b[39mformat(e)) \u001b[39mfrom\u001b[39;00m \u001b[39me\u001b[39;00m\n",
"File \u001b[0;32m~/miniconda3/envs/haystack/lib/python3.9/site-packages/requests/sessions.py:587\u001b[0m, in \u001b[0;36mSession.request\u001b[0;34m(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)\u001b[0m\n\u001b[1;32m 582\u001b[0m send_kwargs \u001b[39m=\u001b[39m {\n\u001b[1;32m 583\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mtimeout\u001b[39m\u001b[39m\"\u001b[39m: timeout,\n\u001b[1;32m 584\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mallow_redirects\u001b[39m\u001b[39m\"\u001b[39m: allow_redirects,\n\u001b[1;32m 585\u001b[0m }\n\u001b[1;32m 586\u001b[0m send_kwargs\u001b[39m.\u001b[39mupdate(settings)\n\u001b[0;32m--> 587\u001b[0m resp \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49msend(prep, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49msend_kwargs)\n\u001b[1;32m 589\u001b[0m \u001b[39mreturn\u001b[39;00m resp\n",
"File \u001b[0;32m~/miniconda3/envs/haystack/lib/python3.9/site-packages/requests/sessions.py:701\u001b[0m, in \u001b[0;36mSession.send\u001b[0;34m(self, request, **kwargs)\u001b[0m\n\u001b[1;32m 698\u001b[0m start \u001b[39m=\u001b[39m preferred_clock()\n\u001b[1;32m 700\u001b[0m \u001b[39m# Send the request\u001b[39;00m\n\u001b[0;32m--> 701\u001b[0m r \u001b[39m=\u001b[39m adapter\u001b[39m.\u001b[39;49msend(request, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m 703\u001b[0m \u001b[39m# Total elapsed time of the request (approximately)\u001b[39;00m\n\u001b[1;32m 704\u001b[0m elapsed \u001b[39m=\u001b[39m preferred_clock() \u001b[39m-\u001b[39m start\n",
"File \u001b[0;32m~/miniconda3/envs/haystack/lib/python3.9/site-packages/requests/adapters.py:489\u001b[0m, in \u001b[0;36mHTTPAdapter.send\u001b[0;34m(self, request, stream, timeout, verify, cert, proxies)\u001b[0m\n\u001b[1;32m 487\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m 488\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m chunked:\n\u001b[0;32m--> 489\u001b[0m resp \u001b[39m=\u001b[39m conn\u001b[39m.\u001b[39;49murlopen(\n\u001b[1;32m 490\u001b[0m method\u001b[39m=\u001b[39;49mrequest\u001b[39m.\u001b[39;49mmethod,\n\u001b[1;32m 491\u001b[0m url\u001b[39m=\u001b[39;49murl,\n\u001b[1;32m 492\u001b[0m body\u001b[39m=\u001b[39;49mrequest\u001b[39m.\u001b[39;49mbody,\n\u001b[1;32m 493\u001b[0m headers\u001b[39m=\u001b[39;49mrequest\u001b[39m.\u001b[39;49mheaders,\n\u001b[1;32m 494\u001b[0m redirect\u001b[39m=\u001b[39;49m\u001b[39mFalse\u001b[39;49;00m,\n\u001b[1;32m 495\u001b[0m assert_same_host\u001b[39m=\u001b[39;49m\u001b[39mFalse\u001b[39;49;00m,\n\u001b[1;32m 496\u001b[0m preload_content\u001b[39m=\u001b[39;49m\u001b[39mFalse\u001b[39;49;00m,\n\u001b[1;32m 497\u001b[0m decode_content\u001b[39m=\u001b[39;49m\u001b[39mFalse\u001b[39;49;00m,\n\u001b[1;32m 498\u001b[0m retries\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mmax_retries,\n\u001b[1;32m 499\u001b[0m timeout\u001b[39m=\u001b[39;49mtimeout,\n\u001b[1;32m 500\u001b[0m )\n\u001b[1;32m 502\u001b[0m \u001b[39m# Send the request.\u001b[39;00m\n\u001b[1;32m 503\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 504\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mhasattr\u001b[39m(conn, \u001b[39m\"\u001b[39m\u001b[39mproxy_pool\u001b[39m\u001b[39m\"\u001b[39m):\n",
"File \u001b[0;32m~/miniconda3/envs/haystack/lib/python3.9/site-packages/urllib3/connectionpool.py:703\u001b[0m, in \u001b[0;36mHTTPConnectionPool.urlopen\u001b[0;34m(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)\u001b[0m\n\u001b[1;32m 700\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_prepare_proxy(conn)\n\u001b[1;32m 702\u001b[0m \u001b[39m# Make the request on the httplib connection object.\u001b[39;00m\n\u001b[0;32m--> 703\u001b[0m httplib_response \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_make_request(\n\u001b[1;32m 704\u001b[0m conn,\n\u001b[1;32m 705\u001b[0m method,\n\u001b[1;32m 706\u001b[0m url,\n\u001b[1;32m 707\u001b[0m timeout\u001b[39m=\u001b[39;49mtimeout_obj,\n\u001b[1;32m 708\u001b[0m body\u001b[39m=\u001b[39;49mbody,\n\u001b[1;32m 709\u001b[0m headers\u001b[39m=\u001b[39;49mheaders,\n\u001b[1;32m 710\u001b[0m chunked\u001b[39m=\u001b[39;49mchunked,\n\u001b[1;32m 711\u001b[0m )\n\u001b[1;32m 713\u001b[0m \u001b[39m# If we're going to release the connection in ``finally:``, then\u001b[39;00m\n\u001b[1;32m 714\u001b[0m \u001b[39m# the response doesn't need to know about the connection. Otherwise\u001b[39;00m\n\u001b[1;32m 715\u001b[0m \u001b[39m# it will also try to release it and we'll have a double-release\u001b[39;00m\n\u001b[1;32m 716\u001b[0m \u001b[39m# mess.\u001b[39;00m\n\u001b[1;32m 717\u001b[0m response_conn \u001b[39m=\u001b[39m conn \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m release_conn \u001b[39melse\u001b[39;00m \u001b[39mNone\u001b[39;00m\n",
"File \u001b[0;32m~/miniconda3/envs/haystack/lib/python3.9/site-packages/urllib3/connectionpool.py:449\u001b[0m, in \u001b[0;36mHTTPConnectionPool._make_request\u001b[0;34m(self, conn, method, url, timeout, chunked, **httplib_request_kw)\u001b[0m\n\u001b[1;32m 444\u001b[0m httplib_response \u001b[39m=\u001b[39m conn\u001b[39m.\u001b[39mgetresponse()\n\u001b[1;32m 445\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mBaseException\u001b[39;00m \u001b[39mas\u001b[39;00m e:\n\u001b[1;32m 446\u001b[0m \u001b[39m# Remove the TypeError from the exception chain in\u001b[39;00m\n\u001b[1;32m 447\u001b[0m \u001b[39m# Python 3 (including for exceptions like SystemExit).\u001b[39;00m\n\u001b[1;32m 448\u001b[0m \u001b[39m# Otherwise it looks like a bug in the code.\u001b[39;00m\n\u001b[0;32m--> 449\u001b[0m six\u001b[39m.\u001b[39;49mraise_from(e, \u001b[39mNone\u001b[39;49;00m)\n\u001b[1;32m 450\u001b[0m \u001b[39mexcept\u001b[39;00m (SocketTimeout, BaseSSLError, SocketError) \u001b[39mas\u001b[39;00m e:\n\u001b[1;32m 451\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_raise_timeout(err\u001b[39m=\u001b[39me, url\u001b[39m=\u001b[39murl, timeout_value\u001b[39m=\u001b[39mread_timeout)\n",
"File \u001b[0;32m<string>:3\u001b[0m, in \u001b[0;36mraise_from\u001b[0;34m(value, from_value)\u001b[0m\n",
"File \u001b[0;32m~/miniconda3/envs/haystack/lib/python3.9/site-packages/urllib3/connectionpool.py:444\u001b[0m, in \u001b[0;36mHTTPConnectionPool._make_request\u001b[0;34m(self, conn, method, url, timeout, chunked, **httplib_request_kw)\u001b[0m\n\u001b[1;32m 441\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mTypeError\u001b[39;00m:\n\u001b[1;32m 442\u001b[0m \u001b[39m# Python 3\u001b[39;00m\n\u001b[1;32m 443\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m--> 444\u001b[0m httplib_response \u001b[39m=\u001b[39m conn\u001b[39m.\u001b[39;49mgetresponse()\n\u001b[1;32m 445\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mBaseException\u001b[39;00m \u001b[39mas\u001b[39;00m e:\n\u001b[1;32m 446\u001b[0m \u001b[39m# Remove the TypeError from the exception chain in\u001b[39;00m\n\u001b[1;32m 447\u001b[0m \u001b[39m# Python 3 (including for exceptions like SystemExit).\u001b[39;00m\n\u001b[1;32m 448\u001b[0m \u001b[39m# Otherwise it looks like a bug in the code.\u001b[39;00m\n\u001b[1;32m 449\u001b[0m six\u001b[39m.\u001b[39mraise_from(e, \u001b[39mNone\u001b[39;00m)\n",
"File \u001b[0;32m~/miniconda3/envs/haystack/lib/python3.9/http/client.py:1377\u001b[0m, in \u001b[0;36mHTTPConnection.getresponse\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1375\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m 1376\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m-> 1377\u001b[0m response\u001b[39m.\u001b[39;49mbegin()\n\u001b[1;32m 1378\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mConnectionError\u001b[39;00m:\n\u001b[1;32m 1379\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mclose()\n",
"File \u001b[0;32m~/miniconda3/envs/haystack/lib/python3.9/http/client.py:320\u001b[0m, in \u001b[0;36mHTTPResponse.begin\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 318\u001b[0m \u001b[39m# read until we get a non-100 response\u001b[39;00m\n\u001b[1;32m 319\u001b[0m \u001b[39mwhile\u001b[39;00m \u001b[39mTrue\u001b[39;00m:\n\u001b[0;32m--> 320\u001b[0m version, status, reason \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_read_status()\n\u001b[1;32m 321\u001b[0m \u001b[39mif\u001b[39;00m status \u001b[39m!=\u001b[39m CONTINUE:\n\u001b[1;32m 322\u001b[0m \u001b[39mbreak\u001b[39;00m\n",
"File \u001b[0;32m~/miniconda3/envs/haystack/lib/python3.9/http/client.py:281\u001b[0m, in \u001b[0;36mHTTPResponse._read_status\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 280\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m_read_status\u001b[39m(\u001b[39mself\u001b[39m):\n\u001b[0;32m--> 281\u001b[0m line \u001b[39m=\u001b[39m \u001b[39mstr\u001b[39m(\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mfp\u001b[39m.\u001b[39;49mreadline(_MAXLINE \u001b[39m+\u001b[39;49m \u001b[39m1\u001b[39;49m), \u001b[39m\"\u001b[39m\u001b[39miso-8859-1\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m 282\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mlen\u001b[39m(line) \u001b[39m>\u001b[39m _MAXLINE:\n\u001b[1;32m 283\u001b[0m \u001b[39mraise\u001b[39;00m LineTooLong(\u001b[39m\"\u001b[39m\u001b[39mstatus line\u001b[39m\u001b[39m\"\u001b[39m)\n",
"File \u001b[0;32m~/miniconda3/envs/haystack/lib/python3.9/socket.py:704\u001b[0m, in \u001b[0;36mSocketIO.readinto\u001b[0;34m(self, b)\u001b[0m\n\u001b[1;32m 702\u001b[0m \u001b[39mwhile\u001b[39;00m \u001b[39mTrue\u001b[39;00m:\n\u001b[1;32m 703\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m--> 704\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_sock\u001b[39m.\u001b[39;49mrecv_into(b)\n\u001b[1;32m 705\u001b[0m \u001b[39mexcept\u001b[39;00m timeout:\n\u001b[1;32m 706\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_timeout_occurred \u001b[39m=\u001b[39m \u001b[39mTrue\u001b[39;00m\n",
"File \u001b[0;32m~/miniconda3/envs/haystack/lib/python3.9/ssl.py:1242\u001b[0m, in \u001b[0;36mSSLSocket.recv_into\u001b[0;34m(self, buffer, nbytes, flags)\u001b[0m\n\u001b[1;32m 1238\u001b[0m \u001b[39mif\u001b[39;00m flags \u001b[39m!=\u001b[39m \u001b[39m0\u001b[39m:\n\u001b[1;32m 1239\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\n\u001b[1;32m 1240\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mnon-zero flags not allowed in calls to recv_into() on \u001b[39m\u001b[39m%s\u001b[39;00m\u001b[39m\"\u001b[39m \u001b[39m%\u001b[39m\n\u001b[1;32m 1241\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m\u001b[39m__class__\u001b[39m)\n\u001b[0;32m-> 1242\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mread(nbytes, buffer)\n\u001b[1;32m 1243\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 1244\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39msuper\u001b[39m()\u001b[39m.\u001b[39mrecv_into(buffer, nbytes, flags)\n",
"File \u001b[0;32m~/miniconda3/envs/haystack/lib/python3.9/ssl.py:1100\u001b[0m, in \u001b[0;36mSSLSocket.read\u001b[0;34m(self, len, buffer)\u001b[0m\n\u001b[1;32m 1098\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m 1099\u001b[0m \u001b[39mif\u001b[39;00m buffer \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m-> 1100\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_sslobj\u001b[39m.\u001b[39;49mread(\u001b[39mlen\u001b[39;49m, buffer)\n\u001b[1;32m 1101\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 1102\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_sslobj\u001b[39m.\u001b[39mread(\u001b[39mlen\u001b[39m)\n",
"\u001b[0;31mKeyboardInterrupt\u001b[0m: "
]
}
],
"source": [
"from tqdm import tqdm\n",
"\n",
"data = [\n",
" [], # title\n",
" [], # description\n",
"]\n",
"\n",
"# Embed and insert in batches\n",
"for i in tqdm(range(0, len(dataset))):\n",
" data[0].append(dataset[i]['title'])\n",
" data[1].append(dataset[i]['description'])\n",
" if len(data[0]) % BATCH_SIZE == 0:\n",
" data.append(embed(data[1]))\n",
" collection.insert(data)\n",
" data = [[],[]]\n",
"\n",
"# Embed and insert the remainder \n",
"if len(data[0]) != 0:\n",
" data.append(embed(data[1]))\n",
" collection.insert(data)\n",
" data = [[],[]]\n"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"## Query the Database\n",
"With our data safely inserted in Milvus, we can now perform a query. The query takes in a string or a list of strings and searches them. The resuts print out your provided description and the results that include the result score, the result title, and the result book description. "
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [],
"source": [
"import textwrap\n",
"\n",
"def query(queries, top_k = 5):\n",
" if type(queries) != list:\n",
" queries = [queries]\n",
" res = collection.search(embed(queries), anns_field='embedding', param=QUERY_PARAM, limit = top_k, output_fields=['title', 'description'])\n",
" for i, hit in enumerate(res):\n",
" print('Description:', queries[i])\n",
" print('Results:')\n",
" for ii, hits in enumerate(hit):\n",
" print('\\t' + 'Rank:', ii + 1, 'Score:', hits.score, 'Title:', hits.entity.get('title'))\n",
" print(textwrap.fill(hits.entity.get('description'), 88))\n",
" print()"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"RPC error: [search], <MilvusException: (code=1, message=code: UnexpectedError, reason: code: CollectionNotExists, reason: can't find collection: book_search)>, <Time:{'RPC start': '2023-03-17 14:22:18.368461', 'RPC error': '2023-03-17 14:22:18.382086'}>\n"
]
},
{
"ename": "MilvusException",
"evalue": "<MilvusException: (code=1, message=code: UnexpectedError, reason: code: CollectionNotExists, reason: can't find collection: book_search)>",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mMilvusException\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[32], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m query(\u001b[39m'\u001b[39;49m\u001b[39mBook about a k-9 from europe\u001b[39;49m\u001b[39m'\u001b[39;49m)\n",
"Cell \u001b[0;32mIn[31], line 6\u001b[0m, in \u001b[0;36mquery\u001b[0;34m(queries, top_k)\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mtype\u001b[39m(queries) \u001b[39m!=\u001b[39m \u001b[39mlist\u001b[39m:\n\u001b[1;32m 5\u001b[0m queries \u001b[39m=\u001b[39m [queries]\n\u001b[0;32m----> 6\u001b[0m res \u001b[39m=\u001b[39m collection\u001b[39m.\u001b[39;49msearch(embed(queries), anns_field\u001b[39m=\u001b[39;49m\u001b[39m'\u001b[39;49m\u001b[39membedding\u001b[39;49m\u001b[39m'\u001b[39;49m, param\u001b[39m=\u001b[39;49mQUERY_PARAM, limit \u001b[39m=\u001b[39;49m top_k, output_fields\u001b[39m=\u001b[39;49m[\u001b[39m'\u001b[39;49m\u001b[39mtitle\u001b[39;49m\u001b[39m'\u001b[39;49m, \u001b[39m'\u001b[39;49m\u001b[39mdescription\u001b[39;49m\u001b[39m'\u001b[39;49m])\n\u001b[1;32m 7\u001b[0m \u001b[39mfor\u001b[39;00m i, hit \u001b[39min\u001b[39;00m \u001b[39menumerate\u001b[39m(res):\n\u001b[1;32m 8\u001b[0m \u001b[39mprint\u001b[39m(\u001b[39m'\u001b[39m\u001b[39mDescription:\u001b[39m\u001b[39m'\u001b[39m, queries[i])\n",
"File \u001b[0;32m~/miniconda3/envs/haystack/lib/python3.9/site-packages/pymilvus/orm/collection.py:614\u001b[0m, in \u001b[0;36mCollection.search\u001b[0;34m(self, data, anns_field, param, limit, expr, partition_names, output_fields, timeout, round_decimal, **kwargs)\u001b[0m\n\u001b[1;32m 611\u001b[0m \u001b[39mraise\u001b[39;00m DataTypeNotMatchException(message\u001b[39m=\u001b[39mExceptionsMessage\u001b[39m.\u001b[39mExprType \u001b[39m%\u001b[39m \u001b[39mtype\u001b[39m(expr))\n\u001b[1;32m 613\u001b[0m conn \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_get_connection()\n\u001b[0;32m--> 614\u001b[0m res \u001b[39m=\u001b[39m conn\u001b[39m.\u001b[39;49msearch(\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_name, data, anns_field, param, limit, expr,\n\u001b[1;32m 615\u001b[0m partition_names, output_fields, round_decimal, timeout\u001b[39m=\u001b[39;49mtimeout,\n\u001b[1;32m 616\u001b[0m schema\u001b[39m=\u001b[39;49m\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_schema_dict, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m 617\u001b[0m \u001b[39mif\u001b[39;00m kwargs\u001b[39m.\u001b[39mget(\u001b[39m\"\u001b[39m\u001b[39m_async\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39mFalse\u001b[39;00m):\n\u001b[1;32m 618\u001b[0m \u001b[39mreturn\u001b[39;00m SearchFuture(res)\n",
"File \u001b[0;32m~/miniconda3/envs/haystack/lib/python3.9/site-packages/pymilvus/decorators.py:109\u001b[0m, in \u001b[0;36merror_handler.<locals>.wrapper.<locals>.handler\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 107\u001b[0m record_dict[\u001b[39m\"\u001b[39m\u001b[39mRPC error\u001b[39m\u001b[39m\"\u001b[39m] \u001b[39m=\u001b[39m \u001b[39mstr\u001b[39m(datetime\u001b[39m.\u001b[39mdatetime\u001b[39m.\u001b[39mnow())\n\u001b[1;32m 108\u001b[0m LOGGER\u001b[39m.\u001b[39merror(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mRPC error: [\u001b[39m\u001b[39m{\u001b[39;00minner_name\u001b[39m}\u001b[39;00m\u001b[39m], \u001b[39m\u001b[39m{\u001b[39;00me\u001b[39m}\u001b[39;00m\u001b[39m, <Time:\u001b[39m\u001b[39m{\u001b[39;00mrecord_dict\u001b[39m}\u001b[39;00m\u001b[39m>\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[0;32m--> 109\u001b[0m \u001b[39mraise\u001b[39;00m e\n\u001b[1;32m 110\u001b[0m \u001b[39mexcept\u001b[39;00m grpc\u001b[39m.\u001b[39mFutureTimeoutError \u001b[39mas\u001b[39;00m e:\n\u001b[1;32m 111\u001b[0m record_dict[\u001b[39m\"\u001b[39m\u001b[39mgRPC timeout\u001b[39m\u001b[39m\"\u001b[39m] \u001b[39m=\u001b[39m \u001b[39mstr\u001b[39m(datetime\u001b[39m.\u001b[39mdatetime\u001b[39m.\u001b[39mnow())\n",
"File \u001b[0;32m~/miniconda3/envs/haystack/lib/python3.9/site-packages/pymilvus/decorators.py:105\u001b[0m, in \u001b[0;36merror_handler.<locals>.wrapper.<locals>.handler\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 103\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m 104\u001b[0m record_dict[\u001b[39m\"\u001b[39m\u001b[39mRPC start\u001b[39m\u001b[39m\"\u001b[39m] \u001b[39m=\u001b[39m \u001b[39mstr\u001b[39m(datetime\u001b[39m.\u001b[39mdatetime\u001b[39m.\u001b[39mnow())\n\u001b[0;32m--> 105\u001b[0m \u001b[39mreturn\u001b[39;00m func(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m 106\u001b[0m \u001b[39mexcept\u001b[39;00m MilvusException \u001b[39mas\u001b[39;00m e:\n\u001b[1;32m 107\u001b[0m record_dict[\u001b[39m\"\u001b[39m\u001b[39mRPC error\u001b[39m\u001b[39m\"\u001b[39m] \u001b[39m=\u001b[39m \u001b[39mstr\u001b[39m(datetime\u001b[39m.\u001b[39mdatetime\u001b[39m.\u001b[39mnow())\n",
"File \u001b[0;32m~/miniconda3/envs/haystack/lib/python3.9/site-packages/pymilvus/decorators.py:136\u001b[0m, in \u001b[0;36mtracing_request.<locals>.wrapper.<locals>.handler\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 134\u001b[0m \u001b[39mif\u001b[39;00m req_id:\n\u001b[1;32m 135\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mset_onetime_request_id(req_id)\n\u001b[0;32m--> 136\u001b[0m ret \u001b[39m=\u001b[39m func(\u001b[39mself\u001b[39;49m, \u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m 137\u001b[0m \u001b[39mreturn\u001b[39;00m ret\n",
"File \u001b[0;32m~/miniconda3/envs/haystack/lib/python3.9/site-packages/pymilvus/decorators.py:85\u001b[0m, in \u001b[0;36mretry_on_rpc_failure.<locals>.wrapper.<locals>.handler\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 83\u001b[0m back_off \u001b[39m=\u001b[39m \u001b[39mmin\u001b[39m(back_off \u001b[39m*\u001b[39m back_off_multiplier, max_back_off)\n\u001b[1;32m 84\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m---> 85\u001b[0m \u001b[39mraise\u001b[39;00m e\n\u001b[1;32m 86\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mException\u001b[39;00m \u001b[39mas\u001b[39;00m e:\n\u001b[1;32m 87\u001b[0m \u001b[39mraise\u001b[39;00m e\n",
"File \u001b[0;32m~/miniconda3/envs/haystack/lib/python3.9/site-packages/pymilvus/decorators.py:50\u001b[0m, in \u001b[0;36mretry_on_rpc_failure.<locals>.wrapper.<locals>.handler\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 48\u001b[0m \u001b[39mwhile\u001b[39;00m \u001b[39mTrue\u001b[39;00m:\n\u001b[1;32m 49\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m---> 50\u001b[0m \u001b[39mreturn\u001b[39;00m func(\u001b[39mself\u001b[39;49m, \u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m 51\u001b[0m \u001b[39mexcept\u001b[39;00m grpc\u001b[39m.\u001b[39mRpcError \u001b[39mas\u001b[39;00m e:\n\u001b[1;32m 52\u001b[0m \u001b[39m# DEADLINE_EXCEEDED means that the task wat not completed\u001b[39;00m\n\u001b[1;32m 53\u001b[0m \u001b[39m# UNAVAILABLE means that the service is not reachable currently\u001b[39;00m\n\u001b[1;32m 54\u001b[0m \u001b[39m# Reference: https://grpc.github.io/grpc/python/grpc.html#grpc-status-code\u001b[39;00m\n\u001b[1;32m 55\u001b[0m \u001b[39mif\u001b[39;00m e\u001b[39m.\u001b[39mcode() \u001b[39m!=\u001b[39m grpc\u001b[39m.\u001b[39mStatusCode\u001b[39m.\u001b[39mDEADLINE_EXCEEDED \u001b[39mand\u001b[39;00m e\u001b[39m.\u001b[39mcode() \u001b[39m!=\u001b[39m grpc\u001b[39m.\u001b[39mStatusCode\u001b[39m.\u001b[39mUNAVAILABLE:\n",
"File \u001b[0;32m~/miniconda3/envs/haystack/lib/python3.9/site-packages/pymilvus/client/grpc_handler.py:472\u001b[0m, in \u001b[0;36mGrpcHandler.search\u001b[0;34m(self, collection_name, data, anns_field, param, limit, expression, partition_names, output_fields, round_decimal, timeout, schema, **kwargs)\u001b[0m\n\u001b[1;32m 467\u001b[0m requests \u001b[39m=\u001b[39m Prepare\u001b[39m.\u001b[39msearch_requests_with_expr(collection_name, data, anns_field, param, limit, schema,\n\u001b[1;32m 468\u001b[0m expression, partition_names, output_fields, round_decimal,\n\u001b[1;32m 469\u001b[0m \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n\u001b[1;32m 471\u001b[0m auto_id \u001b[39m=\u001b[39m schema[\u001b[39m\"\u001b[39m\u001b[39mauto_id\u001b[39m\u001b[39m\"\u001b[39m]\n\u001b[0;32m--> 472\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_execute_search_requests(requests, timeout, round_decimal\u001b[39m=\u001b[39;49mround_decimal, auto_id\u001b[39m=\u001b[39;49mauto_id, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n",
"File \u001b[0;32m~/miniconda3/envs/haystack/lib/python3.9/site-packages/pymilvus/client/grpc_handler.py:441\u001b[0m, in \u001b[0;36mGrpcHandler._execute_search_requests\u001b[0;34m(self, requests, timeout, **kwargs)\u001b[0m\n\u001b[1;32m 439\u001b[0m \u001b[39mif\u001b[39;00m kwargs\u001b[39m.\u001b[39mget(\u001b[39m\"\u001b[39m\u001b[39m_async\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39mFalse\u001b[39;00m):\n\u001b[1;32m 440\u001b[0m \u001b[39mreturn\u001b[39;00m SearchFuture(\u001b[39mNone\u001b[39;00m, \u001b[39mNone\u001b[39;00m, \u001b[39mTrue\u001b[39;00m, pre_err)\n\u001b[0;32m--> 441\u001b[0m \u001b[39mraise\u001b[39;00m pre_err\n",
"File \u001b[0;32m~/miniconda3/envs/haystack/lib/python3.9/site-packages/pymilvus/client/grpc_handler.py:432\u001b[0m, in \u001b[0;36mGrpcHandler._execute_search_requests\u001b[0;34m(self, requests, timeout, **kwargs)\u001b[0m\n\u001b[1;32m 429\u001b[0m response \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_stub\u001b[39m.\u001b[39mSearch(request, timeout\u001b[39m=\u001b[39mtimeout)\n\u001b[1;32m 431\u001b[0m \u001b[39mif\u001b[39;00m response\u001b[39m.\u001b[39mstatus\u001b[39m.\u001b[39merror_code \u001b[39m!=\u001b[39m \u001b[39m0\u001b[39m:\n\u001b[0;32m--> 432\u001b[0m \u001b[39mraise\u001b[39;00m MilvusException(response\u001b[39m.\u001b[39mstatus\u001b[39m.\u001b[39merror_code, response\u001b[39m.\u001b[39mstatus\u001b[39m.\u001b[39mreason)\n\u001b[1;32m 434\u001b[0m raws\u001b[39m.\u001b[39mappend(response)\n\u001b[1;32m 435\u001b[0m round_decimal \u001b[39m=\u001b[39m kwargs\u001b[39m.\u001b[39mget(\u001b[39m\"\u001b[39m\u001b[39mround_decimal\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m-\u001b[39m\u001b[39m1\u001b[39m)\n",
"\u001b[0;31mMilvusException\u001b[0m: <MilvusException: (code=1, message=code: UnexpectedError, reason: code: CollectionNotExists, reason: can't find collection: book_search)>"
]
}
],
"source": [
"query('Book about a k-9 from europe')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "haystack",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.16"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}