diff --git a/examples/vector_databases/chroma/Using_Chroma_for_embeddings_search.ipynb b/examples/vector_databases/chroma/Using_Chroma_for_embeddings_search.ipynb index 5eb2b424..09a45ad1 100644 --- a/examples/vector_databases/chroma/Using_Chroma_for_embeddings_search.ipynb +++ b/examples/vector_databases/chroma/Using_Chroma_for_embeddings_search.ipynb @@ -42,29 +42,221 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "8d8810f9", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting openai\n", + " Obtaining dependency information for openai from https://files.pythonhosted.org/packages/67/78/7588a047e458cb8075a4089d721d7af5e143ff85a2388d4a28c530be0494/openai-0.27.8-py3-none-any.whl.metadata\n", + " Downloading openai-0.27.8-py3-none-any.whl.metadata (13 kB)\n", + "Collecting requests>=2.20 (from openai)\n", + " Obtaining dependency information for requests>=2.20 from https://files.pythonhosted.org/packages/70/8e/0e2d847013cb52cd35b38c009bb167a1a26b2ce6cd6965bf26b47bc0bf44/requests-2.31.0-py3-none-any.whl.metadata\n", + " Using cached requests-2.31.0-py3-none-any.whl.metadata (4.6 kB)\n", + "Collecting tqdm (from openai)\n", + " Using cached tqdm-4.65.0-py3-none-any.whl (77 kB)\n", + "Collecting aiohttp (from openai)\n", + " Obtaining dependency information for aiohttp from https://files.pythonhosted.org/packages/fa/9e/49002fde2a97d7df0e162e919c31cf13aa9f184537739743d1239edd0e67/aiohttp-3.8.5-cp310-cp310-macosx_11_0_arm64.whl.metadata\n", + " Downloading aiohttp-3.8.5-cp310-cp310-macosx_11_0_arm64.whl.metadata (7.7 kB)\n", + "Collecting charset-normalizer<4,>=2 (from requests>=2.20->openai)\n", + " Obtaining dependency information for charset-normalizer<4,>=2 from https://files.pythonhosted.org/packages/ec/a7/96835706283d63fefbbbb4f119d52f195af00fc747e67cc54397c56312c8/charset_normalizer-3.2.0-cp310-cp310-macosx_11_0_arm64.whl.metadata\n", + " Using cached charset_normalizer-3.2.0-cp310-cp310-macosx_11_0_arm64.whl.metadata (31 kB)\n", + "Collecting idna<4,>=2.5 (from requests>=2.20->openai)\n", + " Using cached idna-3.4-py3-none-any.whl (61 kB)\n", + "Collecting urllib3<3,>=1.21.1 (from requests>=2.20->openai)\n", + " Obtaining dependency information for urllib3<3,>=1.21.1 from https://files.pythonhosted.org/packages/9b/81/62fd61001fa4b9d0df6e31d47ff49cfa9de4af03adecf339c7bc30656b37/urllib3-2.0.4-py3-none-any.whl.metadata\n", + " Downloading urllib3-2.0.4-py3-none-any.whl.metadata (6.6 kB)\n", + "Collecting certifi>=2017.4.17 (from requests>=2.20->openai)\n", + " Using cached certifi-2023.5.7-py3-none-any.whl (156 kB)\n", + "Collecting attrs>=17.3.0 (from aiohttp->openai)\n", + " Using cached attrs-23.1.0-py3-none-any.whl (61 kB)\n", + "Collecting multidict<7.0,>=4.5 (from aiohttp->openai)\n", + " Using cached multidict-6.0.4-cp310-cp310-macosx_11_0_arm64.whl (29 kB)\n", + "Collecting async-timeout<5.0,>=4.0.0a3 (from aiohttp->openai)\n", + " Using cached async_timeout-4.0.2-py3-none-any.whl (5.8 kB)\n", + "Collecting yarl<2.0,>=1.0 (from aiohttp->openai)\n", + " Using cached yarl-1.9.2-cp310-cp310-macosx_11_0_arm64.whl (62 kB)\n", + "Collecting frozenlist>=1.1.1 (from aiohttp->openai)\n", + " Obtaining dependency information for frozenlist>=1.1.1 from https://files.pythonhosted.org/packages/67/6a/55a49da0fa373ac9aa49ccd5b6393ecc183e2a0904d9449ea3ee1163e0b1/frozenlist-1.4.0-cp310-cp310-macosx_11_0_arm64.whl.metadata\n", + " Downloading frozenlist-1.4.0-cp310-cp310-macosx_11_0_arm64.whl.metadata (5.2 kB)\n", + "Collecting aiosignal>=1.1.2 (from aiohttp->openai)\n", + " Using cached aiosignal-1.3.1-py3-none-any.whl (7.6 kB)\n", + "Using cached openai-0.27.8-py3-none-any.whl (73 kB)\n", + "Using cached requests-2.31.0-py3-none-any.whl (62 kB)\n", + "Downloading aiohttp-3.8.5-cp310-cp310-macosx_11_0_arm64.whl (343 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m343.9/343.9 kB\u001b[0m \u001b[31m11.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hUsing cached charset_normalizer-3.2.0-cp310-cp310-macosx_11_0_arm64.whl (124 kB)\n", + "Downloading frozenlist-1.4.0-cp310-cp310-macosx_11_0_arm64.whl (46 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m46.0/46.0 kB\u001b[0m \u001b[31m4.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading urllib3-2.0.4-py3-none-any.whl (123 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m123.9/123.9 kB\u001b[0m \u001b[31m20.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hInstalling collected packages: urllib3, tqdm, multidict, idna, frozenlist, charset-normalizer, certifi, attrs, async-timeout, yarl, requests, aiosignal, aiohttp, openai\n", + "Successfully installed aiohttp-3.8.5 aiosignal-1.3.1 async-timeout-4.0.2 attrs-23.1.0 certifi-2023.5.7 charset-normalizer-3.2.0 frozenlist-1.4.0 idna-3.4 multidict-6.0.4 openai-0.27.8 requests-2.31.0 tqdm-4.65.0 urllib3-2.0.4 yarl-1.9.2\n", + "Note: you may need to restart the kernel to use updated packages.\n", + "Collecting chromadb\n", + " Obtaining dependency information for chromadb from https://files.pythonhosted.org/packages/47/b7/41d975f02818c965cdb8a119cab5a38cfb08e0c1abb18efebe9a373ea97b/chromadb-0.4.2-py3-none-any.whl.metadata\n", + " Downloading chromadb-0.4.2-py3-none-any.whl.metadata (6.9 kB)\n", + "Collecting pandas>=1.3 (from chromadb)\n", + " Obtaining dependency information for pandas>=1.3 from https://files.pythonhosted.org/packages/4a/f6/f620ca62365d83e663a255a41b08d2fc2eaf304e0b8b21bb6d62a7390fe3/pandas-2.0.3-cp310-cp310-macosx_11_0_arm64.whl.metadata\n", + " Using cached pandas-2.0.3-cp310-cp310-macosx_11_0_arm64.whl.metadata (18 kB)\n", + "Requirement already satisfied: requests>=2.28 in /Users/antontroynikov/miniforge3/envs/chroma-openai-cookbook/lib/python3.10/site-packages (from chromadb) (2.31.0)\n", + "Collecting pydantic<2.0,>=1.9 (from chromadb)\n", + " Obtaining dependency information for pydantic<2.0,>=1.9 from https://files.pythonhosted.org/packages/79/3e/6b4d0fb2174beceac9a991ba8e67158b45c35faca9ea4545ae32d47096cd/pydantic-1.10.11-cp310-cp310-macosx_11_0_arm64.whl.metadata\n", + " Using cached pydantic-1.10.11-cp310-cp310-macosx_11_0_arm64.whl.metadata (148 kB)\n", + "Collecting chroma-hnswlib==0.7.1 (from chromadb)\n", + " Obtaining dependency information for chroma-hnswlib==0.7.1 from https://files.pythonhosted.org/packages/a5/d5/54947127f5cb2a1fcef40877fb3e6044495eec0a158ba0956babe4ab2a77/chroma_hnswlib-0.7.1-cp310-cp310-macosx_13_0_arm64.whl.metadata\n", + " Using cached chroma_hnswlib-0.7.1-cp310-cp310-macosx_13_0_arm64.whl.metadata (252 bytes)\n", + "Collecting fastapi<0.100.0,>=0.95.2 (from chromadb)\n", + " Obtaining dependency information for fastapi<0.100.0,>=0.95.2 from https://files.pythonhosted.org/packages/73/eb/03b691afa0b5ffa1e93ed34f97ec1e7855c758efbdcfb16c209af0b0506b/fastapi-0.99.1-py3-none-any.whl.metadata\n", + " Using cached fastapi-0.99.1-py3-none-any.whl.metadata (23 kB)\n", + "Collecting uvicorn[standard]>=0.18.3 (from chromadb)\n", + " Obtaining dependency information for uvicorn[standard]>=0.18.3 from https://files.pythonhosted.org/packages/5d/07/b9eac057f7efa56900640a233c1ed63db83568322c6bcbabe98f741d5289/uvicorn-0.23.1-py3-none-any.whl.metadata\n", + " Using cached uvicorn-0.23.1-py3-none-any.whl.metadata (6.2 kB)\n", + "Collecting numpy>=1.21.6 (from chromadb)\n", + " Obtaining dependency information for numpy>=1.21.6 from https://files.pythonhosted.org/packages/1b/cd/9e8313ffd849626c836fffd7881296a74f53a7739bd9ce7a6e22b1fc843b/numpy-1.25.1-cp310-cp310-macosx_11_0_arm64.whl.metadata\n", + " Using cached numpy-1.25.1-cp310-cp310-macosx_11_0_arm64.whl.metadata (5.6 kB)\n", + "Collecting posthog>=2.4.0 (from chromadb)\n", + " Using cached posthog-3.0.1-py2.py3-none-any.whl (37 kB)\n", + "Requirement already satisfied: typing-extensions>=4.5.0 in /Users/antontroynikov/miniforge3/envs/chroma-openai-cookbook/lib/python3.10/site-packages (from chromadb) (4.7.1)\n", + "Collecting pulsar-client>=3.1.0 (from chromadb)\n", + " Obtaining dependency information for pulsar-client>=3.1.0 from https://files.pythonhosted.org/packages/43/85/ab0455008ce3335a1c75a7c500fd8921ab166f34821fa67dc91ae9687a40/pulsar_client-3.2.0-cp310-cp310-macosx_10_15_universal2.whl.metadata\n", + " Using cached pulsar_client-3.2.0-cp310-cp310-macosx_10_15_universal2.whl.metadata (1.0 kB)\n", + "Collecting onnxruntime>=1.14.1 (from chromadb)\n", + " Obtaining dependency information for onnxruntime>=1.14.1 from https://files.pythonhosted.org/packages/cf/06/0c6e355b9ddbebc34d0e21bc5be1e4bd2c124ebd9030525838fa6e65eaa8/onnxruntime-1.15.1-cp310-cp310-macosx_11_0_arm64.whl.metadata\n", + " Using cached onnxruntime-1.15.1-cp310-cp310-macosx_11_0_arm64.whl.metadata (4.0 kB)\n", + "Collecting tokenizers>=0.13.2 (from chromadb)\n", + " Using cached tokenizers-0.13.3-cp310-cp310-macosx_12_0_arm64.whl (3.9 MB)\n", + "Collecting pypika>=0.48.9 (from chromadb)\n", + " Using cached PyPika-0.48.9-py2.py3-none-any.whl\n", + "Requirement already satisfied: tqdm>=4.65.0 in /Users/antontroynikov/miniforge3/envs/chroma-openai-cookbook/lib/python3.10/site-packages (from chromadb) (4.65.0)\n", + "Collecting overrides>=7.3.1 (from chromadb)\n", + " Using cached overrides-7.3.1-py3-none-any.whl (17 kB)\n", + "Collecting importlib-resources (from chromadb)\n", + " Obtaining dependency information for importlib-resources from https://files.pythonhosted.org/packages/29/d1/bed03eca30aa05aaf6e0873de091f9385c48705c4a607c2dfe3edbe543e8/importlib_resources-6.0.0-py3-none-any.whl.metadata\n", + " Using cached importlib_resources-6.0.0-py3-none-any.whl.metadata (4.2 kB)\n", + "Collecting starlette<0.28.0,>=0.27.0 (from fastapi<0.100.0,>=0.95.2->chromadb)\n", + " Obtaining dependency information for starlette<0.28.0,>=0.27.0 from https://files.pythonhosted.org/packages/58/f8/e2cca22387965584a409795913b774235752be4176d276714e15e1a58884/starlette-0.27.0-py3-none-any.whl.metadata\n", + " Using cached starlette-0.27.0-py3-none-any.whl.metadata (5.8 kB)\n", + "Collecting coloredlogs (from onnxruntime>=1.14.1->chromadb)\n", + " Using cached coloredlogs-15.0.1-py2.py3-none-any.whl (46 kB)\n", + "Collecting flatbuffers (from onnxruntime>=1.14.1->chromadb)\n", + " Obtaining dependency information for flatbuffers from https://files.pythonhosted.org/packages/6f/12/d5c79ee252793ffe845d58a913197bfa02ae9a0b5c9bc3dc4b58d477b9e7/flatbuffers-23.5.26-py2.py3-none-any.whl.metadata\n", + " Using cached flatbuffers-23.5.26-py2.py3-none-any.whl.metadata (850 bytes)\n", + "Requirement already satisfied: packaging in /Users/antontroynikov/miniforge3/envs/chroma-openai-cookbook/lib/python3.10/site-packages (from onnxruntime>=1.14.1->chromadb) (23.1)\n", + "Collecting protobuf (from onnxruntime>=1.14.1->chromadb)\n", + " Obtaining dependency information for protobuf from https://files.pythonhosted.org/packages/cb/d3/a164038605494d49acc4f9cda1c0bc200b96382c53edd561387263bb181d/protobuf-4.23.4-cp37-abi3-macosx_10_9_universal2.whl.metadata\n", + " Using cached protobuf-4.23.4-cp37-abi3-macosx_10_9_universal2.whl.metadata (540 bytes)\n", + "Collecting sympy (from onnxruntime>=1.14.1->chromadb)\n", + " Using cached sympy-1.12-py3-none-any.whl (5.7 MB)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /Users/antontroynikov/miniforge3/envs/chroma-openai-cookbook/lib/python3.10/site-packages (from pandas>=1.3->chromadb) (2.8.2)\n", + "Collecting pytz>=2020.1 (from pandas>=1.3->chromadb)\n", + " Using cached pytz-2023.3-py2.py3-none-any.whl (502 kB)\n", + "Collecting tzdata>=2022.1 (from pandas>=1.3->chromadb)\n", + " Using cached tzdata-2023.3-py2.py3-none-any.whl (341 kB)\n", + "Requirement already satisfied: six>=1.5 in /Users/antontroynikov/miniforge3/envs/chroma-openai-cookbook/lib/python3.10/site-packages (from posthog>=2.4.0->chromadb) (1.16.0)\n", + "Collecting monotonic>=1.5 (from posthog>=2.4.0->chromadb)\n", + " Using cached monotonic-1.6-py2.py3-none-any.whl (8.2 kB)\n", + "Collecting backoff>=1.10.0 (from posthog>=2.4.0->chromadb)\n", + " Using cached backoff-2.2.1-py3-none-any.whl (15 kB)\n", + "Requirement already satisfied: certifi in /Users/antontroynikov/miniforge3/envs/chroma-openai-cookbook/lib/python3.10/site-packages (from pulsar-client>=3.1.0->chromadb) (2023.5.7)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/antontroynikov/miniforge3/envs/chroma-openai-cookbook/lib/python3.10/site-packages (from requests>=2.28->chromadb) (3.2.0)\n", + "Requirement already satisfied: idna<4,>=2.5 in /Users/antontroynikov/miniforge3/envs/chroma-openai-cookbook/lib/python3.10/site-packages (from requests>=2.28->chromadb) (3.4)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/antontroynikov/miniforge3/envs/chroma-openai-cookbook/lib/python3.10/site-packages (from requests>=2.28->chromadb) (2.0.4)\n", + "Collecting click>=7.0 (from uvicorn[standard]>=0.18.3->chromadb)\n", + " Obtaining dependency information for click>=7.0 from https://files.pythonhosted.org/packages/1a/70/e63223f8116931d365993d4a6b7ef653a4d920b41d03de7c59499962821f/click-8.1.6-py3-none-any.whl.metadata\n", + " Using cached click-8.1.6-py3-none-any.whl.metadata (3.0 kB)\n", + "Collecting h11>=0.8 (from uvicorn[standard]>=0.18.3->chromadb)\n", + " Using cached h11-0.14.0-py3-none-any.whl (58 kB)\n", + "Collecting httptools>=0.5.0 (from uvicorn[standard]>=0.18.3->chromadb)\n", + " Obtaining dependency information for httptools>=0.5.0 from https://files.pythonhosted.org/packages/8f/71/d535e9f6967958d21b8fe1baeb7efb6304b86e8fcff44d0bda8690e0aec9/httptools-0.6.0-cp310-cp310-macosx_10_9_universal2.whl.metadata\n", + " Using cached httptools-0.6.0-cp310-cp310-macosx_10_9_universal2.whl.metadata (3.6 kB)\n", + "Collecting python-dotenv>=0.13 (from uvicorn[standard]>=0.18.3->chromadb)\n", + " Using cached python_dotenv-1.0.0-py3-none-any.whl (19 kB)\n", + "Collecting pyyaml>=5.1 (from uvicorn[standard]>=0.18.3->chromadb)\n", + " Obtaining dependency information for pyyaml>=5.1 from https://files.pythonhosted.org/packages/5b/07/10033a403b23405a8fc48975444463d3d10a5c2736b7eb2550b07b367429/PyYAML-6.0.1-cp310-cp310-macosx_11_0_arm64.whl.metadata\n", + " Using cached PyYAML-6.0.1-cp310-cp310-macosx_11_0_arm64.whl.metadata (2.1 kB)\n", + "Collecting uvloop!=0.15.0,!=0.15.1,>=0.14.0 (from uvicorn[standard]>=0.18.3->chromadb)\n", + " Using cached uvloop-0.17.0-cp310-cp310-macosx_10_9_universal2.whl (2.1 MB)\n", + "Collecting watchfiles>=0.13 (from uvicorn[standard]>=0.18.3->chromadb)\n", + " Using cached watchfiles-0.19.0-cp37-abi3-macosx_11_0_arm64.whl (388 kB)\n", + "Collecting websockets>=10.4 (from uvicorn[standard]>=0.18.3->chromadb)\n", + " Using cached websockets-11.0.3-cp310-cp310-macosx_11_0_arm64.whl (121 kB)\n", + "Collecting anyio<5,>=3.4.0 (from starlette<0.28.0,>=0.27.0->fastapi<0.100.0,>=0.95.2->chromadb)\n", + " Obtaining dependency information for anyio<5,>=3.4.0 from https://files.pythonhosted.org/packages/19/24/44299477fe7dcc9cb58d0a57d5a7588d6af2ff403fdd2d47a246c91a3246/anyio-3.7.1-py3-none-any.whl.metadata\n", + " Using cached anyio-3.7.1-py3-none-any.whl.metadata (4.7 kB)\n", + "Collecting humanfriendly>=9.1 (from coloredlogs->onnxruntime>=1.14.1->chromadb)\n", + " Using cached humanfriendly-10.0-py2.py3-none-any.whl (86 kB)\n", + "Collecting mpmath>=0.19 (from sympy->onnxruntime>=1.14.1->chromadb)\n", + " Using cached mpmath-1.3.0-py3-none-any.whl (536 kB)\n", + "Collecting sniffio>=1.1 (from anyio<5,>=3.4.0->starlette<0.28.0,>=0.27.0->fastapi<0.100.0,>=0.95.2->chromadb)\n", + " Using cached sniffio-1.3.0-py3-none-any.whl (10 kB)\n", + "Collecting exceptiongroup (from anyio<5,>=3.4.0->starlette<0.28.0,>=0.27.0->fastapi<0.100.0,>=0.95.2->chromadb)\n", + " Obtaining dependency information for exceptiongroup from https://files.pythonhosted.org/packages/fe/17/f43b7c9ccf399d72038042ee72785c305f6c6fdc6231942f8ab99d995742/exceptiongroup-1.1.2-py3-none-any.whl.metadata\n", + " Using cached exceptiongroup-1.1.2-py3-none-any.whl.metadata (6.1 kB)\n", + "Downloading chromadb-0.4.2-py3-none-any.whl (399 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m399.3/399.3 kB\u001b[0m \u001b[31m12.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hUsing cached chroma_hnswlib-0.7.1-cp310-cp310-macosx_13_0_arm64.whl (195 kB)\n", + "Using cached fastapi-0.99.1-py3-none-any.whl (58 kB)\n", + "Using cached numpy-1.25.1-cp310-cp310-macosx_11_0_arm64.whl (14.0 MB)\n", + "Using cached onnxruntime-1.15.1-cp310-cp310-macosx_11_0_arm64.whl (6.1 MB)\n", + "Using cached pandas-2.0.3-cp310-cp310-macosx_11_0_arm64.whl (10.8 MB)\n", + "Using cached pulsar_client-3.2.0-cp310-cp310-macosx_10_15_universal2.whl (10.8 MB)\n", + "Using cached pydantic-1.10.11-cp310-cp310-macosx_11_0_arm64.whl (2.5 MB)\n", + "Using cached importlib_resources-6.0.0-py3-none-any.whl (31 kB)\n", + "Using cached click-8.1.6-py3-none-any.whl (97 kB)\n", + "Using cached httptools-0.6.0-cp310-cp310-macosx_10_9_universal2.whl (237 kB)\n", + "Using cached PyYAML-6.0.1-cp310-cp310-macosx_11_0_arm64.whl (169 kB)\n", + "Using cached starlette-0.27.0-py3-none-any.whl (66 kB)\n", + "Using cached flatbuffers-23.5.26-py2.py3-none-any.whl (26 kB)\n", + "Using cached protobuf-4.23.4-cp37-abi3-macosx_10_9_universal2.whl (400 kB)\n", + "Using cached uvicorn-0.23.1-py3-none-any.whl (59 kB)\n", + "Using cached anyio-3.7.1-py3-none-any.whl (80 kB)\n", + "Using cached exceptiongroup-1.1.2-py3-none-any.whl (14 kB)\n", + "Installing collected packages: tokenizers, pytz, pypika, mpmath, monotonic, flatbuffers, websockets, uvloop, tzdata, sympy, sniffio, pyyaml, python-dotenv, pydantic, pulsar-client, protobuf, overrides, numpy, importlib-resources, humanfriendly, httptools, h11, exceptiongroup, click, backoff, uvicorn, posthog, pandas, coloredlogs, chroma-hnswlib, anyio, watchfiles, starlette, onnxruntime, fastapi, chromadb\n", + "Successfully installed anyio-3.7.1 backoff-2.2.1 chroma-hnswlib-0.7.1 chromadb-0.4.2 click-8.1.6 coloredlogs-15.0.1 exceptiongroup-1.1.2 fastapi-0.99.1 flatbuffers-23.5.26 h11-0.14.0 httptools-0.6.0 humanfriendly-10.0 importlib-resources-6.0.0 monotonic-1.6 mpmath-1.3.0 numpy-1.25.1 onnxruntime-1.15.1 overrides-7.3.1 pandas-2.0.3 posthog-3.0.1 protobuf-4.23.4 pulsar-client-3.2.0 pydantic-1.10.11 pypika-0.48.9 python-dotenv-1.0.0 pytz-2023.3 pyyaml-6.0.1 sniffio-1.3.0 starlette-0.27.0 sympy-1.12 tokenizers-0.13.3 tzdata-2023.3 uvicorn-0.23.1 uvloop-0.17.0 watchfiles-0.19.0 websockets-11.0.3\n", + "Note: you may need to restart the kernel to use updated packages.\n", + "Collecting wget\n", + " Using cached wget-3.2.zip (10 kB)\n", + " Preparing metadata (setup.py) ... \u001b[?25ldone\n", + "\u001b[?25hBuilding wheels for collected packages: wget\n", + " Building wheel for wget (setup.py) ... \u001b[?25ldone\n", + "\u001b[?25h Created wheel for wget: filename=wget-3.2-py3-none-any.whl size=9657 sha256=b2d83c5fcdeab398d0a4e9808a470bbf725fffea4a6130e731c6097b9561005b\n", + " Stored in directory: /Users/antontroynikov/Library/Caches/pip/wheels/8b/f1/7f/5c94f0a7a505ca1c81cd1d9208ae2064675d97582078e6c769\n", + "Successfully built wget\n", + "Installing collected packages: wget\n", + "Successfully installed wget-3.2\n", + "Note: you may need to restart the kernel to use updated packages.\n", + "Requirement already satisfied: numpy in /Users/antontroynikov/miniforge3/envs/chroma-openai-cookbook/lib/python3.10/site-packages (1.25.1)\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], "source": [ - "# We'll need to install the Chroma client\n", - "!pip install chromadb\n", + "# Make sure the OpenAI library is installed\n", + "%pip install openai\n", "\n", - "#Install wget to pull zip file\n", - "!pip install wget" + "# We'll need to install the Chroma client\n", + "%pip install chromadb\n", + "\n", + "# Install wget to pull zip file\n", + "%pip install wget\n", + "\n", + "# Install numpy for data manipulation\n", + "%pip install numpy" ] }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 3, "id": "5be94df6", "metadata": {}, "outputs": [], "source": [ "import openai\n", - "from typing import List, Iterator\n", "import pandas as pd\n", - "import numpy as np\n", "import os\n", "import wget\n", "from ast import literal_eval\n", @@ -94,24 +286,17 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "id": "5dff8b55", "metadata": {}, "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "100% [......................................................................] 698933052 / 698933052" - ] - }, { "data": { "text/plain": [ "'vector_database_wikipedia_articles_embedded.zip'" ] }, - "execution_count": 2, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -125,7 +310,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "id": "21097972", "metadata": {}, "outputs": [], @@ -137,7 +322,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 7, "id": "70bbd8ba", "metadata": {}, "outputs": [], @@ -147,7 +332,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 8, "id": "1721e45d", "metadata": {}, "outputs": [ @@ -266,7 +451,7 @@ "4 [0.021524671465158463, 0.018522677943110466, -... 4 " ] }, - "execution_count": 5, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -277,7 +462,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 9, "id": "960b82af", "metadata": {}, "outputs": [], @@ -292,7 +477,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 11, "id": "a334ab8b", "metadata": {}, "outputs": [ @@ -349,24 +534,14 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 5, "id": "159d9646", "metadata": {}, "outputs": [], "source": [ - "\n", - "\n", - "chroma_client = chromadb.Client() # Ephemeral. Comment out for the persistent version.\n", - "\n", - "# Uncomment the following for the persistent version. \n", - "# import chromadb.config.Settings\n", - "# persist_directory = 'chroma_persistence' # Directory to store persisted Chroma data. \n", - "# client = chromadb.Client(\n", - "# Settings(\n", - "# persist_directory=persist_directory,\n", - "# chroma_db_impl=\"duckdb+parquet\",\n", - "# )\n", - "# )" + "chroma_client = chromadb.EphemeralClient() # Equivalent to chromadb.Client(), ephemeral.\n", + "# Uncomment for persistent client\n", + "# chroma_client = chromadb.PersistentClient()" ] }, { @@ -384,7 +559,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 6, "id": "ad2d1bce", "metadata": {}, "outputs": [ @@ -432,7 +607,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 15, "id": "84885fec", "metadata": {}, "outputs": [], @@ -462,7 +637,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 16, "id": "273b8b4c", "metadata": {}, "outputs": [], @@ -481,7 +656,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 17, "id": "e84cf47f", "metadata": {}, "outputs": [ @@ -514,61 +689,61 @@ " \n", " \n", " \n", - " 116\n", - " 12249\n", - " 0.265118\n", - " Europe\n", - " Europe is the western part of the continent of...\n", + " 2\n", + " 23266\n", + " 0.249646\n", + " Art\n", + " Art is a creative activity that expresses imag...\n", " \n", " \n", - " 1332\n", - " 12248\n", - " 0.290684\n", - " European\n", - " European may mean:\\nA person or attribute of t...\n", + " 11777\n", + " 15436\n", + " 0.271688\n", + " Hellenistic art\n", + " The art of the Hellenistic time (from 400 B.C....\n", " \n", " \n", - " 2885\n", - " 12225\n", - " 0.314833\n", - " Scandinavia\n", - " Scandinavia is a group of countries in norther...\n", + " 12178\n", + " 23265\n", + " 0.279306\n", + " Byzantine art\n", + " Byzantine art is a form of Christian Greek art...\n", " \n", " \n", - " 12212\n", - " 1332\n", - " 0.317179\n", - " Western civilization\n", - " Western civilization, western culture or the ...\n", + " 13215\n", + " 11777\n", + " 0.294415\n", + " Art film\n", + " Art films are a type of movie that is very dif...\n", " \n", " \n", - " 12216\n", - " 12216\n", - " 0.321235\n", - " Eastern Europe\n", - " Eastern Europe is the eastern region of Europe...\n", + " 15436\n", + " 22108\n", + " 0.305937\n", + " Renaissance art\n", + " Many of the most famous and best-loved works o...\n", " \n", " \n", "\n", "" ], "text/plain": [ - " id score title \\\n", - "116 12249 0.265118 Europe \n", - "1332 12248 0.290684 European \n", - "2885 12225 0.314833 Scandinavia \n", - "12212 1332 0.317179 Western civilization \n", - "12216 12216 0.321235 Eastern Europe \n", + " id score title \\\n", + "2 23266 0.249646 Art \n", + "11777 15436 0.271688 Hellenistic art \n", + "12178 23265 0.279306 Byzantine art \n", + "13215 11777 0.294415 Art film \n", + "15436 22108 0.305937 Renaissance art \n", "\n", " content \n", - "116 Europe is the western part of the continent of... \n", - "1332 European may mean:\\nA person or attribute of t... \n", - "2885 Scandinavia is a group of countries in norther... \n", - "12212 Western civilization, western culture or the ... \n", - "12216 Eastern Europe is the eastern region of Europe... " + "2 Art is a creative activity that expresses imag... \n", + "11777 The art of the Hellenistic time (from 400 B.C.... \n", + "12178 Byzantine art is a form of Christian Greek art... \n", + "13215 Art films are a type of movie that is very dif... \n", + "15436 Many of the most famous and best-loved works o... " ] }, - "execution_count": 12, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } @@ -585,7 +760,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 18, "id": "f4db910a", "metadata": {}, "outputs": [ @@ -672,7 +847,7 @@ "11702 William Wallace was a Scottish knight who foug... " ] }, - "execution_count": 13, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } @@ -694,21 +869,13 @@ "source": [ "Now that you've got a basic embeddings search running, you can [hop over to the Chroma docs](https://docs.trychroma.com/usage-guide#using-where-filters) to learn more about how to add filters to your query, update/delete data in your collections, and deploy Chroma." ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0119d87a", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { "kernelspec": { - "display_name": "vector_db_split", + "display_name": "chroma_openai", "language": "python", - "name": "vector_db_split" + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -720,12 +887,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.11" - }, - "vscode": { - "interpreter": { - "hash": "fd16a328ca3d68029457069b79cb0b38eb39a0f5ccc4fe4473d3047707df8207" - } + "version": "3.10.12" } }, "nbformat": 4, diff --git a/examples/vector_databases/chroma/hyde-with-chroma-and-openai.ipynb b/examples/vector_databases/chroma/hyde-with-chroma-and-openai.ipynb index 50466b41..72f6c215 100644 --- a/examples/vector_databases/chroma/hyde-with-chroma-and-openai.ipynb +++ b/examples/vector_databases/chroma/hyde-with-chroma-and-openai.ipynb @@ -34,9 +34,17 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], "source": [ "%pip install -qU openai chromadb pandas" ] @@ -52,7 +60,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -67,7 +75,7 @@ "import os\n", "\n", "# Uncomment the following line to set the environment variable in the notebook\n", - "# os.environ[\"OPENAI_API_KEY\"] = \"sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\" \n", + "# os.environ[\"OPENAI_API_KEY\"] = 'sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx'\n", "\n", "if os.getenv(\"OPENAI_API_KEY\") is not None:\n", " print(\"OPENAI_API_KEY is ready\")\n", @@ -91,7 +99,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -177,7 +185,7 @@ "4 {} [5152028, 11705328] " ] }, - "execution_count": 3, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -205,7 +213,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -556,7 +564,7 @@ "# We initialize an embedding function, and provide it to the collection.\n", "embedding_function = OpenAIEmbeddingFunction(api_key=os.getenv(\"OPENAI_API_KEY\"))\n", "\n", - "chroma_client = chromadb.Client()\n", + "chroma_client = chromadb.Client() # Ephemeral by default\n", "scifact_corpus_collection = chroma_client.create_collection(name='scifact_corpus', embedding_function=embedding_function)" ] }, @@ -975,7 +983,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.16" + "version": "3.10.12" }, "vscode": { "interpreter": {