Update notebooks (#598)

pull/612/head
Anton Troynikov 11 months ago committed by GitHub
parent 964dd85ed3
commit 3115683f14
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -42,29 +42,221 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 1,
"id": "8d8810f9",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Collecting openai\n",
" Obtaining dependency information for openai from https://files.pythonhosted.org/packages/67/78/7588a047e458cb8075a4089d721d7af5e143ff85a2388d4a28c530be0494/openai-0.27.8-py3-none-any.whl.metadata\n",
" Downloading openai-0.27.8-py3-none-any.whl.metadata (13 kB)\n",
"Collecting requests>=2.20 (from openai)\n",
" Obtaining dependency information for requests>=2.20 from https://files.pythonhosted.org/packages/70/8e/0e2d847013cb52cd35b38c009bb167a1a26b2ce6cd6965bf26b47bc0bf44/requests-2.31.0-py3-none-any.whl.metadata\n",
" Using cached requests-2.31.0-py3-none-any.whl.metadata (4.6 kB)\n",
"Collecting tqdm (from openai)\n",
" Using cached tqdm-4.65.0-py3-none-any.whl (77 kB)\n",
"Collecting aiohttp (from openai)\n",
" Obtaining dependency information for aiohttp from https://files.pythonhosted.org/packages/fa/9e/49002fde2a97d7df0e162e919c31cf13aa9f184537739743d1239edd0e67/aiohttp-3.8.5-cp310-cp310-macosx_11_0_arm64.whl.metadata\n",
" Downloading aiohttp-3.8.5-cp310-cp310-macosx_11_0_arm64.whl.metadata (7.7 kB)\n",
"Collecting charset-normalizer<4,>=2 (from requests>=2.20->openai)\n",
" Obtaining dependency information for charset-normalizer<4,>=2 from https://files.pythonhosted.org/packages/ec/a7/96835706283d63fefbbbb4f119d52f195af00fc747e67cc54397c56312c8/charset_normalizer-3.2.0-cp310-cp310-macosx_11_0_arm64.whl.metadata\n",
" Using cached charset_normalizer-3.2.0-cp310-cp310-macosx_11_0_arm64.whl.metadata (31 kB)\n",
"Collecting idna<4,>=2.5 (from requests>=2.20->openai)\n",
" Using cached idna-3.4-py3-none-any.whl (61 kB)\n",
"Collecting urllib3<3,>=1.21.1 (from requests>=2.20->openai)\n",
" Obtaining dependency information for urllib3<3,>=1.21.1 from https://files.pythonhosted.org/packages/9b/81/62fd61001fa4b9d0df6e31d47ff49cfa9de4af03adecf339c7bc30656b37/urllib3-2.0.4-py3-none-any.whl.metadata\n",
" Downloading urllib3-2.0.4-py3-none-any.whl.metadata (6.6 kB)\n",
"Collecting certifi>=2017.4.17 (from requests>=2.20->openai)\n",
" Using cached certifi-2023.5.7-py3-none-any.whl (156 kB)\n",
"Collecting attrs>=17.3.0 (from aiohttp->openai)\n",
" Using cached attrs-23.1.0-py3-none-any.whl (61 kB)\n",
"Collecting multidict<7.0,>=4.5 (from aiohttp->openai)\n",
" Using cached multidict-6.0.4-cp310-cp310-macosx_11_0_arm64.whl (29 kB)\n",
"Collecting async-timeout<5.0,>=4.0.0a3 (from aiohttp->openai)\n",
" Using cached async_timeout-4.0.2-py3-none-any.whl (5.8 kB)\n",
"Collecting yarl<2.0,>=1.0 (from aiohttp->openai)\n",
" Using cached yarl-1.9.2-cp310-cp310-macosx_11_0_arm64.whl (62 kB)\n",
"Collecting frozenlist>=1.1.1 (from aiohttp->openai)\n",
" Obtaining dependency information for frozenlist>=1.1.1 from https://files.pythonhosted.org/packages/67/6a/55a49da0fa373ac9aa49ccd5b6393ecc183e2a0904d9449ea3ee1163e0b1/frozenlist-1.4.0-cp310-cp310-macosx_11_0_arm64.whl.metadata\n",
" Downloading frozenlist-1.4.0-cp310-cp310-macosx_11_0_arm64.whl.metadata (5.2 kB)\n",
"Collecting aiosignal>=1.1.2 (from aiohttp->openai)\n",
" Using cached aiosignal-1.3.1-py3-none-any.whl (7.6 kB)\n",
"Using cached openai-0.27.8-py3-none-any.whl (73 kB)\n",
"Using cached requests-2.31.0-py3-none-any.whl (62 kB)\n",
"Downloading aiohttp-3.8.5-cp310-cp310-macosx_11_0_arm64.whl (343 kB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m343.9/343.9 kB\u001b[0m \u001b[31m11.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hUsing cached charset_normalizer-3.2.0-cp310-cp310-macosx_11_0_arm64.whl (124 kB)\n",
"Downloading frozenlist-1.4.0-cp310-cp310-macosx_11_0_arm64.whl (46 kB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m46.0/46.0 kB\u001b[0m \u001b[31m4.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hDownloading urllib3-2.0.4-py3-none-any.whl (123 kB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m123.9/123.9 kB\u001b[0m \u001b[31m20.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hInstalling collected packages: urllib3, tqdm, multidict, idna, frozenlist, charset-normalizer, certifi, attrs, async-timeout, yarl, requests, aiosignal, aiohttp, openai\n",
"Successfully installed aiohttp-3.8.5 aiosignal-1.3.1 async-timeout-4.0.2 attrs-23.1.0 certifi-2023.5.7 charset-normalizer-3.2.0 frozenlist-1.4.0 idna-3.4 multidict-6.0.4 openai-0.27.8 requests-2.31.0 tqdm-4.65.0 urllib3-2.0.4 yarl-1.9.2\n",
"Note: you may need to restart the kernel to use updated packages.\n",
"Collecting chromadb\n",
" Obtaining dependency information for chromadb from https://files.pythonhosted.org/packages/47/b7/41d975f02818c965cdb8a119cab5a38cfb08e0c1abb18efebe9a373ea97b/chromadb-0.4.2-py3-none-any.whl.metadata\n",
" Downloading chromadb-0.4.2-py3-none-any.whl.metadata (6.9 kB)\n",
"Collecting pandas>=1.3 (from chromadb)\n",
" Obtaining dependency information for pandas>=1.3 from https://files.pythonhosted.org/packages/4a/f6/f620ca62365d83e663a255a41b08d2fc2eaf304e0b8b21bb6d62a7390fe3/pandas-2.0.3-cp310-cp310-macosx_11_0_arm64.whl.metadata\n",
" Using cached pandas-2.0.3-cp310-cp310-macosx_11_0_arm64.whl.metadata (18 kB)\n",
"Requirement already satisfied: requests>=2.28 in /Users/antontroynikov/miniforge3/envs/chroma-openai-cookbook/lib/python3.10/site-packages (from chromadb) (2.31.0)\n",
"Collecting pydantic<2.0,>=1.9 (from chromadb)\n",
" Obtaining dependency information for pydantic<2.0,>=1.9 from https://files.pythonhosted.org/packages/79/3e/6b4d0fb2174beceac9a991ba8e67158b45c35faca9ea4545ae32d47096cd/pydantic-1.10.11-cp310-cp310-macosx_11_0_arm64.whl.metadata\n",
" Using cached pydantic-1.10.11-cp310-cp310-macosx_11_0_arm64.whl.metadata (148 kB)\n",
"Collecting chroma-hnswlib==0.7.1 (from chromadb)\n",
" Obtaining dependency information for chroma-hnswlib==0.7.1 from https://files.pythonhosted.org/packages/a5/d5/54947127f5cb2a1fcef40877fb3e6044495eec0a158ba0956babe4ab2a77/chroma_hnswlib-0.7.1-cp310-cp310-macosx_13_0_arm64.whl.metadata\n",
" Using cached chroma_hnswlib-0.7.1-cp310-cp310-macosx_13_0_arm64.whl.metadata (252 bytes)\n",
"Collecting fastapi<0.100.0,>=0.95.2 (from chromadb)\n",
" Obtaining dependency information for fastapi<0.100.0,>=0.95.2 from https://files.pythonhosted.org/packages/73/eb/03b691afa0b5ffa1e93ed34f97ec1e7855c758efbdcfb16c209af0b0506b/fastapi-0.99.1-py3-none-any.whl.metadata\n",
" Using cached fastapi-0.99.1-py3-none-any.whl.metadata (23 kB)\n",
"Collecting uvicorn[standard]>=0.18.3 (from chromadb)\n",
" Obtaining dependency information for uvicorn[standard]>=0.18.3 from https://files.pythonhosted.org/packages/5d/07/b9eac057f7efa56900640a233c1ed63db83568322c6bcbabe98f741d5289/uvicorn-0.23.1-py3-none-any.whl.metadata\n",
" Using cached uvicorn-0.23.1-py3-none-any.whl.metadata (6.2 kB)\n",
"Collecting numpy>=1.21.6 (from chromadb)\n",
" Obtaining dependency information for numpy>=1.21.6 from https://files.pythonhosted.org/packages/1b/cd/9e8313ffd849626c836fffd7881296a74f53a7739bd9ce7a6e22b1fc843b/numpy-1.25.1-cp310-cp310-macosx_11_0_arm64.whl.metadata\n",
" Using cached numpy-1.25.1-cp310-cp310-macosx_11_0_arm64.whl.metadata (5.6 kB)\n",
"Collecting posthog>=2.4.0 (from chromadb)\n",
" Using cached posthog-3.0.1-py2.py3-none-any.whl (37 kB)\n",
"Requirement already satisfied: typing-extensions>=4.5.0 in /Users/antontroynikov/miniforge3/envs/chroma-openai-cookbook/lib/python3.10/site-packages (from chromadb) (4.7.1)\n",
"Collecting pulsar-client>=3.1.0 (from chromadb)\n",
" Obtaining dependency information for pulsar-client>=3.1.0 from https://files.pythonhosted.org/packages/43/85/ab0455008ce3335a1c75a7c500fd8921ab166f34821fa67dc91ae9687a40/pulsar_client-3.2.0-cp310-cp310-macosx_10_15_universal2.whl.metadata\n",
" Using cached pulsar_client-3.2.0-cp310-cp310-macosx_10_15_universal2.whl.metadata (1.0 kB)\n",
"Collecting onnxruntime>=1.14.1 (from chromadb)\n",
" Obtaining dependency information for onnxruntime>=1.14.1 from https://files.pythonhosted.org/packages/cf/06/0c6e355b9ddbebc34d0e21bc5be1e4bd2c124ebd9030525838fa6e65eaa8/onnxruntime-1.15.1-cp310-cp310-macosx_11_0_arm64.whl.metadata\n",
" Using cached onnxruntime-1.15.1-cp310-cp310-macosx_11_0_arm64.whl.metadata (4.0 kB)\n",
"Collecting tokenizers>=0.13.2 (from chromadb)\n",
" Using cached tokenizers-0.13.3-cp310-cp310-macosx_12_0_arm64.whl (3.9 MB)\n",
"Collecting pypika>=0.48.9 (from chromadb)\n",
" Using cached PyPika-0.48.9-py2.py3-none-any.whl\n",
"Requirement already satisfied: tqdm>=4.65.0 in /Users/antontroynikov/miniforge3/envs/chroma-openai-cookbook/lib/python3.10/site-packages (from chromadb) (4.65.0)\n",
"Collecting overrides>=7.3.1 (from chromadb)\n",
" Using cached overrides-7.3.1-py3-none-any.whl (17 kB)\n",
"Collecting importlib-resources (from chromadb)\n",
" Obtaining dependency information for importlib-resources from https://files.pythonhosted.org/packages/29/d1/bed03eca30aa05aaf6e0873de091f9385c48705c4a607c2dfe3edbe543e8/importlib_resources-6.0.0-py3-none-any.whl.metadata\n",
" Using cached importlib_resources-6.0.0-py3-none-any.whl.metadata (4.2 kB)\n",
"Collecting starlette<0.28.0,>=0.27.0 (from fastapi<0.100.0,>=0.95.2->chromadb)\n",
" Obtaining dependency information for starlette<0.28.0,>=0.27.0 from https://files.pythonhosted.org/packages/58/f8/e2cca22387965584a409795913b774235752be4176d276714e15e1a58884/starlette-0.27.0-py3-none-any.whl.metadata\n",
" Using cached starlette-0.27.0-py3-none-any.whl.metadata (5.8 kB)\n",
"Collecting coloredlogs (from onnxruntime>=1.14.1->chromadb)\n",
" Using cached coloredlogs-15.0.1-py2.py3-none-any.whl (46 kB)\n",
"Collecting flatbuffers (from onnxruntime>=1.14.1->chromadb)\n",
" Obtaining dependency information for flatbuffers from https://files.pythonhosted.org/packages/6f/12/d5c79ee252793ffe845d58a913197bfa02ae9a0b5c9bc3dc4b58d477b9e7/flatbuffers-23.5.26-py2.py3-none-any.whl.metadata\n",
" Using cached flatbuffers-23.5.26-py2.py3-none-any.whl.metadata (850 bytes)\n",
"Requirement already satisfied: packaging in /Users/antontroynikov/miniforge3/envs/chroma-openai-cookbook/lib/python3.10/site-packages (from onnxruntime>=1.14.1->chromadb) (23.1)\n",
"Collecting protobuf (from onnxruntime>=1.14.1->chromadb)\n",
" Obtaining dependency information for protobuf from https://files.pythonhosted.org/packages/cb/d3/a164038605494d49acc4f9cda1c0bc200b96382c53edd561387263bb181d/protobuf-4.23.4-cp37-abi3-macosx_10_9_universal2.whl.metadata\n",
" Using cached protobuf-4.23.4-cp37-abi3-macosx_10_9_universal2.whl.metadata (540 bytes)\n",
"Collecting sympy (from onnxruntime>=1.14.1->chromadb)\n",
" Using cached sympy-1.12-py3-none-any.whl (5.7 MB)\n",
"Requirement already satisfied: python-dateutil>=2.8.2 in /Users/antontroynikov/miniforge3/envs/chroma-openai-cookbook/lib/python3.10/site-packages (from pandas>=1.3->chromadb) (2.8.2)\n",
"Collecting pytz>=2020.1 (from pandas>=1.3->chromadb)\n",
" Using cached pytz-2023.3-py2.py3-none-any.whl (502 kB)\n",
"Collecting tzdata>=2022.1 (from pandas>=1.3->chromadb)\n",
" Using cached tzdata-2023.3-py2.py3-none-any.whl (341 kB)\n",
"Requirement already satisfied: six>=1.5 in /Users/antontroynikov/miniforge3/envs/chroma-openai-cookbook/lib/python3.10/site-packages (from posthog>=2.4.0->chromadb) (1.16.0)\n",
"Collecting monotonic>=1.5 (from posthog>=2.4.0->chromadb)\n",
" Using cached monotonic-1.6-py2.py3-none-any.whl (8.2 kB)\n",
"Collecting backoff>=1.10.0 (from posthog>=2.4.0->chromadb)\n",
" Using cached backoff-2.2.1-py3-none-any.whl (15 kB)\n",
"Requirement already satisfied: certifi in /Users/antontroynikov/miniforge3/envs/chroma-openai-cookbook/lib/python3.10/site-packages (from pulsar-client>=3.1.0->chromadb) (2023.5.7)\n",
"Requirement already satisfied: charset-normalizer<4,>=2 in /Users/antontroynikov/miniforge3/envs/chroma-openai-cookbook/lib/python3.10/site-packages (from requests>=2.28->chromadb) (3.2.0)\n",
"Requirement already satisfied: idna<4,>=2.5 in /Users/antontroynikov/miniforge3/envs/chroma-openai-cookbook/lib/python3.10/site-packages (from requests>=2.28->chromadb) (3.4)\n",
"Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/antontroynikov/miniforge3/envs/chroma-openai-cookbook/lib/python3.10/site-packages (from requests>=2.28->chromadb) (2.0.4)\n",
"Collecting click>=7.0 (from uvicorn[standard]>=0.18.3->chromadb)\n",
" Obtaining dependency information for click>=7.0 from https://files.pythonhosted.org/packages/1a/70/e63223f8116931d365993d4a6b7ef653a4d920b41d03de7c59499962821f/click-8.1.6-py3-none-any.whl.metadata\n",
" Using cached click-8.1.6-py3-none-any.whl.metadata (3.0 kB)\n",
"Collecting h11>=0.8 (from uvicorn[standard]>=0.18.3->chromadb)\n",
" Using cached h11-0.14.0-py3-none-any.whl (58 kB)\n",
"Collecting httptools>=0.5.0 (from uvicorn[standard]>=0.18.3->chromadb)\n",
" Obtaining dependency information for httptools>=0.5.0 from https://files.pythonhosted.org/packages/8f/71/d535e9f6967958d21b8fe1baeb7efb6304b86e8fcff44d0bda8690e0aec9/httptools-0.6.0-cp310-cp310-macosx_10_9_universal2.whl.metadata\n",
" Using cached httptools-0.6.0-cp310-cp310-macosx_10_9_universal2.whl.metadata (3.6 kB)\n",
"Collecting python-dotenv>=0.13 (from uvicorn[standard]>=0.18.3->chromadb)\n",
" Using cached python_dotenv-1.0.0-py3-none-any.whl (19 kB)\n",
"Collecting pyyaml>=5.1 (from uvicorn[standard]>=0.18.3->chromadb)\n",
" Obtaining dependency information for pyyaml>=5.1 from https://files.pythonhosted.org/packages/5b/07/10033a403b23405a8fc48975444463d3d10a5c2736b7eb2550b07b367429/PyYAML-6.0.1-cp310-cp310-macosx_11_0_arm64.whl.metadata\n",
" Using cached PyYAML-6.0.1-cp310-cp310-macosx_11_0_arm64.whl.metadata (2.1 kB)\n",
"Collecting uvloop!=0.15.0,!=0.15.1,>=0.14.0 (from uvicorn[standard]>=0.18.3->chromadb)\n",
" Using cached uvloop-0.17.0-cp310-cp310-macosx_10_9_universal2.whl (2.1 MB)\n",
"Collecting watchfiles>=0.13 (from uvicorn[standard]>=0.18.3->chromadb)\n",
" Using cached watchfiles-0.19.0-cp37-abi3-macosx_11_0_arm64.whl (388 kB)\n",
"Collecting websockets>=10.4 (from uvicorn[standard]>=0.18.3->chromadb)\n",
" Using cached websockets-11.0.3-cp310-cp310-macosx_11_0_arm64.whl (121 kB)\n",
"Collecting anyio<5,>=3.4.0 (from starlette<0.28.0,>=0.27.0->fastapi<0.100.0,>=0.95.2->chromadb)\n",
" Obtaining dependency information for anyio<5,>=3.4.0 from https://files.pythonhosted.org/packages/19/24/44299477fe7dcc9cb58d0a57d5a7588d6af2ff403fdd2d47a246c91a3246/anyio-3.7.1-py3-none-any.whl.metadata\n",
" Using cached anyio-3.7.1-py3-none-any.whl.metadata (4.7 kB)\n",
"Collecting humanfriendly>=9.1 (from coloredlogs->onnxruntime>=1.14.1->chromadb)\n",
" Using cached humanfriendly-10.0-py2.py3-none-any.whl (86 kB)\n",
"Collecting mpmath>=0.19 (from sympy->onnxruntime>=1.14.1->chromadb)\n",
" Using cached mpmath-1.3.0-py3-none-any.whl (536 kB)\n",
"Collecting sniffio>=1.1 (from anyio<5,>=3.4.0->starlette<0.28.0,>=0.27.0->fastapi<0.100.0,>=0.95.2->chromadb)\n",
" Using cached sniffio-1.3.0-py3-none-any.whl (10 kB)\n",
"Collecting exceptiongroup (from anyio<5,>=3.4.0->starlette<0.28.0,>=0.27.0->fastapi<0.100.0,>=0.95.2->chromadb)\n",
" Obtaining dependency information for exceptiongroup from https://files.pythonhosted.org/packages/fe/17/f43b7c9ccf399d72038042ee72785c305f6c6fdc6231942f8ab99d995742/exceptiongroup-1.1.2-py3-none-any.whl.metadata\n",
" Using cached exceptiongroup-1.1.2-py3-none-any.whl.metadata (6.1 kB)\n",
"Downloading chromadb-0.4.2-py3-none-any.whl (399 kB)\n",
"\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m399.3/399.3 kB\u001b[0m \u001b[31m12.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
"\u001b[?25hUsing cached chroma_hnswlib-0.7.1-cp310-cp310-macosx_13_0_arm64.whl (195 kB)\n",
"Using cached fastapi-0.99.1-py3-none-any.whl (58 kB)\n",
"Using cached numpy-1.25.1-cp310-cp310-macosx_11_0_arm64.whl (14.0 MB)\n",
"Using cached onnxruntime-1.15.1-cp310-cp310-macosx_11_0_arm64.whl (6.1 MB)\n",
"Using cached pandas-2.0.3-cp310-cp310-macosx_11_0_arm64.whl (10.8 MB)\n",
"Using cached pulsar_client-3.2.0-cp310-cp310-macosx_10_15_universal2.whl (10.8 MB)\n",
"Using cached pydantic-1.10.11-cp310-cp310-macosx_11_0_arm64.whl (2.5 MB)\n",
"Using cached importlib_resources-6.0.0-py3-none-any.whl (31 kB)\n",
"Using cached click-8.1.6-py3-none-any.whl (97 kB)\n",
"Using cached httptools-0.6.0-cp310-cp310-macosx_10_9_universal2.whl (237 kB)\n",
"Using cached PyYAML-6.0.1-cp310-cp310-macosx_11_0_arm64.whl (169 kB)\n",
"Using cached starlette-0.27.0-py3-none-any.whl (66 kB)\n",
"Using cached flatbuffers-23.5.26-py2.py3-none-any.whl (26 kB)\n",
"Using cached protobuf-4.23.4-cp37-abi3-macosx_10_9_universal2.whl (400 kB)\n",
"Using cached uvicorn-0.23.1-py3-none-any.whl (59 kB)\n",
"Using cached anyio-3.7.1-py3-none-any.whl (80 kB)\n",
"Using cached exceptiongroup-1.1.2-py3-none-any.whl (14 kB)\n",
"Installing collected packages: tokenizers, pytz, pypika, mpmath, monotonic, flatbuffers, websockets, uvloop, tzdata, sympy, sniffio, pyyaml, python-dotenv, pydantic, pulsar-client, protobuf, overrides, numpy, importlib-resources, humanfriendly, httptools, h11, exceptiongroup, click, backoff, uvicorn, posthog, pandas, coloredlogs, chroma-hnswlib, anyio, watchfiles, starlette, onnxruntime, fastapi, chromadb\n",
"Successfully installed anyio-3.7.1 backoff-2.2.1 chroma-hnswlib-0.7.1 chromadb-0.4.2 click-8.1.6 coloredlogs-15.0.1 exceptiongroup-1.1.2 fastapi-0.99.1 flatbuffers-23.5.26 h11-0.14.0 httptools-0.6.0 humanfriendly-10.0 importlib-resources-6.0.0 monotonic-1.6 mpmath-1.3.0 numpy-1.25.1 onnxruntime-1.15.1 overrides-7.3.1 pandas-2.0.3 posthog-3.0.1 protobuf-4.23.4 pulsar-client-3.2.0 pydantic-1.10.11 pypika-0.48.9 python-dotenv-1.0.0 pytz-2023.3 pyyaml-6.0.1 sniffio-1.3.0 starlette-0.27.0 sympy-1.12 tokenizers-0.13.3 tzdata-2023.3 uvicorn-0.23.1 uvloop-0.17.0 watchfiles-0.19.0 websockets-11.0.3\n",
"Note: you may need to restart the kernel to use updated packages.\n",
"Collecting wget\n",
" Using cached wget-3.2.zip (10 kB)\n",
" Preparing metadata (setup.py) ... \u001b[?25ldone\n",
"\u001b[?25hBuilding wheels for collected packages: wget\n",
" Building wheel for wget (setup.py) ... \u001b[?25ldone\n",
"\u001b[?25h Created wheel for wget: filename=wget-3.2-py3-none-any.whl size=9657 sha256=b2d83c5fcdeab398d0a4e9808a470bbf725fffea4a6130e731c6097b9561005b\n",
" Stored in directory: /Users/antontroynikov/Library/Caches/pip/wheels/8b/f1/7f/5c94f0a7a505ca1c81cd1d9208ae2064675d97582078e6c769\n",
"Successfully built wget\n",
"Installing collected packages: wget\n",
"Successfully installed wget-3.2\n",
"Note: you may need to restart the kernel to use updated packages.\n",
"Requirement already satisfied: numpy in /Users/antontroynikov/miniforge3/envs/chroma-openai-cookbook/lib/python3.10/site-packages (1.25.1)\n",
"Note: you may need to restart the kernel to use updated packages.\n"
]
}
],
"source": [
"# Make sure the OpenAI library is installed\n",
"%pip install openai\n",
"\n",
"# We'll need to install the Chroma client\n",
"!pip install chromadb\n",
"%pip install chromadb\n",
"\n",
"# Install wget to pull zip file\n",
"%pip install wget\n",
"\n",
"#Install wget to pull zip file\n",
"!pip install wget"
"# Install numpy for data manipulation\n",
"%pip install numpy"
]
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 3,
"id": "5be94df6",
"metadata": {},
"outputs": [],
"source": [
"import openai\n",
"from typing import List, Iterator\n",
"import pandas as pd\n",
"import numpy as np\n",
"import os\n",
"import wget\n",
"from ast import literal_eval\n",
@ -94,24 +286,17 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 3,
"id": "5dff8b55",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"100% [......................................................................] 698933052 / 698933052"
]
},
{
"data": {
"text/plain": [
"'vector_database_wikipedia_articles_embedded.zip'"
]
},
"execution_count": 2,
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
@ -125,7 +310,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 4,
"id": "21097972",
"metadata": {},
"outputs": [],
@ -137,7 +322,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 7,
"id": "70bbd8ba",
"metadata": {},
"outputs": [],
@ -147,7 +332,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 8,
"id": "1721e45d",
"metadata": {},
"outputs": [
@ -266,7 +451,7 @@
"4 [0.021524671465158463, 0.018522677943110466, -... 4 "
]
},
"execution_count": 5,
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
@ -277,7 +462,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 9,
"id": "960b82af",
"metadata": {},
"outputs": [],
@ -292,7 +477,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 11,
"id": "a334ab8b",
"metadata": {},
"outputs": [
@ -349,24 +534,14 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 5,
"id": "159d9646",
"metadata": {},
"outputs": [],
"source": [
"\n",
"\n",
"chroma_client = chromadb.Client() # Ephemeral. Comment out for the persistent version.\n",
"\n",
"# Uncomment the following for the persistent version. \n",
"# import chromadb.config.Settings\n",
"# persist_directory = 'chroma_persistence' # Directory to store persisted Chroma data. \n",
"# client = chromadb.Client(\n",
"# Settings(\n",
"# persist_directory=persist_directory,\n",
"# chroma_db_impl=\"duckdb+parquet\",\n",
"# )\n",
"# )"
"chroma_client = chromadb.EphemeralClient() # Equivalent to chromadb.Client(), ephemeral.\n",
"# Uncomment for persistent client\n",
"# chroma_client = chromadb.PersistentClient()"
]
},
{
@ -384,7 +559,7 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 6,
"id": "ad2d1bce",
"metadata": {},
"outputs": [
@ -432,7 +607,7 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 15,
"id": "84885fec",
"metadata": {},
"outputs": [],
@ -462,7 +637,7 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 16,
"id": "273b8b4c",
"metadata": {},
"outputs": [],
@ -481,7 +656,7 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 17,
"id": "e84cf47f",
"metadata": {},
"outputs": [
@ -514,61 +689,61 @@
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>116</th>\n",
" <td>12249</td>\n",
" <td>0.265118</td>\n",
" <td>Europe</td>\n",
" <td>Europe is the western part of the continent of...</td>\n",
" <th>2</th>\n",
" <td>23266</td>\n",
" <td>0.249646</td>\n",
" <td>Art</td>\n",
" <td>Art is a creative activity that expresses imag...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1332</th>\n",
" <td>12248</td>\n",
" <td>0.290684</td>\n",
" <td>European</td>\n",
" <td>European may mean:\\nA person or attribute of t...</td>\n",
" <th>11777</th>\n",
" <td>15436</td>\n",
" <td>0.271688</td>\n",
" <td>Hellenistic art</td>\n",
" <td>The art of the Hellenistic time (from 400 B.C....</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2885</th>\n",
" <td>12225</td>\n",
" <td>0.314833</td>\n",
" <td>Scandinavia</td>\n",
" <td>Scandinavia is a group of countries in norther...</td>\n",
" <th>12178</th>\n",
" <td>23265</td>\n",
" <td>0.279306</td>\n",
" <td>Byzantine art</td>\n",
" <td>Byzantine art is a form of Christian Greek art...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12212</th>\n",
" <td>1332</td>\n",
" <td>0.317179</td>\n",
" <td>Western civilization</td>\n",
" <td>Western civilization, western culture or the ...</td>\n",
" <th>13215</th>\n",
" <td>11777</td>\n",
" <td>0.294415</td>\n",
" <td>Art film</td>\n",
" <td>Art films are a type of movie that is very dif...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12216</th>\n",
" <td>12216</td>\n",
" <td>0.321235</td>\n",
" <td>Eastern Europe</td>\n",
" <td>Eastern Europe is the eastern region of Europe...</td>\n",
" <th>15436</th>\n",
" <td>22108</td>\n",
" <td>0.305937</td>\n",
" <td>Renaissance art</td>\n",
" <td>Many of the most famous and best-loved works o...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" id score title \\\n",
"116 12249 0.265118 Europe \n",
"1332 12248 0.290684 European \n",
"2885 12225 0.314833 Scandinavia \n",
"12212 1332 0.317179 Western civilization \n",
"12216 12216 0.321235 Eastern Europe \n",
" id score title \\\n",
"2 23266 0.249646 Art \n",
"11777 15436 0.271688 Hellenistic art \n",
"12178 23265 0.279306 Byzantine art \n",
"13215 11777 0.294415 Art film \n",
"15436 22108 0.305937 Renaissance art \n",
"\n",
" content \n",
"116 Europe is the western part of the continent of... \n",
"1332 European may mean:\\nA person or attribute of t... \n",
"2885 Scandinavia is a group of countries in norther... \n",
"12212 Western civilization, western culture or the ... \n",
"12216 Eastern Europe is the eastern region of Europe... "
"2 Art is a creative activity that expresses imag... \n",
"11777 The art of the Hellenistic time (from 400 B.C.... \n",
"12178 Byzantine art is a form of Christian Greek art... \n",
"13215 Art films are a type of movie that is very dif... \n",
"15436 Many of the most famous and best-loved works o... "
]
},
"execution_count": 12,
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
@ -585,7 +760,7 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": 18,
"id": "f4db910a",
"metadata": {},
"outputs": [
@ -672,7 +847,7 @@
"11702 William Wallace was a Scottish knight who foug... "
]
},
"execution_count": 13,
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
@ -694,21 +869,13 @@
"source": [
"Now that you've got a basic embeddings search running, you can [hop over to the Chroma docs](https://docs.trychroma.com/usage-guide#using-where-filters) to learn more about how to add filters to your query, update/delete data in your collections, and deploy Chroma."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0119d87a",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "vector_db_split",
"display_name": "chroma_openai",
"language": "python",
"name": "vector_db_split"
"name": "python3"
},
"language_info": {
"codemirror_mode": {
@ -720,12 +887,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.11"
},
"vscode": {
"interpreter": {
"hash": "fd16a328ca3d68029457069b79cb0b38eb39a0f5ccc4fe4473d3047707df8207"
}
"version": "3.10.12"
}
},
"nbformat": 4,

@ -34,9 +34,17 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 2,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Note: you may need to restart the kernel to use updated packages.\n"
]
}
],
"source": [
"%pip install -qU openai chromadb pandas"
]
@ -52,7 +60,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 3,
"metadata": {},
"outputs": [
{
@ -67,7 +75,7 @@
"import os\n",
"\n",
"# Uncomment the following line to set the environment variable in the notebook\n",
"# os.environ[\"OPENAI_API_KEY\"] = \"sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\" \n",
"# os.environ[\"OPENAI_API_KEY\"] = 'sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx'\n",
"\n",
"if os.getenv(\"OPENAI_API_KEY\") is not None:\n",
" print(\"OPENAI_API_KEY is ready\")\n",
@ -91,7 +99,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 4,
"metadata": {},
"outputs": [
{
@ -177,7 +185,7 @@
"4 {} [5152028, 11705328] "
]
},
"execution_count": 3,
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
@ -205,7 +213,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
@ -556,7 +564,7 @@
"# We initialize an embedding function, and provide it to the collection.\n",
"embedding_function = OpenAIEmbeddingFunction(api_key=os.getenv(\"OPENAI_API_KEY\"))\n",
"\n",
"chroma_client = chromadb.Client()\n",
"chroma_client = chromadb.Client() # Ephemeral by default\n",
"scifact_corpus_collection = chroma_client.create_collection(name='scifact_corpus', embedding_function=embedding_function)"
]
},
@ -975,7 +983,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.16"
"version": "3.10.12"
},
"vscode": {
"interpreter": {

Loading…
Cancel
Save