upgrade chroma to 0.4.0 (#7749)

** This should land Monday the 17th ** 

Chroma is upgrading from `0.3.29` to `0.4.0`. `0.4.0` is easier to
build, more durable, faster, smaller, and more extensible. This comes
with a few changes:

1. A simplified and improved client setup. Instead of having to remember
weird settings, users can just do `EphemeralClient`, `PersistentClient`
or `HttpClient` (the underlying direct `Client` implementation is also
still accessible)

2. We migrated data stores away from `duckdb` and `clickhouse`. This
changes the api for the `PersistentClient` that used to reference
`chroma_db_impl="duckdb+parquet"`. Now we simply set
`is_persistent=true`. `is_persistent` is set for you to `true` if you
use `PersistentClient`.

3. Because we migrated away from `duckdb` and `clickhouse` - this also
means that users need to migrate their data into the new layout and
schema. Chroma is committed to providing extension notification and
tooling around any schema and data migrations (for example - this PR!).

After upgrading to `0.4.0` - if users try to access their data that was
stored in the previous regime, the system will throw an `Exception` and
instruct them how to use the migration assistant to migrate their data.
The migration assitant is a pip installable CLI: `pip install
chroma_migrate`. And is runnable by calling `chroma_migrate`

-- TODO ADD here is a short video demonstrating how it works. 

Please reference the readme at
[chroma-core/chroma-migrate](https://github.com/chroma-core/chroma-migrate)
to see a full write-up of our philosophy on migrations as well as more
details about this particular migration.

Please direct any users facing issues upgrading to our Discord channel
called
[#get-help](https://discord.com/channels/1073293645303795742/1129200523111841883).
We have also created a [email
listserv](https://airtable.com/shrHaErIs1j9F97BE) to notify developers
directly in the future about breaking changes.

---------

Co-authored-by: Bagatur <baskaryan@gmail.com>
pull/7832/head
Jeff Huber 11 months ago committed by GitHub
parent 10246375a5
commit 2139d0197e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -43,7 +43,7 @@
"\n",
"# Instantiate 2 diff cromadb indexs, each one with a diff embedding.\n",
"client_settings = chromadb.config.Settings(\n",
" chroma_db_impl=\"duckdb+parquet\",\n",
" is_persistent=True,\n",
" persist_directory=DB_DIR,\n",
" anonymized_telemetry=False,\n",
")\n",

46
poetry.lock generated

@ -1610,31 +1610,46 @@ files = [
{file = "charset_normalizer-3.1.0-py3-none-any.whl", hash = "sha256:3d9098b479e78c85080c98e1e35ff40b4a31d8953102bb0fd7d1b6f8a2111a3d"},
]
[[package]]
name = "chroma-hnswlib"
version = "0.7.1"
description = "Chromas fork of hnswlib"
category = "dev"
optional = false
python-versions = "*"
files = [
{file = "chroma-hnswlib-0.7.1.tar.gz", hash = "sha256:f72592dc7d0522c25cc1f8864db7a3781f179ba989f209cc3ea01694c0d76493"},
{file = "chroma_hnswlib-0.7.1-cp310-cp310-macosx_13_0_arm64.whl", hash = "sha256:38f51585d81a5072db70b17207afd1f57670c209836d0fbbf2a1aa7e8bece6b7"},
]
[package.dependencies]
numpy = "*"
[[package]]
name = "chromadb"
version = "0.3.26"
version = "0.4.1"
description = "Chroma."
category = "dev"
optional = false
python-versions = ">=3.7"
files = [
{file = "chromadb-0.3.26-py3-none-any.whl", hash = "sha256:45a7848ee3ed8b694ca5789e5fd723406b76a13fa46f9a9a769f93317f29894c"},
{file = "chromadb-0.3.26.tar.gz", hash = "sha256:a9b596d507f081993f2e32a7dcacabbbec2f6aebc2b6defe524442b07e265296"},
{file = "chromadb-0.4.1-py3-none-any.whl", hash = "sha256:980e776bfbb76a2689418b03a254e7edb888961f57b7615f815c8d95f048b396"},
{file = "chromadb-0.4.1.tar.gz", hash = "sha256:9b1a76d615dd2280e7b30ff82101ed31c26782a4d832070046309fde82515385"},
]
[package.dependencies]
clickhouse-connect = ">=0.5.7"
duckdb = ">=0.7.1"
fastapi = ">=0.85.1"
chroma-hnswlib = "0.7.1"
fastapi = ">=0.95.2,<0.100.0"
graphlib-backport = {version = ">=1.0.3", markers = "python_version < \"3.9\""}
hnswlib = ">=0.7"
importlib-resources = "*"
numpy = ">=1.21.6"
onnxruntime = ">=1.14.1"
overrides = ">=7.3.1"
pandas = ">=1.3"
posthog = ">=2.4.0"
pulsar-client = ">=3.1.0"
pydantic = ">=1.9"
pydantic = ">=1.9,<2.0"
pypika = ">=0.48.9"
requests = ">=2.28"
tokenizers = ">=0.13.2"
tqdm = ">=4.65.0"
@ -3604,7 +3619,7 @@ name = "hnswlib"
version = "0.7.0"
description = "hnswlib"
category = "main"
optional = false
optional = true
python-versions = "*"
files = [
{file = "hnswlib-0.7.0.tar.gz", hash = "sha256:bc459668e7e44bb7454b256b90c98c5af750653919d9a91698dafcf416cf64c4"},
@ -8563,6 +8578,17 @@ files = [
doc = ["sphinx", "sphinx_rtd_theme"]
test = ["flake8", "isort", "pytest"]
[[package]]
name = "pypika"
version = "0.48.9"
description = "A SQL query builder API for Python"
category = "dev"
optional = false
python-versions = "*"
files = [
{file = "PyPika-0.48.9.tar.gz", hash = "sha256:838836a61747e7c8380cd1b7ff638694b7a7335345d0f559b04b2cd832ad5378"},
]
[[package]]
name = "pyproject-hooks"
version = "1.0.0"
@ -12857,4 +12883,4 @@ text-helpers = ["chardet"]
[metadata]
lock-version = "2.0"
python-versions = ">=3.8.1,<4.0"
content-hash = "f322b36103013bd59c34dddadf84209292ea61ed73bd26fbfa355d372011238b"
content-hash = "aee2f0c85636738d08d512c53fd551ab43a2e94c1ebf14c6178c9534da75dcaa"

@ -193,7 +193,7 @@ deeplake = "^3.6.8"
libdeeplake = "^0.0.60"
weaviate-client = "^3.15.5"
torch = "^1.0.0"
chromadb = "^0.3.21"
chromadb = "^0.4.0"
tiktoken = "^0.3.3"
python-dotenv = "^1.0.0"
sentence-transformers = "^2"

Loading…
Cancel
Save