milvus: fixed bug when using partition key and dynamic fields together (#25028)

**Description:**

This PR fixes a bug where if `enable_dynamic_field` and
`partition_key_field` are enabled at the same time, a pymilvus error
occurs.

Milvus requires the partition key field to be a full schema defined
field, and not a dynamic one, so it will throw the error "the specified
partition key field {field} not exist" when creating the collection.

When `enabled_dynamic_field` is set to `True`, all schema field creation
based on `metadatas` is skipped. This code now checks if
`partition_key_field` is set, and creates the field.

Integration test added.

**Twitter handle:** StuartMarshUK

---------

Co-authored-by: Stuart Marsh <stuart.marsh@qumata.com>
Co-authored-by: Erick Friis <erick@langchain.dev>
This commit is contained in:
Stuart Marsh 2024-08-05 17:01:55 +01:00 committed by GitHub
parent 6890daa90c
commit 16bd0697dc
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 37 additions and 1 deletions

View File

@ -433,7 +433,14 @@ class Milvus(VectorStore):
# ...
# ```
if self.enable_dynamic_field:
pass
# If both dynamic fields and partition key field are enabled
if self._partition_key_field is not None:
# create the partition field
fields.append(
FieldSchema(
self._partition_key_field, DataType.VARCHAR, max_length=65_535
)
)
elif self._metadata_field is not None:
fields.append(FieldSchema(self._metadata_field, DataType.JSON))
else:

View File

@ -1,4 +1,5 @@
"""Test Milvus functionality."""
from typing import Any, List, Optional
import pytest
@ -274,6 +275,34 @@ def test_milvus_metadata_field() -> None:
}
def test_milvus_enable_dynamic_field_with_partition_key() -> None:
"""
Test end to end construction and enable dynamic field
with partition_key_field
"""
texts = ["foo", "bar", "baz"]
metadatas = [{"id": i, "namespace": f"name_{i}"} for i in range(len(texts))]
docsearch = _milvus_from_texts(
metadatas=metadatas, enable_dynamic_field=True, partition_key_field="namespace"
)
# filter on a single namespace
output = docsearch.similarity_search("foo", k=10, expr="namespace == 'name_2'")
assert len(output) == 1
# without namespace filter
output = docsearch.similarity_search("foo", k=10)
assert len(output) == 3
assert set(docsearch.fields) == {
docsearch._primary_field,
docsearch._text_field,
docsearch._vector_field,
docsearch._partition_key_field,
}
# if __name__ == "__main__":
# test_milvus()
# test_milvus_vector_search()