community[patch]: Added a function `from_existing_collection` in `Qdrant` vector database. (#20779)

Issue: #20514 
The current implementation of `construct_instance` expects a `texts:
List[str]` that will call the embedding function. This might not be
needed when we already have a client with collection and `path, you
don't want to add any text.

This PR adds a class method that returns a qdrant instance with an
existing client.

Here everytime
cb6e5e56c2/libs/community/langchain_community/vectorstores/qdrant.py (L1592)
`construct_instance` is called, this line sends some text for embedding
generation.

---------

Co-authored-by: Anush <anushshetty90@gmail.com>
pull/20254/head
Mayank Solanki 3 weeks ago committed by GitHub
parent 893a924b90
commit 8c085fc697
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -1367,6 +1367,51 @@ class Qdrant(VectorStore):
qdrant.add_texts(texts, metadatas, ids, batch_size)
return qdrant
@classmethod
def from_existing_collection(
cls: Type[Qdrant],
embedding: Embeddings,
path: str,
collection_name: str,
location: Optional[str] = None,
url: Optional[str] = None,
port: Optional[int] = 6333,
grpc_port: int = 6334,
prefer_grpc: bool = False,
https: Optional[bool] = None,
api_key: Optional[str] = None,
prefix: Optional[str] = None,
timeout: Optional[float] = None,
host: Optional[str] = None,
**kwargs: Any,
) -> Qdrant:
"""
Get instance of an existing Qdrant collection.
This method will return the instance of the store without inserting any new
embeddings
"""
client, async_client = cls._generate_clients(
location=location,
url=url,
port=port,
grpc_port=grpc_port,
prefer_grpc=prefer_grpc,
https=https,
api_key=api_key,
prefix=prefix,
timeout=timeout,
host=host,
path=path,
**kwargs,
)
return cls(
client=client,
async_client=async_client,
collection_name=collection_name,
embeddings=embedding,
**kwargs,
)
@classmethod
@sync_call_fallback
async def afrom_texts(

@ -0,0 +1,39 @@
import tempfile
import uuid
import pytest
from langchain_community.vectorstores import Qdrant
from tests.integration_tests.vectorstores.fake_embeddings import (
ConsistentFakeEmbeddings,
)
@pytest.mark.parametrize("vector_name", ["custom-vector"])
def test_qdrant_from_existing_collection_uses_same_collection(vector_name: str) -> None:
"""Test if the Qdrant.from_existing_collection reuses the same collection."""
from qdrant_client import QdrantClient
collection_name = uuid.uuid4().hex
with tempfile.TemporaryDirectory() as tmpdir:
docs = ["foo"]
qdrant = Qdrant.from_texts(
docs,
embedding=ConsistentFakeEmbeddings(),
path=str(tmpdir),
collection_name=collection_name,
vector_name=vector_name,
)
del qdrant
qdrant = Qdrant.from_existing_collection(
embedding=ConsistentFakeEmbeddings(),
path=str(tmpdir),
collection_name=collection_name,
vector_name=vector_name,
)
qdrant.add_texts(["baz", "bar"])
del qdrant
client = QdrantClient(path=str(tmpdir))
assert 3 == client.count(collection_name).count
Loading…
Cancel
Save