From c1521ddbdb33ab6091f76ddcdca96678cb93a7c2 Mon Sep 17 00:00:00 2001 From: Ankush Gola <9536492+agola11@users.noreply.github.com> Date: Tue, 11 Apr 2023 18:49:08 -0700 Subject: [PATCH] Add workaround for not having async vector store methods (#2733) This allows us to use the async API for the Retrieval chains, though it is not guaranteed to be thread safe. --- langchain/vectorstores/base.py | 23 ++++++++++++++++--- .../vectorstores/test_chroma.py | 13 +++++++++++ 2 files changed, 33 insertions(+), 3 deletions(-) diff --git a/langchain/vectorstores/base.py b/langchain/vectorstores/base.py index 92f3a601..b1c9bf64 100644 --- a/langchain/vectorstores/base.py +++ b/langchain/vectorstores/base.py @@ -1,7 +1,9 @@ """Interface for vector stores.""" from __future__ import annotations +import asyncio from abc import ABC, abstractmethod +from functools import partial from typing import Any, Dict, Iterable, List, Optional, Type, TypeVar from pydantic import BaseModel, Field, root_validator @@ -83,7 +85,12 @@ class VectorStore(ABC): self, query: str, k: int = 4, **kwargs: Any ) -> List[Document]: """Return docs most similar to query.""" - raise NotImplementedError + + # This is a temporary workaround to make the similarity search + # asynchronous. The proper solution is to make the similarity search + # asynchronous in the vector store implementations. + func = partial(self.similarity_search, query, k, **kwargs) + return await asyncio.get_event_loop().run_in_executor(None, func) def similarity_search_by_vector( self, embedding: List[float], k: int = 4, **kwargs: Any @@ -103,7 +110,12 @@ class VectorStore(ABC): self, embedding: List[float], k: int = 4, **kwargs: Any ) -> List[Document]: """Return docs most similar to embedding vector.""" - raise NotImplementedError + + # This is a temporary workaround to make the similarity search + # asynchronous. The proper solution is to make the similarity search + # asynchronous in the vector store implementations. + func = partial(self.similarity_search_by_vector, embedding, k, **kwargs) + return await asyncio.get_event_loop().run_in_executor(None, func) def max_marginal_relevance_search( self, query: str, k: int = 4, fetch_k: int = 20 @@ -127,7 +139,12 @@ class VectorStore(ABC): self, query: str, k: int = 4, fetch_k: int = 20 ) -> List[Document]: """Return docs selected using the maximal marginal relevance.""" - raise NotImplementedError + + # This is a temporary workaround to make the similarity search + # asynchronous. The proper solution is to make the similarity search + # asynchronous in the vector store implementations. + func = partial(self.max_marginal_relevance_search, query, k, fetch_k) + return await asyncio.get_event_loop().run_in_executor(None, func) def max_marginal_relevance_search_by_vector( self, embedding: List[float], k: int = 4, fetch_k: int = 20 diff --git a/tests/integration_tests/vectorstores/test_chroma.py b/tests/integration_tests/vectorstores/test_chroma.py index 9cb07599..0075a163 100644 --- a/tests/integration_tests/vectorstores/test_chroma.py +++ b/tests/integration_tests/vectorstores/test_chroma.py @@ -1,4 +1,6 @@ """Test Chroma functionality.""" +import pytest + from langchain.docstore.document import Document from langchain.vectorstores import Chroma from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings @@ -14,6 +16,17 @@ def test_chroma() -> None: assert output == [Document(page_content="foo")] +@pytest.mark.asyncio +async def test_chroma_async() -> None: + """Test end to end construction and search.""" + texts = ["foo", "bar", "baz"] + docsearch = Chroma.from_texts( + collection_name="test_collection", texts=texts, embedding=FakeEmbeddings() + ) + output = await docsearch.asimilarity_search("foo", k=1) + assert output == [Document(page_content="foo")] + + def test_chroma_with_metadatas() -> None: """Test end to end construction and search.""" texts = ["foo", "bar", "baz"]