Add workaround for not having async vector store methods (#2733)

This allows us to use the async API for the Retrieval chains, though it is not guaranteed to be thread safe.
fix_agent_callbacks
Ankush Gola 1 year ago committed by GitHub
parent 0806951c07
commit c1521ddbdb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -1,7 +1,9 @@
"""Interface for vector stores."""
from __future__ import annotations
import asyncio
from abc import ABC, abstractmethod
from functools import partial
from typing import Any, Dict, Iterable, List, Optional, Type, TypeVar
from pydantic import BaseModel, Field, root_validator
@ -83,7 +85,12 @@ class VectorStore(ABC):
self, query: str, k: int = 4, **kwargs: Any
) -> List[Document]:
"""Return docs most similar to query."""
raise NotImplementedError
# This is a temporary workaround to make the similarity search
# asynchronous. The proper solution is to make the similarity search
# asynchronous in the vector store implementations.
func = partial(self.similarity_search, query, k, **kwargs)
return await asyncio.get_event_loop().run_in_executor(None, func)
def similarity_search_by_vector(
self, embedding: List[float], k: int = 4, **kwargs: Any
@ -103,7 +110,12 @@ class VectorStore(ABC):
self, embedding: List[float], k: int = 4, **kwargs: Any
) -> List[Document]:
"""Return docs most similar to embedding vector."""
raise NotImplementedError
# This is a temporary workaround to make the similarity search
# asynchronous. The proper solution is to make the similarity search
# asynchronous in the vector store implementations.
func = partial(self.similarity_search_by_vector, embedding, k, **kwargs)
return await asyncio.get_event_loop().run_in_executor(None, func)
def max_marginal_relevance_search(
self, query: str, k: int = 4, fetch_k: int = 20
@ -127,7 +139,12 @@ class VectorStore(ABC):
self, query: str, k: int = 4, fetch_k: int = 20
) -> List[Document]:
"""Return docs selected using the maximal marginal relevance."""
raise NotImplementedError
# This is a temporary workaround to make the similarity search
# asynchronous. The proper solution is to make the similarity search
# asynchronous in the vector store implementations.
func = partial(self.max_marginal_relevance_search, query, k, fetch_k)
return await asyncio.get_event_loop().run_in_executor(None, func)
def max_marginal_relevance_search_by_vector(
self, embedding: List[float], k: int = 4, fetch_k: int = 20

@ -1,4 +1,6 @@
"""Test Chroma functionality."""
import pytest
from langchain.docstore.document import Document
from langchain.vectorstores import Chroma
from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings
@ -14,6 +16,17 @@ def test_chroma() -> None:
assert output == [Document(page_content="foo")]
@pytest.mark.asyncio
async def test_chroma_async() -> None:
"""Test end to end construction and search."""
texts = ["foo", "bar", "baz"]
docsearch = Chroma.from_texts(
collection_name="test_collection", texts=texts, embedding=FakeEmbeddings()
)
output = await docsearch.asimilarity_search("foo", k=1)
assert output == [Document(page_content="foo")]
def test_chroma_with_metadatas() -> None:
"""Test end to end construction and search."""
texts = ["foo", "bar", "baz"]

Loading…
Cancel
Save