Add workaround for not having async vector store methods (#2733)

This allows us to use the async API for the Retrieval chains, though it is not guaranteed to be thread safe.
fix_agent_callbacks
Ankush Gola 1 year ago committed by GitHub
parent 0806951c07
commit c1521ddbdb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -1,7 +1,9 @@
"""Interface for vector stores.""" """Interface for vector stores."""
from __future__ import annotations from __future__ import annotations
import asyncio
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from functools import partial
from typing import Any, Dict, Iterable, List, Optional, Type, TypeVar from typing import Any, Dict, Iterable, List, Optional, Type, TypeVar
from pydantic import BaseModel, Field, root_validator from pydantic import BaseModel, Field, root_validator
@ -83,7 +85,12 @@ class VectorStore(ABC):
self, query: str, k: int = 4, **kwargs: Any self, query: str, k: int = 4, **kwargs: Any
) -> List[Document]: ) -> List[Document]:
"""Return docs most similar to query.""" """Return docs most similar to query."""
raise NotImplementedError
# This is a temporary workaround to make the similarity search
# asynchronous. The proper solution is to make the similarity search
# asynchronous in the vector store implementations.
func = partial(self.similarity_search, query, k, **kwargs)
return await asyncio.get_event_loop().run_in_executor(None, func)
def similarity_search_by_vector( def similarity_search_by_vector(
self, embedding: List[float], k: int = 4, **kwargs: Any self, embedding: List[float], k: int = 4, **kwargs: Any
@ -103,7 +110,12 @@ class VectorStore(ABC):
self, embedding: List[float], k: int = 4, **kwargs: Any self, embedding: List[float], k: int = 4, **kwargs: Any
) -> List[Document]: ) -> List[Document]:
"""Return docs most similar to embedding vector.""" """Return docs most similar to embedding vector."""
raise NotImplementedError
# This is a temporary workaround to make the similarity search
# asynchronous. The proper solution is to make the similarity search
# asynchronous in the vector store implementations.
func = partial(self.similarity_search_by_vector, embedding, k, **kwargs)
return await asyncio.get_event_loop().run_in_executor(None, func)
def max_marginal_relevance_search( def max_marginal_relevance_search(
self, query: str, k: int = 4, fetch_k: int = 20 self, query: str, k: int = 4, fetch_k: int = 20
@ -127,7 +139,12 @@ class VectorStore(ABC):
self, query: str, k: int = 4, fetch_k: int = 20 self, query: str, k: int = 4, fetch_k: int = 20
) -> List[Document]: ) -> List[Document]:
"""Return docs selected using the maximal marginal relevance.""" """Return docs selected using the maximal marginal relevance."""
raise NotImplementedError
# This is a temporary workaround to make the similarity search
# asynchronous. The proper solution is to make the similarity search
# asynchronous in the vector store implementations.
func = partial(self.max_marginal_relevance_search, query, k, fetch_k)
return await asyncio.get_event_loop().run_in_executor(None, func)
def max_marginal_relevance_search_by_vector( def max_marginal_relevance_search_by_vector(
self, embedding: List[float], k: int = 4, fetch_k: int = 20 self, embedding: List[float], k: int = 4, fetch_k: int = 20

@ -1,4 +1,6 @@
"""Test Chroma functionality.""" """Test Chroma functionality."""
import pytest
from langchain.docstore.document import Document from langchain.docstore.document import Document
from langchain.vectorstores import Chroma from langchain.vectorstores import Chroma
from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings
@ -14,6 +16,17 @@ def test_chroma() -> None:
assert output == [Document(page_content="foo")] assert output == [Document(page_content="foo")]
@pytest.mark.asyncio
async def test_chroma_async() -> None:
"""Test end to end construction and search."""
texts = ["foo", "bar", "baz"]
docsearch = Chroma.from_texts(
collection_name="test_collection", texts=texts, embedding=FakeEmbeddings()
)
output = await docsearch.asimilarity_search("foo", k=1)
assert output == [Document(page_content="foo")]
def test_chroma_with_metadatas() -> None: def test_chroma_with_metadatas() -> None:
"""Test end to end construction and search.""" """Test end to end construction and search."""
texts = ["foo", "bar", "baz"] texts = ["foo", "bar", "baz"]

Loading…
Cancel
Save