Harrison/move vectorstore base (#11030)

pull/10597/head
Harrison Chase 10 months ago committed by GitHub
parent 3eb79580c2
commit 5f13668fa0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -36,7 +36,7 @@
"from langchain.chains import LLMChain\nfrom langchain.llms import OpenAI\nfrom langchain.prompts import PromptTemplate\n",
"from langchain.embeddings import OpenAIEmbeddings\n",
"from langchain.llms import BaseLLM\n",
"from langchain.vectorstores.base import VectorStore\n",
"from langchain.schema.vectorstore import VectorStore\n",
"from pydantic import BaseModel, Field\n",
"from langchain.chains.base import Chain\n",
"from langchain_experimental.autonomous_agents import BabyAGI"

@ -32,7 +32,7 @@
"from langchain.chains import LLMChain\nfrom langchain.llms import OpenAI\nfrom langchain.prompts import PromptTemplate\n",
"from langchain.embeddings import OpenAIEmbeddings\n",
"from langchain.llms import BaseLLM\n",
"from langchain.vectorstores.base import VectorStore\n",
"from langchain.schema.vectorstore import VectorStore\n",
"from pydantic import BaseModel, Field\n",
"from langchain.chains.base import Chain\n",
"from langchain_experimental.autonomous_agents import BabyAGI"

@ -10,9 +10,9 @@ from langchain.schema import (
Document,
)
from langchain.schema.messages import AIMessage, HumanMessage, SystemMessage
from langchain.schema.vectorstore import VectorStoreRetriever
from langchain.tools.base import BaseTool
from langchain.tools.human.tool import HumanInputRun
from langchain.vectorstores.base import VectorStoreRetriever
from langchain_experimental.autonomous_agents.autogpt.output_parser import (
AutoGPTOutputParser,

@ -1,7 +1,7 @@
from typing import Any, Dict, List
from langchain.memory.chat_memory import BaseChatMemory, get_prompt_input_key
from langchain.vectorstores.base import VectorStoreRetriever
from langchain.schema.vectorstore import VectorStoreRetriever
from langchain_experimental.pydantic_v1 import Field

@ -5,8 +5,8 @@ from langchain.prompts.chat import (
BaseChatPromptTemplate,
)
from langchain.schema.messages import BaseMessage, HumanMessage, SystemMessage
from langchain.schema.vectorstore import VectorStoreRetriever
from langchain.tools.base import BaseTool
from langchain.vectorstores.base import VectorStoreRetriever
from langchain_experimental.autonomous_agents.autogpt.prompt_generator import get_prompt
from langchain_experimental.pydantic_v1 import BaseModel

@ -5,7 +5,7 @@ from typing import Any, Dict, List, Optional
from langchain.callbacks.manager import CallbackManagerForChainRun
from langchain.chains.base import Chain
from langchain.schema.language_model import BaseLanguageModel
from langchain.vectorstores.base import VectorStore
from langchain.schema.vectorstore import VectorStore
from langchain_experimental.autonomous_agents.baby_agi.task_creation import (
TaskCreationChain,

@ -5,12 +5,12 @@ from langchain.agents.agent_toolkits.base import BaseToolkit
from langchain.llms.openai import OpenAI
from langchain.pydantic_v1 import BaseModel, Field
from langchain.schema.language_model import BaseLanguageModel
from langchain.schema.vectorstore import VectorStore
from langchain.tools import BaseTool
from langchain.tools.vectorstore.tool import (
VectorStoreQATool,
VectorStoreQAWithSourcesTool,
)
from langchain.vectorstores.base import VectorStore
class VectorStoreInfo(BaseModel):

@ -22,7 +22,7 @@ from langchain.pydantic_v1 import Extra, Field, root_validator
from langchain.schema import BasePromptTemplate, BaseRetriever, Document
from langchain.schema.language_model import BaseLanguageModel
from langchain.schema.messages import BaseMessage
from langchain.vectorstores.base import VectorStore
from langchain.schema.vectorstore import VectorStore
# Depending on the memory type and configuration, the chat history format may differ.
# This needs to be consolidated.

@ -11,7 +11,7 @@ from langchain.chains.combine_documents.stuff import StuffDocumentsChain
from langchain.chains.qa_with_sources.base import BaseQAWithSourcesChain
from langchain.docstore.document import Document
from langchain.pydantic_v1 import Field, root_validator
from langchain.vectorstores.base import VectorStore
from langchain.schema.vectorstore import VectorStore
class VectorDBQAWithSourcesChain(BaseQAWithSourcesChain):

@ -21,7 +21,7 @@ from langchain.prompts import PromptTemplate
from langchain.pydantic_v1 import Extra, Field, root_validator
from langchain.schema import BaseRetriever, Document
from langchain.schema.language_model import BaseLanguageModel
from langchain.vectorstores.base import VectorStore
from langchain.schema.vectorstore import VectorStore
class BaseRetrievalQA(Chain):
@ -198,7 +198,7 @@ class RetrievalQA(BaseRetrievalQA):
from langchain.llms import OpenAI
from langchain.chains import RetrievalQA
from langchain.faiss import FAISS
from langchain.vectorstores.base import VectorStoreRetriever
from langchain.schema.vectorstore import VectorStoreRetriever
retriever = VectorStoreRetriever(vectorstore=FAISS(...))
retrievalQA = RetrievalQA.from_llm(llm=OpenAI(), retriever=retriever)

@ -7,7 +7,7 @@ from langchain.chains.router.base import RouterChain
from langchain.docstore.document import Document
from langchain.pydantic_v1 import Extra
from langchain.schema.embeddings import Embeddings
from langchain.vectorstores.base import VectorStore
from langchain.schema.vectorstore import VectorStore
class EmbeddingRouterChain(RouterChain):

@ -25,7 +25,7 @@ from langchain.document_loaders.base import BaseLoader
from langchain.indexes.base import NAMESPACE_UUID, RecordManager
from langchain.pydantic_v1 import root_validator
from langchain.schema import Document
from langchain.vectorstores.base import VectorStore
from langchain.schema.vectorstore import VectorStore
T = TypeVar("T")

@ -9,8 +9,8 @@ from langchain.pydantic_v1 import BaseModel, Extra, Field
from langchain.schema import Document
from langchain.schema.embeddings import Embeddings
from langchain.schema.language_model import BaseLanguageModel
from langchain.schema.vectorstore import VectorStore
from langchain.text_splitter import RecursiveCharacterTextSplitter, TextSplitter
from langchain.vectorstores.base import VectorStore
from langchain.vectorstores.chroma import Chroma

@ -6,7 +6,7 @@ from langchain.memory.chat_memory import BaseMemory
from langchain.memory.utils import get_prompt_input_key
from langchain.pydantic_v1 import Field
from langchain.schema import Document
from langchain.vectorstores.base import VectorStoreRetriever
from langchain.schema.vectorstore import VectorStoreRetriever
class VectorStoreRetrieverMemory(BaseMemory):

@ -6,7 +6,7 @@ from typing import Any, Dict, List, Optional, Type
from langchain.prompts.example_selector.base import BaseExampleSelector
from langchain.pydantic_v1 import BaseModel, Extra
from langchain.schema.embeddings import Embeddings
from langchain.vectorstores.base import VectorStore
from langchain.schema.vectorstore import VectorStore
def sorted_values(values: Dict[str, str]) -> List[Any]:

@ -5,7 +5,7 @@ from typing import Any, Dict, List, Optional, Tuple
from langchain.callbacks.manager import CallbackManagerForRetrieverRun
from langchain.pydantic_v1 import Field
from langchain.schema import BaseRetriever, Document
from langchain.vectorstores.base import VectorStore
from langchain.schema.vectorstore import VectorStore
def _get_hours_passed(time: datetime.datetime, ref_time: datetime.datetime) -> float:

@ -16,9 +16,9 @@ from langchain.output_parsers.pydantic import PydanticOutputParser
from langchain.prompts import BasePromptTemplate, PromptTemplate
from langchain.pydantic_v1 import BaseModel, Field
from langchain.schema import BaseRetriever, Document
from langchain.schema.vectorstore import VectorStore
from langchain.text_splitter import RecursiveCharacterTextSplitter, TextSplitter
from langchain.utilities import GoogleSearchAPIWrapper
from langchain.vectorstores.base import VectorStore
logger = logging.getLogger(__name__)

@ -0,0 +1,611 @@
from __future__ import annotations
import asyncio
import logging
import math
import warnings
from abc import ABC, abstractmethod
from functools import partial
from typing import (
TYPE_CHECKING,
Any,
Callable,
ClassVar,
Collection,
Dict,
Iterable,
List,
Optional,
Tuple,
Type,
TypeVar,
)
from langchain.pydantic_v1 import Field, root_validator
from langchain.schema import BaseRetriever
from langchain.schema.document import Document
from langchain.schema.embeddings import Embeddings
if TYPE_CHECKING:
from langchain.callbacks.manager import (
AsyncCallbackManagerForRetrieverRun,
CallbackManagerForRetrieverRun,
)
logger = logging.getLogger(__name__)
VST = TypeVar("VST", bound="VectorStore")
class VectorStore(ABC):
"""Interface for vector store."""
@abstractmethod
def add_texts(
self,
texts: Iterable[str],
metadatas: Optional[List[dict]] = None,
**kwargs: Any,
) -> List[str]:
"""Run more texts through the embeddings and add to the vectorstore.
Args:
texts: Iterable of strings to add to the vectorstore.
metadatas: Optional list of metadatas associated with the texts.
kwargs: vectorstore specific parameters
Returns:
List of ids from adding the texts into the vectorstore.
"""
@property
def embeddings(self) -> Optional[Embeddings]:
"""Access the query embedding object if available."""
logger.debug(
f"{Embeddings.__name__} is not implemented for {self.__class__.__name__}"
)
return None
def delete(self, ids: Optional[List[str]] = None, **kwargs: Any) -> Optional[bool]:
"""Delete by vector ID or other criteria.
Args:
ids: List of ids to delete.
**kwargs: Other keyword arguments that subclasses might use.
Returns:
Optional[bool]: True if deletion is successful,
False otherwise, None if not implemented.
"""
raise NotImplementedError("delete method must be implemented by subclass.")
async def aadd_texts(
self,
texts: Iterable[str],
metadatas: Optional[List[dict]] = None,
**kwargs: Any,
) -> List[str]:
"""Run more texts through the embeddings and add to the vectorstore."""
raise NotImplementedError
def add_documents(self, documents: List[Document], **kwargs: Any) -> List[str]:
"""Run more documents through the embeddings and add to the vectorstore.
Args:
documents (List[Document]: Documents to add to the vectorstore.
Returns:
List[str]: List of IDs of the added texts.
"""
# TODO: Handle the case where the user doesn't provide ids on the Collection
texts = [doc.page_content for doc in documents]
metadatas = [doc.metadata for doc in documents]
return self.add_texts(texts, metadatas, **kwargs)
async def aadd_documents(
self, documents: List[Document], **kwargs: Any
) -> List[str]:
"""Run more documents through the embeddings and add to the vectorstore.
Args:
documents (List[Document]: Documents to add to the vectorstore.
Returns:
List[str]: List of IDs of the added texts.
"""
texts = [doc.page_content for doc in documents]
metadatas = [doc.metadata for doc in documents]
return await self.aadd_texts(texts, metadatas, **kwargs)
def search(self, query: str, search_type: str, **kwargs: Any) -> List[Document]:
"""Return docs most similar to query using specified search type."""
if search_type == "similarity":
return self.similarity_search(query, **kwargs)
elif search_type == "mmr":
return self.max_marginal_relevance_search(query, **kwargs)
else:
raise ValueError(
f"search_type of {search_type} not allowed. Expected "
"search_type to be 'similarity' or 'mmr'."
)
async def asearch(
self, query: str, search_type: str, **kwargs: Any
) -> List[Document]:
"""Return docs most similar to query using specified search type."""
if search_type == "similarity":
return await self.asimilarity_search(query, **kwargs)
elif search_type == "mmr":
return await self.amax_marginal_relevance_search(query, **kwargs)
else:
raise ValueError(
f"search_type of {search_type} not allowed. Expected "
"search_type to be 'similarity' or 'mmr'."
)
@abstractmethod
def similarity_search(
self, query: str, k: int = 4, **kwargs: Any
) -> List[Document]:
"""Return docs most similar to query."""
@staticmethod
def _euclidean_relevance_score_fn(distance: float) -> float:
"""Return a similarity score on a scale [0, 1]."""
# The 'correct' relevance function
# may differ depending on a few things, including:
# - the distance / similarity metric used by the VectorStore
# - the scale of your embeddings (OpenAI's are unit normed. Many
# others are not!)
# - embedding dimensionality
# - etc.
# This function converts the euclidean norm of normalized embeddings
# (0 is most similar, sqrt(2) most dissimilar)
# to a similarity function (0 to 1)
return 1.0 - distance / math.sqrt(2)
@staticmethod
def _cosine_relevance_score_fn(distance: float) -> float:
"""Normalize the distance to a score on a scale [0, 1]."""
return 1.0 - distance
@staticmethod
def _max_inner_product_relevance_score_fn(distance: float) -> float:
"""Normalize the distance to a score on a scale [0, 1]."""
if distance > 0:
return 1.0 - distance
return -1.0 * distance
def _select_relevance_score_fn(self) -> Callable[[float], float]:
"""
The 'correct' relevance function
may differ depending on a few things, including:
- the distance / similarity metric used by the VectorStore
- the scale of your embeddings (OpenAI's are unit normed. Many others are not!)
- embedding dimensionality
- etc.
Vectorstores should define their own selection based method of relevance.
"""
raise NotImplementedError
def similarity_search_with_score(
self, *args: Any, **kwargs: Any
) -> List[Tuple[Document, float]]:
"""Run similarity search with distance."""
raise NotImplementedError
def _similarity_search_with_relevance_scores(
self,
query: str,
k: int = 4,
**kwargs: Any,
) -> List[Tuple[Document, float]]:
"""
Default similarity search with relevance scores. Modify if necessary
in subclass.
Return docs and relevance scores in the range [0, 1].
0 is dissimilar, 1 is most similar.
Args:
query: input text
k: Number of Documents to return. Defaults to 4.
**kwargs: kwargs to be passed to similarity search. Should include:
score_threshold: Optional, a floating point value between 0 to 1 to
filter the resulting set of retrieved docs
Returns:
List of Tuples of (doc, similarity_score)
"""
relevance_score_fn = self._select_relevance_score_fn()
docs_and_scores = self.similarity_search_with_score(query, k, **kwargs)
return [(doc, relevance_score_fn(score)) for doc, score in docs_and_scores]
def similarity_search_with_relevance_scores(
self,
query: str,
k: int = 4,
**kwargs: Any,
) -> List[Tuple[Document, float]]:
"""Return docs and relevance scores in the range [0, 1].
0 is dissimilar, 1 is most similar.
Args:
query: input text
k: Number of Documents to return. Defaults to 4.
**kwargs: kwargs to be passed to similarity search. Should include:
score_threshold: Optional, a floating point value between 0 to 1 to
filter the resulting set of retrieved docs
Returns:
List of Tuples of (doc, similarity_score)
"""
score_threshold = kwargs.pop("score_threshold", None)
docs_and_similarities = self._similarity_search_with_relevance_scores(
query, k=k, **kwargs
)
if any(
similarity < 0.0 or similarity > 1.0
for _, similarity in docs_and_similarities
):
warnings.warn(
"Relevance scores must be between"
f" 0 and 1, got {docs_and_similarities}"
)
if score_threshold is not None:
docs_and_similarities = [
(doc, similarity)
for doc, similarity in docs_and_similarities
if similarity >= score_threshold
]
if len(docs_and_similarities) == 0:
warnings.warn(
"No relevant docs were retrieved using the relevance score"
f" threshold {score_threshold}"
)
return docs_and_similarities
async def asimilarity_search_with_relevance_scores(
self, query: str, k: int = 4, **kwargs: Any
) -> List[Tuple[Document, float]]:
"""Return docs most similar to query."""
# This is a temporary workaround to make the similarity search
# asynchronous. The proper solution is to make the similarity search
# asynchronous in the vector store implementations.
func = partial(
self.similarity_search_with_relevance_scores, query, k=k, **kwargs
)
return await asyncio.get_event_loop().run_in_executor(None, func)
async def asimilarity_search(
self, query: str, k: int = 4, **kwargs: Any
) -> List[Document]:
"""Return docs most similar to query."""
# This is a temporary workaround to make the similarity search
# asynchronous. The proper solution is to make the similarity search
# asynchronous in the vector store implementations.
func = partial(self.similarity_search, query, k=k, **kwargs)
return await asyncio.get_event_loop().run_in_executor(None, func)
def similarity_search_by_vector(
self, embedding: List[float], k: int = 4, **kwargs: Any
) -> List[Document]:
"""Return docs most similar to embedding vector.
Args:
embedding: Embedding to look up documents similar to.
k: Number of Documents to return. Defaults to 4.
Returns:
List of Documents most similar to the query vector.
"""
raise NotImplementedError
async def asimilarity_search_by_vector(
self, embedding: List[float], k: int = 4, **kwargs: Any
) -> List[Document]:
"""Return docs most similar to embedding vector."""
# This is a temporary workaround to make the similarity search
# asynchronous. The proper solution is to make the similarity search
# asynchronous in the vector store implementations.
func = partial(self.similarity_search_by_vector, embedding, k=k, **kwargs)
return await asyncio.get_event_loop().run_in_executor(None, func)
def max_marginal_relevance_search(
self,
query: str,
k: int = 4,
fetch_k: int = 20,
lambda_mult: float = 0.5,
**kwargs: Any,
) -> List[Document]:
"""Return docs selected using the maximal marginal relevance.
Maximal marginal relevance optimizes for similarity to query AND diversity
among selected documents.
Args:
query: Text to look up documents similar to.
k: Number of Documents to return. Defaults to 4.
fetch_k: Number of Documents to fetch to pass to MMR algorithm.
lambda_mult: Number between 0 and 1 that determines the degree
of diversity among the results with 0 corresponding
to maximum diversity and 1 to minimum diversity.
Defaults to 0.5.
Returns:
List of Documents selected by maximal marginal relevance.
"""
raise NotImplementedError
async def amax_marginal_relevance_search(
self,
query: str,
k: int = 4,
fetch_k: int = 20,
lambda_mult: float = 0.5,
**kwargs: Any,
) -> List[Document]:
"""Return docs selected using the maximal marginal relevance."""
# This is a temporary workaround to make the similarity search
# asynchronous. The proper solution is to make the similarity search
# asynchronous in the vector store implementations.
func = partial(
self.max_marginal_relevance_search,
query,
k=k,
fetch_k=fetch_k,
lambda_mult=lambda_mult,
**kwargs,
)
return await asyncio.get_event_loop().run_in_executor(None, func)
def max_marginal_relevance_search_by_vector(
self,
embedding: List[float],
k: int = 4,
fetch_k: int = 20,
lambda_mult: float = 0.5,
**kwargs: Any,
) -> List[Document]:
"""Return docs selected using the maximal marginal relevance.
Maximal marginal relevance optimizes for similarity to query AND diversity
among selected documents.
Args:
embedding: Embedding to look up documents similar to.
k: Number of Documents to return. Defaults to 4.
fetch_k: Number of Documents to fetch to pass to MMR algorithm.
lambda_mult: Number between 0 and 1 that determines the degree
of diversity among the results with 0 corresponding
to maximum diversity and 1 to minimum diversity.
Defaults to 0.5.
Returns:
List of Documents selected by maximal marginal relevance.
"""
raise NotImplementedError
async def amax_marginal_relevance_search_by_vector(
self,
embedding: List[float],
k: int = 4,
fetch_k: int = 20,
lambda_mult: float = 0.5,
**kwargs: Any,
) -> List[Document]:
"""Return docs selected using the maximal marginal relevance."""
raise NotImplementedError
@classmethod
def from_documents(
cls: Type[VST],
documents: List[Document],
embedding: Embeddings,
**kwargs: Any,
) -> VST:
"""Return VectorStore initialized from documents and embeddings."""
texts = [d.page_content for d in documents]
metadatas = [d.metadata for d in documents]
return cls.from_texts(texts, embedding, metadatas=metadatas, **kwargs)
@classmethod
async def afrom_documents(
cls: Type[VST],
documents: List[Document],
embedding: Embeddings,
**kwargs: Any,
) -> VST:
"""Return VectorStore initialized from documents and embeddings."""
texts = [d.page_content for d in documents]
metadatas = [d.metadata for d in documents]
return await cls.afrom_texts(texts, embedding, metadatas=metadatas, **kwargs)
@classmethod
@abstractmethod
def from_texts(
cls: Type[VST],
texts: List[str],
embedding: Embeddings,
metadatas: Optional[List[dict]] = None,
**kwargs: Any,
) -> VST:
"""Return VectorStore initialized from texts and embeddings."""
@classmethod
async def afrom_texts(
cls: Type[VST],
texts: List[str],
embedding: Embeddings,
metadatas: Optional[List[dict]] = None,
**kwargs: Any,
) -> VST:
"""Return VectorStore initialized from texts and embeddings."""
raise NotImplementedError
def _get_retriever_tags(self) -> List[str]:
"""Get tags for retriever."""
tags = [self.__class__.__name__]
if self.embeddings:
tags.append(self.embeddings.__class__.__name__)
return tags
def as_retriever(self, **kwargs: Any) -> VectorStoreRetriever:
"""Return VectorStoreRetriever initialized from this VectorStore.
Args:
search_type (Optional[str]): Defines the type of search that
the Retriever should perform.
Can be "similarity" (default), "mmr", or
"similarity_score_threshold".
search_kwargs (Optional[Dict]): Keyword arguments to pass to the
search function. Can include things like:
k: Amount of documents to return (Default: 4)
score_threshold: Minimum relevance threshold
for similarity_score_threshold
fetch_k: Amount of documents to pass to MMR algorithm (Default: 20)
lambda_mult: Diversity of results returned by MMR;
1 for minimum diversity and 0 for maximum. (Default: 0.5)
filter: Filter by document metadata
Returns:
VectorStoreRetriever: Retriever class for VectorStore.
Examples:
.. code-block:: python
# Retrieve more documents with higher diversity
# Useful if your dataset has many similar documents
docsearch.as_retriever(
search_type="mmr",
search_kwargs={'k': 6, 'lambda_mult': 0.25}
)
# Fetch more documents for the MMR algorithm to consider
# But only return the top 5
docsearch.as_retriever(
search_type="mmr",
search_kwargs={'k': 5, 'fetch_k': 50}
)
# Only retrieve documents that have a relevance score
# Above a certain threshold
docsearch.as_retriever(
search_type="similarity_score_threshold",
search_kwargs={'score_threshold': 0.8}
)
# Only get the single most similar document from the dataset
docsearch.as_retriever(search_kwargs={'k': 1})
# Use a filter to only retrieve documents from a specific paper
docsearch.as_retriever(
search_kwargs={'filter': {'paper_title':'GPT-4 Technical Report'}}
)
"""
tags = kwargs.pop("tags", None) or []
tags.extend(self._get_retriever_tags())
return VectorStoreRetriever(vectorstore=self, **kwargs, tags=tags)
class VectorStoreRetriever(BaseRetriever):
"""Base Retriever class for VectorStore."""
vectorstore: VectorStore
"""VectorStore to use for retrieval."""
search_type: str = "similarity"
"""Type of search to perform. Defaults to "similarity"."""
search_kwargs: dict = Field(default_factory=dict)
"""Keyword arguments to pass to the search function."""
allowed_search_types: ClassVar[Collection[str]] = (
"similarity",
"similarity_score_threshold",
"mmr",
)
class Config:
"""Configuration for this pydantic object."""
arbitrary_types_allowed = True
@root_validator()
def validate_search_type(cls, values: Dict) -> Dict:
"""Validate search type."""
search_type = values["search_type"]
if search_type not in cls.allowed_search_types:
raise ValueError(
f"search_type of {search_type} not allowed. Valid values are: "
f"{cls.allowed_search_types}"
)
if search_type == "similarity_score_threshold":
score_threshold = values["search_kwargs"].get("score_threshold")
if (score_threshold is None) or (not isinstance(score_threshold, float)):
raise ValueError(
"`score_threshold` is not specified with a float value(0~1) "
"in `search_kwargs`."
)
return values
def _get_relevant_documents(
self, query: str, *, run_manager: CallbackManagerForRetrieverRun
) -> List[Document]:
if self.search_type == "similarity":
docs = self.vectorstore.similarity_search(query, **self.search_kwargs)
elif self.search_type == "similarity_score_threshold":
docs_and_similarities = (
self.vectorstore.similarity_search_with_relevance_scores(
query, **self.search_kwargs
)
)
docs = [doc for doc, _ in docs_and_similarities]
elif self.search_type == "mmr":
docs = self.vectorstore.max_marginal_relevance_search(
query, **self.search_kwargs
)
else:
raise ValueError(f"search_type of {self.search_type} not allowed.")
return docs
async def _aget_relevant_documents(
self, query: str, *, run_manager: AsyncCallbackManagerForRetrieverRun
) -> List[Document]:
if self.search_type == "similarity":
docs = await self.vectorstore.asimilarity_search(
query, **self.search_kwargs
)
elif self.search_type == "similarity_score_threshold":
docs_and_similarities = (
await self.vectorstore.asimilarity_search_with_relevance_scores(
query, **self.search_kwargs
)
)
docs = [doc for doc, _ in docs_and_similarities]
elif self.search_type == "mmr":
docs = await self.vectorstore.amax_marginal_relevance_search(
query, **self.search_kwargs
)
else:
raise ValueError(f"search_type of {self.search_type} not allowed.")
return docs
def add_documents(self, documents: List[Document], **kwargs: Any) -> List[str]:
"""Add documents to vectorstore."""
return self.vectorstore.add_documents(documents, **kwargs)
async def aadd_documents(
self, documents: List[Document], **kwargs: Any
) -> List[str]:
"""Add documents to vectorstore."""
return await self.vectorstore.aadd_documents(documents, **kwargs)

@ -8,8 +8,8 @@ from langchain.chains import RetrievalQA, RetrievalQAWithSourcesChain
from langchain.llms.openai import OpenAI
from langchain.pydantic_v1 import BaseModel, Field
from langchain.schema.language_model import BaseLanguageModel
from langchain.schema.vectorstore import VectorStore
from langchain.tools.base import BaseTool
from langchain.vectorstores.base import VectorStore
class BaseVectorStoreTool(BaseModel):

@ -18,6 +18,7 @@ and retrieve the data that are 'most similar' to the embedded query.
Embeddings, Document
""" # noqa: E501
from langchain.schema.vectorstore import VectorStore
from langchain.vectorstores.alibabacloud_opensearch import (
AlibabaCloudOpenSearch,
AlibabaCloudOpenSearchSettings,
@ -28,7 +29,6 @@ from langchain.vectorstores.atlas import AtlasDB
from langchain.vectorstores.awadb import AwaDB
from langchain.vectorstores.azuresearch import AzureSearch
from langchain.vectorstores.bageldb import Bagel
from langchain.vectorstores.base import VectorStore
from langchain.vectorstores.cassandra import Cassandra
from langchain.vectorstores.chroma import Chroma
from langchain.vectorstores.clarifai import Clarifai

@ -6,7 +6,7 @@ from typing import Any, Dict, Iterable, List, Optional, Tuple
from langchain.schema import Document
from langchain.schema.embeddings import Embeddings
from langchain.vectorstores.base import VectorStore
from langchain.schema.vectorstore import VectorStore
logger = logging.getLogger()

@ -14,8 +14,8 @@ except ImportError:
from langchain.docstore.document import Document
from langchain.schema.embeddings import Embeddings
from langchain.schema.vectorstore import VectorStore
from langchain.utils import get_from_dict_or_env
from langchain.vectorstores.base import VectorStore
_LANGCHAIN_DEFAULT_EMBEDDING_DIM = 1536
_LANGCHAIN_DEFAULT_COLLECTION_NAME = "langchain_document"

@ -13,7 +13,7 @@ from langchain.docstore.base import Docstore
from langchain.docstore.document import Document
from langchain.docstore.in_memory import InMemoryDocstore
from langchain.schema.embeddings import Embeddings
from langchain.vectorstores.base import VectorStore
from langchain.schema.vectorstore import VectorStore
from langchain.vectorstores.utils import maximal_marginal_relevance
INDEX_METRICS = frozenset(["angular", "euclidean", "manhattan", "hamming", "dot"])

@ -8,7 +8,7 @@ import numpy as np
from langchain.docstore.document import Document
from langchain.schema.embeddings import Embeddings
from langchain.vectorstores.base import VectorStore
from langchain.schema.vectorstore import VectorStore
logger = logging.getLogger(__name__)

@ -8,7 +8,7 @@ import numpy as np
from langchain.docstore.document import Document
from langchain.schema.embeddings import Embeddings
from langchain.vectorstores.base import VectorStore
from langchain.schema.vectorstore import VectorStore
from langchain.vectorstores.utils import maximal_marginal_relevance
if TYPE_CHECKING:

@ -26,8 +26,8 @@ from langchain.docstore.document import Document
from langchain.pydantic_v1 import root_validator
from langchain.schema import BaseRetriever
from langchain.schema.embeddings import Embeddings
from langchain.schema.vectorstore import VectorStore
from langchain.utils import get_from_env
from langchain.vectorstores.base import VectorStore
logger = logging.getLogger()

@ -20,8 +20,8 @@ if TYPE_CHECKING:
from langchain.docstore.document import Document
from langchain.schema.embeddings import Embeddings
from langchain.schema.vectorstore import VectorStore
from langchain.utils import xor_args
from langchain.vectorstores.base import VectorStore
DEFAULT_K = 5

@ -1,608 +1,3 @@
from __future__ import annotations
from langchain.schema.vectorstore import VectorStore, VectorStoreRetriever
import asyncio
import logging
import math
import warnings
from abc import ABC, abstractmethod
from functools import partial
from typing import (
Any,
Callable,
ClassVar,
Collection,
Dict,
Iterable,
List,
Optional,
Tuple,
Type,
TypeVar,
)
from langchain.callbacks.manager import (
AsyncCallbackManagerForRetrieverRun,
CallbackManagerForRetrieverRun,
)
from langchain.docstore.document import Document
from langchain.pydantic_v1 import Field, root_validator
from langchain.schema import BaseRetriever
from langchain.schema.embeddings import Embeddings
logger = logging.getLogger(__name__)
VST = TypeVar("VST", bound="VectorStore")
class VectorStore(ABC):
"""Interface for vector store."""
@abstractmethod
def add_texts(
self,
texts: Iterable[str],
metadatas: Optional[List[dict]] = None,
**kwargs: Any,
) -> List[str]:
"""Run more texts through the embeddings and add to the vectorstore.
Args:
texts: Iterable of strings to add to the vectorstore.
metadatas: Optional list of metadatas associated with the texts.
kwargs: vectorstore specific parameters
Returns:
List of ids from adding the texts into the vectorstore.
"""
@property
def embeddings(self) -> Optional[Embeddings]:
"""Access the query embedding object if available."""
logger.debug(
f"{Embeddings.__name__} is not implemented for {self.__class__.__name__}"
)
return None
def delete(self, ids: Optional[List[str]] = None, **kwargs: Any) -> Optional[bool]:
"""Delete by vector ID or other criteria.
Args:
ids: List of ids to delete.
**kwargs: Other keyword arguments that subclasses might use.
Returns:
Optional[bool]: True if deletion is successful,
False otherwise, None if not implemented.
"""
raise NotImplementedError("delete method must be implemented by subclass.")
async def aadd_texts(
self,
texts: Iterable[str],
metadatas: Optional[List[dict]] = None,
**kwargs: Any,
) -> List[str]:
"""Run more texts through the embeddings and add to the vectorstore."""
raise NotImplementedError
def add_documents(self, documents: List[Document], **kwargs: Any) -> List[str]:
"""Run more documents through the embeddings and add to the vectorstore.
Args:
documents (List[Document]: Documents to add to the vectorstore.
Returns:
List[str]: List of IDs of the added texts.
"""
# TODO: Handle the case where the user doesn't provide ids on the Collection
texts = [doc.page_content for doc in documents]
metadatas = [doc.metadata for doc in documents]
return self.add_texts(texts, metadatas, **kwargs)
async def aadd_documents(
self, documents: List[Document], **kwargs: Any
) -> List[str]:
"""Run more documents through the embeddings and add to the vectorstore.
Args:
documents (List[Document]: Documents to add to the vectorstore.
Returns:
List[str]: List of IDs of the added texts.
"""
texts = [doc.page_content for doc in documents]
metadatas = [doc.metadata for doc in documents]
return await self.aadd_texts(texts, metadatas, **kwargs)
def search(self, query: str, search_type: str, **kwargs: Any) -> List[Document]:
"""Return docs most similar to query using specified search type."""
if search_type == "similarity":
return self.similarity_search(query, **kwargs)
elif search_type == "mmr":
return self.max_marginal_relevance_search(query, **kwargs)
else:
raise ValueError(
f"search_type of {search_type} not allowed. Expected "
"search_type to be 'similarity' or 'mmr'."
)
async def asearch(
self, query: str, search_type: str, **kwargs: Any
) -> List[Document]:
"""Return docs most similar to query using specified search type."""
if search_type == "similarity":
return await self.asimilarity_search(query, **kwargs)
elif search_type == "mmr":
return await self.amax_marginal_relevance_search(query, **kwargs)
else:
raise ValueError(
f"search_type of {search_type} not allowed. Expected "
"search_type to be 'similarity' or 'mmr'."
)
@abstractmethod
def similarity_search(
self, query: str, k: int = 4, **kwargs: Any
) -> List[Document]:
"""Return docs most similar to query."""
@staticmethod
def _euclidean_relevance_score_fn(distance: float) -> float:
"""Return a similarity score on a scale [0, 1]."""
# The 'correct' relevance function
# may differ depending on a few things, including:
# - the distance / similarity metric used by the VectorStore
# - the scale of your embeddings (OpenAI's are unit normed. Many
# others are not!)
# - embedding dimensionality
# - etc.
# This function converts the euclidean norm of normalized embeddings
# (0 is most similar, sqrt(2) most dissimilar)
# to a similarity function (0 to 1)
return 1.0 - distance / math.sqrt(2)
@staticmethod
def _cosine_relevance_score_fn(distance: float) -> float:
"""Normalize the distance to a score on a scale [0, 1]."""
return 1.0 - distance
@staticmethod
def _max_inner_product_relevance_score_fn(distance: float) -> float:
"""Normalize the distance to a score on a scale [0, 1]."""
if distance > 0:
return 1.0 - distance
return -1.0 * distance
def _select_relevance_score_fn(self) -> Callable[[float], float]:
"""
The 'correct' relevance function
may differ depending on a few things, including:
- the distance / similarity metric used by the VectorStore
- the scale of your embeddings (OpenAI's are unit normed. Many others are not!)
- embedding dimensionality
- etc.
Vectorstores should define their own selection based method of relevance.
"""
raise NotImplementedError
def similarity_search_with_score(
self, *args: Any, **kwargs: Any
) -> List[Tuple[Document, float]]:
"""Run similarity search with distance."""
raise NotImplementedError
def _similarity_search_with_relevance_scores(
self,
query: str,
k: int = 4,
**kwargs: Any,
) -> List[Tuple[Document, float]]:
"""
Default similarity search with relevance scores. Modify if necessary
in subclass.
Return docs and relevance scores in the range [0, 1].
0 is dissimilar, 1 is most similar.
Args:
query: input text
k: Number of Documents to return. Defaults to 4.
**kwargs: kwargs to be passed to similarity search. Should include:
score_threshold: Optional, a floating point value between 0 to 1 to
filter the resulting set of retrieved docs
Returns:
List of Tuples of (doc, similarity_score)
"""
relevance_score_fn = self._select_relevance_score_fn()
docs_and_scores = self.similarity_search_with_score(query, k, **kwargs)
return [(doc, relevance_score_fn(score)) for doc, score in docs_and_scores]
def similarity_search_with_relevance_scores(
self,
query: str,
k: int = 4,
**kwargs: Any,
) -> List[Tuple[Document, float]]:
"""Return docs and relevance scores in the range [0, 1].
0 is dissimilar, 1 is most similar.
Args:
query: input text
k: Number of Documents to return. Defaults to 4.
**kwargs: kwargs to be passed to similarity search. Should include:
score_threshold: Optional, a floating point value between 0 to 1 to
filter the resulting set of retrieved docs
Returns:
List of Tuples of (doc, similarity_score)
"""
score_threshold = kwargs.pop("score_threshold", None)
docs_and_similarities = self._similarity_search_with_relevance_scores(
query, k=k, **kwargs
)
if any(
similarity < 0.0 or similarity > 1.0
for _, similarity in docs_and_similarities
):
warnings.warn(
"Relevance scores must be between"
f" 0 and 1, got {docs_and_similarities}"
)
if score_threshold is not None:
docs_and_similarities = [
(doc, similarity)
for doc, similarity in docs_and_similarities
if similarity >= score_threshold
]
if len(docs_and_similarities) == 0:
warnings.warn(
"No relevant docs were retrieved using the relevance score"
f" threshold {score_threshold}"
)
return docs_and_similarities
async def asimilarity_search_with_relevance_scores(
self, query: str, k: int = 4, **kwargs: Any
) -> List[Tuple[Document, float]]:
"""Return docs most similar to query."""
# This is a temporary workaround to make the similarity search
# asynchronous. The proper solution is to make the similarity search
# asynchronous in the vector store implementations.
func = partial(
self.similarity_search_with_relevance_scores, query, k=k, **kwargs
)
return await asyncio.get_event_loop().run_in_executor(None, func)
async def asimilarity_search(
self, query: str, k: int = 4, **kwargs: Any
) -> List[Document]:
"""Return docs most similar to query."""
# This is a temporary workaround to make the similarity search
# asynchronous. The proper solution is to make the similarity search
# asynchronous in the vector store implementations.
func = partial(self.similarity_search, query, k=k, **kwargs)
return await asyncio.get_event_loop().run_in_executor(None, func)
def similarity_search_by_vector(
self, embedding: List[float], k: int = 4, **kwargs: Any
) -> List[Document]:
"""Return docs most similar to embedding vector.
Args:
embedding: Embedding to look up documents similar to.
k: Number of Documents to return. Defaults to 4.
Returns:
List of Documents most similar to the query vector.
"""
raise NotImplementedError
async def asimilarity_search_by_vector(
self, embedding: List[float], k: int = 4, **kwargs: Any
) -> List[Document]:
"""Return docs most similar to embedding vector."""
# This is a temporary workaround to make the similarity search
# asynchronous. The proper solution is to make the similarity search
# asynchronous in the vector store implementations.
func = partial(self.similarity_search_by_vector, embedding, k=k, **kwargs)
return await asyncio.get_event_loop().run_in_executor(None, func)
def max_marginal_relevance_search(
self,
query: str,
k: int = 4,
fetch_k: int = 20,
lambda_mult: float = 0.5,
**kwargs: Any,
) -> List[Document]:
"""Return docs selected using the maximal marginal relevance.
Maximal marginal relevance optimizes for similarity to query AND diversity
among selected documents.
Args:
query: Text to look up documents similar to.
k: Number of Documents to return. Defaults to 4.
fetch_k: Number of Documents to fetch to pass to MMR algorithm.
lambda_mult: Number between 0 and 1 that determines the degree
of diversity among the results with 0 corresponding
to maximum diversity and 1 to minimum diversity.
Defaults to 0.5.
Returns:
List of Documents selected by maximal marginal relevance.
"""
raise NotImplementedError
async def amax_marginal_relevance_search(
self,
query: str,
k: int = 4,
fetch_k: int = 20,
lambda_mult: float = 0.5,
**kwargs: Any,
) -> List[Document]:
"""Return docs selected using the maximal marginal relevance."""
# This is a temporary workaround to make the similarity search
# asynchronous. The proper solution is to make the similarity search
# asynchronous in the vector store implementations.
func = partial(
self.max_marginal_relevance_search,
query,
k=k,
fetch_k=fetch_k,
lambda_mult=lambda_mult,
**kwargs,
)
return await asyncio.get_event_loop().run_in_executor(None, func)
def max_marginal_relevance_search_by_vector(
self,
embedding: List[float],
k: int = 4,
fetch_k: int = 20,
lambda_mult: float = 0.5,
**kwargs: Any,
) -> List[Document]:
"""Return docs selected using the maximal marginal relevance.
Maximal marginal relevance optimizes for similarity to query AND diversity
among selected documents.
Args:
embedding: Embedding to look up documents similar to.
k: Number of Documents to return. Defaults to 4.
fetch_k: Number of Documents to fetch to pass to MMR algorithm.
lambda_mult: Number between 0 and 1 that determines the degree
of diversity among the results with 0 corresponding
to maximum diversity and 1 to minimum diversity.
Defaults to 0.5.
Returns:
List of Documents selected by maximal marginal relevance.
"""
raise NotImplementedError
async def amax_marginal_relevance_search_by_vector(
self,
embedding: List[float],
k: int = 4,
fetch_k: int = 20,
lambda_mult: float = 0.5,
**kwargs: Any,
) -> List[Document]:
"""Return docs selected using the maximal marginal relevance."""
raise NotImplementedError
@classmethod
def from_documents(
cls: Type[VST],
documents: List[Document],
embedding: Embeddings,
**kwargs: Any,
) -> VST:
"""Return VectorStore initialized from documents and embeddings."""
texts = [d.page_content for d in documents]
metadatas = [d.metadata for d in documents]
return cls.from_texts(texts, embedding, metadatas=metadatas, **kwargs)
@classmethod
async def afrom_documents(
cls: Type[VST],
documents: List[Document],
embedding: Embeddings,
**kwargs: Any,
) -> VST:
"""Return VectorStore initialized from documents and embeddings."""
texts = [d.page_content for d in documents]
metadatas = [d.metadata for d in documents]
return await cls.afrom_texts(texts, embedding, metadatas=metadatas, **kwargs)
@classmethod
@abstractmethod
def from_texts(
cls: Type[VST],
texts: List[str],
embedding: Embeddings,
metadatas: Optional[List[dict]] = None,
**kwargs: Any,
) -> VST:
"""Return VectorStore initialized from texts and embeddings."""
@classmethod
async def afrom_texts(
cls: Type[VST],
texts: List[str],
embedding: Embeddings,
metadatas: Optional[List[dict]] = None,
**kwargs: Any,
) -> VST:
"""Return VectorStore initialized from texts and embeddings."""
raise NotImplementedError
def _get_retriever_tags(self) -> List[str]:
"""Get tags for retriever."""
tags = [self.__class__.__name__]
if self.embeddings:
tags.append(self.embeddings.__class__.__name__)
return tags
def as_retriever(self, **kwargs: Any) -> VectorStoreRetriever:
"""Return VectorStoreRetriever initialized from this VectorStore.
Args:
search_type (Optional[str]): Defines the type of search that
the Retriever should perform.
Can be "similarity" (default), "mmr", or
"similarity_score_threshold".
search_kwargs (Optional[Dict]): Keyword arguments to pass to the
search function. Can include things like:
k: Amount of documents to return (Default: 4)
score_threshold: Minimum relevance threshold
for similarity_score_threshold
fetch_k: Amount of documents to pass to MMR algorithm (Default: 20)
lambda_mult: Diversity of results returned by MMR;
1 for minimum diversity and 0 for maximum. (Default: 0.5)
filter: Filter by document metadata
Returns:
VectorStoreRetriever: Retriever class for VectorStore.
Examples:
.. code-block:: python
# Retrieve more documents with higher diversity
# Useful if your dataset has many similar documents
docsearch.as_retriever(
search_type="mmr",
search_kwargs={'k': 6, 'lambda_mult': 0.25}
)
# Fetch more documents for the MMR algorithm to consider
# But only return the top 5
docsearch.as_retriever(
search_type="mmr",
search_kwargs={'k': 5, 'fetch_k': 50}
)
# Only retrieve documents that have a relevance score
# Above a certain threshold
docsearch.as_retriever(
search_type="similarity_score_threshold",
search_kwargs={'score_threshold': 0.8}
)
# Only get the single most similar document from the dataset
docsearch.as_retriever(search_kwargs={'k': 1})
# Use a filter to only retrieve documents from a specific paper
docsearch.as_retriever(
search_kwargs={'filter': {'paper_title':'GPT-4 Technical Report'}}
)
"""
tags = kwargs.pop("tags", None) or []
tags.extend(self._get_retriever_tags())
return VectorStoreRetriever(vectorstore=self, **kwargs, tags=tags)
class VectorStoreRetriever(BaseRetriever):
"""Base Retriever class for VectorStore."""
vectorstore: VectorStore
"""VectorStore to use for retrieval."""
search_type: str = "similarity"
"""Type of search to perform. Defaults to "similarity"."""
search_kwargs: dict = Field(default_factory=dict)
"""Keyword arguments to pass to the search function."""
allowed_search_types: ClassVar[Collection[str]] = (
"similarity",
"similarity_score_threshold",
"mmr",
)
class Config:
"""Configuration for this pydantic object."""
arbitrary_types_allowed = True
@root_validator()
def validate_search_type(cls, values: Dict) -> Dict:
"""Validate search type."""
search_type = values["search_type"]
if search_type not in cls.allowed_search_types:
raise ValueError(
f"search_type of {search_type} not allowed. Valid values are: "
f"{cls.allowed_search_types}"
)
if search_type == "similarity_score_threshold":
score_threshold = values["search_kwargs"].get("score_threshold")
if (score_threshold is None) or (not isinstance(score_threshold, float)):
raise ValueError(
"`score_threshold` is not specified with a float value(0~1) "
"in `search_kwargs`."
)
return values
def _get_relevant_documents(
self, query: str, *, run_manager: CallbackManagerForRetrieverRun
) -> List[Document]:
if self.search_type == "similarity":
docs = self.vectorstore.similarity_search(query, **self.search_kwargs)
elif self.search_type == "similarity_score_threshold":
docs_and_similarities = (
self.vectorstore.similarity_search_with_relevance_scores(
query, **self.search_kwargs
)
)
docs = [doc for doc, _ in docs_and_similarities]
elif self.search_type == "mmr":
docs = self.vectorstore.max_marginal_relevance_search(
query, **self.search_kwargs
)
else:
raise ValueError(f"search_type of {self.search_type} not allowed.")
return docs
async def _aget_relevant_documents(
self, query: str, *, run_manager: AsyncCallbackManagerForRetrieverRun
) -> List[Document]:
if self.search_type == "similarity":
docs = await self.vectorstore.asimilarity_search(
query, **self.search_kwargs
)
elif self.search_type == "similarity_score_threshold":
docs_and_similarities = (
await self.vectorstore.asimilarity_search_with_relevance_scores(
query, **self.search_kwargs
)
)
docs = [doc for doc, _ in docs_and_similarities]
elif self.search_type == "mmr":
docs = await self.vectorstore.amax_marginal_relevance_search(
query, **self.search_kwargs
)
else:
raise ValueError(f"search_type of {self.search_type} not allowed.")
return docs
def add_documents(self, documents: List[Document], **kwargs: Any) -> List[str]:
"""Add documents to vectorstore."""
return self.vectorstore.add_documents(documents, **kwargs)
async def aadd_documents(
self, documents: List[Document], **kwargs: Any
) -> List[str]:
"""Add documents to vectorstore."""
return await self.vectorstore.aadd_documents(documents, **kwargs)
__all__ = ["VectorStore", "VectorStoreRetriever"]

@ -22,7 +22,7 @@ if typing.TYPE_CHECKING:
from langchain.docstore.document import Document
from langchain.schema.embeddings import Embeddings
from langchain.vectorstores.base import VectorStore
from langchain.schema.vectorstore import VectorStore
from langchain.vectorstores.utils import maximal_marginal_relevance
CVST = TypeVar("CVST", bound="Cassandra")

@ -18,8 +18,8 @@ import numpy as np
from langchain.docstore.document import Document
from langchain.schema.embeddings import Embeddings
from langchain.schema.vectorstore import VectorStore
from langchain.utils import xor_args
from langchain.vectorstores.base import VectorStore
from langchain.vectorstores.utils import maximal_marginal_relevance
if TYPE_CHECKING:

@ -10,7 +10,7 @@ import requests
from langchain.docstore.document import Document
from langchain.schema.embeddings import Embeddings
from langchain.vectorstores.base import VectorStore
from langchain.schema.vectorstore import VectorStore
logger = logging.getLogger(__name__)

@ -9,7 +9,7 @@ from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
from langchain.docstore.document import Document
from langchain.pydantic_v1 import BaseSettings
from langchain.schema.embeddings import Embeddings
from langchain.vectorstores.base import VectorStore
from langchain.schema.vectorstore import VectorStore
logger = logging.getLogger()

@ -14,8 +14,8 @@ import numpy as np
from langchain.docstore.document import Document
from langchain.schema.embeddings import Embeddings
from langchain.schema.vectorstore import VectorStore
from langchain.utils import get_from_env
from langchain.vectorstores.base import VectorStore
from langchain.vectorstores.utils import maximal_marginal_relevance
logger = logging.getLogger(__name__)

@ -16,7 +16,7 @@ except ImportError:
from langchain.docstore.document import Document
from langchain.schema.embeddings import Embeddings
from langchain.vectorstores.base import VectorStore
from langchain.schema.vectorstore import VectorStore
from langchain.vectorstores.utils import maximal_marginal_relevance
logger = logging.getLogger(__name__)

@ -8,7 +8,7 @@ import numpy as np
from langchain.docstore.document import Document
from langchain.schema.embeddings import Embeddings
from langchain.vectorstores.base import VectorStore
from langchain.schema.vectorstore import VectorStore
from langchain.vectorstores.utils import maximal_marginal_relevance
logger = logging.getLogger(__name__)

@ -18,8 +18,8 @@ from typing import (
from langchain._api import deprecated
from langchain.docstore.document import Document
from langchain.schema.embeddings import Embeddings
from langchain.schema.vectorstore import VectorStore
from langchain.utils import get_from_dict_or_env
from langchain.vectorstores.base import VectorStore
if TYPE_CHECKING:
from elasticsearch import Elasticsearch

@ -16,7 +16,7 @@ from typing import (
from langchain.docstore.document import Document
from langchain.schema.embeddings import Embeddings
from langchain.vectorstores.base import VectorStore
from langchain.schema.vectorstore import VectorStore
from langchain.vectorstores.utils import DistanceStrategy
if TYPE_CHECKING:

@ -7,7 +7,7 @@ from typing import TYPE_CHECKING, Any, Iterable, List, Optional, Type
from langchain.docstore.document import Document
from langchain.schema.embeddings import Embeddings
from langchain.vectorstores.base import VectorStore
from langchain.schema.vectorstore import VectorStore
if TYPE_CHECKING:
from pyepsilla import vectordb

@ -23,7 +23,7 @@ from langchain.docstore.base import AddableMixin, Docstore
from langchain.docstore.document import Document
from langchain.docstore.in_memory import InMemoryDocstore
from langchain.schema.embeddings import Embeddings
from langchain.vectorstores.base import VectorStore
from langchain.schema.vectorstore import VectorStore
from langchain.vectorstores.utils import DistanceStrategy, maximal_marginal_relevance

@ -7,8 +7,8 @@ from typing import Any, Dict, Iterable, List, Optional, Tuple, Type
from langchain.docstore.document import Document
from langchain.schema.embeddings import Embeddings
from langchain.schema.vectorstore import VectorStore
from langchain.utils import get_from_dict_or_env
from langchain.vectorstores.base import VectorStore
ADA_TOKEN_COUNT = 1536
_LANGCHAIN_DEFAULT_TABLE_NAME = "langchain_pg_embedding"

@ -5,7 +5,7 @@ from typing import Any, Iterable, List, Optional
from langchain.docstore.document import Document
from langchain.schema.embeddings import Embeddings
from langchain.vectorstores.base import VectorStore
from langchain.schema.vectorstore import VectorStore
class LanceDB(VectorStore):

@ -13,7 +13,7 @@ import requests
from langchain.pydantic_v1 import Field
from langchain.schema import Document
from langchain.schema.embeddings import Embeddings
from langchain.vectorstores.base import VectorStore, VectorStoreRetriever
from langchain.schema.vectorstore import VectorStore, VectorStoreRetriever
class ModelChoices(str, Enum):

@ -17,7 +17,7 @@ from typing import (
from langchain.docstore.document import Document
from langchain.schema.embeddings import Embeddings
from langchain.vectorstores.base import VectorStore
from langchain.schema.vectorstore import VectorStore
if TYPE_CHECKING:
import marqo

@ -9,7 +9,7 @@ from typing import TYPE_CHECKING, Any, Iterable, List, Optional, Type
from langchain.docstore.document import Document
from langchain.embeddings import TensorflowHubEmbeddings
from langchain.schema.embeddings import Embeddings
from langchain.vectorstores.base import VectorStore
from langchain.schema.vectorstore import VectorStore
if TYPE_CHECKING:
from google.cloud import storage

@ -5,8 +5,8 @@ from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Tuple, Ty
from langchain.docstore.document import Document
from langchain.schema.embeddings import Embeddings
from langchain.schema.vectorstore import VectorStore
from langchain.utils import get_from_env
from langchain.vectorstores.base import VectorStore
if TYPE_CHECKING:
from meilisearch import Client

@ -8,7 +8,7 @@ import numpy as np
from langchain.docstore.document import Document
from langchain.schema.embeddings import Embeddings
from langchain.vectorstores.base import VectorStore
from langchain.schema.vectorstore import VectorStore
from langchain.vectorstores.utils import maximal_marginal_relevance
logger = logging.getLogger(__name__)

@ -18,7 +18,7 @@ import numpy as np
from langchain.docstore.document import Document
from langchain.schema.embeddings import Embeddings
from langchain.vectorstores.base import VectorStore
from langchain.schema.vectorstore import VectorStore
from langchain.vectorstores.utils import maximal_marginal_relevance
if TYPE_CHECKING:

@ -9,7 +9,7 @@ from typing import Any, Dict, Iterable, List, Optional, Tuple
from langchain.docstore.document import Document
from langchain.pydantic_v1 import BaseSettings
from langchain.schema.embeddings import Embeddings
from langchain.vectorstores.base import VectorStore
from langchain.schema.vectorstore import VectorStore
logger = logging.getLogger()

@ -16,8 +16,8 @@ from typing import (
from langchain.docstore.document import Document
from langchain.schema.embeddings import Embeddings
from langchain.schema.vectorstore import VectorStore
from langchain.utils import get_from_env
from langchain.vectorstores.base import VectorStore
from langchain.vectorstores.utils import DistanceStrategy
DEFAULT_DISTANCE_STRATEGY = DistanceStrategy.COSINE

@ -3,7 +3,7 @@ from typing import Any, Dict, Iterable, List, Optional, Type
from langchain.schema.document import Document
from langchain.schema.embeddings import Embeddings
from langchain.vectorstores.base import VST, VectorStore
from langchain.schema.vectorstore import VST, VectorStore
FIELD_TYPES = {
"f": "files",

@ -8,8 +8,8 @@ import numpy as np
from langchain.schema import Document
from langchain.schema.embeddings import Embeddings
from langchain.schema.vectorstore import VectorStore
from langchain.utils import get_from_dict_or_env
from langchain.vectorstores.base import VectorStore
from langchain.vectorstores.utils import maximal_marginal_relevance
IMPORT_OPENSEARCH_PY_ERROR = (

@ -11,8 +11,8 @@ from sqlalchemy.orm import Session, declarative_base, relationship
from langchain.docstore.document import Document
from langchain.schema.embeddings import Embeddings
from langchain.schema.vectorstore import VectorStore
from langchain.utils import get_from_dict_or_env
from langchain.vectorstores.base import VectorStore
Base = declarative_base() # type: Any

@ -27,8 +27,8 @@ from sqlalchemy.orm import Session, declarative_base
from langchain.docstore.document import Document
from langchain.schema.embeddings import Embeddings
from langchain.schema.vectorstore import VectorStore
from langchain.utils import get_from_dict_or_env
from langchain.vectorstores.base import VectorStore
from langchain.vectorstores.utils import maximal_marginal_relevance
if TYPE_CHECKING:

@ -9,8 +9,8 @@ import numpy as np
from langchain.docstore.document import Document
from langchain.schema.embeddings import Embeddings
from langchain.schema.vectorstore import VectorStore
from langchain.utils.iter import batch_iterate
from langchain.vectorstores.base import VectorStore
from langchain.vectorstores.utils import DistanceStrategy, maximal_marginal_relevance
if TYPE_CHECKING:

@ -30,6 +30,7 @@ from langchain.callbacks.manager import (
)
from langchain.docstore.document import Document
from langchain.schema.embeddings import Embeddings
from langchain.schema.vectorstore import VectorStore, VectorStoreRetriever
from langchain.utilities.redis import (
_array_to_buffer,
_buffer_to_array,
@ -37,7 +38,6 @@ from langchain.utilities.redis import (
get_client,
)
from langchain.utils import get_from_dict_or_env
from langchain.vectorstores.base import VectorStore, VectorStoreRetriever
from langchain.vectorstores.redis.constants import (
REDIS_REQUIRED_MODULES,
REDIS_TAG_SEPARATOR,

@ -6,7 +6,7 @@ from typing import Any, Iterable, List, Optional, Tuple
from langchain.docstore.document import Document
from langchain.schema.embeddings import Embeddings
from langchain.vectorstores.base import VectorStore
from langchain.schema.vectorstore import VectorStore
logger = logging.getLogger(__name__)

@ -12,7 +12,7 @@ from langchain.docstore.base import AddableMixin, Docstore
from langchain.docstore.document import Document
from langchain.docstore.in_memory import InMemoryDocstore
from langchain.schema.embeddings import Embeddings
from langchain.vectorstores.base import VectorStore
from langchain.schema.vectorstore import VectorStore
from langchain.vectorstores.utils import DistanceStrategy

@ -21,7 +21,7 @@ from langchain.callbacks.manager import (
)
from langchain.docstore.document import Document
from langchain.schema.embeddings import Embeddings
from langchain.vectorstores.base import VectorStore, VectorStoreRetriever
from langchain.schema.vectorstore import VectorStore, VectorStoreRetriever
from langchain.vectorstores.utils import DistanceStrategy
DEFAULT_DISTANCE_STRATEGY = DistanceStrategy.DOT_PRODUCT

@ -12,8 +12,8 @@ from uuid import uuid4
from langchain.docstore.document import Document
from langchain.schema.embeddings import Embeddings
from langchain.schema.vectorstore import VectorStore
from langchain.utils import guard_import
from langchain.vectorstores.base import VectorStore
from langchain.vectorstores.utils import maximal_marginal_relevance
DEFAULT_K = 4 # Number of Documents to return.

@ -15,7 +15,7 @@ from typing import (
from langchain.docstore.document import Document
from langchain.schema.embeddings import Embeddings
from langchain.vectorstores.base import VectorStore
from langchain.schema.vectorstore import VectorStore
if TYPE_CHECKING:
import sqlite3

@ -9,7 +9,7 @@ from typing import Any, Dict, Iterable, List, Optional, Tuple
from langchain.docstore.document import Document
from langchain.pydantic_v1 import BaseSettings
from langchain.schema.embeddings import Embeddings
from langchain.vectorstores.base import VectorStore
from langchain.schema.vectorstore import VectorStore
logger = logging.getLogger()
DEBUG = False

@ -18,7 +18,7 @@ import numpy as np
from langchain.docstore.document import Document
from langchain.schema.embeddings import Embeddings
from langchain.vectorstores.base import VectorStore
from langchain.schema.vectorstore import VectorStore
from langchain.vectorstores.utils import maximal_marginal_relevance
if TYPE_CHECKING:

@ -7,8 +7,8 @@ from typing import Any, Iterable, List, Optional, Type
from langchain.docstore.document import Document
from langchain.schema.embeddings import Embeddings
from langchain.schema.vectorstore import VectorStore
from langchain.utils import get_from_dict_or_env
from langchain.vectorstores.base import VectorStore
logger = logging.getLogger(__name__)

@ -10,8 +10,8 @@ import numpy as np
from langchain.docstore.document import Document
from langchain.schema.embeddings import Embeddings
from langchain.schema.vectorstore import VectorStore
from langchain.utils import guard_import
from langchain.vectorstores.base import VectorStore
from langchain.vectorstores.utils import maximal_marginal_relevance
logger = logging.getLogger(__name__)

@ -20,8 +20,8 @@ from typing import (
from langchain.docstore.document import Document
from langchain.embeddings.base import Embeddings
from langchain.schema.vectorstore import VectorStore
from langchain.utils import get_from_dict_or_env
from langchain.vectorstores.base import VectorStore
from langchain.vectorstores.utils import DistanceStrategy
if TYPE_CHECKING:

@ -5,8 +5,8 @@ from typing import TYPE_CHECKING, Any, Iterable, List, Optional, Tuple, Union
from langchain.docstore.document import Document
from langchain.schema.embeddings import Embeddings
from langchain.schema.vectorstore import VectorStore
from langchain.utils import get_from_env
from langchain.vectorstores.base import VectorStore
if TYPE_CHECKING:
from typesense.client import Client

@ -8,7 +8,7 @@ from langchain.docstore.base import AddableMixin, Docstore
from langchain.docstore.document import Document
from langchain.docstore.in_memory import InMemoryDocstore
from langchain.schema.embeddings import Embeddings
from langchain.vectorstores.base import VectorStore
from langchain.schema.vectorstore import VectorStore
def dependable_usearch_import() -> Any:

@ -7,7 +7,7 @@ import numpy as np
from langchain.docstore.document import Document
from langchain.schema.embeddings import Embeddings
from langchain.vectorstores.base import VectorStore
from langchain.schema.vectorstore import VectorStore
from langchain.vectorstores.utils import maximal_marginal_relevance

@ -9,7 +9,7 @@ import numpy as np
from langchain.docstore.document import Document
from langchain.schema.embeddings import Embeddings
from langchain.vectorstores.base import VectorStore
from langchain.schema.vectorstore import VectorStore
if TYPE_CHECKING:
import vearch

@ -11,7 +11,7 @@ import requests
from langchain.pydantic_v1 import Field
from langchain.schema import Document
from langchain.schema.embeddings import Embeddings
from langchain.vectorstores.base import VectorStore, VectorStoreRetriever
from langchain.schema.vectorstore import VectorStore, VectorStoreRetriever
logger = logging.getLogger(__name__)

@ -18,7 +18,7 @@ import numpy as np
from langchain.docstore.document import Document
from langchain.schema.embeddings import Embeddings
from langchain.vectorstores.base import VectorStore
from langchain.schema.vectorstore import VectorStore
from langchain.vectorstores.utils import maximal_marginal_relevance
if TYPE_CHECKING:

@ -6,7 +6,7 @@ from typing import Any, Dict, Iterable, List, Optional, Tuple, Type
from langchain.docstore.document import Document
from langchain.schema.embeddings import Embeddings
from langchain.vectorstores.base import VectorStore
from langchain.schema.vectorstore import VectorStore
class XataVectorStore(VectorStore):

@ -9,7 +9,7 @@ import numpy as np
from langchain.docstore.document import Document
from langchain.schema.embeddings import Embeddings
from langchain.vectorstores.base import VectorStore
from langchain.schema.vectorstore import VectorStore
from langchain.vectorstores.utils import maximal_marginal_relevance
if TYPE_CHECKING:

@ -9,7 +9,7 @@ from langchain.indexes import index
from langchain.indexes._sql_record_manager import SQLRecordManager
from langchain.schema import Document
from langchain.schema.embeddings import Embeddings
from langchain.vectorstores.base import VST, VectorStore
from langchain.schema.vectorstore import VST, VectorStore
class ToyLoader(BaseLoader):

@ -11,7 +11,7 @@ from langchain.retrievers.time_weighted_retriever import (
)
from langchain.schema import Document
from langchain.schema.embeddings import Embeddings
from langchain.vectorstores.base import VectorStore
from langchain.schema.vectorstore import VectorStore
def _get_example_memories(k: int = 4) -> List[Document]:

Loading…
Cancel
Save