From 61f12229dfb8d90522026d106dc42d8b6df23a6a Mon Sep 17 00:00:00 2001 From: Samantha Whitmore Date: Tue, 8 Nov 2022 18:19:39 -0800 Subject: [PATCH] Create VectorStore interface (#92) --- examples/embeddings.ipynb | 10 +++++----- langchain/__init__.py | 3 +-- langchain/vectorstores/__init__.py | 6 ++++++ langchain/vectorstores/base.py | 13 +++++++++++++ .../{ => vectorstores}/elastic_vector_search.py | 3 ++- langchain/{ => vectorstores}/faiss.py | 3 ++- 6 files changed, 29 insertions(+), 9 deletions(-) create mode 100644 langchain/vectorstores/__init__.py create mode 100644 langchain/vectorstores/base.py rename langchain/{ => vectorstores}/elastic_vector_search.py (98%) rename langchain/{ => vectorstores}/faiss.py (97%) diff --git a/examples/embeddings.ipynb b/examples/embeddings.ipynb index 46ae1051ed..fe90ee6178 100644 --- a/examples/embeddings.ipynb +++ b/examples/embeddings.ipynb @@ -8,9 +8,9 @@ "outputs": [], "source": [ "from langchain.embeddings.openai import OpenAIEmbeddings\n", - "from langchain.elastic_vector_search import ElasticVectorSearch\n", - "from langchain.faiss import FAISS\n", - "from langchain.text_splitter import CharacterTextSplitter" + "from langchain.text_splitter import CharacterTextSplitter\n", + "from langchain.vectorstores.elastic_vector_search import ElasticVectorSearch\n", + "from langchain.vectorstores.faiss import FAISS" ] }, { @@ -69,7 +69,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 5, "id": "4906b8a3", "metadata": {}, "outputs": [], @@ -82,7 +82,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 6, "id": "95f9eee9", "metadata": {}, "outputs": [ diff --git a/langchain/__init__.py b/langchain/__init__.py index 0fd4d7f953..50113af259 100644 --- a/langchain/__init__.py +++ b/langchain/__init__.py @@ -16,11 +16,10 @@ from langchain.chains import ( SQLDatabaseChain, ) from langchain.docstore import Wikipedia -from langchain.elastic_vector_search import ElasticVectorSearch -from langchain.faiss import FAISS from langchain.llms import Cohere, HuggingFaceHub, OpenAI from langchain.prompts import BasePrompt, DynamicPrompt, Prompt from langchain.sql_database import SQLDatabase +from langchain.vectorstores import FAISS, ElasticVectorSearch __all__ = [ "LLMChain", diff --git a/langchain/vectorstores/__init__.py b/langchain/vectorstores/__init__.py new file mode 100644 index 0000000000..dc6dfe92e2 --- /dev/null +++ b/langchain/vectorstores/__init__.py @@ -0,0 +1,6 @@ +"""Wrappers on top of vector stores.""" +from langchain.vectorstores.base import VectorStore +from langchain.vectorstores.elastic_vector_search import ElasticVectorSearch +from langchain.vectorstores.faiss import FAISS + +__all__ = ["ElasticVectorSearch", "FAISS", "VectorStore"] diff --git a/langchain/vectorstores/base.py b/langchain/vectorstores/base.py new file mode 100644 index 0000000000..d6da56f7e4 --- /dev/null +++ b/langchain/vectorstores/base.py @@ -0,0 +1,13 @@ +"""Interface for vector stores.""" +from abc import ABC, abstractmethod +from typing import List + +from langchain.docstore.document import Document + + +class VectorStore(ABC): + """Interface for vector stores.""" + + @abstractmethod + def similarity_search(self, query: str, k: int = 4) -> List[Document]: + """Return docs most similar to query.""" diff --git a/langchain/elastic_vector_search.py b/langchain/vectorstores/elastic_vector_search.py similarity index 98% rename from langchain/elastic_vector_search.py rename to langchain/vectorstores/elastic_vector_search.py index f5571eec60..deeee97278 100644 --- a/langchain/elastic_vector_search.py +++ b/langchain/vectorstores/elastic_vector_search.py @@ -4,6 +4,7 @@ from typing import Callable, Dict, List from langchain.docstore.document import Document from langchain.embeddings.base import Embeddings +from langchain.vectorstores.base import VectorStore def _default_text_mapping(dim: int) -> Dict: @@ -27,7 +28,7 @@ def _default_script_query(query_vector: List[int]) -> Dict: } -class ElasticVectorSearch: +class ElasticVectorSearch(VectorStore): """Wrapper around Elasticsearch as a vector database. Example: diff --git a/langchain/faiss.py b/langchain/vectorstores/faiss.py similarity index 97% rename from langchain/faiss.py rename to langchain/vectorstores/faiss.py index 5474ad43f2..81446ad200 100644 --- a/langchain/faiss.py +++ b/langchain/vectorstores/faiss.py @@ -7,9 +7,10 @@ from langchain.docstore.base import Docstore from langchain.docstore.document import Document from langchain.docstore.in_memory import InMemoryDocstore from langchain.embeddings.base import Embeddings +from langchain.vectorstores.base import VectorStore -class FAISS: +class FAISS(VectorStore): """Wrapper around FAISS vector database. To use, you should have the ``faiss`` python package installed.