mirror of
https://github.com/hwchase17/langchain
synced 2024-11-06 03:20:49 +00:00
Feature faiss delete (#8135)
<!-- Thank you for contributing to LangChain! Replace this comment with: - Description: docstore had two main method: add and search, however, dealing with docstore sometimes requires deleting an entry from docstore. So I have added a simple delete method that deletes items from docstore. Additionally, I have added the delete method to faiss vectorstore for the very same reason. - Issue: NA - Dependencies: NA - Tag maintainer: @rlancemartin, @eyurtsev - Twitter handle: we announce bigger features on Twitter. If your PR gets announced and you'd like a mention, we'll gladly shout you out! Please make sure you're PR is passing linting and testing before submitting. Run `make format`, `make lint` and `make test` to check this locally. If you're adding a new integration, please include: 1. a test for the integration, preferably unit tests that do not rely on network access, 2. an example notebook showing its use. Maintainer responsibilities: - General / Misc / if you don't know who to tag: @baskaryan - DataLoaders / VectorStores / Retrievers: @rlancemartin, @eyurtsev - Models / Prompts: @hwchase17, @baskaryan - Memory: @hwchase17 - Agents / Tools / Toolkits: @hinthornw - Tracing / Callbacks: @agola11 - Async: @agola11 If no one reviews your PR within a few days, feel free to @-mention the same people again. See contribution guidelines for more information on how to write/run tests, lint, etc: https://github.com/hwchase17/langchain/blob/master/.github/CONTRIBUTING.md --> --------- Co-authored-by: Harrison Chase <hw.chase.17@gmail.com>
This commit is contained in:
parent
b57fa1a39c
commit
485d716c21
@ -80,7 +80,7 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"from langchain.document_loaders import TextLoader\n",
|
"from langchain.document_loaders import TextLoader\n",
|
||||||
"\n",
|
"\n",
|
||||||
"loader = TextLoader(\"../../../state_of_the_union.txt\")\n",
|
"loader = TextLoader(\"../../../extras/modules/state_of_the_union.txt\")\n",
|
||||||
"documents = loader.load()\n",
|
"documents = loader.load()\n",
|
||||||
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
||||||
"docs = text_splitter.split_documents(documents)\n",
|
"docs = text_splitter.split_documents(documents)\n",
|
||||||
@ -90,7 +90,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 11,
|
"execution_count": 3,
|
||||||
"id": "5eabdb75",
|
"id": "5eabdb75",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"tags": []
|
"tags": []
|
||||||
@ -517,6 +517,67 @@
|
|||||||
"for doc in results:\n",
|
"for doc in results:\n",
|
||||||
" print(f\"Content: {doc.page_content}, Metadata: {doc.metadata}\")"
|
" print(f\"Content: {doc.page_content}, Metadata: {doc.metadata}\")"
|
||||||
]
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "1becca53",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Delete\n",
|
||||||
|
"\n",
|
||||||
|
"You can also delete ids. Note that the ids to delete should be the ids in the docstore."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 4,
|
||||||
|
"id": "1408b870",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"True"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 4,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"db.delete([db.index_to_docstore_id[0]])"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 6,
|
||||||
|
"id": "d13daf33",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"False"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 6,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"# Is now missing\n",
|
||||||
|
"0 in db.index_to_docstore_id"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "30ace43e",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
"""Interface to access to place that stores documents."""
|
"""Interface to access to place that stores documents."""
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from typing import Dict, Union
|
from typing import Dict, List, Union
|
||||||
|
|
||||||
from langchain.docstore.document import Document
|
from langchain.docstore.document import Document
|
||||||
|
|
||||||
@ -16,6 +16,10 @@ class Docstore(ABC):
|
|||||||
If page does not exist, return similar entries.
|
If page does not exist, return similar entries.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
def delete(self, ids: List) -> None:
|
||||||
|
"""Deleting IDs from in memory dictionary."""
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
|
||||||
class AddableMixin(ABC):
|
class AddableMixin(ABC):
|
||||||
"""Mixin class that supports adding texts."""
|
"""Mixin class that supports adding texts."""
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
"""Simple in memory docstore in the form of a dict."""
|
"""Simple in memory docstore in the form of a dict."""
|
||||||
from typing import Dict, Optional, Union
|
from typing import Dict, List, Optional, Union
|
||||||
|
|
||||||
from langchain.docstore.base import AddableMixin, Docstore
|
from langchain.docstore.base import AddableMixin, Docstore
|
||||||
from langchain.docstore.document import Document
|
from langchain.docstore.document import Document
|
||||||
@ -26,6 +26,14 @@ class InMemoryDocstore(Docstore, AddableMixin):
|
|||||||
raise ValueError(f"Tried to add ids that already exist: {overlapping}")
|
raise ValueError(f"Tried to add ids that already exist: {overlapping}")
|
||||||
self._dict = {**self._dict, **texts}
|
self._dict = {**self._dict, **texts}
|
||||||
|
|
||||||
|
def delete(self, ids: List) -> None:
|
||||||
|
"""Deleting IDs from in memory dictionary."""
|
||||||
|
overlapping = set(ids).intersection(self._dict)
|
||||||
|
if not overlapping:
|
||||||
|
raise ValueError(f"Tried to delete ids that does not exist: {ids}")
|
||||||
|
for _id in ids:
|
||||||
|
self._dict.pop(_id)
|
||||||
|
|
||||||
def search(self, search: str) -> Union[str, Document]:
|
def search(self, search: str) -> Union[str, Document]:
|
||||||
"""Search via direct lookup.
|
"""Search via direct lookup.
|
||||||
|
|
||||||
|
@ -468,6 +468,36 @@ class FAISS(VectorStore):
|
|||||||
)
|
)
|
||||||
return docs
|
return docs
|
||||||
|
|
||||||
|
def delete(self, ids: Optional[List[str]] = None, **kwargs: Any) -> Optional[bool]:
|
||||||
|
"""Delete by ID. These are the IDs in the vectorstore.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
ids: List of ids to delete.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Optional[bool]: True if deletion is successful,
|
||||||
|
False otherwise, None if not implemented.
|
||||||
|
"""
|
||||||
|
if ids is None:
|
||||||
|
raise ValueError("No ids provided to delete.")
|
||||||
|
|
||||||
|
overlapping = set(ids).intersection(self.index_to_docstore_id.values())
|
||||||
|
if not overlapping:
|
||||||
|
raise ValueError("ids do not exist in the current object")
|
||||||
|
|
||||||
|
_reversed_index = {v: k for k, v in self.index_to_docstore_id.items()}
|
||||||
|
|
||||||
|
index_to_delete = [_reversed_index[i] for i in ids]
|
||||||
|
|
||||||
|
# Removing ids from index.
|
||||||
|
self.index.remove_ids(np.array(index_to_delete, dtype=np.int64))
|
||||||
|
for _id in index_to_delete:
|
||||||
|
del self.index_to_docstore_id[_id]
|
||||||
|
|
||||||
|
# Remove items from docstore.
|
||||||
|
self.docstore.delete(ids)
|
||||||
|
return True
|
||||||
|
|
||||||
def merge_from(self, target: FAISS) -> None:
|
def merge_from(self, target: FAISS) -> None:
|
||||||
"""Merge another FAISS object with the current one.
|
"""Merge another FAISS object with the current one.
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user