mirror of
https://github.com/hwchase17/langchain
synced 2024-11-06 03:20:49 +00:00
Feature faiss delete (#8135)
<!-- Thank you for contributing to LangChain! Replace this comment with: - Description: docstore had two main method: add and search, however, dealing with docstore sometimes requires deleting an entry from docstore. So I have added a simple delete method that deletes items from docstore. Additionally, I have added the delete method to faiss vectorstore for the very same reason. - Issue: NA - Dependencies: NA - Tag maintainer: @rlancemartin, @eyurtsev - Twitter handle: we announce bigger features on Twitter. If your PR gets announced and you'd like a mention, we'll gladly shout you out! Please make sure you're PR is passing linting and testing before submitting. Run `make format`, `make lint` and `make test` to check this locally. If you're adding a new integration, please include: 1. a test for the integration, preferably unit tests that do not rely on network access, 2. an example notebook showing its use. Maintainer responsibilities: - General / Misc / if you don't know who to tag: @baskaryan - DataLoaders / VectorStores / Retrievers: @rlancemartin, @eyurtsev - Models / Prompts: @hwchase17, @baskaryan - Memory: @hwchase17 - Agents / Tools / Toolkits: @hinthornw - Tracing / Callbacks: @agola11 - Async: @agola11 If no one reviews your PR within a few days, feel free to @-mention the same people again. See contribution guidelines for more information on how to write/run tests, lint, etc: https://github.com/hwchase17/langchain/blob/master/.github/CONTRIBUTING.md --> --------- Co-authored-by: Harrison Chase <hw.chase.17@gmail.com>
This commit is contained in:
parent
b57fa1a39c
commit
485d716c21
@ -80,7 +80,7 @@
|
||||
"source": [
|
||||
"from langchain.document_loaders import TextLoader\n",
|
||||
"\n",
|
||||
"loader = TextLoader(\"../../../state_of_the_union.txt\")\n",
|
||||
"loader = TextLoader(\"../../../extras/modules/state_of_the_union.txt\")\n",
|
||||
"documents = loader.load()\n",
|
||||
"text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
||||
"docs = text_splitter.split_documents(documents)\n",
|
||||
@ -90,7 +90,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"execution_count": 3,
|
||||
"id": "5eabdb75",
|
||||
"metadata": {
|
||||
"tags": []
|
||||
@ -517,6 +517,67 @@
|
||||
"for doc in results:\n",
|
||||
" print(f\"Content: {doc.page_content}, Metadata: {doc.metadata}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1becca53",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Delete\n",
|
||||
"\n",
|
||||
"You can also delete ids. Note that the ids to delete should be the ids in the docstore."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "1408b870",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"True"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"db.delete([db.index_to_docstore_id[0]])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "d13daf33",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"False"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Is now missing\n",
|
||||
"0 in db.index_to_docstore_id"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "30ace43e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
@ -1,6 +1,6 @@
|
||||
"""Interface to access to place that stores documents."""
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Dict, Union
|
||||
from typing import Dict, List, Union
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
|
||||
@ -16,6 +16,10 @@ class Docstore(ABC):
|
||||
If page does not exist, return similar entries.
|
||||
"""
|
||||
|
||||
def delete(self, ids: List) -> None:
|
||||
"""Deleting IDs from in memory dictionary."""
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class AddableMixin(ABC):
|
||||
"""Mixin class that supports adding texts."""
|
||||
|
@ -1,5 +1,5 @@
|
||||
"""Simple in memory docstore in the form of a dict."""
|
||||
from typing import Dict, Optional, Union
|
||||
from typing import Dict, List, Optional, Union
|
||||
|
||||
from langchain.docstore.base import AddableMixin, Docstore
|
||||
from langchain.docstore.document import Document
|
||||
@ -26,6 +26,14 @@ class InMemoryDocstore(Docstore, AddableMixin):
|
||||
raise ValueError(f"Tried to add ids that already exist: {overlapping}")
|
||||
self._dict = {**self._dict, **texts}
|
||||
|
||||
def delete(self, ids: List) -> None:
|
||||
"""Deleting IDs from in memory dictionary."""
|
||||
overlapping = set(ids).intersection(self._dict)
|
||||
if not overlapping:
|
||||
raise ValueError(f"Tried to delete ids that does not exist: {ids}")
|
||||
for _id in ids:
|
||||
self._dict.pop(_id)
|
||||
|
||||
def search(self, search: str) -> Union[str, Document]:
|
||||
"""Search via direct lookup.
|
||||
|
||||
|
@ -468,6 +468,36 @@ class FAISS(VectorStore):
|
||||
)
|
||||
return docs
|
||||
|
||||
def delete(self, ids: Optional[List[str]] = None, **kwargs: Any) -> Optional[bool]:
|
||||
"""Delete by ID. These are the IDs in the vectorstore.
|
||||
|
||||
Args:
|
||||
ids: List of ids to delete.
|
||||
|
||||
Returns:
|
||||
Optional[bool]: True if deletion is successful,
|
||||
False otherwise, None if not implemented.
|
||||
"""
|
||||
if ids is None:
|
||||
raise ValueError("No ids provided to delete.")
|
||||
|
||||
overlapping = set(ids).intersection(self.index_to_docstore_id.values())
|
||||
if not overlapping:
|
||||
raise ValueError("ids do not exist in the current object")
|
||||
|
||||
_reversed_index = {v: k for k, v in self.index_to_docstore_id.items()}
|
||||
|
||||
index_to_delete = [_reversed_index[i] for i in ids]
|
||||
|
||||
# Removing ids from index.
|
||||
self.index.remove_ids(np.array(index_to_delete, dtype=np.int64))
|
||||
for _id in index_to_delete:
|
||||
del self.index_to_docstore_id[_id]
|
||||
|
||||
# Remove items from docstore.
|
||||
self.docstore.delete(ids)
|
||||
return True
|
||||
|
||||
def merge_from(self, target: FAISS) -> None:
|
||||
"""Merge another FAISS object with the current one.
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user