mirror of
https://github.com/hwchase17/langchain
synced 2024-11-08 07:10:35 +00:00
Fixes incorrect docstore creation in faiss.py (#7026)
- **Description**: Current implementation assumes that the length of `texts` and `ids` should be same but if the passed `ids` length is not equal to the passed length of `texts`, current code `dict(zip(index_to_id.values(), documents))` is not failing or giving any warning and silently creating docstores only for the passed `ids` i.e. if `ids = ['A']` and `texts=["I love Open Source","I love langchain"]` then only one `docstore` will be created. But either two docstores should be created assuming same id value for all the elements of `texts` or an error should be raised. - **Issue**: My change fixes this by using dictionary comprehension instead of `zip`. This was if lengths of `ids` and `texts` mismatches an explicit `IndexError` will be raised. @rlancemartin, @eyurtsev
This commit is contained in:
parent
3f7213586e
commit
603a0bea29
@ -521,6 +521,13 @@ class FAISS(VectorStore):
|
|||||||
metadata = metadatas[i] if metadatas else {}
|
metadata = metadatas[i] if metadatas else {}
|
||||||
documents.append(Document(page_content=text, metadata=metadata))
|
documents.append(Document(page_content=text, metadata=metadata))
|
||||||
index_to_id = dict(enumerate(ids))
|
index_to_id = dict(enumerate(ids))
|
||||||
|
|
||||||
|
if len(index_to_id) != len(documents):
|
||||||
|
raise Exception(
|
||||||
|
f"{len(index_to_id)} ids provided for {len(documents)} documents."
|
||||||
|
" Each document should have an id."
|
||||||
|
)
|
||||||
|
|
||||||
docstore = InMemoryDocstore(dict(zip(index_to_id.values(), documents)))
|
docstore = InMemoryDocstore(dict(zip(index_to_id.values(), documents)))
|
||||||
return cls(
|
return cls(
|
||||||
embedding.embed_query,
|
embedding.embed_query,
|
||||||
|
Loading…
Reference in New Issue
Block a user