mirror of
https://github.com/hwchase17/langchain
synced 2024-11-11 19:11:02 +00:00
afa2d85405
- Description: Added missing `from_documents` method to `KNNRetriever`, providing the ability to supply metadata to LangChain `Document`s, and to give it parity to the other retrievers, which do have `from_documents`. - Issue: None - Dependencies: None - Twitter handle: None Co-authored-by: Victor Adan <vadan@netroadshow.com> Co-authored-by: Bagatur <22008038+baskaryan@users.noreply.github.com>
30 lines
1.1 KiB
Python
30 lines
1.1 KiB
Python
from langchain_core.documents import Document
|
|
|
|
from langchain_community.embeddings import FakeEmbeddings
|
|
from langchain_community.retrievers.knn import KNNRetriever
|
|
|
|
|
|
class TestKNNRetriever:
|
|
def test_from_texts(self) -> None:
|
|
input_texts = ["I have a pen.", "Do you have a pen?", "I have a bag."]
|
|
knn_retriever = KNNRetriever.from_texts(
|
|
texts=input_texts, embeddings=FakeEmbeddings(size=100)
|
|
)
|
|
assert len(knn_retriever.texts) == 3
|
|
|
|
def test_from_documents(self) -> None:
|
|
input_docs = [
|
|
Document(page_content="I have a pen.", metadata={"page": 1}),
|
|
Document(page_content="Do you have a pen?", metadata={"page": 2}),
|
|
Document(page_content="I have a bag.", metadata={"page": 3}),
|
|
]
|
|
knn_retriever = KNNRetriever.from_documents(
|
|
documents=input_docs, embeddings=FakeEmbeddings(size=100)
|
|
)
|
|
assert knn_retriever.texts == [
|
|
"I have a pen.",
|
|
"Do you have a pen?",
|
|
"I have a bag.",
|
|
]
|
|
assert knn_retriever.metadatas == [{"page": 1}, {"page": 2}, {"page": 3}]
|