mirror of
https://github.com/hwchase17/langchain
synced 2024-11-06 03:20:49 +00:00
df234fb171
- **Description:** The current embedchain implementation seems to handle document metadata differently than done in the current implementation of langchain and a KeyError is thrown. I would love for someone else to test this... --------- Co-authored-by: KKUGLER <kai.kugler@mercedes-benz.com> Co-authored-by: Harrison Chase <hw.chase.17@gmail.com> Co-authored-by: Deshraj Yadav <deshraj@gatech.edu>
47 lines
1.2 KiB
Python
47 lines
1.2 KiB
Python
"""Integration test for Embedchain."""
|
|
|
|
import os
|
|
from typing import Any
|
|
from unittest.mock import patch
|
|
|
|
import pytest
|
|
from langchain_core.documents import Document
|
|
|
|
from langchain_community.retrievers.embedchain import EmbedchainRetriever
|
|
|
|
try:
|
|
from embedchain import Pipeline
|
|
except ImportError:
|
|
pytest.skip("Requires embedchain", allow_module_level=True)
|
|
|
|
os.environ["OPENAI_API_KEY"] = "sk-xxxx"
|
|
|
|
context_value = [
|
|
{
|
|
"context": "this document is about John",
|
|
"metadata": {
|
|
"source": "source#1",
|
|
"doc_id": 123,
|
|
},
|
|
},
|
|
]
|
|
|
|
|
|
@pytest.mark.requires("embedchain")
|
|
@patch.object(Pipeline, "search", return_value=context_value)
|
|
@patch.object(Pipeline, "add", return_value=123)
|
|
def test_embedchain_retriever(mock_add: Any, mock_search: Any) -> None:
|
|
retriever = EmbedchainRetriever.create()
|
|
texts = [
|
|
"This document is about John",
|
|
]
|
|
for text in texts:
|
|
retriever.add_texts(text)
|
|
docs = retriever.get_relevant_documents("doc about john")
|
|
assert len(docs) == 1
|
|
for doc in docs:
|
|
assert isinstance(doc, Document)
|
|
assert doc.page_content
|
|
assert doc.metadata
|
|
assert len(list(doc.metadata.items())) > 0
|