langchain/libs/community/tests/integration_tests/document_loaders/test_tidb.py

77 lines
2.3 KiB
Python
Raw Normal View History

import os
import pytest
from sqlalchemy import Column, Integer, MetaData, String, Table, create_engine
from langchain_community.document_loaders import TiDBLoader
try:
CONNECTION_STRING = os.getenv("TEST_TiDB_CONNECTION_URL", "")
if CONNECTION_STRING == "":
raise OSError("TEST_TiDB_URL environment variable is not set")
tidb_available = True
except (OSError, ImportError):
tidb_available = False
@pytest.mark.skipif(not tidb_available, reason="tidb is not available")
def test_load_documents() -> None:
"""Test loading documents from TiDB."""
# Connect to the database
engine = create_engine(CONNECTION_STRING)
metadata = MetaData()
table_name = "tidb_loader_intergration_test"
# Create a test table
test_table = Table(
table_name,
metadata,
Column("id", Integer, primary_key=True),
Column("name", String(255)),
Column("description", String(255)),
)
metadata.create_all(engine)
with engine.connect() as connection:
transaction = connection.begin()
try:
connection.execute(
test_table.insert(),
[
{"name": "Item 1", "description": "Description of Item 1"},
{"name": "Item 2", "description": "Description of Item 2"},
{"name": "Item 3", "description": "Description of Item 3"},
],
)
transaction.commit()
except:
transaction.rollback()
raise
loader = TiDBLoader(
connection_string=CONNECTION_STRING,
query=f"SELECT * FROM {table_name};",
page_content_columns=["name", "description"],
metadata_columns=["id"],
)
documents = loader.load()
test_table.drop(bind=engine)
# check
assert len(documents) == 3
assert (
documents[0].page_content == "name: Item 1\ndescription: Description of Item 1"
)
assert documents[0].metadata == {"id": 1}
assert (
documents[1].page_content == "name: Item 2\ndescription: Description of Item 2"
)
assert documents[1].metadata == {"id": 2}
assert (
documents[2].page_content == "name: Item 3\ndescription: Description of Item 3"
)
assert documents[2].metadata == {"id": 3}