2023-08-10 17:39:29 +00:00
|
|
|
from __future__ import annotations
|
|
|
|
|
2023-07-19 19:14:41 +00:00
|
|
|
from typing import TYPE_CHECKING
|
|
|
|
|
|
|
|
import pytest
|
2023-11-21 16:35:29 +00:00
|
|
|
from langchain_core.documents import Document
|
2023-07-19 19:14:41 +00:00
|
|
|
|
2023-12-11 21:53:30 +00:00
|
|
|
from langchain_community.document_loaders import GeoDataFrameLoader
|
2023-07-19 19:14:41 +00:00
|
|
|
|
|
|
|
if TYPE_CHECKING:
|
|
|
|
from geopandas import GeoDataFrame
|
|
|
|
else:
|
|
|
|
GeoDataFrame = "geopandas.GeoDataFrame"
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.requires("geopandas")
|
|
|
|
def sample_gdf() -> GeoDataFrame:
|
2023-08-10 17:39:29 +00:00
|
|
|
import geopandas
|
|
|
|
|
2023-08-19 04:35:39 +00:00
|
|
|
# TODO: geopandas.datasets will be deprecated in 1.0
|
2023-07-19 19:14:41 +00:00
|
|
|
path_to_data = geopandas.datasets.get_path("nybb")
|
|
|
|
gdf = geopandas.read_file(path_to_data)
|
|
|
|
gdf["area"] = gdf.area
|
|
|
|
gdf["crs"] = gdf.crs.to_string()
|
|
|
|
return gdf.head(2)
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.requires("geopandas")
|
|
|
|
def test_load_returns_list_of_documents(sample_gdf: GeoDataFrame) -> None:
|
|
|
|
loader = GeoDataFrameLoader(sample_gdf)
|
|
|
|
docs = loader.load()
|
|
|
|
assert isinstance(docs, list)
|
|
|
|
assert all(isinstance(doc, Document) for doc in docs)
|
|
|
|
assert len(docs) == 2
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.requires("geopandas")
|
|
|
|
def test_load_converts_dataframe_columns_to_document_metadata(
|
|
|
|
sample_gdf: GeoDataFrame,
|
|
|
|
) -> None:
|
|
|
|
loader = GeoDataFrameLoader(sample_gdf)
|
|
|
|
docs = loader.load()
|
|
|
|
for i, doc in enumerate(docs):
|
|
|
|
assert doc.metadata["area"] == sample_gdf.loc[i, "area"]
|
|
|
|
assert doc.metadata["crs"] == sample_gdf.loc[i, "crs"]
|