mirror of https://github.com/hwchase17/langchain
Extend Cube Semantic Loader functionality (#8186)
**PR Description:** This pull request introduces several enhancements and new features to the `CubeSemanticLoader`. The changes include the following: 1. Added imports for the `json` and `time` modules. 2. Added new constructor parameters: `load_dimension_values`, `dimension_values_limit`, `dimension_values_max_retries`, and `dimension_values_retry_delay`. 3. Updated the class documentation with descriptions for the new constructor parameters. 4. Added a new private method `_get_dimension_values()` to retrieve dimension values from Cube's REST API. 5. Modified the `load()` method to load dimension values for string dimensions if `load_dimension_values` is set to `True`. 6. Updated the API endpoint in the `load()` method from the base URL to the metadata endpoint. 7. Refactored the code to retrieve metadata from the response JSON. 8. Added the `column_member_type` field to the metadata dictionary to indicate if a column is a measure or a dimension. 9. Added the `column_values` field to the metadata dictionary to store the dimension values retrieved from Cube's API. 10. Modified the `page_content` construction to include the column title and description instead of the table name, column name, data type, title, and description. These changes improve the functionality and flexibility of the `CubeSemanticLoader` class by allowing the loading of dimension values and providing more detailed metadata for each document. --------- Co-authored-by: Bagatur <baskaryan@gmail.com>pull/8202/head
parent
82b8d8596c
commit
d983046f90
@ -1,86 +1,61 @@
|
|||||||
from typing import List
|
import unittest
|
||||||
from unittest import TestCase
|
from unittest.mock import MagicMock, Mock, patch
|
||||||
from unittest.mock import MagicMock, patch
|
|
||||||
|
|
||||||
import requests
|
|
||||||
|
|
||||||
from langchain.docstore.document import Document
|
|
||||||
from langchain.document_loaders import CubeSemanticLoader
|
from langchain.document_loaders import CubeSemanticLoader
|
||||||
|
|
||||||
|
MODULE_PATH = "langchain.document_loaders.cube_semantic.CubeSemanticLoader"
|
||||||
|
|
||||||
|
|
||||||
class TestCubeSemanticLoader(TestCase):
|
class TestCubeSemanticLoader(unittest.TestCase):
|
||||||
@patch.object(requests, "get")
|
def setUp(self) -> None:
|
||||||
def test_load_success(self, mock_get: MagicMock) -> None:
|
self.loader = CubeSemanticLoader(
|
||||||
# Arrange
|
cube_api_url="http://example.com", cube_api_token="test_token"
|
||||||
cube_api_url: str = "https://example.com/cube_api"
|
)
|
||||||
cube_api_token: str = "abc123"
|
|
||||||
mock_response: MagicMock = MagicMock()
|
@patch("requests.request")
|
||||||
|
def test_get_dimension_values(self, mock_request: MagicMock) -> None:
|
||||||
|
mock_response = Mock()
|
||||||
mock_response.status_code = 200
|
mock_response.status_code = 200
|
||||||
mock_response_json: dict = {
|
mock_response.json.return_value = {"data": [{"test_dimension": "value1"}]}
|
||||||
|
mock_request.return_value = mock_response
|
||||||
|
|
||||||
|
values = self.loader._get_dimension_values("test_dimension")
|
||||||
|
self.assertEqual(values, ["value1"])
|
||||||
|
|
||||||
|
@patch("requests.get")
|
||||||
|
@patch(f"{MODULE_PATH}._get_dimension_values")
|
||||||
|
def test_load(
|
||||||
|
self, mock_get_dimension_values: MagicMock, mock_get: MagicMock
|
||||||
|
) -> None:
|
||||||
|
# Mocking the response
|
||||||
|
mock_response = Mock()
|
||||||
|
mock_response.raise_for_status.return_value = None
|
||||||
|
mock_response.json.return_value = {
|
||||||
"cubes": [
|
"cubes": [
|
||||||
{
|
{
|
||||||
|
"name": "test_cube",
|
||||||
"type": "view",
|
"type": "view",
|
||||||
"name": "cube1",
|
"measures": [],
|
||||||
"measures": [{"type": "sum", "name": "sales", "title": "Sales"}],
|
|
||||||
"dimensions": [
|
"dimensions": [
|
||||||
{
|
{
|
||||||
|
"name": "test_dimension",
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"name": "product_name",
|
"title": "Test Title",
|
||||||
"title": "Product Name",
|
"description": "Test Description",
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
mock_response.json.return_value = mock_response_json
|
|
||||||
mock_get.return_value = mock_response
|
mock_get.return_value = mock_response
|
||||||
|
|
||||||
expected_docs: List[Document] = [
|
mock_get_dimension_values.return_value = ["value1", "value2"]
|
||||||
Document(
|
|
||||||
page_content=(
|
|
||||||
"table name: cube1, "
|
|
||||||
"column name: sales, "
|
|
||||||
"column data type: sum, "
|
|
||||||
"column title: Sales, "
|
|
||||||
"column description: None"
|
|
||||||
),
|
|
||||||
metadata={
|
|
||||||
"table_name": "cube1",
|
|
||||||
"column_name": "sales",
|
|
||||||
"column_data_type": "sum",
|
|
||||||
"column_title": "Sales",
|
|
||||||
"column_description": "None",
|
|
||||||
},
|
|
||||||
),
|
|
||||||
Document(
|
|
||||||
page_content=(
|
|
||||||
"table name: cube1, "
|
|
||||||
"column name: product_name, "
|
|
||||||
"column data type: string, "
|
|
||||||
"column title: Product Name, "
|
|
||||||
"column description: None"
|
|
||||||
),
|
|
||||||
metadata={
|
|
||||||
"table_name": "cube1",
|
|
||||||
"column_name": "product_name",
|
|
||||||
"column_data_type": "string",
|
|
||||||
"column_title": "Product Name",
|
|
||||||
"column_description": "None",
|
|
||||||
},
|
|
||||||
),
|
|
||||||
]
|
|
||||||
|
|
||||||
loader: CubeSemanticLoader = CubeSemanticLoader(cube_api_url, cube_api_token)
|
documents = self.loader.load()
|
||||||
|
self.assertEqual(len(documents), 1)
|
||||||
|
self.assertEqual(documents[0].page_content, "Test Title, Test Description")
|
||||||
|
self.assertEqual(documents[0].metadata["column_values"], ["value1", "value2"])
|
||||||
|
|
||||||
# Act
|
|
||||||
result: List[Document] = loader.load()
|
|
||||||
|
|
||||||
# Assert
|
if __name__ == "__main__":
|
||||||
self.assertEqual(result, expected_docs)
|
unittest.main()
|
||||||
mock_get.assert_called_once_with(
|
|
||||||
cube_api_url,
|
|
||||||
headers={
|
|
||||||
"Content-Type": "application/json",
|
|
||||||
"Authorization": cube_api_token,
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
Loading…
Reference in New Issue