diff --git a/docs/extras/integrations/document_loaders/cube_semantic.ipynb b/docs/extras/integrations/document_loaders/cube_semantic.ipynb index 5868d58c0f..68b1702d93 100644 --- a/docs/extras/integrations/document_loaders/cube_semantic.ipynb +++ b/docs/extras/integrations/document_loaders/cube_semantic.ipynb @@ -106,15 +106,39 @@ " - `column_data_type`\n", " - `column_title`\n", " - `column_description`\n", - " - `column_values`" + " - `column_values`\n", + " - `cube_data_obj_type`" ] }, { - "attachments": {}, - "cell_type": "markdown", + "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ - "> page_content='Users View City, None' metadata={'table_name': 'users_view', 'column_name': 'users_view.city', 'column_data_type': 'string', 'column_title': 'Users View City', 'column_description': 'None', 'column_member_type': 'dimension', 'column_values': ['Austin', 'Chicago', 'Los Angeles', 'Mountain View', 'New York', 'Palo Alto', 'San Francisco', 'Seattle']}" + "# Given string containing page content\n", + "page_content = 'Users View City, None'\n", + "\n", + "# Given dictionary containing metadata\n", + "metadata = {\n", + " 'table_name': 'users_view',\n", + " 'column_name': 'users_view.city',\n", + " 'column_data_type': 'string',\n", + " 'column_title': 'Users View City',\n", + " 'column_description': 'None',\n", + " 'column_member_type': 'dimension',\n", + " 'column_values': [\n", + " 'Austin',\n", + " 'Chicago',\n", + " 'Los Angeles',\n", + " 'Mountain View',\n", + " 'New York',\n", + " 'Palo Alto',\n", + " 'San Francisco',\n", + " 'Seattle'\n", + " ],\n", + " 'cube_data_obj_type': 'view'\n", + "}" ] } ], diff --git a/libs/langchain/langchain/document_loaders/cube_semantic.py b/libs/langchain/langchain/document_loaders/cube_semantic.py index a29a41f02f..91364a9282 100644 --- a/libs/langchain/langchain/document_loaders/cube_semantic.py +++ b/libs/langchain/langchain/document_loaders/cube_semantic.py @@ -113,27 +113,39 @@ class CubeSemanticLoader(BaseLoader): - column_title - column_description - column_values + - cube_data_obj_type """ headers = { "Content-Type": "application/json", "Authorization": self.cube_api_token, } + logger.info(f"Loading metadata from {self.cube_api_url}...") response = requests.get(f"{self.cube_api_url}/meta", headers=headers) response.raise_for_status() raw_meta_json = response.json() - cubes = raw_meta_json.get("cubes", []) + cube_data_objects = raw_meta_json.get("cubes", []) + + logger.info(f"Found {len(cube_data_objects)} cube data objects in metadata.") + + if not cube_data_objects: + raise ValueError("No cubes found in metadata.") + docs = [] - for cube in cubes: - if cube.get("type") != "view": + for cube_data_obj in cube_data_objects: + cube_data_obj_name = cube_data_obj.get("name") + cube_data_obj_type = cube_data_obj.get("type") + cube_data_obj_is_public = cube_data_obj.get("public") + measures = cube_data_obj.get("measures", []) + dimensions = cube_data_obj.get("dimensions", []) + + logger.info(f"Processing {cube_data_obj_name}...") + + if not cube_data_obj_is_public: + logger.info(f"Skipping {cube_data_obj_name} because it is not public.") continue - cube_name = cube.get("name") - - measures = cube.get("measures", []) - dimensions = cube.get("dimensions", []) - for item in measures + dimensions: column_member_type = "measure" if item in measures else "dimension" dimension_values = [] @@ -148,13 +160,14 @@ class CubeSemanticLoader(BaseLoader): dimension_values = self._get_dimension_values(item_name) metadata = dict( - table_name=str(cube_name), + table_name=str(cube_data_obj_name), column_name=item_name, column_data_type=item_type, column_title=str(item.get("title")), column_description=str(item.get("description")), column_member_type=column_member_type, column_values=dimension_values, + cube_data_obj_type=cube_data_obj_type, ) page_content = f"{str(item.get('title'))}, " diff --git a/libs/langchain/tests/unit_tests/document_loaders/test_cube_semantic.py b/libs/langchain/tests/unit_tests/document_loaders/test_cube_semantic.py index c309f17ffe..70a71736aa 100644 --- a/libs/langchain/tests/unit_tests/document_loaders/test_cube_semantic.py +++ b/libs/langchain/tests/unit_tests/document_loaders/test_cube_semantic.py @@ -35,6 +35,7 @@ class TestCubeSemanticLoader(unittest.TestCase): { "name": "test_cube", "type": "view", + "public": True, "measures": [], "dimensions": [ {