Cube semantic loader: allow cubes processing (#9927)

We've started to receive feedback (after launch) that using only views
is confusing.
We're considering this as a good practice, as a view serves as a
"facade" for your data - however, we decided to let users decide this on
their own.

Solves the questions from:
- https://github.com/cube-js/cube/issues/7028
- https://github.com/langchain-ai/langchain/pull/9690
This commit is contained in:
Mike Nitsenko 2023-08-29 20:21:01 +06:00 committed by GitHub
parent e80834d783
commit c80e406e95
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 51 additions and 13 deletions

View File

@ -106,15 +106,39 @@
" - `column_data_type`\n",
" - `column_title`\n",
" - `column_description`\n",
" - `column_values`"
" - `column_values`\n",
" - `cube_data_obj_type`"
]
},
{
"attachments": {},
"cell_type": "markdown",
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"> page_content='Users View City, None' metadata={'table_name': 'users_view', 'column_name': 'users_view.city', 'column_data_type': 'string', 'column_title': 'Users View City', 'column_description': 'None', 'column_member_type': 'dimension', 'column_values': ['Austin', 'Chicago', 'Los Angeles', 'Mountain View', 'New York', 'Palo Alto', 'San Francisco', 'Seattle']}"
"# Given string containing page content\n",
"page_content = 'Users View City, None'\n",
"\n",
"# Given dictionary containing metadata\n",
"metadata = {\n",
" 'table_name': 'users_view',\n",
" 'column_name': 'users_view.city',\n",
" 'column_data_type': 'string',\n",
" 'column_title': 'Users View City',\n",
" 'column_description': 'None',\n",
" 'column_member_type': 'dimension',\n",
" 'column_values': [\n",
" 'Austin',\n",
" 'Chicago',\n",
" 'Los Angeles',\n",
" 'Mountain View',\n",
" 'New York',\n",
" 'Palo Alto',\n",
" 'San Francisco',\n",
" 'Seattle'\n",
" ],\n",
" 'cube_data_obj_type': 'view'\n",
"}"
]
}
],

View File

@ -113,27 +113,39 @@ class CubeSemanticLoader(BaseLoader):
- column_title
- column_description
- column_values
- cube_data_obj_type
"""
headers = {
"Content-Type": "application/json",
"Authorization": self.cube_api_token,
}
logger.info(f"Loading metadata from {self.cube_api_url}...")
response = requests.get(f"{self.cube_api_url}/meta", headers=headers)
response.raise_for_status()
raw_meta_json = response.json()
cubes = raw_meta_json.get("cubes", [])
cube_data_objects = raw_meta_json.get("cubes", [])
logger.info(f"Found {len(cube_data_objects)} cube data objects in metadata.")
if not cube_data_objects:
raise ValueError("No cubes found in metadata.")
docs = []
for cube in cubes:
if cube.get("type") != "view":
for cube_data_obj in cube_data_objects:
cube_data_obj_name = cube_data_obj.get("name")
cube_data_obj_type = cube_data_obj.get("type")
cube_data_obj_is_public = cube_data_obj.get("public")
measures = cube_data_obj.get("measures", [])
dimensions = cube_data_obj.get("dimensions", [])
logger.info(f"Processing {cube_data_obj_name}...")
if not cube_data_obj_is_public:
logger.info(f"Skipping {cube_data_obj_name} because it is not public.")
continue
cube_name = cube.get("name")
measures = cube.get("measures", [])
dimensions = cube.get("dimensions", [])
for item in measures + dimensions:
column_member_type = "measure" if item in measures else "dimension"
dimension_values = []
@ -148,13 +160,14 @@ class CubeSemanticLoader(BaseLoader):
dimension_values = self._get_dimension_values(item_name)
metadata = dict(
table_name=str(cube_name),
table_name=str(cube_data_obj_name),
column_name=item_name,
column_data_type=item_type,
column_title=str(item.get("title")),
column_description=str(item.get("description")),
column_member_type=column_member_type,
column_values=dimension_values,
cube_data_obj_type=cube_data_obj_type,
)
page_content = f"{str(item.get('title'))}, "

View File

@ -35,6 +35,7 @@ class TestCubeSemanticLoader(unittest.TestCase):
{
"name": "test_cube",
"type": "view",
"public": True,
"measures": [],
"dimensions": [
{