mirror of
https://github.com/hwchase17/langchain
synced 2024-11-04 06:00:26 +00:00
Cube semantic loader: allow cubes processing (#9927)
We've started to receive feedback (after launch) that using only views is confusing. We're considering this as a good practice, as a view serves as a "facade" for your data - however, we decided to let users decide this on their own. Solves the questions from: - https://github.com/cube-js/cube/issues/7028 - https://github.com/langchain-ai/langchain/pull/9690
This commit is contained in:
parent
e80834d783
commit
c80e406e95
@ -106,15 +106,39 @@
|
||||
" - `column_data_type`\n",
|
||||
" - `column_title`\n",
|
||||
" - `column_description`\n",
|
||||
" - `column_values`"
|
||||
" - `column_values`\n",
|
||||
" - `cube_data_obj_type`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"> page_content='Users View City, None' metadata={'table_name': 'users_view', 'column_name': 'users_view.city', 'column_data_type': 'string', 'column_title': 'Users View City', 'column_description': 'None', 'column_member_type': 'dimension', 'column_values': ['Austin', 'Chicago', 'Los Angeles', 'Mountain View', 'New York', 'Palo Alto', 'San Francisco', 'Seattle']}"
|
||||
"# Given string containing page content\n",
|
||||
"page_content = 'Users View City, None'\n",
|
||||
"\n",
|
||||
"# Given dictionary containing metadata\n",
|
||||
"metadata = {\n",
|
||||
" 'table_name': 'users_view',\n",
|
||||
" 'column_name': 'users_view.city',\n",
|
||||
" 'column_data_type': 'string',\n",
|
||||
" 'column_title': 'Users View City',\n",
|
||||
" 'column_description': 'None',\n",
|
||||
" 'column_member_type': 'dimension',\n",
|
||||
" 'column_values': [\n",
|
||||
" 'Austin',\n",
|
||||
" 'Chicago',\n",
|
||||
" 'Los Angeles',\n",
|
||||
" 'Mountain View',\n",
|
||||
" 'New York',\n",
|
||||
" 'Palo Alto',\n",
|
||||
" 'San Francisco',\n",
|
||||
" 'Seattle'\n",
|
||||
" ],\n",
|
||||
" 'cube_data_obj_type': 'view'\n",
|
||||
"}"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
@ -113,27 +113,39 @@ class CubeSemanticLoader(BaseLoader):
|
||||
- column_title
|
||||
- column_description
|
||||
- column_values
|
||||
- cube_data_obj_type
|
||||
"""
|
||||
headers = {
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": self.cube_api_token,
|
||||
}
|
||||
|
||||
logger.info(f"Loading metadata from {self.cube_api_url}...")
|
||||
response = requests.get(f"{self.cube_api_url}/meta", headers=headers)
|
||||
response.raise_for_status()
|
||||
raw_meta_json = response.json()
|
||||
cubes = raw_meta_json.get("cubes", [])
|
||||
cube_data_objects = raw_meta_json.get("cubes", [])
|
||||
|
||||
logger.info(f"Found {len(cube_data_objects)} cube data objects in metadata.")
|
||||
|
||||
if not cube_data_objects:
|
||||
raise ValueError("No cubes found in metadata.")
|
||||
|
||||
docs = []
|
||||
|
||||
for cube in cubes:
|
||||
if cube.get("type") != "view":
|
||||
for cube_data_obj in cube_data_objects:
|
||||
cube_data_obj_name = cube_data_obj.get("name")
|
||||
cube_data_obj_type = cube_data_obj.get("type")
|
||||
cube_data_obj_is_public = cube_data_obj.get("public")
|
||||
measures = cube_data_obj.get("measures", [])
|
||||
dimensions = cube_data_obj.get("dimensions", [])
|
||||
|
||||
logger.info(f"Processing {cube_data_obj_name}...")
|
||||
|
||||
if not cube_data_obj_is_public:
|
||||
logger.info(f"Skipping {cube_data_obj_name} because it is not public.")
|
||||
continue
|
||||
|
||||
cube_name = cube.get("name")
|
||||
|
||||
measures = cube.get("measures", [])
|
||||
dimensions = cube.get("dimensions", [])
|
||||
|
||||
for item in measures + dimensions:
|
||||
column_member_type = "measure" if item in measures else "dimension"
|
||||
dimension_values = []
|
||||
@ -148,13 +160,14 @@ class CubeSemanticLoader(BaseLoader):
|
||||
dimension_values = self._get_dimension_values(item_name)
|
||||
|
||||
metadata = dict(
|
||||
table_name=str(cube_name),
|
||||
table_name=str(cube_data_obj_name),
|
||||
column_name=item_name,
|
||||
column_data_type=item_type,
|
||||
column_title=str(item.get("title")),
|
||||
column_description=str(item.get("description")),
|
||||
column_member_type=column_member_type,
|
||||
column_values=dimension_values,
|
||||
cube_data_obj_type=cube_data_obj_type,
|
||||
)
|
||||
|
||||
page_content = f"{str(item.get('title'))}, "
|
||||
|
@ -35,6 +35,7 @@ class TestCubeSemanticLoader(unittest.TestCase):
|
||||
{
|
||||
"name": "test_cube",
|
||||
"type": "view",
|
||||
"public": True,
|
||||
"measures": [],
|
||||
"dimensions": [
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user