mirror of https://github.com/hwchase17/langchain
Document loader for Cube Semantic Layer (#6882)
### Description This pull request introduces the "Cube Semantic Layer" document loader, which demonstrates the retrieval of Cube's data model metadata in a format suitable for passing to LLMs as embeddings. This enhancement aims to provide contextual information and improve the understanding of data. Twitter handle: @the_cube_dev --------- Co-authored-by: rlm <pexpresss31@gmail.com>pull/7238/head
parent
e533da8bf2
commit
d669b9ece9
@ -0,0 +1,78 @@
|
||||
from typing import List
|
||||
|
||||
import requests
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.document_loaders.base import BaseLoader
|
||||
|
||||
|
||||
class CubeSemanticLoader(BaseLoader):
|
||||
"""Load Cube semantic layer metadata."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
cube_api_url: str,
|
||||
cube_api_token: str,
|
||||
):
|
||||
self.cube_api_url = cube_api_url
|
||||
"""Use the REST API of your Cube's deployment.
|
||||
Please find out more information here:
|
||||
https://cube.dev/docs/http-api/rest#configuration-base-path
|
||||
"""
|
||||
self.cube_api_token = cube_api_token
|
||||
"""Authentication tokens are generated based on your Cube's API secret.
|
||||
Please find out more information here:
|
||||
https://cube.dev/docs/security#generating-json-web-tokens-jwt
|
||||
"""
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
"""Makes a call to Cube's REST API metadata endpoint.
|
||||
|
||||
Returns:
|
||||
A list of documents with attributes:
|
||||
- page_content=column_name
|
||||
- metadata
|
||||
- table_name
|
||||
- column_name
|
||||
- column_data_type
|
||||
- column_title
|
||||
- column_description
|
||||
"""
|
||||
headers = {
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": self.cube_api_token,
|
||||
}
|
||||
|
||||
response = requests.get(self.cube_api_url, headers=headers)
|
||||
response.raise_for_status()
|
||||
raw_meta_json = response.json()
|
||||
cubes = raw_meta_json.get("cubes", [])
|
||||
docs = []
|
||||
|
||||
for cube in cubes:
|
||||
if cube.get("type") != "view":
|
||||
continue
|
||||
|
||||
cube_name = cube.get("name")
|
||||
|
||||
measures = cube.get("measures", [])
|
||||
dimensions = cube.get("dimensions", [])
|
||||
|
||||
for item in measures + dimensions:
|
||||
metadata = dict(
|
||||
table_name=str(cube_name),
|
||||
column_name=str(item.get("name")),
|
||||
column_data_type=str(item.get("type")),
|
||||
column_title=str(item.get("title")),
|
||||
column_description=str(item.get("description")),
|
||||
)
|
||||
|
||||
page_content = f"table name: {str(cube_name)}, "
|
||||
page_content += f"column name: {str(item.get('name'))}, "
|
||||
page_content += f"column data type: {str(item.get('type'))}, "
|
||||
page_content += f"column title: {str(item.get('title'))}, "
|
||||
page_content += f"column description: {str(item.get('description'))}"
|
||||
|
||||
docs.append(Document(page_content=page_content, metadata=metadata))
|
||||
|
||||
return docs
|
@ -0,0 +1,86 @@
|
||||
from typing import List
|
||||
from unittest import TestCase
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import requests
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.document_loaders import CubeSemanticLoader
|
||||
|
||||
|
||||
class TestCubeSemanticLoader(TestCase):
|
||||
@patch.object(requests, "get")
|
||||
def test_load_success(self, mock_get: MagicMock) -> None:
|
||||
# Arrange
|
||||
cube_api_url: str = "https://example.com/cube_api"
|
||||
cube_api_token: str = "abc123"
|
||||
mock_response: MagicMock = MagicMock()
|
||||
mock_response.status_code = 200
|
||||
mock_response_json: dict = {
|
||||
"cubes": [
|
||||
{
|
||||
"type": "view",
|
||||
"name": "cube1",
|
||||
"measures": [{"type": "sum", "name": "sales", "title": "Sales"}],
|
||||
"dimensions": [
|
||||
{
|
||||
"type": "string",
|
||||
"name": "product_name",
|
||||
"title": "Product Name",
|
||||
}
|
||||
],
|
||||
}
|
||||
]
|
||||
}
|
||||
mock_response.json.return_value = mock_response_json
|
||||
mock_get.return_value = mock_response
|
||||
|
||||
expected_docs: List[Document] = [
|
||||
Document(
|
||||
page_content=(
|
||||
"table name: cube1, "
|
||||
"column name: sales, "
|
||||
"column data type: sum, "
|
||||
"column title: Sales, "
|
||||
"column description: None"
|
||||
),
|
||||
metadata={
|
||||
"table_name": "cube1",
|
||||
"column_name": "sales",
|
||||
"column_data_type": "sum",
|
||||
"column_title": "Sales",
|
||||
"column_description": "None",
|
||||
},
|
||||
),
|
||||
Document(
|
||||
page_content=(
|
||||
"table name: cube1, "
|
||||
"column name: product_name, "
|
||||
"column data type: string, "
|
||||
"column title: Product Name, "
|
||||
"column description: None"
|
||||
),
|
||||
metadata={
|
||||
"table_name": "cube1",
|
||||
"column_name": "product_name",
|
||||
"column_data_type": "string",
|
||||
"column_title": "Product Name",
|
||||
"column_description": "None",
|
||||
},
|
||||
),
|
||||
]
|
||||
|
||||
loader: CubeSemanticLoader = CubeSemanticLoader(cube_api_url, cube_api_token)
|
||||
|
||||
# Act
|
||||
result: List[Document] = loader.load()
|
||||
|
||||
# Assert
|
||||
self.assertEqual(result, expected_docs)
|
||||
mock_get.assert_called_once_with(
|
||||
cube_api_url,
|
||||
headers={
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": cube_api_token,
|
||||
},
|
||||
)
|
Loading…
Reference in New Issue