From 06e524416c18543d5fd4dcbebb9cdf4b56c47db4 Mon Sep 17 00:00:00 2001 From: Eugene Yurtsev Date: Fri, 19 May 2023 11:25:52 -0400 Subject: [PATCH] power bi api wrapper integration tests & bug fix (#4983) # Powerbi API wrapper bug fix + integration tests - Bug fix by removing `TYPE_CHECKING` in in utilities/powerbi.py - Added integration test for power bi api in utilities/test_powerbi_api.py - Added integration test for power bi agent in agent/test_powerbi_agent.py - Edited .env.examples to help set up power bi related environment variables - Updated demo notebook with working code in docs../examples/powerbi.ipynb - AzureOpenAI -> ChatOpenAI Notes: Chat models (gpt3.5, gpt4) are much more capable than davinci at writing DAX queries, so that is important to getting the agent to work properly. Interestingly, gpt3.5-turbo needed the examples=DEFAULT_FEWSHOT_EXAMPLES to write consistent DAX queries, so gpt4 seems necessary as the smart llm. Fixes #4325 ## Before submitting Azure-core and Azure-identity are necessary dependencies check integration tests with the following: `pytest tests/integration_tests/utilities/test_powerbi_api.py` `pytest tests/integration_tests/agent/test_powerbi_agent.py` You will need a power bi account with a dataset id + table name in order to test. See .env.examples for details. ## Who can review? @hwchase17 @vowelparrot --------- Co-authored-by: aditya-pethe --- .../agents/toolkits/examples/powerbi.ipynb | 138 ++++++++---------- langchain/utilities/powerbi.py | 14 +- tests/integration_tests/.env.example | 10 +- .../agent/test_powerbi_agent.py | 47 ++++++ .../utilities/test_powerbi_api.py | 36 +++++ tests/unit_tests/tools/powerbi/__init__.py | 0 .../unit_tests/tools/powerbi/test_powerbi.py | 10 ++ 7 files changed, 174 insertions(+), 81 deletions(-) create mode 100644 tests/integration_tests/agent/test_powerbi_agent.py create mode 100644 tests/integration_tests/utilities/test_powerbi_api.py create mode 100644 tests/unit_tests/tools/powerbi/__init__.py create mode 100644 tests/unit_tests/tools/powerbi/test_powerbi.py diff --git a/docs/modules/agents/toolkits/examples/powerbi.ipynb b/docs/modules/agents/toolkits/examples/powerbi.ipynb index f1bfc110..8b4e1d97 100644 --- a/docs/modules/agents/toolkits/examples/powerbi.ipynb +++ b/docs/modules/agents/toolkits/examples/powerbi.ipynb @@ -1,10 +1,7 @@ { "cells": [ { - "attachments": {}, "cell_type": "markdown", - "id": "0e499e90-7a6d-4fab-8aab-31a4df417601", - "metadata": {}, "source": [ "# PowerBI Dataset Agent\n", "\n", @@ -17,46 +14,41 @@ "- You can also supply a username to impersonate for use with datasets that have RLS enabled. \n", "- The toolkit uses a LLM to create the query from the question, the agent uses the LLM for the overall execution.\n", "- Testing was done mostly with a `text-davinci-003` model, codex models did not seem to perform ver well." - ] + ], + "metadata": {}, + "attachments": {} }, { "cell_type": "markdown", - "id": "ec927ac6-9b2a-4e8a-9a6e-3e429191875c", - "metadata": { - "tags": [] - }, "source": [ "## Initialization" - ] + ], + "metadata": { + "tags": [] + } }, { "cell_type": "code", "execution_count": null, - "id": "53422913-967b-4f2a-8022-00269c1be1b1", - "metadata": { - "tags": [] - }, - "outputs": [], "source": [ "from langchain.agents.agent_toolkits import create_pbi_agent\n", "from langchain.agents.agent_toolkits import PowerBIToolkit\n", "from langchain.utilities.powerbi import PowerBIDataset\n", - "from langchain.llms.openai import AzureOpenAI\n", + "from langchain.chat_models import ChatOpenAI\n", "from langchain.agents import AgentExecutor\n", "from azure.identity import DefaultAzureCredential" - ] + ], + "outputs": [], + "metadata": { + "tags": [] + } }, { "cell_type": "code", "execution_count": null, - "id": "090f3699-79c6-4ce1-ab96-a94f0121fd64", - "metadata": { - "tags": [] - }, - "outputs": [], "source": [ - "fast_llm = AzureOpenAI(temperature=0.5, max_tokens=1000, deployment_name=\"gpt-35-turbo\", verbose=True)\n", - "smart_llm = AzureOpenAI(temperature=0, max_tokens=100, deployment_name=\"gpt-4\", verbose=True)\n", + "fast_llm = ChatOpenAI(temperature=0.5, max_tokens=1000, model_name=\"gpt-3.5-turbo\", verbose=True)\n", + "smart_llm = ChatOpenAI(temperature=0, max_tokens=100, model_name=\"gpt-4\", verbose=True)\n", "\n", "toolkit = PowerBIToolkit(\n", " powerbi=PowerBIDataset(dataset_id=\"\", table_names=['table1', 'table2'], credential=DefaultAzureCredential()), \n", @@ -68,97 +60,90 @@ " toolkit=toolkit,\n", " verbose=True,\n", ")" - ] + ], + "outputs": [], + "metadata": { + "tags": [] + } }, { "cell_type": "markdown", - "id": "36ae48c7-cb08-4fef-977e-c7d4b96a464b", - "metadata": {}, "source": [ "## Example: describing a table" - ] + ], + "metadata": {} }, { "cell_type": "code", "execution_count": null, - "id": "ff70e83d-5ad0-4fc7-bb96-27d82ac166d7", - "metadata": { - "tags": [] - }, - "outputs": [], "source": [ "agent_executor.run(\"Describe table1\")" - ] + ], + "outputs": [], + "metadata": { + "tags": [] + } }, { - "attachments": {}, "cell_type": "markdown", - "id": "9abcfe8e-1868-42a4-8345-ad2d9b44c681", - "metadata": {}, "source": [ "## Example: simple query on a table\n", "In this example, the agent actually figures out the correct query to get a row count of the table." - ] + ], + "metadata": {}, + "attachments": {} }, { "cell_type": "code", "execution_count": null, - "id": "bea76658-a65b-47e2-b294-6d52c5556246", - "metadata": { - "tags": [] - }, - "outputs": [], "source": [ "agent_executor.run(\"How many records are in table1?\")" - ] + ], + "outputs": [], + "metadata": { + "tags": [] + } }, { "cell_type": "markdown", - "id": "6fbc26af-97e4-4a21-82aa-48bdc992da26", - "metadata": {}, "source": [ "## Example: running queries" - ] + ], + "metadata": {} }, { "cell_type": "code", "execution_count": null, - "id": "17bea710-4a23-4de0-b48e-21d57be48293", - "metadata": { - "tags": [] - }, - "outputs": [], "source": [ "agent_executor.run(\"How many records are there by dimension1 in table2?\")" - ] + ], + "outputs": [], + "metadata": { + "tags": [] + } }, { "cell_type": "code", "execution_count": null, - "id": "474dddda-c067-4eeb-98b1-e763ee78b18c", - "metadata": { - "tags": [] - }, - "outputs": [], "source": [ "agent_executor.run(\"What unique values are there for dimensions2 in table2\")" - ] + ], + "outputs": [], + "metadata": { + "tags": [] + } }, { - "attachments": {}, "cell_type": "markdown", - "id": "6fd950e4", - "metadata": {}, "source": [ "## Example: add your own few-shot prompts" - ] + ], + "metadata": {}, + "attachments": {} }, { "cell_type": "code", "execution_count": null, - "id": "87d677f9", - "metadata": {}, - "outputs": [], "source": [ "#fictional example\n", "few_shots = \"\"\"\n", @@ -182,24 +167,24 @@ " toolkit=toolkit,\n", " verbose=True,\n", ")" - ] + ], + "outputs": [], + "metadata": {} }, { "cell_type": "code", "execution_count": null, - "id": "33f4bb43", - "metadata": {}, - "outputs": [], "source": [ "agent_executor.run(\"What was the maximum of value in revenue in dollars in 2022?\")" - ] + ], + "outputs": [], + "metadata": {} } ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" + "name": "python3", + "display_name": "Python 3.9.16 64-bit" }, "language_info": { "codemirror_mode": { @@ -211,9 +196,12 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.5" + "version": "3.9.16" + }, + "interpreter": { + "hash": "397704579725e15f5c7cb49fe5f0341eb7531c82d19f2c29d197e8b64ab5776b" } }, "nbformat": 4, "nbformat_minor": 5 -} +} \ No newline at end of file diff --git a/langchain/utilities/powerbi.py b/langchain/utilities/powerbi.py index 3105a90b..9af8cb66 100644 --- a/langchain/utilities/powerbi.py +++ b/langchain/utilities/powerbi.py @@ -1,11 +1,10 @@ """Wrapper around a Power BI endpoint.""" - from __future__ import annotations import logging import os from copy import deepcopy -from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Union +from typing import Any, Dict, Iterable, List, Optional, Union import aiohttp import requests @@ -17,8 +16,13 @@ _LOGGER = logging.getLogger(__name__) BASE_URL = os.getenv("POWERBI_BASE_URL", "https://api.powerbi.com/v1.0/myorg") -if TYPE_CHECKING: +try: from azure.core.credentials import TokenCredential +except ImportError: + _LOGGER.log( + logging.WARNING, + "Could not import azure.core python package.", + ) class PowerBIDataset(BaseModel): @@ -67,8 +71,8 @@ class PowerBIDataset(BaseModel): "Content-Type": "application/json", "Authorization": "Bearer " + self.token, } - from azure.core.exceptions import ( # pylint: disable=import-outside-toplevel - ClientAuthenticationError, + from azure.core.exceptions import ( + ClientAuthenticationError, # pylint: disable=import-outside-toplevel ) if self.credential: diff --git a/tests/integration_tests/.env.example b/tests/integration_tests/.env.example index cf9cab60..64153788 100644 --- a/tests/integration_tests/.env.example +++ b/tests/integration_tests/.env.example @@ -14,4 +14,12 @@ PINECONE_ENVIRONMENT=us-west4-gcp # more details here: https://confluence.atlassian.com/enterprise/using-personal-access-tokens-1026032365.html # JIRA_API_TOKEN=your_jira_api_token_here # JIRA_USERNAME=your_jira_username_here -# JIRA_INSTANCE_URL=your_jira_instance_url_here \ No newline at end of file +# JIRA_INSTANCE_URL=your_jira_instance_url_here + + +# power bi +# sign in to azure in order to authenticate with DefaultAzureCredentials +# details here https://learn.microsoft.com/en-us/dotnet/api/azure.identity.defaultazurecredential?view=azure-dotnet +POWERBI_DATASET_ID=_powerbi_dataset_id_here +POWERBI_TABLE_NAME=_test_table_name_here +POWERBI_NUMROWS=_num_rows_in_your_test_table \ No newline at end of file diff --git a/tests/integration_tests/agent/test_powerbi_agent.py b/tests/integration_tests/agent/test_powerbi_agent.py new file mode 100644 index 00000000..f59d4273 --- /dev/null +++ b/tests/integration_tests/agent/test_powerbi_agent.py @@ -0,0 +1,47 @@ +import pytest + +from langchain.agents.agent_toolkits import PowerBIToolkit, create_pbi_agent +from langchain.chat_models import ChatOpenAI +from langchain.utilities.powerbi import PowerBIDataset +from langchain.utils import get_from_env + + +def azure_installed() -> bool: + try: + from azure.core.credentials import TokenCredential # noqa: F401 + from azure.identity import DefaultAzureCredential # noqa: F401 + + return True + except Exception as e: + print(f"azure not installed, skipping test {e}") + return False + + +@pytest.mark.skipif(not azure_installed(), reason="requires azure package") +def test_daxquery() -> None: + from azure.identity import DefaultAzureCredential + + DATASET_ID = get_from_env("", "POWERBI_DATASET_ID") + TABLE_NAME = get_from_env("", "POWERBI_TABLE_NAME") + NUM_ROWS = get_from_env("", "POWERBI_NUMROWS") + + fast_llm = ChatOpenAI( + temperature=0.5, max_tokens=1000, model_name="gpt-3.5-turbo", verbose=True + ) + smart_llm = ChatOpenAI( + temperature=0, max_tokens=100, model_name="gpt-4", verbose=True + ) + + toolkit = PowerBIToolkit( + powerbi=PowerBIDataset( + dataset_id=DATASET_ID, + table_names=[TABLE_NAME], + credential=DefaultAzureCredential(), + ), + llm=smart_llm, + ) + + agent_executor = create_pbi_agent(llm=fast_llm, toolkit=toolkit, verbose=True) + + output = agent_executor.run(f"How many rows are in the table, {TABLE_NAME}") + assert NUM_ROWS in output diff --git a/tests/integration_tests/utilities/test_powerbi_api.py b/tests/integration_tests/utilities/test_powerbi_api.py new file mode 100644 index 00000000..8d7a6114 --- /dev/null +++ b/tests/integration_tests/utilities/test_powerbi_api.py @@ -0,0 +1,36 @@ +"""Integration test for POWERBI API Wrapper.""" +import pytest + +from langchain.utilities.powerbi import PowerBIDataset +from langchain.utils import get_from_env + + +def azure_installed() -> bool: + try: + from azure.core.credentials import TokenCredential # noqa: F401 + from azure.identity import DefaultAzureCredential # noqa: F401 + + return True + except Exception as e: + print(f"azure not installed, skipping test {e}") + return False + + +@pytest.mark.skipif(not azure_installed(), reason="requires azure package") +def test_daxquery() -> None: + from azure.identity import DefaultAzureCredential + + DATASET_ID = get_from_env("", "POWERBI_DATASET_ID") + TABLE_NAME = get_from_env("", "POWERBI_TABLE_NAME") + NUM_ROWS = get_from_env("", "POWERBI_NUMROWS") + + powerbi = PowerBIDataset( + dataset_id=DATASET_ID, + table_names=[TABLE_NAME], + credential=DefaultAzureCredential(), + ) + + output = powerbi.run(f'EVALUATE ROW("RowCount", COUNTROWS({TABLE_NAME}))') + numrows = str(output["results"][0]["tables"][0]["rows"][0]["[RowCount]"]) + + assert NUM_ROWS == numrows diff --git a/tests/unit_tests/tools/powerbi/__init__.py b/tests/unit_tests/tools/powerbi/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/unit_tests/tools/powerbi/test_powerbi.py b/tests/unit_tests/tools/powerbi/test_powerbi.py new file mode 100644 index 00000000..fcb6e7b6 --- /dev/null +++ b/tests/unit_tests/tools/powerbi/test_powerbi.py @@ -0,0 +1,10 @@ +def test_power_bi_can_be_imported() -> None: + """Test that powerbi tools can be imported. + + The goal of this test is to verify that langchain users will not get import errors + when loading powerbi related code if they don't have optional dependencies + installed. + """ + from langchain.tools.powerbi.tool import QueryPowerBITool # noqa + from langchain.agents.agent_toolkits import PowerBIToolkit, create_pbi_agent # noqa + from langchain.utilities.powerbi import PowerBIDataset # noqa