From 06e524416c18543d5fd4dcbebb9cdf4b56c47db4 Mon Sep 17 00:00:00 2001
From: Eugene Yurtsev <eyurtsev@gmail.com>
Date: Fri, 19 May 2023 11:25:52 -0400
Subject: [PATCH] power bi api wrapper integration tests & bug fix (#4983)

# Powerbi API wrapper bug fix + integration tests

- Bug fix by removing `TYPE_CHECKING` in in utilities/powerbi.py
- Added integration test for power bi api in
utilities/test_powerbi_api.py
- Added integration test for power bi agent in
agent/test_powerbi_agent.py
- Edited .env.examples to help set up power bi related environment
variables
- Updated demo notebook with working code in
docs../examples/powerbi.ipynb - AzureOpenAI -> ChatOpenAI

Notes:

Chat models (gpt3.5, gpt4) are much more capable than davinci at writing
DAX queries, so that is important to getting the agent to work properly.
Interestingly, gpt3.5-turbo needed the examples=DEFAULT_FEWSHOT_EXAMPLES
to write consistent DAX queries, so gpt4 seems necessary as the smart
llm.

Fixes #4325

## Before submitting

Azure-core and Azure-identity are necessary dependencies

check integration tests with the following:
`pytest tests/integration_tests/utilities/test_powerbi_api.py`
`pytest tests/integration_tests/agent/test_powerbi_agent.py`

You will need a power bi account with a dataset id + table name in order
to test. See .env.examples for details.

## Who can review?
@hwchase17
@vowelparrot

---------

Co-authored-by: aditya-pethe <adityapethe1@gmail.com>
---
 .../agents/toolkits/examples/powerbi.ipynb    | 154 ++++++++----------
 langchain/utilities/powerbi.py                |  14 +-
 tests/integration_tests/.env.example          |  10 +-
 .../agent/test_powerbi_agent.py               |  47 ++++++
 .../utilities/test_powerbi_api.py             |  36 ++++
 tests/unit_tests/tools/powerbi/__init__.py    |   0
 .../unit_tests/tools/powerbi/test_powerbi.py  |  10 ++
 7 files changed, 182 insertions(+), 89 deletions(-)
 create mode 100644 tests/integration_tests/agent/test_powerbi_agent.py
 create mode 100644 tests/integration_tests/utilities/test_powerbi_api.py
 create mode 100644 tests/unit_tests/tools/powerbi/__init__.py
 create mode 100644 tests/unit_tests/tools/powerbi/test_powerbi.py

diff --git a/docs/modules/agents/toolkits/examples/powerbi.ipynb b/docs/modules/agents/toolkits/examples/powerbi.ipynb
index f1bfc110..8b4e1d97 100644
--- a/docs/modules/agents/toolkits/examples/powerbi.ipynb
+++ b/docs/modules/agents/toolkits/examples/powerbi.ipynb
@@ -1,10 +1,7 @@
 {
  "cells": [
   {
-   "attachments": {},
    "cell_type": "markdown",
-   "id": "0e499e90-7a6d-4fab-8aab-31a4df417601",
-   "metadata": {},
    "source": [
     "# PowerBI Dataset Agent\n",
     "\n",
@@ -17,46 +14,41 @@
     "- You can also supply a username to impersonate for use with datasets that have RLS enabled. \n",
     "- The toolkit uses a LLM to create the query from the question, the agent uses the LLM for the overall execution.\n",
     "- Testing was done mostly with a `text-davinci-003` model, codex models did not seem to perform ver well."
-   ]
+   ],
+   "metadata": {},
+   "attachments": {}
   },
   {
    "cell_type": "markdown",
-   "id": "ec927ac6-9b2a-4e8a-9a6e-3e429191875c",
-   "metadata": {
-    "tags": []
-   },
    "source": [
     "## Initialization"
-   ]
+   ],
+   "metadata": {
+    "tags": []
+   }
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "53422913-967b-4f2a-8022-00269c1be1b1",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
    "source": [
     "from langchain.agents.agent_toolkits import create_pbi_agent\n",
     "from langchain.agents.agent_toolkits import PowerBIToolkit\n",
     "from langchain.utilities.powerbi import PowerBIDataset\n",
-    "from langchain.llms.openai import AzureOpenAI\n",
+    "from langchain.chat_models import ChatOpenAI\n",
     "from langchain.agents import AgentExecutor\n",
     "from azure.identity import DefaultAzureCredential"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {
+    "tags": []
+   }
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "090f3699-79c6-4ce1-ab96-a94f0121fd64",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
    "source": [
-    "fast_llm = AzureOpenAI(temperature=0.5, max_tokens=1000, deployment_name=\"gpt-35-turbo\", verbose=True)\n",
-    "smart_llm = AzureOpenAI(temperature=0, max_tokens=100, deployment_name=\"gpt-4\", verbose=True)\n",
+    "fast_llm = ChatOpenAI(temperature=0.5, max_tokens=1000, model_name=\"gpt-3.5-turbo\", verbose=True)\n",
+    "smart_llm = ChatOpenAI(temperature=0, max_tokens=100, model_name=\"gpt-4\", verbose=True)\n",
     "\n",
     "toolkit = PowerBIToolkit(\n",
     "    powerbi=PowerBIDataset(dataset_id=\"<dataset_id>\", table_names=['table1', 'table2'], credential=DefaultAzureCredential()), \n",
@@ -68,97 +60,90 @@
     "    toolkit=toolkit,\n",
     "    verbose=True,\n",
     ")"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {
+    "tags": []
+   }
   },
   {
    "cell_type": "markdown",
-   "id": "36ae48c7-cb08-4fef-977e-c7d4b96a464b",
-   "metadata": {},
    "source": [
     "## Example: describing a table"
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "ff70e83d-5ad0-4fc7-bb96-27d82ac166d7",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
    "source": [
     "agent_executor.run(\"Describe table1\")"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {
+    "tags": []
+   }
   },
   {
-   "attachments": {},
    "cell_type": "markdown",
-   "id": "9abcfe8e-1868-42a4-8345-ad2d9b44c681",
-   "metadata": {},
    "source": [
     "## Example: simple query on a table\n",
     "In this example, the agent actually figures out the correct query to get a row count of the table."
-   ]
+   ],
+   "metadata": {},
+   "attachments": {}
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "bea76658-a65b-47e2-b294-6d52c5556246",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
    "source": [
     "agent_executor.run(\"How many records are in table1?\")"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {
+    "tags": []
+   }
   },
   {
    "cell_type": "markdown",
-   "id": "6fbc26af-97e4-4a21-82aa-48bdc992da26",
-   "metadata": {},
    "source": [
     "## Example: running queries"
-   ]
+   ],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "17bea710-4a23-4de0-b48e-21d57be48293",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
    "source": [
     "agent_executor.run(\"How many records are there by dimension1 in table2?\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "474dddda-c067-4eeb-98b1-e763ee78b18c",
+   ],
+   "outputs": [],
    "metadata": {
     "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "agent_executor.run(\"What unique values are there for dimensions2 in table2\")"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "id": "6fd950e4",
-   "metadata": {},
-   "source": [
-    "## Example: add your own few-shot prompts"
-   ]
+   }
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "87d677f9",
-   "metadata": {},
+   "source": [
+    "agent_executor.run(\"What unique values are there for dimensions2 in table2\")"
+   ],
    "outputs": [],
+   "metadata": {
+    "tags": []
+   }
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "## Example: add your own few-shot prompts"
+   ],
+   "metadata": {},
+   "attachments": {}
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
    "source": [
     "#fictional example\n",
     "few_shots = \"\"\"\n",
@@ -182,24 +167,24 @@
     "    toolkit=toolkit,\n",
     "    verbose=True,\n",
     ")"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "33f4bb43",
-   "metadata": {},
-   "outputs": [],
    "source": [
     "agent_executor.run(\"What was the maximum of value in revenue in dollars in 2022?\")"
-   ]
+   ],
+   "outputs": [],
+   "metadata": {}
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
+   "name": "python3",
+   "display_name": "Python 3.9.16 64-bit"
   },
   "language_info": {
    "codemirror_mode": {
@@ -211,9 +196,12 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.5"
+   "version": "3.9.16"
+  },
+  "interpreter": {
+   "hash": "397704579725e15f5c7cb49fe5f0341eb7531c82d19f2c29d197e8b64ab5776b"
   }
  },
  "nbformat": 4,
  "nbformat_minor": 5
-}
+}
\ No newline at end of file
diff --git a/langchain/utilities/powerbi.py b/langchain/utilities/powerbi.py
index 3105a90b..9af8cb66 100644
--- a/langchain/utilities/powerbi.py
+++ b/langchain/utilities/powerbi.py
@@ -1,11 +1,10 @@
 """Wrapper around a Power BI endpoint."""
-
 from __future__ import annotations
 
 import logging
 import os
 from copy import deepcopy
-from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Union
+from typing import Any, Dict, Iterable, List, Optional, Union
 
 import aiohttp
 import requests
@@ -17,8 +16,13 @@ _LOGGER = logging.getLogger(__name__)
 
 BASE_URL = os.getenv("POWERBI_BASE_URL", "https://api.powerbi.com/v1.0/myorg")
 
-if TYPE_CHECKING:
+try:
     from azure.core.credentials import TokenCredential
+except ImportError:
+    _LOGGER.log(
+        logging.WARNING,
+        "Could not import azure.core python package.",
+    )
 
 
 class PowerBIDataset(BaseModel):
@@ -67,8 +71,8 @@ class PowerBIDataset(BaseModel):
                 "Content-Type": "application/json",
                 "Authorization": "Bearer " + self.token,
             }
-        from azure.core.exceptions import (  # pylint: disable=import-outside-toplevel
-            ClientAuthenticationError,
+        from azure.core.exceptions import (
+            ClientAuthenticationError,  # pylint: disable=import-outside-toplevel
         )
 
         if self.credential:
diff --git a/tests/integration_tests/.env.example b/tests/integration_tests/.env.example
index cf9cab60..64153788 100644
--- a/tests/integration_tests/.env.example
+++ b/tests/integration_tests/.env.example
@@ -14,4 +14,12 @@ PINECONE_ENVIRONMENT=us-west4-gcp
 # more details here: https://confluence.atlassian.com/enterprise/using-personal-access-tokens-1026032365.html
 # JIRA_API_TOKEN=your_jira_api_token_here
 # JIRA_USERNAME=your_jira_username_here
-# JIRA_INSTANCE_URL=your_jira_instance_url_here
\ No newline at end of file
+# JIRA_INSTANCE_URL=your_jira_instance_url_here
+
+
+# power bi
+# sign in to azure in order to authenticate with DefaultAzureCredentials
+# details here https://learn.microsoft.com/en-us/dotnet/api/azure.identity.defaultazurecredential?view=azure-dotnet
+POWERBI_DATASET_ID=_powerbi_dataset_id_here
+POWERBI_TABLE_NAME=_test_table_name_here
+POWERBI_NUMROWS=_num_rows_in_your_test_table
\ No newline at end of file
diff --git a/tests/integration_tests/agent/test_powerbi_agent.py b/tests/integration_tests/agent/test_powerbi_agent.py
new file mode 100644
index 00000000..f59d4273
--- /dev/null
+++ b/tests/integration_tests/agent/test_powerbi_agent.py
@@ -0,0 +1,47 @@
+import pytest
+
+from langchain.agents.agent_toolkits import PowerBIToolkit, create_pbi_agent
+from langchain.chat_models import ChatOpenAI
+from langchain.utilities.powerbi import PowerBIDataset
+from langchain.utils import get_from_env
+
+
+def azure_installed() -> bool:
+    try:
+        from azure.core.credentials import TokenCredential  # noqa: F401
+        from azure.identity import DefaultAzureCredential  # noqa: F401
+
+        return True
+    except Exception as e:
+        print(f"azure not installed, skipping test {e}")
+        return False
+
+
+@pytest.mark.skipif(not azure_installed(), reason="requires azure package")
+def test_daxquery() -> None:
+    from azure.identity import DefaultAzureCredential
+
+    DATASET_ID = get_from_env("", "POWERBI_DATASET_ID")
+    TABLE_NAME = get_from_env("", "POWERBI_TABLE_NAME")
+    NUM_ROWS = get_from_env("", "POWERBI_NUMROWS")
+
+    fast_llm = ChatOpenAI(
+        temperature=0.5, max_tokens=1000, model_name="gpt-3.5-turbo", verbose=True
+    )
+    smart_llm = ChatOpenAI(
+        temperature=0, max_tokens=100, model_name="gpt-4", verbose=True
+    )
+
+    toolkit = PowerBIToolkit(
+        powerbi=PowerBIDataset(
+            dataset_id=DATASET_ID,
+            table_names=[TABLE_NAME],
+            credential=DefaultAzureCredential(),
+        ),
+        llm=smart_llm,
+    )
+
+    agent_executor = create_pbi_agent(llm=fast_llm, toolkit=toolkit, verbose=True)
+
+    output = agent_executor.run(f"How many rows are in the table, {TABLE_NAME}")
+    assert NUM_ROWS in output
diff --git a/tests/integration_tests/utilities/test_powerbi_api.py b/tests/integration_tests/utilities/test_powerbi_api.py
new file mode 100644
index 00000000..8d7a6114
--- /dev/null
+++ b/tests/integration_tests/utilities/test_powerbi_api.py
@@ -0,0 +1,36 @@
+"""Integration test for POWERBI API Wrapper."""
+import pytest
+
+from langchain.utilities.powerbi import PowerBIDataset
+from langchain.utils import get_from_env
+
+
+def azure_installed() -> bool:
+    try:
+        from azure.core.credentials import TokenCredential  # noqa: F401
+        from azure.identity import DefaultAzureCredential  # noqa: F401
+
+        return True
+    except Exception as e:
+        print(f"azure not installed, skipping test {e}")
+        return False
+
+
+@pytest.mark.skipif(not azure_installed(), reason="requires azure package")
+def test_daxquery() -> None:
+    from azure.identity import DefaultAzureCredential
+
+    DATASET_ID = get_from_env("", "POWERBI_DATASET_ID")
+    TABLE_NAME = get_from_env("", "POWERBI_TABLE_NAME")
+    NUM_ROWS = get_from_env("", "POWERBI_NUMROWS")
+
+    powerbi = PowerBIDataset(
+        dataset_id=DATASET_ID,
+        table_names=[TABLE_NAME],
+        credential=DefaultAzureCredential(),
+    )
+
+    output = powerbi.run(f'EVALUATE ROW("RowCount", COUNTROWS({TABLE_NAME}))')
+    numrows = str(output["results"][0]["tables"][0]["rows"][0]["[RowCount]"])
+
+    assert NUM_ROWS == numrows
diff --git a/tests/unit_tests/tools/powerbi/__init__.py b/tests/unit_tests/tools/powerbi/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/unit_tests/tools/powerbi/test_powerbi.py b/tests/unit_tests/tools/powerbi/test_powerbi.py
new file mode 100644
index 00000000..fcb6e7b6
--- /dev/null
+++ b/tests/unit_tests/tools/powerbi/test_powerbi.py
@@ -0,0 +1,10 @@
+def test_power_bi_can_be_imported() -> None:
+    """Test that powerbi tools can be imported.
+
+    The goal of this test is to verify that langchain users will not get import errors
+    when loading powerbi related code if they don't have optional dependencies
+    installed.
+    """
+    from langchain.tools.powerbi.tool import QueryPowerBITool  # noqa
+    from langchain.agents.agent_toolkits import PowerBIToolkit, create_pbi_agent  # noqa
+    from langchain.utilities.powerbi import PowerBIDataset  # noqa