Harrison/power bi (#3205)

Co-authored-by: Eduard van Valkenburg <eavanvalkenburg@users.noreply.github.com>
1 year ago · cc6fe18152
parent 61e09229c8
commit cc6fe18152
16 changed files with 1012 additions and 3 deletions
--- a/docs/modules/agents/toolkits/examples/powerbi.ipynb
+++ b/docs/modules/agents/toolkits/examples/powerbi.ipynb
@ -0,0 +1,167 @@
+{
+ "cells": [
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "id": "0e499e90-7a6d-4fab-8aab-31a4df417601",
+   "metadata": {},
+   "source": [
+    "# PowerBI Dataset Agent\n",
+    "\n",
+    "This notebook showcases an agent designed to interact with a Power BI Dataset. The agent is designed to answer more general questions about a dataset, as well as recover from errors.\n",
+    "\n",
+    "Note that, as this agent is in active development, all answers might not be correct. It runs against the [executequery endpoint](https://learn.microsoft.com/en-us/rest/api/power-bi/datasets/execute-queries), which does not allow deletes.\n",
+    "\n",
+    "### Some notes\n",
+    "- It relies on authentication with the azure.identity package, which can be installed with `pip install azure-identity`. Alternatively you can create the powerbi dataset with a token as a string without supplying the credentials.\n",
+    "- You can also supply a username to impersonate for use with datasets that have RLS enabled. \n",
+    "- The toolkit uses a LLM to create the query from the question, the agent uses the LLM for the overall execution.\n",
+    "- Testing was done mostly with a `text-davinci-003` model, codex models did not seem to perform ver well."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ec927ac6-9b2a-4e8a-9a6e-3e429191875c",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "## Initialization"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "53422913-967b-4f2a-8022-00269c1be1b1",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "from langchain.agents.agent_toolkits import create_pbi_agent\n",
+    "from langchain.agents.agent_toolkits import PowerBIToolkit\n",
+    "from langchain.utilities.powerbi import PowerBIDataset\n",
+    "from langchain.llms.openai import AzureOpenAI\n",
+    "from langchain.agents import AgentExecutor\n",
+    "from azure.identity import DefaultAzureCredential"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "090f3699-79c6-4ce1-ab96-a94f0121fd64",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "llm = AzureOpenAI(temperature=0, deployment_name=\"text-davinci-003\", verbose=True)\n",
+    "toolkit = PowerBIToolkit(\n",
+    "    powerbi=PowerBIDataset(None, \"<dataset_id>\", ['table1', 'table2'], DefaultAzureCredential()), \n",
+    "    llm=llm\n",
+    ")\n",
+    "\n",
+    "agent_executor = create_pbi_agent(\n",
+    "    llm=llm,\n",
+    "    toolkit=toolkit,\n",
+    "    verbose=True,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "36ae48c7-cb08-4fef-977e-c7d4b96a464b",
+   "metadata": {},
+   "source": [
+    "## Example: describing a table"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ff70e83d-5ad0-4fc7-bb96-27d82ac166d7",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "agent_executor.run(\"Describe table1\")"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "id": "9abcfe8e-1868-42a4-8345-ad2d9b44c681",
+   "metadata": {},
+   "source": [
+    "## Example: simple query on a table\n",
+    "In this example, the agent actually figures out the correct query to get a row count of the table."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bea76658-a65b-47e2-b294-6d52c5556246",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "agent_executor.run(\"How many records are in table1?\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6fbc26af-97e4-4a21-82aa-48bdc992da26",
+   "metadata": {},
+   "source": [
+    "## Example: running queries"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "17bea710-4a23-4de0-b48e-21d57be48293",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "agent_executor.run(\"How many records are there by dimension1 in table2?\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "474dddda-c067-4eeb-98b1-e763ee78b18c",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "agent_executor.run(\"What unique values are there for dimensions2 in table2\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/langchain/init.py
+++ b/langchain/init.py
@ -50,6 +50,7 @@ from langchain.sql_database import SQLDatabase
 from langchain.utilities import ArxivAPIWrapper
 from langchain.utilities.google_search import GoogleSearchAPIWrapper
 from langchain.utilities.google_serper import GoogleSerperAPIWrapper
+from langchain.utilities.powerbi import PowerBIDataset
 from langchain.utilities.searx_search import SearxSearchWrapper
 from langchain.utilities.serpapi import SerpAPIWrapper
 from langchain.utilities.wikipedia import WikipediaAPIWrapper
@ -106,6 +107,7 @@ __all__ = [
    "HuggingFacePipeline",
    "SQLDatabase",
    "SQLDatabaseChain",
+    "PowerBIDataset",
    "FAISS",
    "MRKLChain",
    "VectorDBQA",
--- a/langchain/agents/init.py
+++ b/langchain/agents/init.py
@ -12,6 +12,8 @@ from langchain.agents.agent_toolkits import (
    create_json_agent,
    create_openapi_agent,
    create_pandas_dataframe_agent,
+    create_pbi_agent,
+    create_pbi_chat_agent,
    create_sql_agent,
    create_vectorstore_agent,
    create_vectorstore_router_agent,
@ -44,6 +46,8 @@ __all__ = [
    "ConversationalChatAgent",
    "load_agent",
    "create_sql_agent",
+    "create_pbi_agent",
+    "create_pbi_chat_agent",
    "create_json_agent",
    "create_openapi_agent",
    "create_vectorstore_router_agent",
--- a/langchain/agents/agent_toolkits/init.py
+++ b/langchain/agents/agent_toolkits/init.py
@ -8,6 +8,9 @@ from langchain.agents.agent_toolkits.nla.toolkit import NLAToolkit
 from langchain.agents.agent_toolkits.openapi.base import create_openapi_agent
 from langchain.agents.agent_toolkits.openapi.toolkit import OpenAPIToolkit
 from langchain.agents.agent_toolkits.pandas.base import create_pandas_dataframe_agent
+from langchain.agents.agent_toolkits.powerbi.base import create_pbi_agent
+from langchain.agents.agent_toolkits.powerbi.chat_base import create_pbi_chat_agent
+from langchain.agents.agent_toolkits.powerbi.toolkit import PowerBIToolkit
 from langchain.agents.agent_toolkits.python.base import create_python_agent
 from langchain.agents.agent_toolkits.sql.base import create_sql_agent
 from langchain.agents.agent_toolkits.sql.toolkit import SQLDatabaseToolkit
@ -26,11 +29,14 @@ __all__ = [
    "create_json_agent",
    "create_sql_agent",
    "create_openapi_agent",
+    "create_pbi_agent",
+    "create_pbi_chat_agent",
    "create_python_agent",
    "create_vectorstore_agent",
    "JsonToolkit",
    "SQLDatabaseToolkit",
    "NLAToolkit",
+    "PowerBIToolkit",
    "OpenAPIToolkit",
    "VectorStoreToolkit",
    "create_vectorstore_router_agent",
--- a/langchain/agents/agent_toolkits/powerbi/init.py
+++ b/langchain/agents/agent_toolkits/powerbi/init.py
@ -0,0 +1 @@
+"""Power BI agent."""
--- a/langchain/agents/agent_toolkits/powerbi/base.py
+++ b/langchain/agents/agent_toolkits/powerbi/base.py
@ -0,0 +1,62 @@
+"""Power BI agent."""
+from typing import Any, Dict, List, Optional
+
+from langchain.agents import AgentExecutor
+from langchain.agents.agent_toolkits.powerbi.prompt import (
+    POWERBI_PREFIX,
+    POWERBI_SUFFIX,
+)
+from langchain.agents.agent_toolkits.powerbi.toolkit import PowerBIToolkit
+from langchain.agents.mrkl.base import ZeroShotAgent
+from langchain.agents.mrkl.prompt import FORMAT_INSTRUCTIONS
+from langchain.callbacks.base import BaseCallbackManager
+from langchain.chains.llm import LLMChain
+from langchain.llms.base import BaseLLM
+from langchain.utilities.powerbi import PowerBIDataset
+
+
+def create_pbi_agent(
+    llm: BaseLLM,
+    toolkit: Optional[PowerBIToolkit],
+    powerbi: Optional[PowerBIDataset] = None,
+    callback_manager: Optional[BaseCallbackManager] = None,
+    prefix: str = POWERBI_PREFIX,
+    suffix: str = POWERBI_SUFFIX,
+    format_instructions: str = FORMAT_INSTRUCTIONS,
+    examples: Optional[str] = None,
+    input_variables: Optional[List[str]] = None,
+    top_k: int = 10,
+    verbose: bool = False,
+    agent_kwargs: Optional[Dict[str, Any]] = None,
+    **kwargs: Dict[str, Any],
+) -> AgentExecutor:
+    """Construct a pbi agent from an LLM and tools."""
+    if toolkit is None:
+        if powerbi is None:
+            raise ValueError("Must provide either a toolkit or powerbi dataset")
+        toolkit = PowerBIToolkit(powerbi=powerbi, llm=llm, examples=examples)
+    tools = toolkit.get_tools()
+
+    agent = ZeroShotAgent(
+        llm_chain=LLMChain(
+            llm=llm,
+            prompt=ZeroShotAgent.create_prompt(
+                tools,
+                prefix=prefix.format(top_k=top_k),
+                suffix=suffix,
+                format_instructions=format_instructions,
+                input_variables=input_variables,
+            ),
+            callback_manager=callback_manager,  # type: ignore
+            verbose=verbose,
+        ),
+        allowed_tools=[tool.name for tool in tools],
+        **(agent_kwargs or {}),
+    )
+    return AgentExecutor.from_agent_and_tools(
+        agent=agent,
+        tools=tools,
+        callback_manager=callback_manager,
+        verbose=verbose,
+        **kwargs,
+    )
--- a/langchain/agents/agent_toolkits/powerbi/chat_base.py
+++ b/langchain/agents/agent_toolkits/powerbi/chat_base.py
@ -0,0 +1,60 @@
+"""Power BI agent."""
+from typing import Any, Dict, List, Optional
+
+from langchain.agents import AgentExecutor
+from langchain.agents.agent_toolkits.powerbi.prompt import (
+    POWERBI_CHAT_PREFIX,
+    POWERBI_CHAT_SUFFIX,
+)
+from langchain.agents.agent_toolkits.powerbi.toolkit import PowerBIToolkit
+from langchain.agents.conversational_chat.base import ConversationalChatAgent
+from langchain.callbacks.base import BaseCallbackManager
+from langchain.chat_models.base import BaseChatModel
+from langchain.memory import ConversationBufferMemory
+from langchain.memory.chat_memory import BaseChatMemory
+from langchain.utilities.powerbi import PowerBIDataset
+
+
+def create_pbi_chat_agent(
+    llm: BaseChatModel,
+    toolkit: Optional[PowerBIToolkit],
+    powerbi: Optional[PowerBIDataset] = None,
+    callback_manager: Optional[BaseCallbackManager] = None,
+    prefix: str = POWERBI_CHAT_PREFIX,
+    suffix: str = POWERBI_CHAT_SUFFIX,
+    examples: Optional[str] = None,
+    input_variables: Optional[List[str]] = None,
+    memory: Optional[BaseChatMemory] = None,
+    top_k: int = 10,
+    verbose: bool = False,
+    agent_kwargs: Optional[Dict[str, Any]] = None,
+    **kwargs: Dict[str, Any],
+) -> AgentExecutor:
+    """Construct a pbi agent from an Chat LLM and tools.
+
+    If you supply only a toolkit and no powerbi dataset, the same LLM is used for both.
+    """
+    if toolkit is None:
+        if powerbi is None:
+            raise ValueError("Must provide either a toolkit or powerbi dataset")
+        toolkit = PowerBIToolkit(powerbi=powerbi, llm=llm, examples=examples)
+    tools = toolkit.get_tools()
+    agent = ConversationalChatAgent.from_llm_and_tools(
+        llm=llm,
+        tools=tools,
+        system_message=prefix.format(top_k=top_k),
+        user_message=suffix,
+        input_variables=input_variables,
+        callback_manager=callback_manager,
+        verbose=verbose,
+        **(agent_kwargs or {}),
+    )
+    return AgentExecutor.from_agent_and_tools(
+        agent=agent,
+        tools=tools,
+        callback_manager=callback_manager,
+        memory=memory
+        or ConversationBufferMemory(memory_key="chat_history", return_messages=True),
+        verbose=verbose,
+        **kwargs,
+    )
--- a/langchain/agents/agent_toolkits/powerbi/prompt.py
+++ b/langchain/agents/agent_toolkits/powerbi/prompt.py
@ -0,0 +1,48 @@
+# flake8: noqa
+"""Prompts for PowerBI agent."""
+
+
+POWERBI_PREFIX = """You are an agent designed to interact with a Power BI Dataset.
+Given an input question, create a syntactically correct DAX query to run, then look at the results of the query and return the answer.
+Unless the user specifies a specific number of examples they wish to obtain, always limit your query to at most {top_k} results.
+You can order the results by a relevant column to return the most interesting examples in the database.
+Never query for all the columns from a specific table, only ask for a the few relevant columns given the question.
+
+You have access to tools for interacting with the Power BI Dataset. Only use the below tools. Only use the information returned by the below tools to construct your final answer. Usually I should first ask which tables I have, then how each table is defined and then ask the question to query tool to create a query for me and then I should ask the query tool to execute it, finally create a nice sentence that answers the question. If you receive an error back that mentions that the query was wrong try to phrase the question differently and get a new query from the question to query tool.
+
+If the question does not seem related to the dataset, just return "I don't know" as the answer.
+"""
+
+POWERBI_SUFFIX = """Begin!
+
+Question: {input}
+Thought: I should first ask which tables I have, then how each table is defined and then ask the question to query tool to create a query for me and then I should ask the query tool to execute it, finally create a nice sentence that answers the question.
+{agent_scratchpad}"""
+
+POWERBI_CHAT_PREFIX = """Assistant is a large language model trained by OpenAI built to help users interact with a PowerBI Dataset.
+
+Assistant is designed to be able to assist with a wide range of tasks, from answering simple questions to providing in-depth explanations and discussions on a wide range of topics. As a language model, Assistant is able to generate human-like text based on the input it receives, allowing it to engage in natural-sounding conversations and provide responses that are coherent and relevant to the topic at hand.
+
+Assistant is constantly learning and improving, and its capabilities are constantly evolving. It is able to process and understand large amounts of text, and can use this knowledge to provide accurate and informative responses to a wide range of questions. Additionally, Assistant is able to generate its own text based on the input it receives, allowing it to engage in discussions and provide explanations and descriptions on a wide range of topics. 
+
+Given an input question, create a syntactically correct DAX query to run, then look at the results of the query and return the answer. Unless the user specifies a specific number of examples they wish to obtain, always limit your query to at most {top_k} results. You can order the results by a relevant column to return the most interesting examples in the database.
+
+Overall, Assistant is a powerful system that can help with a wide range of tasks and provide valuable insights and information on a wide range of topics. Whether you need help with a specific question or just want to have a conversation about a particular topic, Assistant is here to assist.
+
+Usually I should first ask which tables I have, then how each table is defined and then ask the question to query tool to create a query for me and then I should ask the query tool to execute it, finally create a complete sentence that answers the question. If you receive an error back that mentions that the query was wrong try to phrase the question differently and get a new query from the question to query tool.
+"""
+
+POWERBI_CHAT_SUFFIX = """TOOLS
+------
+Assistant can ask the user to use tools to look up information that may be helpful in answering the users original question. The tools the human can use are:
+
+{{tools}}
+
+{format_instructions}
+
+USER'S INPUT
+--------------------
+Here is the user's input (remember to respond with a markdown code snippet of a json blob with a single action, and NOTHING else):
+
+{{{{input}}}}
+"""
--- a/langchain/agents/agent_toolkits/powerbi/toolkit.py
+++ b/langchain/agents/agent_toolkits/powerbi/toolkit.py
@ -0,0 +1,67 @@
+"""Toolkit for interacting with a Power BI dataset."""
+from typing import List, Optional
+
+from pydantic import Field
+
+from langchain.agents.agent_toolkits.base import BaseToolkit
+from langchain.callbacks.base import BaseCallbackManager
+from langchain.chains.llm import LLMChain
+from langchain.prompts import PromptTemplate
+from langchain.schema import BaseLanguageModel
+from langchain.tools import BaseTool
+from langchain.tools.powerbi.prompt import QUESTION_TO_QUERY
+from langchain.tools.powerbi.tool import (
+    InfoPowerBITool,
+    InputToQueryTool,
+    ListPowerBITool,
+    QueryPowerBITool,
+)
+from langchain.utilities.powerbi import PowerBIDataset
+
+
+class PowerBIToolkit(BaseToolkit):
+    """Toolkit for interacting with PowerBI dataset."""
+
+    powerbi: PowerBIDataset = Field(exclude=True)
+    llm: BaseLanguageModel = Field(exclude=True)
+    examples: Optional[str] = None
+    callback_manager: Optional[BaseCallbackManager] = None
+
+    class Config:
+        """Configuration for this pydantic object."""
+
+        arbitrary_types_allowed = True
+
+    def get_tools(self) -> List[BaseTool]:
+        """Get the tools in the toolkit."""
+        if self.callback_manager:
+            chain = (
+                LLMChain(
+                    llm=self.llm,
+                    callback_manager=self.callback_manager,
+                    prompt=PromptTemplate(
+                        template=QUESTION_TO_QUERY,
+                        input_variables=["tool_input", "tables", "schemas", "examples"],
+                    ),
+                ),
+            )
+        else:
+            chain = (
+                LLMChain(
+                    llm=self.llm,
+                    prompt=PromptTemplate(
+                        template=QUESTION_TO_QUERY,
+                        input_variables=["tool_input", "tables", "schemas", "examples"],
+                    ),
+                ),
+            )
+        return [
+            QueryPowerBITool(powerbi=self.powerbi),
+            InfoPowerBITool(powerbi=self.powerbi),
+            ListPowerBITool(powerbi=self.powerbi),
+            InputToQueryTool(
+                powerbi=self.powerbi,
+                llm_chain=chain,
+                examples=self.examples,
+            ),
+        ]
--- a/langchain/tools/powerbi/init.py
+++ b/langchain/tools/powerbi/init.py
@ -0,0 +1 @@
+"""Tools for interacting with a PowerBI dataset."""
--- a/langchain/tools/powerbi/prompt.py
+++ b/langchain/tools/powerbi/prompt.py
@ -0,0 +1,62 @@
+# flake8: noqa
+QUESTION_TO_QUERY = """
+Answer the question below with a DAX query that can be sent to Power BI. DAX queries have a simple syntax comprised of just one required keyword, EVALUATE, and several optional keywords: ORDER BY, START AT, DEFINE, MEASURE, VAR, TABLE, and COLUMN. Each keyword defines a statement used for the duration of the query. Any time < or > are used in the text below it means that those values need to be replaced by table, columns or other things. 
+
+Some DAX functions return a table instead of a scalar, and must be wrapped in a function that evaluates the table and returns a scalar; unless the table is a single column, single row table, then it is treated as a scalar value. Most DAX functions require one or more arguments, which can include tables, columns, expressions, and values. However, some functions, such as PI, do not require any arguments, but always require parentheses to indicate the null argument. For example, you must always type PI(), not PI. You can also nest functions within other functions. 
+
+Some commonly used functions are:
+EVALUATE <table> - At the most basic level, a DAX query is an EVALUATE statement containing a table expression. At least one EVALUATE statement is required, however, a query can contain any number of EVALUATE statements.
+EVALUATE <table> ORDER BY <expression> ASC or DESC - The optional ORDER BY keyword defines one or more expressions used to sort query results. Any expression that can be evaluated for each row of the result is valid.
+EVALUATE <table> ORDER BY <expression> ASC or DESC START AT <value> or <parameter> - The optional START AT keyword is used inside an ORDER BY clause. It defines the value at which the query results begin.
+DEFINE MEASURE | VAR; EVALUATE <table> - The optional DEFINE keyword introduces one or more calculated entity definitions that exist only for the duration of the query. Definitions precede the EVALUATE statement and are valid for all EVALUATE statements in the query. Definitions can be variables, measures, tables1, and columns1. Definitions can reference other definitions that appear before or after the current definition. At least one definition is required if the DEFINE keyword is included in a query.
+MEASURE <table name>[<measure name>] = <scalar expression> - Introduces a measure definition in a DEFINE statement of a DAX query.
+VAR <name> = <expression> - Stores the result of an expression as a named variable, which can then be passed as an argument to other measure expressions. Once resultant values have been calculated for a variable expression, those values do not change, even if the variable is referenced in another expression.
+
+FILTER(<table>,<filter>) - Returns a table that represents a subset of another table or expression, where <filter> is a Boolean expression that is to be evaluated for each row of the table. For example, [Amount] > 0 or [Region] = "France"
+ROW(<name>, <expression>) - Returns a table with a single row containing values that result from the expressions given to each column.
+DISTINCT(<column>) - Returns a one-column table that contains the distinct values from the specified column. In other words, duplicate values are removed and only unique values are returned. This function cannot be used to Return values into a cell or column on a worksheet; rather, you nest the DISTINCT function within a formula, to get a list of distinct values that can be passed to another function and then counted, summed, or used for other operations.
+DISTINCT(<table>) - Returns a table by removing duplicate rows from another table or expression.
+
+Aggregation functions, names with a A in it, handle booleans and empty strings in appropriate ways, while the same function without A only uses the numeric values in a column. Functions names with an X in it can include a expression as an argument, this will be evaluated for each row in the table and the result will be used in the regular function calculation, these are the functions:
+COUNT(<column>), COUNTA(<column>), COUNTX(<table>,<expression>), COUNTAX(<table>,<expression>), COUNTROWS([<table>]), COUNTBLANK(<column>), DISTINCTCOUNT(<column>), DISTINCTCOUNTNOBLANK (<column>) - these are all variantions of count functions.
+AVERAGE(<column>), AVERAGEA(<column>), AVERAGEX(<table>,<expression>) - these are all variantions of average functions.
+MAX(<column>), MAXA(<column>), MAXX(<table>,<expression>) - these are all variantions of max functions.
+MIN(<column>), MINA(<column>), MINX(<table>,<expression>) - these are all variantions of min functions.
+PRODUCT(<column>), PRODUCTX(<table>,<expression>) - these are all variantions of product functions.
+SUM(<column>), SUMX(<table>,<expression>) - these are all variantions of sum functions.
+
+Date and time functions:
+DATE(year, month, day) - Returns a date value that represents the specified year, month, and day.
+DATEDIFF(date1, date2, <interval>) - Returns the difference between two date values, in the specified interval, that can be SECOND, MINUTE, HOUR, DAY, WEEK, MONTH, QUARTER, YEAR.
+DATEVALUE(<date_text>) - Returns a date value that represents the specified date.
+YEAR(<date>), QUARTER(<date>), MONTH(<date>), DAY(<date>), HOUR(<date>), MINUTE(<date>), SECOND(<date>) - Returns the part of the date for the specified date.
+
+The following tables exist: {tables}
+
+and the schema's for some are given here:
+{schemas}
+
+Examples:
+{examples}
+Question: {tool_input}
+DAX: 
+"""
+
+DEFAULT_FEWSHOT_EXAMPLES = """
+Question: How many rows are in the table <table>?
+DAX: EVALUATE ROW("Number of rows", COUNTROWS(<table>))
+----
+Question: How many rows are in the table <table> where <column> is not empty?
+DAX: EVALUATE ROW("Number of rows", COUNTROWS(FILTER(<table>, <table>[<column>] <> "")))
+----
+Question: What was the average of <column> in <table>?
+DAX: EVALUATE ROW("Average", AVERAGE(<table>[<column>]))
+----
+"""
+
+BAD_REQUEST_RESPONSE = (
+    "Bad request. Please ask the question_to_query_powerbi tool to provide the query."
+)
+BAD_REQUEST_RESPONSE_ESCALATED = "You already tried this, please try a different query."
+
+UNAUTHORIZED_RESPONSE = "Unauthorized. Try changing your authentication, do not retry."
--- a/langchain/tools/powerbi/tool.py
+++ b/langchain/tools/powerbi/tool.py
@ -0,0 +1,189 @@
+"""Tools for interacting with a Power BI dataset."""
+from typing import Any, Dict, Optional
+
+from pydantic import Field, validator
+
+from langchain.chains.llm import LLMChain
+from langchain.tools.base import BaseTool
+from langchain.tools.powerbi.prompt import (
+    BAD_REQUEST_RESPONSE,
+    BAD_REQUEST_RESPONSE_ESCALATED,
+    DEFAULT_FEWSHOT_EXAMPLES,
+    QUESTION_TO_QUERY,
+)
+from langchain.utilities.powerbi import PowerBIDataset, json_to_md
+
+
+class QueryPowerBITool(BaseTool):
+    """Tool for querying a Power BI Dataset."""
+
+    name = "query_powerbi"
+    description = """
+    Input to this tool is a detailed and correct DAX query, output is a result from the dataset.
+    If the query is not correct, an error message will be returned.
+    If an error is returned with Bad request in it, rewrite the query and try again.
+    If an error is returned with Unauthorized in it, do not try again, but tell the user to change their authentication.
+
+    Example Input: "EVALUATE ROW("count", COUNTROWS(table1))"
+    """  # noqa: E501
+    powerbi: PowerBIDataset = Field(exclude=True)
+    session_cache: Dict[str, Any] = Field(default_factory=dict, exclude=True)
+
+    class Config:
+        """Configuration for this pydantic object."""
+
+        arbitrary_types_allowed = True
+
+    def _check_cache(self, tool_input: str) -> Optional[str]:
+        """Check if the input is present in the cache.
+
+        If the value is a bad request, overwrite with the escalated version,
+        if not present return None."""
+        if tool_input not in self.session_cache:
+            return None
+        if self.session_cache[tool_input] == BAD_REQUEST_RESPONSE:
+            self.session_cache[tool_input] = BAD_REQUEST_RESPONSE_ESCALATED
+        return self.session_cache[tool_input]
+
+    def _run(self, tool_input: str) -> str:
+        """Execute the query, return the results or an error message."""
+        if cache := self._check_cache(tool_input):
+            return cache
+        try:
+            self.session_cache[tool_input] = self.powerbi.run(command=tool_input)
+        except Exception as exc:  # pylint: disable=broad-except
+            if "bad request" in str(exc).lower():
+                self.session_cache[tool_input] = BAD_REQUEST_RESPONSE
+            elif "unauthorized" in str(exc).lower():
+                self.session_cache[
+                    tool_input
+                ] = "Unauthorized. Try changing your authentication, do not retry."
+            else:
+                self.session_cache[tool_input] = str(exc)
+            return self.session_cache[tool_input]
+        if "results" in self.session_cache[tool_input]:
+            self.session_cache[tool_input] = json_to_md(
+                self.session_cache[tool_input]["results"][0]["tables"][0]["rows"]
+            )
+        return self.session_cache[tool_input]
+
+    async def _arun(self, tool_input: str) -> str:
+        """Execute the query, return the results or an error message."""
+        if cache := self._check_cache(tool_input):
+            return cache
+        try:
+            self.session_cache[tool_input] = await self.powerbi.arun(command=tool_input)
+        except Exception as exc:  # pylint: disable=broad-except
+            if "bad request" in str(exc).lower():
+                self.session_cache[tool_input] = BAD_REQUEST_RESPONSE
+            elif "unauthorized" in str(exc).lower():
+                self.session_cache[
+                    tool_input
+                ] = "Unauthorized. Try changing your authentication, do not retry."
+            else:
+                self.session_cache[tool_input] = str(exc)
+            return self.session_cache[tool_input]
+        if "results" in self.session_cache[tool_input]:
+            self.session_cache[tool_input] = json_to_md(
+                self.session_cache[tool_input]["results"][0]["tables"][0]["rows"]
+            )
+        return self.session_cache[tool_input]
+
+
+class InfoPowerBITool(BaseTool):
+    """Tool for getting metadata about a PowerBI Dataset."""
+
+    name = "schema_powerbi"
+    description = """
+    Input to this tool is a comma-separated list of tables, output is the schema and sample rows for those tables.
+    Be sure that the tables actually exist by calling list_tables_powerbi first!
+
+    Example Input: "table1, table2, table3"
+    """  # noqa: E501
+    powerbi: PowerBIDataset = Field(exclude=True)
+
+    class Config:
+        """Configuration for this pydantic object."""
+
+        arbitrary_types_allowed = True
+
+    def _run(self, tool_input: str) -> str:
+        """Get the schema for tables in a comma-separated list."""
+        return self.powerbi.get_table_info(tool_input.split(", "))
+
+    async def _arun(self, tool_input: str) -> str:
+        return await self.powerbi.aget_table_info(tool_input.split(", "))
+
+
+class ListPowerBITool(BaseTool):
+    """Tool for getting tables names."""
+
+    name = "list_tables_powerbi"
+    description = "Input is an empty string, output is a comma separated list of tables in the database."  # noqa: E501 # pylint: disable=C0301
+    powerbi: PowerBIDataset = Field(exclude=True)
+
+    class Config:
+        """Configuration for this pydantic object."""
+
+        arbitrary_types_allowed = True
+
+    def _run(self, *args: Any, **kwargs: Any) -> str:
+        """Get the names of the tables."""
+        return ", ".join(self.powerbi.get_table_names())
+
+    async def _arun(self, *args: Any, **kwargs: Any) -> str:
+        """Get the names of the tables."""
+        return ", ".join(self.powerbi.get_table_names())
+
+
+class InputToQueryTool(BaseTool):
+    """Use an LLM to parse the question to a DAX query."""
+
+    name = "question_to_query_powerbi"
+    description = """
+    Use this tool to create the DAX query from a question, the input is a fully formed question related to the powerbi dataset. Always use this tool before executing a query with query_powerbi!
+
+    Example Input: "How many records are in table1?"
+    """  # noqa: E501
+    llm_chain: LLMChain
+    powerbi: PowerBIDataset = Field(exclude=True)
+    template: str = QUESTION_TO_QUERY
+    examples: str = DEFAULT_FEWSHOT_EXAMPLES
+
+    class Config:
+        """Configuration for this pydantic object."""
+
+        arbitrary_types_allowed = True
+
+    @validator("llm_chain")
+    def validate_llm_chain_input_variables(  # pylint: disable=E0213
+        cls, llm_chain: LLMChain
+    ) -> LLMChain:
+        """Make sure the LLM chain has the correct input variables."""
+        if llm_chain.prompt.input_variables != [
+            "tool_input",
+            "tables",
+            "schemas",
+            "examples",
+        ]:
+            raise ValueError(
+                "LLM chain for InputToQueryTool must have input variables ['tool_input', 'tables', 'schemas', 'examples']"  # noqa: C0301 E501 # pylint: disable=C0301
+            )
+        return llm_chain
+
+    def _run(self, tool_input: str) -> str:
+        """Use the LLM to check the query."""
+        return self.llm_chain.predict(
+            tool_input=tool_input,
+            tables=self.powerbi.get_table_names(),
+            schemas=self.powerbi.get_schemas(),
+            examples=self.examples,
+        )
+
+    async def _arun(self, tool_input: str) -> str:
+        return await self.llm_chain.apredict(
+            tool_input=tool_input,
+            tables=self.powerbi.get_table_names(),
+            schemas=self.powerbi.get_schemas(),
+            examples=self.examples,
+        )
--- a/langchain/utilities/init.py
+++ b/langchain/utilities/init.py
@ -8,6 +8,7 @@ from langchain.utilities.google_places_api import GooglePlacesAPIWrapper
 from langchain.utilities.google_search import GoogleSearchAPIWrapper
 from langchain.utilities.google_serper import GoogleSerperAPIWrapper
 from langchain.utilities.openweathermap import OpenWeatherMapAPIWrapper
+from langchain.utilities.powerbi import PowerBIDataset
 from langchain.utilities.python import PythonREPL
 from langchain.utilities.searx_search import SearxSearchWrapper
 from langchain.utilities.serpapi import SerpAPIWrapper
@ -29,4 +30,5 @@ __all__ = [
    "WikipediaAPIWrapper",
    "OpenWeatherMapAPIWrapper",
    "PythonREPL",
+    "PowerBIDataset",
 ]
--- a/langchain/utilities/powerbi.py
+++ b/langchain/utilities/powerbi.py
@ -0,0 +1,235 @@
+"""Wrapper around a Power BI endpoint."""
+
+from __future__ import annotations
+
+import logging
+import os
+from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Union
+
+import aiohttp
+import requests
+from aiohttp import ServerTimeoutError
+from pydantic import BaseModel, Field, root_validator
+from requests.exceptions import Timeout
+
+from langchain.tools.powerbi.prompt import BAD_REQUEST_RESPONSE, UNAUTHORIZED_RESPONSE
+
+_LOGGER = logging.getLogger(__name__)
+
+if TYPE_CHECKING:
+    from azure.core.exceptions import ClientAuthenticationError
+    from azure.identity import ChainedTokenCredential
+    from azure.identity._internal import InteractiveCredential
+
+BASE_URL = os.getenv("POWERBI_BASE_URL", "https://api.powerbi.com/v1.0/myorg/datasets/")
+
+
+class PowerBIDataset(BaseModel):
+    """Create PowerBI engine from dataset ID and credential or token.
+
+    Use either the credential or a supplied token to authenticate.
+    If both are supplied the credential is used to generate a token.
+    The impersonated_user_name is the UPN of a user to be impersonated.
+    If the model is not RLS enabled, this will be ignored.
+    """
+
+    group_id: Optional[str]
+    dataset_id: str
+    table_names: List[str]
+    credential: Optional[Union[ChainedTokenCredential, InteractiveCredential]] = None
+    token: Optional[str] = None
+    impersonated_user_name: Optional[str] = None
+    sample_rows_in_table_info: int = Field(1, gt=0, le=10)
+    aiosession: Optional[aiohttp.ClientSession] = None
+    schemas: Dict[str, str] = Field(default_factory=dict, init=False)
+
+    class Config:
+        """Configuration for this pydantic object."""
+
+        arbitrary_types_allowed = True
+
+    @root_validator(pre=True, allow_reuse=True)
+    def token_or_credential_present(cls, values: Dict[str, Any]) -> Dict[str, Any]:
+        """Validate that at least one of token and credentials is present."""
+        if "token" in values or "credential" in values:
+            return values
+        raise ValueError("Please provide either a credential or a token.")
+
+    @property
+    def request_url(self) -> str:
+        """Get the request url."""
+        if self.group_id:
+            return f"{BASE_URL}/{self.group_id}/datasets/{self.dataset_id}/executeQueries"  # noqa: E501 # pylint: disable=C0301
+        return f"{BASE_URL}/{self.dataset_id}/executeQueries"  # noqa: E501 # pylint: disable=C0301
+
+    @property
+    def headers(self) -> Dict[str, str]:
+        """Get the token."""
+        token = None
+        if self.token:
+            token = self.token
+        if self.credential:
+            try:
+                token = self.credential.get_token(
+                    "https://analysis.windows.net/powerbi/api/.default"
+                ).token
+            except Exception as exc:  # pylint: disable=broad-exception-caught
+                raise ClientAuthenticationError(
+                    "Could not get a token from the supplied credentials."
+                ) from exc
+        if not token:
+            raise ClientAuthenticationError("No credential or token supplied.")
+
+        return {
+            "Content-Type": "application/json",
+            "Authorization": "Bearer " + token,
+        }
+
+    def get_table_names(self) -> Iterable[str]:
+        """Get names of tables available."""
+        return self.table_names
+
+    def get_schemas(self) -> str:
+        """Get the available schema's."""
+        if self.schemas:
+            return ", ".join([f"{key}: {value}" for key, value in self.schemas.items()])
+        return "No known schema's yet. Use the schema_powerbi tool first."
+
+    @property
+    def table_info(self) -> str:
+        """Information about all tables in the database."""
+        return self.get_table_info()
+
+    def _get_tables_to_query(
+        self, table_names: Optional[Union[List[str], str]] = None
+    ) -> List[str]:
+        """Get the tables names that need to be queried."""
+        if table_names is not None:
+            if (
+                isinstance(table_names, list)
+                and len(table_names) > 0
+                and table_names[0] != ""
+            ):
+                return table_names
+            if isinstance(table_names, str) and table_names != "":
+                return [table_names]
+        return self.table_names
+
+    def _get_tables_todo(self, tables_todo: List[str]) -> List[str]:
+        for table in tables_todo:
+            if table in self.schemas:
+                tables_todo.remove(table)
+        return tables_todo
+
+    def _get_schema_for_tables(self, table_names: List[str]) -> str:
+        """Create a string of the table schemas for the supplied tables."""
+        schemas = [
+            schema for table, schema in self.schemas.items() if table in table_names
+        ]
+        return ", ".join(schemas)
+
+    def get_table_info(
+        self, table_names: Optional[Union[List[str], str]] = None
+    ) -> str:
+        """Get information about specified tables."""
+        tables_requested = self._get_tables_to_query(table_names)
+        tables_todo = self._get_tables_todo(tables_requested)
+        for table in tables_todo:
+            try:
+                result = self.run(
+                    f"EVALUATE TOPN({self.sample_rows_in_table_info}, {table})"
+                )
+            except Timeout:
+                _LOGGER.warning("Timeout while getting table info for %s", table)
+                continue
+            except Exception as exc:  # pylint: disable=broad-exception-caught
+                if "bad request" in str(exc).lower():
+                    return BAD_REQUEST_RESPONSE
+                if "unauthorized" in str(exc).lower():
+                    return UNAUTHORIZED_RESPONSE
+                return str(exc)
+            self.schemas[table] = json_to_md(result["results"][0]["tables"][0]["rows"])
+        return self._get_schema_for_tables(tables_requested)
+
+    async def aget_table_info(
+        self, table_names: Optional[Union[List[str], str]] = None
+    ) -> str:
+        """Get information about specified tables."""
+        tables_requested = self._get_tables_to_query(table_names)
+        tables_todo = self._get_tables_todo(tables_requested)
+        for table in tables_todo:
+            try:
+                result = await self.arun(
+                    f"EVALUATE TOPN({self.sample_rows_in_table_info}, {table})"
+                )
+            except ServerTimeoutError:
+                _LOGGER.warning("Timeout while getting table info for %s", table)
+                continue
+            except Exception as exc:  # pylint: disable=broad-exception-caught
+                if "bad request" in str(exc).lower():
+                    return BAD_REQUEST_RESPONSE
+                if "unauthorized" in str(exc).lower():
+                    return UNAUTHORIZED_RESPONSE
+                return str(exc)
+            self.schemas[table] = json_to_md(result["results"][0]["tables"][0]["rows"])
+        return self._get_schema_for_tables(tables_requested)
+
+    def run(self, command: str) -> Any:
+        """Execute a DAX command and return a json representing the results."""
+
+        result = requests.post(
+            self.request_url,
+            json={
+                "queries": [{"query": command}],
+                "impersonatedUserName": self.impersonated_user_name,
+                "serializerSettings": {"includeNulls": True},
+            },
+            headers=self.headers,
+            timeout=10,
+        )
+        result.raise_for_status()
+        return result.json()
+
+    async def arun(self, command: str) -> Any:
+        """Execute a DAX command and return the result asynchronously."""
+        json_content = (
+            {
+                "queries": [{"query": command}],
+                "impersonatedUserName": self.impersonated_user_name,
+                "serializerSettings": {"includeNulls": True},
+            },
+        )
+        if self.aiosession:
+            async with self.aiosession.post(
+                self.request_url, headers=self.headers, json=json_content, timeout=10
+            ) as response:
+                response.raise_for_status()
+                response_json = await response.json()
+                return response_json
+        async with aiohttp.ClientSession() as session:
+            async with session.post(
+                self.request_url, headers=self.headers, json=json_content, timeout=10
+            ) as response:
+                response.raise_for_status()
+                response_json = await response.json()
+                return response_json
+
+
+def json_to_md(
+    json_contents: List[Dict[str, Union[str, int, float]]],
+    table_name: Optional[str] = None,
+) -> str:
+    """Converts a JSON object to a markdown table."""
+    output_md = ""
+    headers = json_contents[0].keys()
+    for header in headers:
+        header.replace("[", ".").replace("]", "")
+        if table_name:
+            header.replace(f"{table_name}.", "")
+        output_md += f"| {header} "
+    output_md += "|\n"
+    for row in json_contents:
+        for value in row.values():
+            output_md += f"| {value} "
+        output_md += "|\n"
+    return output_md
--- a/poetry.lock
+++ b/poetry.lock
@ -566,6 +566,45 @@ dev = ["coverage (>=5,<6)", "flake8 (>=3,<4)", "pytest (>=6,<7)", "sphinx-copybu
 docs = ["sphinx-copybutton (>=0.4,<0.5)", "sphinx-rtd-theme (>=1.0,<2.0)", "sphinx-tabs (>=3,<4)", "sphinxcontrib-mermaid (>=0.7,<0.8)"]
 test = ["coverage (>=5,<6)", "pytest (>=6,<7)"]

+[[package]]
+name = "azure-core"
+version = "1.26.4"
+description = "Microsoft Azure Core Library for Python"
+category = "main"
+optional = true
+python-versions = ">=3.7"
+files = [
+    {file = "azure-core-1.26.4.zip", hash = "sha256:075fe06b74c3007950dd93d49440c2f3430fd9b4a5a2756ec8c79454afc989c6"},
+    {file = "azure_core-1.26.4-py3-none-any.whl", hash = "sha256:d9664b4bc2675d72fba461a285ac43ae33abb2967014a955bf136d9703a2ab3c"},
+]
+
+[package.dependencies]
+requests = ">=2.18.4"
+six = ">=1.11.0"
+typing-extensions = ">=4.3.0"
+
+[package.extras]
+aio = ["aiohttp (>=3.0)"]
+
+[[package]]
+name = "azure-identity"
+version = "1.12.0"
+description = "Microsoft Azure Identity Library for Python"
+category = "main"
+optional = true
+python-versions = ">=3.7"
+files = [
+    {file = "azure-identity-1.12.0.zip", hash = "sha256:7f9b1ae7d97ea7af3f38dd09305e19ab81a1e16ab66ea186b6579d85c1ca2347"},
+    {file = "azure_identity-1.12.0-py3-none-any.whl", hash = "sha256:2a58ce4a209a013e37eaccfd5937570ab99e9118b3e1acf875eed3a85d541b92"},
+]
+
+[package.dependencies]
+azure-core = ">=1.11.0,<2.0.0"
+cryptography = ">=2.5"
+msal = ">=1.12.0,<2.0.0"
+msal-extensions = ">=0.3.0,<2.0.0"
+six = ">=1.12.0"
+
 [[package]]
 name = "babel"
 version = "2.12.1"
@ -3814,6 +3853,45 @@ files = [
    {file = "more_itertools-9.1.0-py3-none-any.whl", hash = "sha256:d2bc7f02446e86a68911e58ded76d6561eea00cddfb2a91e7019bbb586c799f3"},
 ]

+[[package]]
+name = "msal"
+version = "1.21.0"
+description = "The Microsoft Authentication Library (MSAL) for Python library enables your app to access the Microsoft Cloud by supporting authentication of users with Microsoft Azure Active Directory accounts (AAD) and Microsoft Accounts (MSA) using industry standard OAuth2 and OpenID Connect."
+category = "main"
+optional = true
+python-versions = "*"
+files = [
+    {file = "msal-1.21.0-py2.py3-none-any.whl", hash = "sha256:e8444617c1eccdff7bb73f5d4f94036002accea4a2c05f8f39c9efb5bd2b0c6a"},
+    {file = "msal-1.21.0.tar.gz", hash = "sha256:96b5c867830fd116e5f7d0ec8ef1b238b4cda4d1aea86d8fecf518260e136fbf"},
+]
+
+[package.dependencies]
+cryptography = ">=0.6,<41"
+PyJWT = {version = ">=1.0.0,<3", extras = ["crypto"]}
+requests = ">=2.0.0,<3"
+
+[package.extras]
+broker = ["pymsalruntime (>=0.13.2,<0.14)"]
+
+[[package]]
+name = "msal-extensions"
+version = "1.0.0"
+description = "Microsoft Authentication Library extensions (MSAL EX) provides a persistence API that can save your data on disk, encrypted on Windows, macOS and Linux. Concurrent data access will be coordinated by a file lock mechanism."
+category = "main"
+optional = true
+python-versions = "*"
+files = [
+    {file = "msal-extensions-1.0.0.tar.gz", hash = "sha256:c676aba56b0cce3783de1b5c5ecfe828db998167875126ca4b47dc6436451354"},
+    {file = "msal_extensions-1.0.0-py2.py3-none-any.whl", hash = "sha256:91e3db9620b822d0ed2b4d1850056a0f133cba04455e62f11612e40f5502f2ee"},
+]
+
+[package.dependencies]
+msal = ">=0.4.1,<2.0.0"
+portalocker = [
+    {version = ">=1.0,<3", markers = "python_version >= \"3.5\" and platform_system != \"Windows\""},
+    {version = ">=1.6,<3", markers = "python_version >= \"3.5\" and platform_system == \"Windows\""},
+]
+
 [[package]]
 name = "multidict"
 version = "6.0.4"
@ -5308,6 +5386,26 @@ files = [
 dev = ["pre-commit", "tox"]
 testing = ["pytest", "pytest-benchmark"]

+[[package]]
+name = "portalocker"
+version = "2.7.0"
+description = "Wraps the portalocker recipe for easy usage"
+category = "main"
+optional = true
+python-versions = ">=3.5"
+files = [
+    {file = "portalocker-2.7.0-py2.py3-none-any.whl", hash = "sha256:a07c5b4f3985c3cf4798369631fb7011adb498e2a46d8440efc75a8f29a0f983"},
+    {file = "portalocker-2.7.0.tar.gz", hash = "sha256:032e81d534a88ec1736d03f780ba073f047a06c478b06e2937486f334e955c51"},
+]
+
+[package.dependencies]
+pywin32 = {version = ">=226", markers = "platform_system == \"Windows\""}
+
+[package.extras]
+docs = ["sphinx (>=1.7.1)"]
+redis = ["redis"]
+tests = ["pytest (>=5.4.1)", "pytest-cov (>=2.8.1)", "pytest-mypy (>=0.8.0)", "pytest-timeout (>=2.1.0)", "redis", "sphinx (>=6.0.0)"]
+
 [[package]]
 name = "posthog"
 version = "3.0.0"
@ -5872,6 +5970,9 @@ files = [
    {file = "PyJWT-2.6.0.tar.gz", hash = "sha256:69285c7e31fc44f68a1feb309e948e0df53259d579295e6cfe2b1792329f05fd"},
 ]

+[package.dependencies]
+cryptography = {version = ">=3.4.0", optional = true, markers = "extra == \"crypto\""}
+
 [package.extras]
 crypto = ["cryptography (>=3.4.0)"]
 dev = ["coverage[toml] (==5.0.4)", "cryptography (>=3.4.0)", "pre-commit", "pytest (>=6.0.0,<7.0.0)", "sphinx (>=4.5.0,<5.0.0)", "sphinx-rtd-theme", "zope.interface"]
@ -9167,7 +9268,7 @@ cffi = {version = ">=1.11", markers = "platform_python_implementation == \"PyPy\
 cffi = ["cffi (>=1.11)"]

 [extras]
-all = ["anthropic", "cohere", "openai", "nlpcloud", "huggingface_hub", "jina", "manifest-ml", "elasticsearch", "opensearch-py", "google-search-results", "faiss-cpu", "sentence-transformers", "transformers", "spacy", "nltk", "wikipedia", "beautifulsoup4", "tiktoken", "torch", "jinja2", "pinecone-client", "pinecone-text", "weaviate-client", "redis", "google-api-python-client", "wolframalpha", "qdrant-client", "tensorflow-text", "pypdf", "networkx", "nomic", "aleph-alpha-client", "deeplake", "pgvector", "psycopg2-binary", "pyowm", "pytesseract", "html2text", "atlassian-python-api", "gptcache", "duckduckgo-search"]
+all = ["anthropic", "cohere", "openai", "nlpcloud", "huggingface_hub", "jina", "manifest-ml", "elasticsearch", "opensearch-py", "google-search-results", "faiss-cpu", "sentence-transformers", "transformers", "spacy", "nltk", "wikipedia", "beautifulsoup4", "tiktoken", "torch", "jinja2", "pinecone-client", "pinecone-text", "weaviate-client", "redis", "google-api-python-client", "wolframalpha", "qdrant-client", "tensorflow-text", "pypdf", "networkx", "nomic", "aleph-alpha-client", "deeplake", "pgvector", "psycopg2-binary", "pyowm", "pytesseract", "html2text", "atlassian-python-api", "gptcache", "duckduckgo-search", "arxiv", "azure-identity"]
 cohere = ["cohere"]
 llms = ["anthropic", "cohere", "openai", "nlpcloud", "huggingface_hub", "manifest-ml", "torch", "transformers"]
 openai = ["openai"]
@ -9176,4 +9277,4 @@ qdrant = ["qdrant-client"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.8.1,<4.0"
-content-hash = "19a145090188b0b446c68ca33599f4d4943bf9fb1312bcfa98a23268101e1323"
+content-hash = "8b0be7a924d83d9afc5e21e95aa529258a3ae916418e0c1c159732291a615af8"
--- a/pyproject.toml
+++ b/pyproject.toml
@ -61,6 +61,7 @@ psycopg2-binary = {version = "^2.9.5", optional = true}
 #boto3 = {version = "^1.26.96", optional = true} # TODO: fix it, commented because the version failed with deeplake
 pyowm = {version = "^3.3.0", optional = true}
 async-timeout = {version = "^4.0.0", python = "<3.11"}
+azure-identity = {version = "^1.12.0", optional=true}
 gptcache = {version = ">=0.1.7", optional = true}
 atlassian-python-api = {version = "^3.36.0", optional=true}
 pytesseract = {version = "^0.3.10", optional=true}
@ -68,6 +69,7 @@ html2text = {version="^2020.1.16", optional=true}
 numexpr = "^2.8.4"
 duckduckgo-search = {version="^2.8.6", optional=true}

+
 [tool.poetry.group.docs.dependencies]
 autodoc_pydantic = "^1.8.0"
 myst_parser = "^0.18.1"
@ -140,7 +142,7 @@ llms = ["anthropic", "cohere", "openai", "nlpcloud", "huggingface_hub", "manifes
 qdrant = ["qdrant-client"]
 openai = ["openai"]
 cohere = ["cohere"]
-all = ["anthropic", "cohere", "openai", "nlpcloud", "huggingface_hub", "jina", "manifest-ml", "elasticsearch", "opensearch-py", "google-search-results", "faiss-cpu", "sentence_transformers", "transformers", "spacy", "nltk", "wikipedia", "beautifulsoup4", "tiktoken", "torch", "jinja2", "pinecone-client", "pinecone-text", "weaviate-client", "redis", "google-api-python-client", "wolframalpha", "qdrant-client", "tensorflow-text", "pypdf", "networkx", "nomic", "aleph-alpha-client", "deeplake", "pgvector", "psycopg2-binary", "boto3", "pyowm", "pytesseract", "html2text", "atlassian-python-api", "gptcache", "duckduckgo-search", "arxiv"]
+all = ["anthropic", "cohere", "openai", "nlpcloud", "huggingface_hub", "jina", "manifest-ml", "elasticsearch", "opensearch-py", "google-search-results", "faiss-cpu", "sentence_transformers", "transformers", "spacy", "nltk", "wikipedia", "beautifulsoup4", "tiktoken", "torch", "jinja2", "pinecone-client", "pinecone-text", "weaviate-client", "redis", "google-api-python-client", "wolframalpha", "qdrant-client", "tensorflow-text", "pypdf", "networkx", "nomic", "aleph-alpha-client", "deeplake", "pgvector", "psycopg2-binary", "boto3", "pyowm", "pytesseract", "html2text", "atlassian-python-api", "gptcache", "duckduckgo-search", "arxiv", "azure-identity"]

 [tool.ruff]
 select = [
				`@ -0,0 +1 @@`
				`"""Tools for interacting with a PowerBI dataset."""`