mirror of
https://github.com/hwchase17/langchain
synced 2024-11-04 06:00:26 +00:00
ed58eeb9c5
Moved the following modules to new package langchain-community in a backwards compatible fashion: ``` mv langchain/langchain/adapters community/langchain_community mv langchain/langchain/callbacks community/langchain_community/callbacks mv langchain/langchain/chat_loaders community/langchain_community mv langchain/langchain/chat_models community/langchain_community mv langchain/langchain/document_loaders community/langchain_community mv langchain/langchain/docstore community/langchain_community mv langchain/langchain/document_transformers community/langchain_community mv langchain/langchain/embeddings community/langchain_community mv langchain/langchain/graphs community/langchain_community mv langchain/langchain/llms community/langchain_community mv langchain/langchain/memory/chat_message_histories community/langchain_community mv langchain/langchain/retrievers community/langchain_community mv langchain/langchain/storage community/langchain_community mv langchain/langchain/tools community/langchain_community mv langchain/langchain/utilities community/langchain_community mv langchain/langchain/vectorstores community/langchain_community mv langchain/langchain/agents/agent_toolkits community/langchain_community mv langchain/langchain/cache.py community/langchain_community mv langchain/langchain/adapters community/langchain_community mv langchain/langchain/callbacks community/langchain_community/callbacks mv langchain/langchain/chat_loaders community/langchain_community mv langchain/langchain/chat_models community/langchain_community mv langchain/langchain/document_loaders community/langchain_community mv langchain/langchain/docstore community/langchain_community mv langchain/langchain/document_transformers community/langchain_community mv langchain/langchain/embeddings community/langchain_community mv langchain/langchain/graphs community/langchain_community mv langchain/langchain/llms community/langchain_community mv langchain/langchain/memory/chat_message_histories community/langchain_community mv langchain/langchain/retrievers community/langchain_community mv langchain/langchain/storage community/langchain_community mv langchain/langchain/tools community/langchain_community mv langchain/langchain/utilities community/langchain_community mv langchain/langchain/vectorstores community/langchain_community mv langchain/langchain/agents/agent_toolkits community/langchain_community mv langchain/langchain/cache.py community/langchain_community ``` Moved the following to core ``` mv langchain/langchain/utils/json_schema.py core/langchain_core/utils mv langchain/langchain/utils/html.py core/langchain_core/utils mv langchain/langchain/utils/strings.py core/langchain_core/utils cat langchain/langchain/utils/env.py >> core/langchain_core/utils/env.py rm langchain/langchain/utils/env.py ``` See .scripts/community_split/script_integrations.sh for all changes
277 lines
11 KiB
Python
277 lines
11 KiB
Python
"""Tools for interacting with a Power BI dataset."""
|
|
import logging
|
|
from time import perf_counter
|
|
from typing import Any, Dict, Optional, Tuple
|
|
|
|
from langchain_core.callbacks import (
|
|
AsyncCallbackManagerForToolRun,
|
|
CallbackManagerForToolRun,
|
|
)
|
|
from langchain_core.pydantic_v1 import Field, validator
|
|
from langchain_core.tools import BaseTool
|
|
|
|
from langchain_community.chat_models.openai import _import_tiktoken
|
|
from langchain_community.tools.powerbi.prompt import (
|
|
BAD_REQUEST_RESPONSE,
|
|
DEFAULT_FEWSHOT_EXAMPLES,
|
|
RETRY_RESPONSE,
|
|
)
|
|
from langchain_community.utilities.powerbi import PowerBIDataset, json_to_md
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class QueryPowerBITool(BaseTool):
|
|
"""Tool for querying a Power BI Dataset."""
|
|
|
|
name: str = "query_powerbi"
|
|
description: str = """
|
|
Input to this tool is a detailed question about the dataset, output is a result from the dataset. It will try to answer the question using the dataset, and if it cannot, it will ask for clarification.
|
|
|
|
Example Input: "How many rows are in table1?"
|
|
""" # noqa: E501
|
|
llm_chain: Any
|
|
powerbi: PowerBIDataset = Field(exclude=True)
|
|
examples: Optional[str] = DEFAULT_FEWSHOT_EXAMPLES
|
|
session_cache: Dict[str, Any] = Field(default_factory=dict, exclude=True)
|
|
max_iterations: int = 5
|
|
output_token_limit: int = 4000
|
|
tiktoken_model_name: Optional[str] = None # "cl100k_base"
|
|
|
|
class Config:
|
|
"""Configuration for this pydantic object."""
|
|
|
|
arbitrary_types_allowed = True
|
|
|
|
@validator("llm_chain")
|
|
def validate_llm_chain_input_variables( # pylint: disable=E0213
|
|
cls, llm_chain: Any
|
|
) -> Any:
|
|
"""Make sure the LLM chain has the correct input variables."""
|
|
for var in llm_chain.prompt.input_variables:
|
|
if var not in ["tool_input", "tables", "schemas", "examples"]:
|
|
raise ValueError(
|
|
"LLM chain for QueryPowerBITool must have input variables ['tool_input', 'tables', 'schemas', 'examples'], found %s", # noqa: C0301 E501 # pylint: disable=C0301
|
|
llm_chain.prompt.input_variables,
|
|
)
|
|
return llm_chain
|
|
|
|
def _check_cache(self, tool_input: str) -> Optional[str]:
|
|
"""Check if the input is present in the cache.
|
|
|
|
If the value is a bad request, overwrite with the escalated version,
|
|
if not present return None."""
|
|
if tool_input not in self.session_cache:
|
|
return None
|
|
return self.session_cache[tool_input]
|
|
|
|
def _run(
|
|
self,
|
|
tool_input: str,
|
|
run_manager: Optional[CallbackManagerForToolRun] = None,
|
|
**kwargs: Any,
|
|
) -> str:
|
|
"""Execute the query, return the results or an error message."""
|
|
if cache := self._check_cache(tool_input):
|
|
logger.debug("Found cached result for %s: %s", tool_input, cache)
|
|
return cache
|
|
|
|
try:
|
|
logger.info("Running PBI Query Tool with input: %s", tool_input)
|
|
query = self.llm_chain.predict(
|
|
tool_input=tool_input,
|
|
tables=self.powerbi.get_table_names(),
|
|
schemas=self.powerbi.get_schemas(),
|
|
examples=self.examples,
|
|
callbacks=run_manager.get_child() if run_manager else None,
|
|
)
|
|
except Exception as exc: # pylint: disable=broad-except
|
|
self.session_cache[tool_input] = f"Error on call to LLM: {exc}"
|
|
return self.session_cache[tool_input]
|
|
if query == "I cannot answer this":
|
|
self.session_cache[tool_input] = query
|
|
return self.session_cache[tool_input]
|
|
logger.info("PBI Query:\n%s", query)
|
|
start_time = perf_counter()
|
|
pbi_result = self.powerbi.run(command=query)
|
|
end_time = perf_counter()
|
|
logger.debug("PBI Result: %s", pbi_result)
|
|
logger.debug(f"PBI Query duration: {end_time - start_time:0.6f}")
|
|
result, error = self._parse_output(pbi_result)
|
|
if error is not None and "TokenExpired" in error:
|
|
self.session_cache[
|
|
tool_input
|
|
] = "Authentication token expired or invalid, please try reauthenticate."
|
|
return self.session_cache[tool_input]
|
|
|
|
iterations = kwargs.get("iterations", 0)
|
|
if error and iterations < self.max_iterations:
|
|
return self._run(
|
|
tool_input=RETRY_RESPONSE.format(
|
|
tool_input=tool_input, query=query, error=error
|
|
),
|
|
run_manager=run_manager,
|
|
iterations=iterations + 1,
|
|
)
|
|
|
|
self.session_cache[tool_input] = (
|
|
result if result else BAD_REQUEST_RESPONSE.format(error=error)
|
|
)
|
|
return self.session_cache[tool_input]
|
|
|
|
async def _arun(
|
|
self,
|
|
tool_input: str,
|
|
run_manager: Optional[AsyncCallbackManagerForToolRun] = None,
|
|
**kwargs: Any,
|
|
) -> str:
|
|
"""Execute the query, return the results or an error message."""
|
|
if cache := self._check_cache(tool_input):
|
|
logger.debug("Found cached result for %s: %s", tool_input, cache)
|
|
return f"{cache}, from cache, you have already asked this question."
|
|
try:
|
|
logger.info("Running PBI Query Tool with input: %s", tool_input)
|
|
query = await self.llm_chain.apredict(
|
|
tool_input=tool_input,
|
|
tables=self.powerbi.get_table_names(),
|
|
schemas=self.powerbi.get_schemas(),
|
|
examples=self.examples,
|
|
callbacks=run_manager.get_child() if run_manager else None,
|
|
)
|
|
except Exception as exc: # pylint: disable=broad-except
|
|
self.session_cache[tool_input] = f"Error on call to LLM: {exc}"
|
|
return self.session_cache[tool_input]
|
|
|
|
if query == "I cannot answer this":
|
|
self.session_cache[tool_input] = query
|
|
return self.session_cache[tool_input]
|
|
logger.info("PBI Query: %s", query)
|
|
start_time = perf_counter()
|
|
pbi_result = await self.powerbi.arun(command=query)
|
|
end_time = perf_counter()
|
|
logger.debug("PBI Result: %s", pbi_result)
|
|
logger.debug(f"PBI Query duration: {end_time - start_time:0.6f}")
|
|
result, error = self._parse_output(pbi_result)
|
|
if error is not None and ("TokenExpired" in error or "TokenError" in error):
|
|
self.session_cache[
|
|
tool_input
|
|
] = "Authentication token expired or invalid, please try to reauthenticate or check the scope of the credential." # noqa: E501
|
|
return self.session_cache[tool_input]
|
|
|
|
iterations = kwargs.get("iterations", 0)
|
|
if error and iterations < self.max_iterations:
|
|
return await self._arun(
|
|
tool_input=RETRY_RESPONSE.format(
|
|
tool_input=tool_input, query=query, error=error
|
|
),
|
|
run_manager=run_manager,
|
|
iterations=iterations + 1,
|
|
)
|
|
|
|
self.session_cache[tool_input] = (
|
|
result if result else BAD_REQUEST_RESPONSE.format(error=error)
|
|
)
|
|
return self.session_cache[tool_input]
|
|
|
|
def _parse_output(
|
|
self, pbi_result: Dict[str, Any]
|
|
) -> Tuple[Optional[str], Optional[Any]]:
|
|
"""Parse the output of the query to a markdown table."""
|
|
if "results" in pbi_result:
|
|
rows = pbi_result["results"][0]["tables"][0]["rows"]
|
|
if len(rows) == 0:
|
|
logger.info("0 records in result, query was valid.")
|
|
return (
|
|
None,
|
|
"0 rows returned, this might be correct, but please validate if all filter values were correct?", # noqa: E501
|
|
)
|
|
result = json_to_md(rows)
|
|
too_long, length = self._result_too_large(result)
|
|
if too_long:
|
|
return (
|
|
f"Result too large, please try to be more specific or use the `TOPN` function. The result is {length} tokens long, the limit is {self.output_token_limit} tokens.", # noqa: E501
|
|
None,
|
|
)
|
|
return result, None
|
|
|
|
if "error" in pbi_result:
|
|
if (
|
|
"pbi.error" in pbi_result["error"]
|
|
and "details" in pbi_result["error"]["pbi.error"]
|
|
):
|
|
return None, pbi_result["error"]["pbi.error"]["details"][0]["detail"]
|
|
return None, pbi_result["error"]
|
|
return None, pbi_result
|
|
|
|
def _result_too_large(self, result: str) -> Tuple[bool, int]:
|
|
"""Tokenize the output of the query."""
|
|
if self.tiktoken_model_name:
|
|
tiktoken_ = _import_tiktoken()
|
|
encoding = tiktoken_.encoding_for_model(self.tiktoken_model_name)
|
|
length = len(encoding.encode(result))
|
|
logger.info("Result length: %s", length)
|
|
return length > self.output_token_limit, length
|
|
return False, 0
|
|
|
|
|
|
class InfoPowerBITool(BaseTool):
|
|
"""Tool for getting metadata about a PowerBI Dataset."""
|
|
|
|
name: str = "schema_powerbi"
|
|
description: str = """
|
|
Input to this tool is a comma-separated list of tables, output is the schema and sample rows for those tables.
|
|
Be sure that the tables actually exist by calling list_tables_powerbi first!
|
|
|
|
Example Input: "table1, table2, table3"
|
|
""" # noqa: E501
|
|
powerbi: PowerBIDataset = Field(exclude=True)
|
|
|
|
class Config:
|
|
"""Configuration for this pydantic object."""
|
|
|
|
arbitrary_types_allowed = True
|
|
|
|
def _run(
|
|
self,
|
|
tool_input: str,
|
|
run_manager: Optional[CallbackManagerForToolRun] = None,
|
|
) -> str:
|
|
"""Get the schema for tables in a comma-separated list."""
|
|
return self.powerbi.get_table_info(tool_input.split(", "))
|
|
|
|
async def _arun(
|
|
self,
|
|
tool_input: str,
|
|
run_manager: Optional[AsyncCallbackManagerForToolRun] = None,
|
|
) -> str:
|
|
return await self.powerbi.aget_table_info(tool_input.split(", "))
|
|
|
|
|
|
class ListPowerBITool(BaseTool):
|
|
"""Tool for getting tables names."""
|
|
|
|
name: str = "list_tables_powerbi"
|
|
description: str = "Input is an empty string, output is a comma separated list of tables in the database." # noqa: E501 # pylint: disable=C0301
|
|
powerbi: PowerBIDataset = Field(exclude=True)
|
|
|
|
class Config:
|
|
"""Configuration for this pydantic object."""
|
|
|
|
arbitrary_types_allowed = True
|
|
|
|
def _run(
|
|
self,
|
|
tool_input: Optional[str] = None,
|
|
run_manager: Optional[CallbackManagerForToolRun] = None,
|
|
) -> str:
|
|
"""Get the names of the tables."""
|
|
return ", ".join(self.powerbi.get_table_names())
|
|
|
|
async def _arun(
|
|
self,
|
|
tool_input: Optional[str] = None,
|
|
run_manager: Optional[AsyncCallbackManagerForToolRun] = None,
|
|
) -> str:
|
|
"""Get the names of the tables."""
|
|
return ", ".join(self.powerbi.get_table_names())
|