From 2a4b32dee24c22159805f643b87eece107224951 Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Tue, 6 Jun 2023 14:22:49 -0700 Subject: [PATCH] Revise DATABRICKS_API_TOKEN as DATABRICKS_TOKEN (#5796) In the [Databricks integration](https://python.langchain.com/en/latest/integrations/databricks.html) and [Databricks LLM](https://python.langchain.com/en/latest/modules/models/llms/integrations/databricks.html), we suggestted users to set the ENV variable `DATABRICKS_API_TOKEN`. However, this is inconsistent with the other Databricks library. To make it consistent, this PR changes the variable from `DATABRICKS_API_TOKEN` to `DATABRICKS_TOKEN` After changes, there is no more `DATABRICKS_API_TOKEN` in the doc ``` $ git grep DATABRICKS_API_TOKEN|wc -l 0 $ git grep DATABRICKS_TOKEN|wc -l 8 ``` cc @hwchase17 @dev2049 @mengxr since you have reviewed the previous PRs. --- docs/integrations/databricks.ipynb | 2 +- docs/modules/models/llms/integrations/databricks.ipynb | 4 ++-- langchain/llms/databricks.py | 6 +++--- langchain/sql_database.py | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/integrations/databricks.ipynb b/docs/integrations/databricks.ipynb index 4cce07f7..e3636150 100644 --- a/docs/integrations/databricks.ipynb +++ b/docs/integrations/databricks.ipynb @@ -58,7 +58,7 @@ "### Optional Parameters\n", "There following parameters are optional. When executing the method in a Databricks notebook, you don't need to provide them in most of the cases.\n", "* `host`: The Databricks workspace hostname, excluding 'https://' part. Defaults to 'DATABRICKS_HOST' environment variable or current workspace if in a Databricks notebook.\n", - "* `api_token`: The Databricks personal access token for accessing the Databricks SQL warehouse or the cluster. Defaults to 'DATABRICKS_API_TOKEN' environment variable or a temporary one is generated if in a Databricks notebook.\n", + "* `api_token`: The Databricks personal access token for accessing the Databricks SQL warehouse or the cluster. Defaults to 'DATABRICKS_TOKEN' environment variable or a temporary one is generated if in a Databricks notebook.\n", "* `warehouse_id`: The warehouse ID in the Databricks SQL.\n", "* `cluster_id`: The cluster ID in the Databricks Runtime. If running in a Databricks notebook and both 'warehouse_id' and 'cluster_id' are None, it uses the ID of the cluster the notebook is attached to.\n", "* `engine_args`: The arguments to be used when connecting Databricks.\n", diff --git a/docs/modules/models/llms/integrations/databricks.ipynb b/docs/modules/models/llms/integrations/databricks.ipynb index 68425cf4..cc0c1a96 100644 --- a/docs/modules/models/llms/integrations/databricks.ipynb +++ b/docs/modules/models/llms/integrations/databricks.ipynb @@ -163,14 +163,14 @@ ], "source": [ "# Otherwise, you can manually specify the Databricks workspace hostname and personal access token \n", - "# or set `DATABRICKS_HOST` and `DATABRICKS_API_TOKEN` environment variables, respectively.\n", + "# or set `DATABRICKS_HOST` and `DATABRICKS_TOKEN` environment variables, respectively.\n", "# See https://docs.databricks.com/dev-tools/auth.html#databricks-personal-access-tokens\n", "# We strongly recommend not exposing the API token explicitly inside a notebook.\n", "# You can use Databricks secret manager to store your API token securely.\n", "# See https://docs.databricks.com/dev-tools/databricks-utils.html#secrets-utility-dbutilssecrets\n", "\n", "import os\n", - "os.environ[\"DATABRICKS_API_TOKEN\"] = dbutils.secrets.get(\"myworkspace\", \"api_token\")\n", + "os.environ[\"DATABRICKS_TOKEN\"] = dbutils.secrets.get(\"myworkspace\", \"api_token\")\n", "\n", "llm = Databricks(host=\"myworkspace.cloud.databricks.com\", endpoint_name=\"dolly\")\n", "\n", diff --git a/langchain/llms/databricks.py b/langchain/llms/databricks.py index d3ba3a16..b0e0007c 100644 --- a/langchain/llms/databricks.py +++ b/langchain/llms/databricks.py @@ -114,7 +114,7 @@ def get_default_api_token() -> str: """Gets the default Databricks personal access token. Raises an error if the token cannot be automatically determined. """ - if api_token := os.getenv("DATABRICKS_API_TOKEN"): + if api_token := os.getenv("DATABRICKS_TOKEN"): return api_token try: api_token = get_repl_context().apiToken @@ -123,7 +123,7 @@ def get_default_api_token() -> str: except Exception as e: raise ValueError( "api_token was not set and cannot be automatically inferred. Set " - f"environment variable 'DATABRICKS_API_TOKEN'. Received error: {e}" + f"environment variable 'DATABRICKS_TOKEN'. Received error: {e}" ) # TODO: support Databricks CLI profile return api_token @@ -186,7 +186,7 @@ class Databricks(LLM): """Databricks personal access token. If not provided, the default value is determined by - * the ``DATABRICKS_API_TOKEN`` environment variable if present, or + * the ``DATABRICKS_TOKEN`` environment variable if present, or * an automatically generated temporary token if running inside a Databricks notebook attached to an interactive cluster in "single user" or "no isolation shared" mode. diff --git a/langchain/sql_database.py b/langchain/sql_database.py index 088e0aea..088d7374 100644 --- a/langchain/sql_database.py +++ b/langchain/sql_database.py @@ -150,7 +150,7 @@ class SQLDatabase: hostname. Defaults to None. api_token (Optional[str]): The Databricks personal access token for accessing the Databricks SQL warehouse or the cluster. If not provided, - it attempts to fetch from 'DATABRICKS_API_TOKEN'. If still unavailable + it attempts to fetch from 'DATABRICKS_TOKEN'. If still unavailable and running in a Databricks notebook, a temporary token for the current user is generated. Defaults to None. warehouse_id (Optional[str]): The warehouse ID in the Databricks SQL. If @@ -197,7 +197,7 @@ class SQLDatabase: default_api_token = context.apiToken if context else None if api_token is None: api_token = utils.get_from_env( - "api_token", "DATABRICKS_API_TOKEN", default_api_token + "api_token", "DATABRICKS_TOKEN", default_api_token ) if warehouse_id is None and cluster_id is None: