From 199cc700a344a2b15dff3a8924746a5ceb1aad7e Mon Sep 17 00:00:00 2001 From: Natalie Date: Tue, 30 May 2023 18:25:22 -0500 Subject: [PATCH] Ability to specify credentials wihen using Google BigQuery as a data loader (#5466) # Adds ability to specify credentials when using Google BigQuery as a data loader Fixes #5465 . Adds ability to set credentials which must be of the `google.auth.credentials.Credentials` type. This argument is optional and will default to `None. Co-authored-by: Dev 2049 --- langchain/document_loaders/bigquery.py | 27 ++++++++++++++++++++++++-- poetry.lock | 7 +++---- pyproject.toml | 2 ++ 3 files changed, 30 insertions(+), 6 deletions(-) diff --git a/langchain/document_loaders/bigquery.py b/langchain/document_loaders/bigquery.py index b9e1fe1d..a185bb3c 100644 --- a/langchain/document_loaders/bigquery.py +++ b/langchain/document_loaders/bigquery.py @@ -1,8 +1,13 @@ -from typing import List, Optional +from __future__ import annotations + +from typing import TYPE_CHECKING, List, Optional from langchain.docstore.document import Document from langchain.document_loaders.base import BaseLoader +if TYPE_CHECKING: + from google.auth.credentials import Credentials + class BigQueryLoader(BaseLoader): """Loads a query result from BigQuery into a list of documents. @@ -11,6 +16,7 @@ class BigQueryLoader(BaseLoader): are written into the `page_content` of the document. The `metadata_columns` are written into the `metadata` of the document. By default, all columns are written into the `page_content` and none into the `metadata`. + """ def __init__( @@ -19,11 +25,28 @@ class BigQueryLoader(BaseLoader): project: Optional[str] = None, page_content_columns: Optional[List[str]] = None, metadata_columns: Optional[List[str]] = None, + credentials: Optional[Credentials] = None, ): + """Initialize BigQuery document loader. + + Args: + query: The query to run in BigQuery. + project: Optional. The project to run the query in. + page_content_columns: Optional. The columns to write into the `page_content` + of the document. + metadata_columns: Optional. The columns to write into the `metadata` of the + document. + credentials : google.auth.credentials.Credentials, optional + Credentials for accessing Google APIs. Use this parameter to override + default credentials, such as to use Compute Engine + (`google.auth.compute_engine.Credentials`) or Service Account + (`google.oauth2.service_account.Credentials`) credentials directly. + """ self.query = query self.project = project self.page_content_columns = page_content_columns self.metadata_columns = metadata_columns + self.credentials = credentials def load(self) -> List[Document]: try: @@ -34,7 +57,7 @@ class BigQueryLoader(BaseLoader): "Please install it with `pip install google-cloud-bigquery`." ) from ex - bq_client = bigquery.Client(self.project) + bq_client = bigquery.Client(credentials=self.credentials, project=self.project) query_result = bq_client.query(self.query).result() docs: List[Document] = [] diff --git a/poetry.lock b/poetry.lock index d3806c37..61730d1b 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.4.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.4.2 and should not be changed by hand. [[package]] name = "absl-py" @@ -6950,7 +6950,6 @@ files = [ {file = "pylance-0.4.12-cp38-abi3-macosx_10_15_x86_64.whl", hash = "sha256:2b86fb8dccc03094c0db37bef0d91bda60e8eb0d1eddf245c6971450c8d8a53f"}, {file = "pylance-0.4.12-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:0bc82914b13204187d673b5f3d45f93219c38a0e9d0542ba251074f639669789"}, {file = "pylance-0.4.12-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5a4bcce77f99ecd4cbebbadb01e58d5d8138d40eb56bdcdbc3b20b0475e7a472"}, - {file = "pylance-0.4.12-cp38-abi3-win_amd64.whl", hash = "sha256:9616931c5300030adb9626d22515710a127d1e46a46737a7a0f980b52f13627c"}, ] [package.dependencies] @@ -10949,7 +10948,7 @@ cffi = {version = ">=1.11", markers = "platform_python_implementation == \"PyPy\ cffi = ["cffi (>=1.11)"] [extras] -all = ["O365", "aleph-alpha-client", "anthropic", "arxiv", "atlassian-python-api", "azure-ai-formrecognizer", "azure-ai-vision", "azure-cognitiveservices-speech", "azure-cosmos", "azure-identity", "beautifulsoup4", "clickhouse-connect", "cohere", "deeplake", "docarray", "duckduckgo-search", "elasticsearch", "faiss-cpu", "google-api-python-client", "google-search-results", "gptcache", "html2text", "huggingface_hub", "jina", "jinja2", "jq", "lancedb", "langkit", "lark", "lxml", "manifest-ml", "momento", "neo4j", "networkx", "nlpcloud", "nltk", "nomic", "openai", "openlm", "opensearch-py", "pdfminer-six", "pexpect", "pgvector", "pinecone-client", "pinecone-text", "psycopg2-binary", "pymongo", "pyowm", "pypdf", "pytesseract", "pyvespa", "qdrant-client", "redis", "requests-toolbelt", "sentence-transformers", "spacy", "steamship", "tensorflow-text", "tiktoken", "torch", "transformers", "weaviate-client", "wikipedia", "wolframalpha"] +all = ["O365", "aleph-alpha-client", "anthropic", "arxiv", "atlassian-python-api", "azure-ai-formrecognizer", "azure-ai-vision", "azure-cognitiveservices-speech", "azure-cosmos", "azure-identity", "beautifulsoup4", "clickhouse-connect", "cohere", "deeplake", "docarray", "duckduckgo-search", "elasticsearch", "faiss-cpu", "google-api-python-client", "google-auth", "google-search-results", "gptcache", "html2text", "huggingface_hub", "jina", "jinja2", "jq", "lancedb", "langkit", "lark", "lxml", "manifest-ml", "momento", "neo4j", "networkx", "nlpcloud", "nltk", "nomic", "openai", "openlm", "opensearch-py", "pdfminer-six", "pexpect", "pgvector", "pinecone-client", "pinecone-text", "psycopg2-binary", "pymongo", "pyowm", "pypdf", "pytesseract", "pyvespa", "qdrant-client", "redis", "requests-toolbelt", "sentence-transformers", "spacy", "steamship", "tensorflow-text", "tiktoken", "torch", "transformers", "weaviate-client", "wikipedia", "wolframalpha"] azure = ["azure-ai-formrecognizer", "azure-ai-vision", "azure-cognitiveservices-speech", "azure-core", "azure-cosmos", "azure-identity", "openai"] cohere = ["cohere"] docarray = ["docarray"] @@ -10963,4 +10962,4 @@ text-helpers = ["chardet"] [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<4.0" -content-hash = "6a28a31679ae3bdb156121ff7c09bfb1f691345f445196eb0384f08e031c84d3" +content-hash = "379bfcf130acc24f2f8408e2bb7e3ae9d769070e6bf5f66868491bddb1b2fc53" diff --git a/pyproject.toml b/pyproject.toml index a5c9dd89..95853b3a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,6 +40,7 @@ pymongo = {version = "^4.3.3", optional = true} clickhouse-connect = {version="^0.5.14", optional=true} weaviate-client = {version = "^3", optional = true} google-api-python-client = {version = "2.70.0", optional = true} +google-auth = {version = "^2.18.1", optional = true} wolframalpha = {version = "5.0.0", optional = true} anthropic = {version = "^0.2.6", optional = true} qdrant-client = {version = "^1.1.2", optional = true, python = ">=3.8.1,<3.12"} @@ -239,6 +240,7 @@ all = [ "weaviate-client", "redis", "google-api-python-client", + "google-auth", "wolframalpha", "qdrant-client", "tensorflow-text",