langchain/libs/community/langchain_community/document_loaders/bigquery.py
Bagatur ed58eeb9c5
community[major], core[patch], langchain[patch], experimental[patch]: Create langchain-community (#14463)
Moved the following modules to new package langchain-community in a backwards compatible fashion:

```
mv langchain/langchain/adapters community/langchain_community
mv langchain/langchain/callbacks community/langchain_community/callbacks
mv langchain/langchain/chat_loaders community/langchain_community
mv langchain/langchain/chat_models community/langchain_community
mv langchain/langchain/document_loaders community/langchain_community
mv langchain/langchain/docstore community/langchain_community
mv langchain/langchain/document_transformers community/langchain_community
mv langchain/langchain/embeddings community/langchain_community
mv langchain/langchain/graphs community/langchain_community
mv langchain/langchain/llms community/langchain_community
mv langchain/langchain/memory/chat_message_histories community/langchain_community
mv langchain/langchain/retrievers community/langchain_community
mv langchain/langchain/storage community/langchain_community
mv langchain/langchain/tools community/langchain_community
mv langchain/langchain/utilities community/langchain_community
mv langchain/langchain/vectorstores community/langchain_community
mv langchain/langchain/agents/agent_toolkits community/langchain_community
mv langchain/langchain/cache.py community/langchain_community
mv langchain/langchain/adapters community/langchain_community
mv langchain/langchain/callbacks community/langchain_community/callbacks
mv langchain/langchain/chat_loaders community/langchain_community
mv langchain/langchain/chat_models community/langchain_community
mv langchain/langchain/document_loaders community/langchain_community
mv langchain/langchain/docstore community/langchain_community
mv langchain/langchain/document_transformers community/langchain_community
mv langchain/langchain/embeddings community/langchain_community
mv langchain/langchain/graphs community/langchain_community
mv langchain/langchain/llms community/langchain_community
mv langchain/langchain/memory/chat_message_histories community/langchain_community
mv langchain/langchain/retrievers community/langchain_community
mv langchain/langchain/storage community/langchain_community
mv langchain/langchain/tools community/langchain_community
mv langchain/langchain/utilities community/langchain_community
mv langchain/langchain/vectorstores community/langchain_community
mv langchain/langchain/agents/agent_toolkits community/langchain_community
mv langchain/langchain/cache.py community/langchain_community
```

Moved the following to core
```
mv langchain/langchain/utils/json_schema.py core/langchain_core/utils
mv langchain/langchain/utils/html.py core/langchain_core/utils
mv langchain/langchain/utils/strings.py core/langchain_core/utils
cat langchain/langchain/utils/env.py >> core/langchain_core/utils/env.py
rm langchain/langchain/utils/env.py
```

See .scripts/community_split/script_integrations.sh for all changes
2023-12-11 13:53:30 -08:00

95 lines
3.6 KiB
Python

from __future__ import annotations
from typing import TYPE_CHECKING, List, Optional
from langchain_core.documents import Document
from langchain_community.document_loaders.base import BaseLoader
from langchain_community.utilities.vertexai import get_client_info
if TYPE_CHECKING:
from google.auth.credentials import Credentials
class BigQueryLoader(BaseLoader):
"""Load from the Google Cloud Platform `BigQuery`.
Each document represents one row of the result. The `page_content_columns`
are written into the `page_content` of the document. The `metadata_columns`
are written into the `metadata` of the document. By default, all columns
are written into the `page_content` and none into the `metadata`.
"""
def __init__(
self,
query: str,
project: Optional[str] = None,
page_content_columns: Optional[List[str]] = None,
metadata_columns: Optional[List[str]] = None,
credentials: Optional[Credentials] = None,
):
"""Initialize BigQuery document loader.
Args:
query: The query to run in BigQuery.
project: Optional. The project to run the query in.
page_content_columns: Optional. The columns to write into the `page_content`
of the document.
metadata_columns: Optional. The columns to write into the `metadata` of the
document.
credentials : google.auth.credentials.Credentials, optional
Credentials for accessing Google APIs. Use this parameter to override
default credentials, such as to use Compute Engine
(`google.auth.compute_engine.Credentials`) or Service Account
(`google.oauth2.service_account.Credentials`) credentials directly.
"""
self.query = query
self.project = project
self.page_content_columns = page_content_columns
self.metadata_columns = metadata_columns
self.credentials = credentials
def load(self) -> List[Document]:
try:
from google.cloud import bigquery
except ImportError as ex:
raise ImportError(
"Could not import google-cloud-bigquery python package. "
"Please install it with `pip install google-cloud-bigquery`."
) from ex
bq_client = bigquery.Client(
credentials=self.credentials,
project=self.project,
client_info=get_client_info(module="bigquery"),
)
if not bq_client.project:
error_desc = (
"GCP project for Big Query is not set! Either provide a "
"`project` argument during BigQueryLoader instantiation, "
"or set a default project with `gcloud config set project` "
"command."
)
raise ValueError(error_desc)
query_result = bq_client.query(self.query).result()
docs: List[Document] = []
page_content_columns = self.page_content_columns
metadata_columns = self.metadata_columns
if page_content_columns is None:
page_content_columns = [column.name for column in query_result.schema]
if metadata_columns is None:
metadata_columns = []
for row in query_result:
page_content = "\n".join(
f"{k}: {v}" for k, v in row.items() if k in page_content_columns
)
metadata = {k: v for k, v in row.items() if k in metadata_columns}
doc = Document(page_content=page_content, metadata=metadata)
docs.append(doc)
return docs