feat: Add Client Info to available Google Cloud Clients (#12168)

- This is used internally to gather aggregate usage metrics for the
LangChain integrations

- Note: This cannot be added to some of the Vertex AI integrations at
this time because the SDK doesn't allow overriding the
[`ClientInfo`](https://googleapis.dev/python/google-api-core/latest/client_info.html#module-google.api_core.client_info)

- Added to:
  - BigQuery
  - Google Cloud Storage
  - Document AI
  - Vertex AI Model Garden
  - Document AI Warehouse
  - Vertex AI Search
  - Vertex AI Matching Engine (Cloud Storage Client)
 
@baskaryan, @eyurtsev, @hwchase17

---------

Co-authored-by: Eugene Yurtsev <eyurtsev@gmail.com>
pull/12214/head
Holt Skinner 12 months ago committed by GitHub
parent 69f5f82804
commit 69d9eae5cd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -4,6 +4,7 @@ from typing import TYPE_CHECKING, List, Optional
from langchain.docstore.document import Document
from langchain.document_loaders.base import BaseLoader
from langchain.utilities.vertexai import get_client_info
if TYPE_CHECKING:
from google.auth.credentials import Credentials
@ -57,7 +58,11 @@ class BigQueryLoader(BaseLoader):
"Please install it with `pip install google-cloud-bigquery`."
) from ex
bq_client = bigquery.Client(credentials=self.credentials, project=self.project)
bq_client = bigquery.Client(
credentials=self.credentials,
project=self.project,
client_info=get_client_info(module="bigquery"),
)
if not bq_client.project:
error_desc = (
"GCP project for Big Query is not set! Either provide a "

@ -3,6 +3,7 @@ from typing import Callable, List, Optional
from langchain.docstore.document import Document
from langchain.document_loaders.base import BaseLoader
from langchain.document_loaders.gcs_file import GCSFileLoader
from langchain.utilities.vertexai import get_client_info
class GCSDirectoryLoader(BaseLoader):
@ -18,7 +19,7 @@ class GCSDirectoryLoader(BaseLoader):
"""Initialize with bucket and key name.
Args:
project_name: The name of the project for the GCS bucket.
project_name: The ID of the project for the GCS bucket.
bucket: The name of the GCS bucket.
prefix: The prefix of the GCS bucket.
loader_func: A loader function that instantiates a loader based on a
@ -39,7 +40,10 @@ class GCSDirectoryLoader(BaseLoader):
"Could not import google-cloud-storage python package. "
"Please install it with `pip install google-cloud-storage`."
)
client = storage.Client(project=self.project_name)
client = storage.Client(
project=self.project_name,
client_info=get_client_info(module="google-cloud-storage"),
)
docs = []
for blob in client.list_blobs(self.bucket, prefix=self.prefix):
# we shall just skip directories since GCSFileLoader creates

@ -5,6 +5,7 @@ from typing import Callable, List, Optional
from langchain.docstore.document import Document
from langchain.document_loaders.base import BaseLoader
from langchain.document_loaders.unstructured import UnstructuredFileLoader
from langchain.utilities.vertexai import get_client_info
class GCSFileLoader(BaseLoader):
@ -57,7 +58,9 @@ class GCSFileLoader(BaseLoader):
)
# Initialise a client
storage_client = storage.Client(self.project_name)
storage_client = storage.Client(
self.project_name, client_info=get_client_info("google-cloud-storage")
)
# Create a bucket object for our bucket
bucket = storage_client.get_bucket(self.bucket)
# Create a blob object from the filepath

@ -12,6 +12,7 @@ from typing import TYPE_CHECKING, Iterator, List, Optional, Sequence
from langchain.docstore.document import Document
from langchain.document_loaders.base import BaseBlobParser
from langchain.document_loaders.blob_loaders import Blob
from langchain.utilities.vertexai import get_client_info
from langchain.utils.iter import batch_iterate
if TYPE_CHECKING:
@ -89,7 +90,10 @@ class DocAIParser(BaseBlobParser):
options = ClientOptions(
api_endpoint=f"{location}-documentai.googleapis.com"
)
self._client = DocumentProcessorServiceClient(client_options=options)
self._client = DocumentProcessorServiceClient(
client_options=options,
client_info=get_client_info(module="document-ai"),
)
def lazy_parse(self, blob: Blob) -> Iterator[Document]:
"""Parses a blob lazily.

@ -25,6 +25,7 @@ from langchain.schema import (
)
from langchain.schema.output import GenerationChunk
from langchain.utilities.vertexai import (
get_client_info,
init_vertexai,
raise_vertex_import_error,
)
@ -370,9 +371,12 @@ class VertexAIModelGarden(_VertexAIBase, BaseLLM):
client_options = ClientOptions(
api_endpoint=f"{values['location']}-aiplatform.googleapis.com"
)
values["client"] = PredictionServiceClient(client_options=client_options)
client_info = get_client_info(module="vertex-ai-model-garden")
values["client"] = PredictionServiceClient(
client_options=client_options, client_info=client_info
)
values["async_client"] = PredictionServiceAsyncClient(
client_options=client_options
client_options=client_options, client_info=client_info
)
return values

@ -5,6 +5,7 @@ from langchain.callbacks.manager import CallbackManagerForRetrieverRun
from langchain.docstore.document import Document
from langchain.pydantic_v1 import root_validator
from langchain.schema import BaseRetriever
from langchain.utilities.vertexai import get_client_info
from langchain.utils import get_from_dict_or_env
if TYPE_CHECKING:
@ -29,23 +30,21 @@ class GoogleDocumentAIWarehouseRetriever(BaseRetriever):
"""
location: str = "us"
"GCP location where DocAI Warehouse is placed."
"""Google Cloud location where Document AI Warehouse is placed."""
project_number: str
"GCP project number, should contain digits only."
"""Google Cloud project number, should contain digits only."""
schema_id: Optional[str] = None
"DocAI Warehouse schema to queary against. If nothing is provided, all documents "
"in the project will be searched."
"""Document AI Warehouse schema to query against.
If nothing is provided, all documents in the project will be searched."""
qa_size_limit: int = 5
"The limit on the number of documents returned."
"""The limit on the number of documents returned."""
client: "DocumentServiceClient" = None #: :meta private:
@root_validator()
def validate_environment(cls, values: Dict) -> Dict:
"""Validates the environment."""
try: # noqa: F401
from google.cloud.contentwarehouse_v1 import (
DocumentServiceClient,
)
from google.cloud.contentwarehouse_v1 import DocumentServiceClient
except ImportError as exc:
raise ImportError(
"google.cloud.contentwarehouse is not installed."
@ -55,7 +54,9 @@ class GoogleDocumentAIWarehouseRetriever(BaseRetriever):
values["project_number"] = get_from_dict_or_env(
values, "project_number", "PROJECT_NUMBER"
)
values["client"] = DocumentServiceClient()
values["client"] = DocumentServiceClient(
client_info=get_client_info(module="document-ai-warehouse")
)
return values
def _prepare_request_metadata(self, user_ldap: str) -> "RequestMetadata":

@ -6,6 +6,7 @@ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence
from langchain.callbacks.manager import CallbackManagerForRetrieverRun
from langchain.pydantic_v1 import BaseModel, Extra, Field, root_validator
from langchain.schema import BaseRetriever, Document
from langchain.utilities.vertexai import get_client_info
from langchain.utils import get_from_dict_or_env
if TYPE_CHECKING:
@ -260,7 +261,9 @@ class GoogleVertexAISearchRetriever(BaseRetriever, _BaseGoogleVertexAISearchRetr
# For more information, refer to:
# https://cloud.google.com/generative-ai-app-builder/docs/locations#specify_a_multi-region_for_your_data_store
self._client = SearchServiceClient(
credentials=self.credentials, client_options=self.client_options
credentials=self.credentials,
client_options=self.client_options,
client_info=get_client_info(module="vertex-ai-search"),
)
self._serving_config = self._client.serving_config_path(
@ -387,7 +390,9 @@ class GoogleVertexAIMultiTurnSearchRetriever(
)
self._client = ConversationalSearchServiceClient(
credentials=self.credentials, client_options=self.client_options
credentials=self.credentials,
client_options=self.client_options,
client_info=get_client_info(module="vertex-ai-search"),
)
self._serving_config = self._client.serving_config_path(

@ -1,7 +1,9 @@
"""Utilities to init Vertex AI."""
from importlib import metadata
from typing import TYPE_CHECKING, Optional
if TYPE_CHECKING:
from google.api_core.gapic_v1.client_info import ClientInfo
from google.auth.credentials import Credentials
@ -46,3 +48,30 @@ def init_vertexai(
location=location,
credentials=credentials,
)
def get_client_info(module: Optional[str] = None) -> "ClientInfo":
r"""Returns a custom user agent header.
Args:
module (Optional[str]):
Optional. The module for a custom user agent header.
Returns:
google.api_core.gapic_v1.client_info.ClientInfo
"""
try:
from google.api_core.gapic_v1.client_info import ClientInfo
except ImportError as exc:
raise ImportError(
"Could not import ClientInfo. Please, install it with "
"pip install google-api-core"
) from exc
langchain_version = metadata.version("langchain")
client_library_version = (
f"{langchain_version}-{module}" if module else langchain_version
)
return ClientInfo(
client_library_version=client_library_version,
user_agent=f"langchain/{client_library_version}",
)

@ -9,6 +9,7 @@ from typing import TYPE_CHECKING, Any, Iterable, List, Optional, Type
from langchain.schema.document import Document
from langchain.schema.embeddings import Embeddings
from langchain.schema.vectorstore import VectorStore
from langchain.utilities.vertexai import get_client_info
if TYPE_CHECKING:
from google.cloud import storage
@ -419,7 +420,11 @@ class MatchingEngine(VectorStore):
from google.cloud import storage
return storage.Client(credentials=credentials, project=project_id)
return storage.Client(
credentials=credentials,
project=project_id,
client_info=get_client_info(module="vertex-ai-matching-engine"),
)
@classmethod
def _init_aiplatform(

Loading…
Cancel
Save