Datadog logs loader (#7356)

### Description
Created a Loader to get a list of specific logs from Datadog Logs.

### Dependencies
`datadog_api_client` is required.

### Twitter handle
[kzk_maeda](https://twitter.com/kzk_maeda)

---------

Co-authored-by: Bagatur <baskaryan@gmail.com>
This commit is contained in:
Kazuki Maeda 2023-07-10 17:27:55 +09:00 committed by GitHub
parent 7d29bb2c02
commit 92b4418c8c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 254 additions and 0 deletions

View File

@ -0,0 +1,19 @@
# Datadog Logs
>[Datadog](https://www.datadoghq.com/) is a monitoring and analytics platform for cloud-scale applications.
## Installation and Setup
```bash
pip install datadog_api_client
```
We must initialize the loader with the Datadog API key and APP key, and we need to set up the query to extract the desired logs.
## Document Loader
See a [usage example](/docs/modules/data_connection/document_loaders/integrations/datadog_logs.html).
```python
from langchain.document_loaders import DatadogLogsLoader
```

View File

@ -0,0 +1,96 @@
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"# Datadog Logs\n",
"\n",
">[Datadog](https://www.datadoghq.com/) is a monitoring and analytics platform for cloud-scale applications.\n",
"\n",
"This loader fetches the logs from your applications in Datadog using the `datadog_api_client` Python package. You must initialize the loader with your `Datadog API key` and `APP key`, and you need to pass in the query to extract the desired logs."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from langchain.document_loaders import DatadogLogsLoader"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#!pip install datadog-api-client"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"query = \"service:agent status:error\"\n",
"\n",
"loader = DatadogLogsLoader(\n",
" query=query,\n",
" api_key=DD_API_KEY,\n",
" app_key=DD_APP_KEY,\n",
" from_time=1688732708951, # Optional, timestamp in milliseconds\n",
" to_time=1688736308951, # Optional, timestamp in milliseconds\n",
" limit=100, # Optional, default is 100\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[Document(page_content='message: grep: /etc/datadog-agent/system-probe.yaml: No such file or directory', metadata={'id': 'AgAAAYkwpLImvkjRpQAAAAAAAAAYAAAAAEFZa3dwTUFsQUFEWmZfLU5QdElnM3dBWQAAACQAAAAAMDE4OTMwYTQtYzk3OS00MmJjLTlhNDAtOTY4N2EwY2I5ZDdk', 'status': 'error', 'service': 'agent', 'tags': ['accessible-from-goog-gke-node', 'allow-external-ingress-high-ports', 'allow-external-ingress-http', 'allow-external-ingress-https', 'container_id:c7d8ecd27b5b3cfdf3b0df04b8965af6f233f56b7c3c2ffabfab5e3b6ccbd6a5', 'container_name:lab_datadog_1', 'datadog.pipelines:false', 'datadog.submission_auth:private_api_key', 'docker_image:datadog/agent:7.41.1', 'env:dd101-dev', 'hostname:lab-host', 'image_name:datadog/agent', 'image_tag:7.41.1', 'instance-id:7497601202021312403', 'instance-type:custom-1-4096', 'instruqt_aws_accounts:', 'instruqt_azure_subscriptions:', 'instruqt_gcp_projects:', 'internal-hostname:lab-host.d4rjybavkary.svc.cluster.local', 'numeric_project_id:3390740675', 'p-d4rjybavkary', 'project:instruqt-prod', 'service:agent', 'short_image:agent', 'source:agent', 'zone:europe-west1-b'], 'timestamp': datetime.datetime(2023, 7, 7, 13, 57, 27, 206000, tzinfo=tzutc())}),\n",
" Document(page_content='message: grep: /etc/datadog-agent/system-probe.yaml: No such file or directory', metadata={'id': 'AgAAAYkwpLImvkjRpgAAAAAAAAAYAAAAAEFZa3dwTUFsQUFEWmZfLU5QdElnM3dBWgAAACQAAAAAMDE4OTMwYTQtYzk3OS00MmJjLTlhNDAtOTY4N2EwY2I5ZDdk', 'status': 'error', 'service': 'agent', 'tags': ['accessible-from-goog-gke-node', 'allow-external-ingress-high-ports', 'allow-external-ingress-http', 'allow-external-ingress-https', 'container_id:c7d8ecd27b5b3cfdf3b0df04b8965af6f233f56b7c3c2ffabfab5e3b6ccbd6a5', 'container_name:lab_datadog_1', 'datadog.pipelines:false', 'datadog.submission_auth:private_api_key', 'docker_image:datadog/agent:7.41.1', 'env:dd101-dev', 'hostname:lab-host', 'image_name:datadog/agent', 'image_tag:7.41.1', 'instance-id:7497601202021312403', 'instance-type:custom-1-4096', 'instruqt_aws_accounts:', 'instruqt_azure_subscriptions:', 'instruqt_gcp_projects:', 'internal-hostname:lab-host.d4rjybavkary.svc.cluster.local', 'numeric_project_id:3390740675', 'p-d4rjybavkary', 'project:instruqt-prod', 'service:agent', 'short_image:agent', 'source:agent', 'zone:europe-west1-b'], 'timestamp': datetime.datetime(2023, 7, 7, 13, 57, 27, 206000, tzinfo=tzutc())})]"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"documents = loader.load()\n",
"documents"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.11"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@ -30,6 +30,7 @@ from langchain.document_loaders.confluence import ConfluenceLoader
from langchain.document_loaders.conllu import CoNLLULoader
from langchain.document_loaders.csv_loader import CSVLoader, UnstructuredCSVLoader
from langchain.document_loaders.cube_semantic import CubeSemanticLoader
from langchain.document_loaders.datadog_logs import DatadogLogsLoader
from langchain.document_loaders.dataframe import DataFrameLoader
from langchain.document_loaders.diffbot import DiffbotLoader
from langchain.document_loaders.directory import DirectoryLoader
@ -179,6 +180,7 @@ __all__ = [
"CollegeConfidentialLoader",
"ConfluenceLoader",
"CubeSemanticLoader",
"DatadogLogsLoader",
"DataFrameLoader",
"DiffbotLoader",
"DirectoryLoader",

View File

@ -0,0 +1,137 @@
"""Load Datadog logs."""
from datetime import datetime, timedelta
from typing import List, Optional
from langchain.docstore.document import Document
from langchain.document_loaders.base import BaseLoader
class DatadogLogsLoader(BaseLoader):
"""Loads a query result from Datadog into a list of documents.
Logs are written into the `page_content` and into the `metadata`.
"""
def __init__(
self,
query: str,
api_key: str,
app_key: str,
from_time: Optional[int] = None,
to_time: Optional[int] = None,
limit: int = 100,
) -> None:
"""Initialize Datadog document loader.
Requirements:
- Must have datadog_api_client installed. Install with `pip install datadog_api_client`.
Args:
query: The query to run in Datadog.
api_key: The Datadog API key.
app_key: The Datadog APP key.
from_time: Optional. The start of the time range to query.
Supports date math and regular timestamps (milliseconds) like '1688732708951'
Defaults to 20 minutes ago.
to_time: Optional. The end of the time range to query.
Supports date math and regular timestamps (milliseconds) like '1688732708951'
Defaults to now.
limit: The maximum number of logs to return.
Defaults to 100.
""" # noqa: E501
try:
from datadog_api_client import Configuration
except ImportError as ex:
raise ImportError(
"Could not import datadog_api_client python package. "
"Please install it with `pip install datadog_api_client`."
) from ex
self.query = query
configuration = Configuration()
configuration.api_key["apiKeyAuth"] = api_key
configuration.api_key["appKeyAuth"] = app_key
self.configuration = configuration
self.from_time = from_time
self.to_time = to_time
self.limit = limit
def parse_log(self, log: dict) -> Document:
"""
Create Document objects from Datadog log items.
"""
attributes = log.get("attributes", {})
metadata = {
"id": log.get("id", ""),
"status": attributes.get("status"),
"service": attributes.get("service", ""),
"tags": attributes.get("tags", []),
"timestamp": attributes.get("timestamp", ""),
}
message = attributes.get("message", "")
inside_attributes = attributes.get("attributes", {})
content_dict = {**inside_attributes, "message": message}
content = ", ".join(f"{k}: {v}" for k, v in content_dict.items())
return Document(page_content=content, metadata=metadata)
def load(self) -> List[Document]:
"""
Get logs from Datadog.
Returns:
A list of Document objects.
- page_content
- metadata
- id
- service
- status
- tags
- timestamp
"""
try:
from datadog_api_client import ApiClient
from datadog_api_client.v2.api.logs_api import LogsApi
from datadog_api_client.v2.model.logs_list_request import LogsListRequest
from datadog_api_client.v2.model.logs_list_request_page import (
LogsListRequestPage,
)
from datadog_api_client.v2.model.logs_query_filter import LogsQueryFilter
from datadog_api_client.v2.model.logs_sort import LogsSort
except ImportError as ex:
raise ImportError(
"Could not import datadog_api_client python package. "
"Please install it with `pip install datadog_api_client`."
) from ex
now = datetime.now()
twenty_minutes_before = now - timedelta(minutes=20)
now_timestamp = int(now.timestamp() * 1000)
twenty_minutes_before_timestamp = int(twenty_minutes_before.timestamp() * 1000)
_from = (
self.from_time
if self.from_time is not None
else twenty_minutes_before_timestamp
)
body = LogsListRequest(
filter=LogsQueryFilter(
query=self.query,
_from=_from,
to=f"{self.to_time if self.to_time is not None else now_timestamp}",
),
sort=LogsSort.TIMESTAMP_ASCENDING,
page=LogsListRequestPage(
limit=self.limit,
),
)
with ApiClient(configuration=self.configuration) as api_client:
api_instance = LogsApi(api_client)
response = api_instance.list_logs(body=body).to_dict()
docs: List[Document] = []
for row in response["data"]:
docs.append(self.parse_log(row))
return docs