mirror of
https://github.com/hwchase17/langchain
synced 2024-11-06 03:20:49 +00:00
Datadog logs loader (#7356)
### Description Created a Loader to get a list of specific logs from Datadog Logs. ### Dependencies `datadog_api_client` is required. ### Twitter handle [kzk_maeda](https://twitter.com/kzk_maeda) --------- Co-authored-by: Bagatur <baskaryan@gmail.com>
This commit is contained in:
parent
7d29bb2c02
commit
92b4418c8c
19
docs/extras/ecosystem/integrations/datadog_logs.mdx
Normal file
19
docs/extras/ecosystem/integrations/datadog_logs.mdx
Normal file
@ -0,0 +1,19 @@
|
||||
# Datadog Logs
|
||||
|
||||
>[Datadog](https://www.datadoghq.com/) is a monitoring and analytics platform for cloud-scale applications.
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
```bash
|
||||
pip install datadog_api_client
|
||||
```
|
||||
|
||||
We must initialize the loader with the Datadog API key and APP key, and we need to set up the query to extract the desired logs.
|
||||
|
||||
## Document Loader
|
||||
|
||||
See a [usage example](/docs/modules/data_connection/document_loaders/integrations/datadog_logs.html).
|
||||
|
||||
```python
|
||||
from langchain.document_loaders import DatadogLogsLoader
|
||||
```
|
@ -0,0 +1,96 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Datadog Logs\n",
|
||||
"\n",
|
||||
">[Datadog](https://www.datadoghq.com/) is a monitoring and analytics platform for cloud-scale applications.\n",
|
||||
"\n",
|
||||
"This loader fetches the logs from your applications in Datadog using the `datadog_api_client` Python package. You must initialize the loader with your `Datadog API key` and `APP key`, and you need to pass in the query to extract the desired logs."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import DatadogLogsLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#!pip install datadog-api-client"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"query = \"service:agent status:error\"\n",
|
||||
"\n",
|
||||
"loader = DatadogLogsLoader(\n",
|
||||
" query=query,\n",
|
||||
" api_key=DD_API_KEY,\n",
|
||||
" app_key=DD_APP_KEY,\n",
|
||||
" from_time=1688732708951, # Optional, timestamp in milliseconds\n",
|
||||
" to_time=1688736308951, # Optional, timestamp in milliseconds\n",
|
||||
" limit=100, # Optional, default is 100\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='message: grep: /etc/datadog-agent/system-probe.yaml: No such file or directory', metadata={'id': 'AgAAAYkwpLImvkjRpQAAAAAAAAAYAAAAAEFZa3dwTUFsQUFEWmZfLU5QdElnM3dBWQAAACQAAAAAMDE4OTMwYTQtYzk3OS00MmJjLTlhNDAtOTY4N2EwY2I5ZDdk', 'status': 'error', 'service': 'agent', 'tags': ['accessible-from-goog-gke-node', 'allow-external-ingress-high-ports', 'allow-external-ingress-http', 'allow-external-ingress-https', 'container_id:c7d8ecd27b5b3cfdf3b0df04b8965af6f233f56b7c3c2ffabfab5e3b6ccbd6a5', 'container_name:lab_datadog_1', 'datadog.pipelines:false', 'datadog.submission_auth:private_api_key', 'docker_image:datadog/agent:7.41.1', 'env:dd101-dev', 'hostname:lab-host', 'image_name:datadog/agent', 'image_tag:7.41.1', 'instance-id:7497601202021312403', 'instance-type:custom-1-4096', 'instruqt_aws_accounts:', 'instruqt_azure_subscriptions:', 'instruqt_gcp_projects:', 'internal-hostname:lab-host.d4rjybavkary.svc.cluster.local', 'numeric_project_id:3390740675', 'p-d4rjybavkary', 'project:instruqt-prod', 'service:agent', 'short_image:agent', 'source:agent', 'zone:europe-west1-b'], 'timestamp': datetime.datetime(2023, 7, 7, 13, 57, 27, 206000, tzinfo=tzutc())}),\n",
|
||||
" Document(page_content='message: grep: /etc/datadog-agent/system-probe.yaml: No such file or directory', metadata={'id': 'AgAAAYkwpLImvkjRpgAAAAAAAAAYAAAAAEFZa3dwTUFsQUFEWmZfLU5QdElnM3dBWgAAACQAAAAAMDE4OTMwYTQtYzk3OS00MmJjLTlhNDAtOTY4N2EwY2I5ZDdk', 'status': 'error', 'service': 'agent', 'tags': ['accessible-from-goog-gke-node', 'allow-external-ingress-high-ports', 'allow-external-ingress-http', 'allow-external-ingress-https', 'container_id:c7d8ecd27b5b3cfdf3b0df04b8965af6f233f56b7c3c2ffabfab5e3b6ccbd6a5', 'container_name:lab_datadog_1', 'datadog.pipelines:false', 'datadog.submission_auth:private_api_key', 'docker_image:datadog/agent:7.41.1', 'env:dd101-dev', 'hostname:lab-host', 'image_name:datadog/agent', 'image_tag:7.41.1', 'instance-id:7497601202021312403', 'instance-type:custom-1-4096', 'instruqt_aws_accounts:', 'instruqt_azure_subscriptions:', 'instruqt_gcp_projects:', 'internal-hostname:lab-host.d4rjybavkary.svc.cluster.local', 'numeric_project_id:3390740675', 'p-d4rjybavkary', 'project:instruqt-prod', 'service:agent', 'short_image:agent', 'source:agent', 'zone:europe-west1-b'], 'timestamp': datetime.datetime(2023, 7, 7, 13, 57, 27, 206000, tzinfo=tzutc())})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"documents = loader.load()\n",
|
||||
"documents"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": ".venv",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.11"
|
||||
},
|
||||
"orig_nbformat": 4
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
@ -30,6 +30,7 @@ from langchain.document_loaders.confluence import ConfluenceLoader
|
||||
from langchain.document_loaders.conllu import CoNLLULoader
|
||||
from langchain.document_loaders.csv_loader import CSVLoader, UnstructuredCSVLoader
|
||||
from langchain.document_loaders.cube_semantic import CubeSemanticLoader
|
||||
from langchain.document_loaders.datadog_logs import DatadogLogsLoader
|
||||
from langchain.document_loaders.dataframe import DataFrameLoader
|
||||
from langchain.document_loaders.diffbot import DiffbotLoader
|
||||
from langchain.document_loaders.directory import DirectoryLoader
|
||||
@ -179,6 +180,7 @@ __all__ = [
|
||||
"CollegeConfidentialLoader",
|
||||
"ConfluenceLoader",
|
||||
"CubeSemanticLoader",
|
||||
"DatadogLogsLoader",
|
||||
"DataFrameLoader",
|
||||
"DiffbotLoader",
|
||||
"DirectoryLoader",
|
||||
|
137
langchain/document_loaders/datadog_logs.py
Normal file
137
langchain/document_loaders/datadog_logs.py
Normal file
@ -0,0 +1,137 @@
|
||||
"""Load Datadog logs."""
|
||||
from datetime import datetime, timedelta
|
||||
from typing import List, Optional
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.document_loaders.base import BaseLoader
|
||||
|
||||
|
||||
class DatadogLogsLoader(BaseLoader):
|
||||
"""Loads a query result from Datadog into a list of documents.
|
||||
|
||||
Logs are written into the `page_content` and into the `metadata`.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
query: str,
|
||||
api_key: str,
|
||||
app_key: str,
|
||||
from_time: Optional[int] = None,
|
||||
to_time: Optional[int] = None,
|
||||
limit: int = 100,
|
||||
) -> None:
|
||||
"""Initialize Datadog document loader.
|
||||
|
||||
Requirements:
|
||||
- Must have datadog_api_client installed. Install with `pip install datadog_api_client`.
|
||||
|
||||
Args:
|
||||
query: The query to run in Datadog.
|
||||
api_key: The Datadog API key.
|
||||
app_key: The Datadog APP key.
|
||||
from_time: Optional. The start of the time range to query.
|
||||
Supports date math and regular timestamps (milliseconds) like '1688732708951'
|
||||
Defaults to 20 minutes ago.
|
||||
to_time: Optional. The end of the time range to query.
|
||||
Supports date math and regular timestamps (milliseconds) like '1688732708951'
|
||||
Defaults to now.
|
||||
limit: The maximum number of logs to return.
|
||||
Defaults to 100.
|
||||
""" # noqa: E501
|
||||
try:
|
||||
from datadog_api_client import Configuration
|
||||
except ImportError as ex:
|
||||
raise ImportError(
|
||||
"Could not import datadog_api_client python package. "
|
||||
"Please install it with `pip install datadog_api_client`."
|
||||
) from ex
|
||||
|
||||
self.query = query
|
||||
configuration = Configuration()
|
||||
configuration.api_key["apiKeyAuth"] = api_key
|
||||
configuration.api_key["appKeyAuth"] = app_key
|
||||
self.configuration = configuration
|
||||
self.from_time = from_time
|
||||
self.to_time = to_time
|
||||
self.limit = limit
|
||||
|
||||
def parse_log(self, log: dict) -> Document:
|
||||
"""
|
||||
Create Document objects from Datadog log items.
|
||||
"""
|
||||
attributes = log.get("attributes", {})
|
||||
metadata = {
|
||||
"id": log.get("id", ""),
|
||||
"status": attributes.get("status"),
|
||||
"service": attributes.get("service", ""),
|
||||
"tags": attributes.get("tags", []),
|
||||
"timestamp": attributes.get("timestamp", ""),
|
||||
}
|
||||
|
||||
message = attributes.get("message", "")
|
||||
inside_attributes = attributes.get("attributes", {})
|
||||
content_dict = {**inside_attributes, "message": message}
|
||||
content = ", ".join(f"{k}: {v}" for k, v in content_dict.items())
|
||||
return Document(page_content=content, metadata=metadata)
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
"""
|
||||
Get logs from Datadog.
|
||||
|
||||
Returns:
|
||||
A list of Document objects.
|
||||
- page_content
|
||||
- metadata
|
||||
- id
|
||||
- service
|
||||
- status
|
||||
- tags
|
||||
- timestamp
|
||||
"""
|
||||
try:
|
||||
from datadog_api_client import ApiClient
|
||||
from datadog_api_client.v2.api.logs_api import LogsApi
|
||||
from datadog_api_client.v2.model.logs_list_request import LogsListRequest
|
||||
from datadog_api_client.v2.model.logs_list_request_page import (
|
||||
LogsListRequestPage,
|
||||
)
|
||||
from datadog_api_client.v2.model.logs_query_filter import LogsQueryFilter
|
||||
from datadog_api_client.v2.model.logs_sort import LogsSort
|
||||
except ImportError as ex:
|
||||
raise ImportError(
|
||||
"Could not import datadog_api_client python package. "
|
||||
"Please install it with `pip install datadog_api_client`."
|
||||
) from ex
|
||||
|
||||
now = datetime.now()
|
||||
twenty_minutes_before = now - timedelta(minutes=20)
|
||||
now_timestamp = int(now.timestamp() * 1000)
|
||||
twenty_minutes_before_timestamp = int(twenty_minutes_before.timestamp() * 1000)
|
||||
_from = (
|
||||
self.from_time
|
||||
if self.from_time is not None
|
||||
else twenty_minutes_before_timestamp
|
||||
)
|
||||
|
||||
body = LogsListRequest(
|
||||
filter=LogsQueryFilter(
|
||||
query=self.query,
|
||||
_from=_from,
|
||||
to=f"{self.to_time if self.to_time is not None else now_timestamp}",
|
||||
),
|
||||
sort=LogsSort.TIMESTAMP_ASCENDING,
|
||||
page=LogsListRequestPage(
|
||||
limit=self.limit,
|
||||
),
|
||||
)
|
||||
|
||||
with ApiClient(configuration=self.configuration) as api_client:
|
||||
api_instance = LogsApi(api_client)
|
||||
response = api_instance.list_logs(body=body).to_dict()
|
||||
|
||||
docs: List[Document] = []
|
||||
for row in response["data"]:
|
||||
docs.append(self.parse_log(row))
|
||||
|
||||
return docs
|
Loading…
Reference in New Issue
Block a user