Datadog logs loader (#7356)

### Description Created a Loader to get a list of specific logs from Datadog Logs. ### Dependencies `datadog_api_client` is required. ### Twitter handle [kzk_maeda](https://twitter.com/kzk_maeda) --------- Co-authored-by: Bagatur <baskaryan@gmail.com>
2024-11-06 03:20:49 +00:00 · 2023-07-10 17:27:55 +09:00 · 2023-07-10 17:27:55 +09:00 · 92b4418c8c
commit 92b4418c8c
parent 7d29bb2c02
4 changed files with 254 additions and 0 deletions
--- a/docs/extras/ecosystem/integrations/datadog_logs.mdx
+++ b/docs/extras/ecosystem/integrations/datadog_logs.mdx
@ -0,0 +1,19 @@
+# Datadog Logs
+
+>[Datadog](https://www.datadoghq.com/) is a monitoring and analytics platform for cloud-scale applications.
+
+## Installation and Setup
+
+```bash
+pip install datadog_api_client
+```
+
+We must initialize the loader with the Datadog API key and APP key, and we need to set up the query to extract the desired logs.
+
+## Document Loader
+
+See a [usage example](/docs/modules/data_connection/document_loaders/integrations/datadog_logs.html).
+
+```python
+from langchain.document_loaders import DatadogLogsLoader
+```
--- a/docs/extras/modules/data_connection/document_loaders/integrations/datadog_logs.ipynb
+++ b/docs/extras/modules/data_connection/document_loaders/integrations/datadog_logs.ipynb
@ -0,0 +1,96 @@
+{
+ "cells": [
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Datadog Logs\n",
+    "\n",
+    ">[Datadog](https://www.datadoghq.com/) is a monitoring and analytics platform for cloud-scale applications.\n",
+    "\n",
+    "This loader fetches the logs from your applications in Datadog using the `datadog_api_client` Python package. You must initialize the loader with your `Datadog API key` and `APP key`, and you need to pass in the query to extract the desired logs."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain.document_loaders import DatadogLogsLoader"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#!pip install datadog-api-client"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "query = \"service:agent status:error\"\n",
+    "\n",
+    "loader = DatadogLogsLoader(\n",
+    "    query=query,\n",
+    "    api_key=DD_API_KEY,\n",
+    "    app_key=DD_APP_KEY,\n",
+    "    from_time=1688732708951, # Optional, timestamp in milliseconds\n",
+    "    to_time=1688736308951, # Optional, timestamp in milliseconds\n",
+    "    limit=100, # Optional, default is 100\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[Document(page_content='message: grep: /etc/datadog-agent/system-probe.yaml: No such file or directory', metadata={'id': 'AgAAAYkwpLImvkjRpQAAAAAAAAAYAAAAAEFZa3dwTUFsQUFEWmZfLU5QdElnM3dBWQAAACQAAAAAMDE4OTMwYTQtYzk3OS00MmJjLTlhNDAtOTY4N2EwY2I5ZDdk', 'status': 'error', 'service': 'agent', 'tags': ['accessible-from-goog-gke-node', 'allow-external-ingress-high-ports', 'allow-external-ingress-http', 'allow-external-ingress-https', 'container_id:c7d8ecd27b5b3cfdf3b0df04b8965af6f233f56b7c3c2ffabfab5e3b6ccbd6a5', 'container_name:lab_datadog_1', 'datadog.pipelines:false', 'datadog.submission_auth:private_api_key', 'docker_image:datadog/agent:7.41.1', 'env:dd101-dev', 'hostname:lab-host', 'image_name:datadog/agent', 'image_tag:7.41.1', 'instance-id:7497601202021312403', 'instance-type:custom-1-4096', 'instruqt_aws_accounts:', 'instruqt_azure_subscriptions:', 'instruqt_gcp_projects:', 'internal-hostname:lab-host.d4rjybavkary.svc.cluster.local', 'numeric_project_id:3390740675', 'p-d4rjybavkary', 'project:instruqt-prod', 'service:agent', 'short_image:agent', 'source:agent', 'zone:europe-west1-b'], 'timestamp': datetime.datetime(2023, 7, 7, 13, 57, 27, 206000, tzinfo=tzutc())}),\n",
+       " Document(page_content='message: grep: /etc/datadog-agent/system-probe.yaml: No such file or directory', metadata={'id': 'AgAAAYkwpLImvkjRpgAAAAAAAAAYAAAAAEFZa3dwTUFsQUFEWmZfLU5QdElnM3dBWgAAACQAAAAAMDE4OTMwYTQtYzk3OS00MmJjLTlhNDAtOTY4N2EwY2I5ZDdk', 'status': 'error', 'service': 'agent', 'tags': ['accessible-from-goog-gke-node', 'allow-external-ingress-high-ports', 'allow-external-ingress-http', 'allow-external-ingress-https', 'container_id:c7d8ecd27b5b3cfdf3b0df04b8965af6f233f56b7c3c2ffabfab5e3b6ccbd6a5', 'container_name:lab_datadog_1', 'datadog.pipelines:false', 'datadog.submission_auth:private_api_key', 'docker_image:datadog/agent:7.41.1', 'env:dd101-dev', 'hostname:lab-host', 'image_name:datadog/agent', 'image_tag:7.41.1', 'instance-id:7497601202021312403', 'instance-type:custom-1-4096', 'instruqt_aws_accounts:', 'instruqt_azure_subscriptions:', 'instruqt_gcp_projects:', 'internal-hostname:lab-host.d4rjybavkary.svc.cluster.local', 'numeric_project_id:3390740675', 'p-d4rjybavkary', 'project:instruqt-prod', 'service:agent', 'short_image:agent', 'source:agent', 'zone:europe-west1-b'], 'timestamp': datetime.datetime(2023, 7, 7, 13, 57, 27, 206000, tzinfo=tzutc())})]"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "documents = loader.load()\n",
+    "documents"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.11"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/langchain/document_loaders/init.py
+++ b/langchain/document_loaders/init.py
@ -30,6 +30,7 @@ from langchain.document_loaders.confluence import ConfluenceLoader
 from langchain.document_loaders.conllu import CoNLLULoader
 from langchain.document_loaders.csv_loader import CSVLoader, UnstructuredCSVLoader
 from langchain.document_loaders.cube_semantic import CubeSemanticLoader
+from langchain.document_loaders.datadog_logs import DatadogLogsLoader
 from langchain.document_loaders.dataframe import DataFrameLoader
 from langchain.document_loaders.diffbot import DiffbotLoader
 from langchain.document_loaders.directory import DirectoryLoader
@ -179,6 +180,7 @@ __all__ = [
    "CollegeConfidentialLoader",
    "ConfluenceLoader",
    "CubeSemanticLoader",
+    "DatadogLogsLoader",
    "DataFrameLoader",
    "DiffbotLoader",
    "DirectoryLoader",
--- a/langchain/document_loaders/datadog_logs.py
+++ b/langchain/document_loaders/datadog_logs.py
@ -0,0 +1,137 @@
+"""Load Datadog logs."""
+from datetime import datetime, timedelta
+from typing import List, Optional
+
+from langchain.docstore.document import Document
+from langchain.document_loaders.base import BaseLoader
+
+
+class DatadogLogsLoader(BaseLoader):
+    """Loads a query result from Datadog into a list of documents.
+
+    Logs are written into the `page_content` and into the `metadata`.
+    """
+
+    def __init__(
+        self,
+        query: str,
+        api_key: str,
+        app_key: str,
+        from_time: Optional[int] = None,
+        to_time: Optional[int] = None,
+        limit: int = 100,
+    ) -> None:
+        """Initialize Datadog document loader.
+
+        Requirements:
+            - Must have datadog_api_client installed. Install with `pip install datadog_api_client`.
+
+        Args:
+            query: The query to run in Datadog.
+            api_key: The Datadog API key.
+            app_key: The Datadog APP key.
+            from_time: Optional. The start of the time range to query.
+                Supports date math and regular timestamps (milliseconds) like '1688732708951'
+                Defaults to 20 minutes ago.
+            to_time: Optional. The end of the time range to query.
+                Supports date math and regular timestamps (milliseconds) like '1688732708951'
+                Defaults to now.
+            limit: The maximum number of logs to return.
+                Defaults to 100.
+        """  # noqa: E501
+        try:
+            from datadog_api_client import Configuration
+        except ImportError as ex:
+            raise ImportError(
+                "Could not import datadog_api_client python package. "
+                "Please install it with `pip install datadog_api_client`."
+            ) from ex
+
+        self.query = query
+        configuration = Configuration()
+        configuration.api_key["apiKeyAuth"] = api_key
+        configuration.api_key["appKeyAuth"] = app_key
+        self.configuration = configuration
+        self.from_time = from_time
+        self.to_time = to_time
+        self.limit = limit
+
+    def parse_log(self, log: dict) -> Document:
+        """
+        Create Document objects from Datadog log items.
+        """
+        attributes = log.get("attributes", {})
+        metadata = {
+            "id": log.get("id", ""),
+            "status": attributes.get("status"),
+            "service": attributes.get("service", ""),
+            "tags": attributes.get("tags", []),
+            "timestamp": attributes.get("timestamp", ""),
+        }
+
+        message = attributes.get("message", "")
+        inside_attributes = attributes.get("attributes", {})
+        content_dict = {**inside_attributes, "message": message}
+        content = ", ".join(f"{k}: {v}" for k, v in content_dict.items())
+        return Document(page_content=content, metadata=metadata)
+
+    def load(self) -> List[Document]:
+        """
+        Get logs from Datadog.
+
+        Returns:
+            A list of Document objects.
+                - page_content
+                - metadata
+                    - id
+                    - service
+                    - status
+                    - tags
+                    - timestamp
+        """
+        try:
+            from datadog_api_client import ApiClient
+            from datadog_api_client.v2.api.logs_api import LogsApi
+            from datadog_api_client.v2.model.logs_list_request import LogsListRequest
+            from datadog_api_client.v2.model.logs_list_request_page import (
+                LogsListRequestPage,
+            )
+            from datadog_api_client.v2.model.logs_query_filter import LogsQueryFilter
+            from datadog_api_client.v2.model.logs_sort import LogsSort
+        except ImportError as ex:
+            raise ImportError(
+                "Could not import datadog_api_client python package. "
+                "Please install it with `pip install datadog_api_client`."
+            ) from ex
+
+        now = datetime.now()
+        twenty_minutes_before = now - timedelta(minutes=20)
+        now_timestamp = int(now.timestamp() * 1000)
+        twenty_minutes_before_timestamp = int(twenty_minutes_before.timestamp() * 1000)
+        _from = (
+            self.from_time
+            if self.from_time is not None
+            else twenty_minutes_before_timestamp
+        )
+
+        body = LogsListRequest(
+            filter=LogsQueryFilter(
+                query=self.query,
+                _from=_from,
+                to=f"{self.to_time if self.to_time is not None else now_timestamp}",
+            ),
+            sort=LogsSort.TIMESTAMP_ASCENDING,
+            page=LogsListRequestPage(
+                limit=self.limit,
+            ),
+        )
+
+        with ApiClient(configuration=self.configuration) as api_client:
+            api_instance = LogsApi(api_client)
+            response = api_instance.list_logs(body=body).to_dict()
+
+        docs: List[Document] = []
+        for row in response["data"]:
+            docs.append(self.parse_log(row))
+
+        return docs