From 96b605ce88ea91416198d96e6e05d091314ade24 Mon Sep 17 00:00:00 2001
From: Kurt Kartaltepe <kkartaltepe@gmail.com>
Date: Thu, 29 Dec 2022 13:54:31 -0800
Subject: [PATCH] Add perf counters based metrics for intel platforms

Unlike AMD or Nvidia intel historically only exposes gpu telemetry with
linux perf counters. As with all perf counters on modern linux these
require root or CAP_PERFMON.

This adds a small helper program that can be setuid/setcap'ed with the
appropriate permissions to export the telemetry that mangohud is
interested in. Most of the gpu metrics were informed by the existing
intel_gpu_top tool we are using, however this also exports power metrics
that the intel_gpu_top tool does not support for dgpu configurations and
ensures that we select the appropriate card for all metrics in a
multi-gpu system.

Also this tool exports CPU power metrics from the perf counters. This
allows power monitoring for Intel and zen+ AMD cpus. But this isnt
hooked in this PR since it seems cpu and gpu metrics collection are
separated in the rest of the mangohud.

Mostly this is just a PoC to show what avoiding taking a third-party
dependency on the rarely packaged intel_gpu_top might require. The meson
setup in particular is just something thrown together that builds.
---
 meson_options.txt |   1 +
 src/app/intel.cpp | 230 ++++++++++++++++++++++++++++++++++++++++++++++
 src/gpu.h         |   2 +-
 src/intel.cpp     |  82 ++++++-----------
 src/meson.build   |  12 +++
 src/overlay.cpp   |   6 +-
 6 files changed, 278 insertions(+), 55 deletions(-)
 create mode 100644 src/app/intel.cpp
diff --git a/meson_options.txt b/meson_options.txt
index 5f7d11b..50ba289 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -9,6 +9,7 @@ option('with_wayland', type : 'feature', value : 'disabled')
 option('with_dbus', type : 'feature', value : 'enabled')
 option('loglevel', type: 'combo', choices : ['trace', 'debug', 'info', 'warn', 'err', 'critical', 'off'], value : 'info', description: 'Max log level in non-debug build')
 option('mangoapp', type: 'boolean', value : false)
+option('mango_intel_stats', type: 'feature', value : 'enabled', description: 'Enable helper binary for gathering stats for intel hardware on linux')
 option('mangohudctl', type: 'boolean', value : false)
 option('mangoapp_layer', type: 'boolean', value : false)
 option('tests', type: 'feature', value: 'auto', description: 'Run tests')
diff --git a/src/app/intel.cpp b/src/app/intel.cpp
new file mode 100644
index 0000000..34b4e85
--- /dev/null
+++ b/src/app/intel.cpp
@@ -0,0 +1,230 @@
+#include <nlohmann/json.hpp>
+
+#include <dirent.h>
+#include <iostream>
+#include <libdrm/i915_drm.h>
+#include <linux/perf_event.h>
+#include <stdio.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <thread>
+#include <unistd.h>
+#include <cinttypes>
+
+// cut down from file_utils.cpp
+static std::vector<std::string> lsdir(const char *root, const char *prefix) {
+  std::vector<std::string> list;
+  struct dirent *dp;
+
+  DIR *dirp = opendir(root);
+  if (!dirp) {
+    return list;
+  }
+
+  while ((dp = readdir(dirp))) {
+    if ((prefix && !std::string(dp->d_name).find(prefix) == 0) ||
+        !strcmp(dp->d_name, ".") || !strcmp(dp->d_name, ".."))
+      continue;
+
+    switch (dp->d_type) {
+    case DT_DIR:
+      list.push_back(dp->d_name);
+      break;
+    }
+  }
+
+  closedir(dirp);
+  return list;
+}
+
+static inline int perf_event_open(struct perf_event_attr *attr, pid_t pid,
+                                  int cpu, int group_fd, unsigned long flags) {
+  attr->size = sizeof(*attr);
+  return syscall(SYS_perf_event_open, attr, pid, cpu, group_fd, flags);
+}
+
+static uint64_t i915_perf_device_type(const char *bus_id) {
+  FILE *fd;
+  char buf[80] = {0};
+  uint64_t type;
+
+  bool is_igpu = strcmp(bus_id, "0000_00_02.0") == 0; // Ref: igt-gpu-tools for magic.
+  const char *dgpu_path = "/sys/bus/event_source/devices/i915_%s/type";
+  const char *igpu_path = "/sys/bus/event_source/devices/i915/type";
+
+  snprintf(buf, sizeof(buf), is_igpu ? igpu_path : dgpu_path, bus_id);
+  if ((fd = fopen(buf, "r")) == nullptr)
+    return 0;
+
+  if (fscanf(fd, "%" PRIu64, &type) != 1)
+    type = 0;
+
+  fclose(fd);
+  return type;
+}
+
+static inline int i915_perf_event_open(uint64_t type, uint64_t config) {
+  uint64_t format = PERF_FORMAT_TOTAL_TIME_ENABLED;
+  uint64_t group = -1;
+  struct perf_event_attr attr = {0};
+
+  attr.type = type;
+  attr.read_format = format;
+  attr.config = config;
+  attr.use_clockid = 1;
+  attr.clockid = CLOCK_MONOTONIC;
+
+  int cpu = 0, ncpu = 16; // hopefully one of these works.
+  int ret;
+  do {
+    ret = perf_event_open(&attr, -1, cpu++, group, 0);
+  } while ((ret < 0 && errno == EINVAL) &&
+           (cpu < ncpu)); // find a cpu to open on.
+
+  return ret;
+}
+
+static int power_perf_event_open(const char *metric) {
+  FILE *fd;
+  char buf[80] = {0};
+  uint64_t type, config;
+
+  snprintf(buf, sizeof(buf), "/sys/devices/power/type");
+  if ((fd = fopen(buf, "r")) == nullptr)
+    return -1;
+
+  int scanned = fscanf(fd, "%" PRIu64, &type);
+  fclose(fd);
+  if (scanned != 1)
+    return -1;
+
+  snprintf(buf, sizeof(buf), "/sys/devices/power/events/%s", metric);
+  if ((fd = fopen(buf, "r")) == nullptr)
+    return -1;
+
+  scanned = fscanf(fd, "event=%" PRIx64, &config);
+  fclose(fd);
+  if (scanned != 1)
+    return -1;
+
+  return i915_perf_event_open(type, config);
+}
+
+struct counter {
+  union {
+    int fd;
+    FILE *fp;
+  };
+  uint64_t val;
+  uint64_t ts;
+  uint64_t val_prev;
+  uint64_t ts_prev;
+};
+
+static void counter_update(struct counter *c, uint64_t in[4]) {
+  c->val_prev = c->val;
+  c->ts_prev = c->ts;
+  c->val = in[0];
+  c->ts = in[1];
+}
+
+static float counter_value(struct counter c, float scale) {
+  if (c.val < c.val_prev || c.ts_prev == 0) { // just try again next time.
+    return 0.0;
+  }
+  float t = c.ts - c.ts_prev;
+  float d = c.val - c.val_prev;
+  return d / t * scale;
+}
+
+static std::string find_pci_busid(const char *drm_dev) {
+  char drm_link_c[PATH_MAX] = {0};
+  ssize_t count = readlink((std::string("/sys/class/drm/") + drm_dev).c_str(),
+                           drm_link_c, PATH_MAX);
+  if (count <= 0) {
+    fprintf(stderr, "Invalid drm device: %s\n", drm_dev);
+    exit(1);
+  }
+  std::string drm_link(drm_link_c);
+  // Example link format:
+  // ../../devices/pci0000:00/0000:00:03.1/0000:07:00.0/0000:08:01.0/0000:09:00.0/drm/card0
+  ssize_t pci_start = drm_link.rfind("/", count - 14) + 1;
+  ssize_t pci_end = drm_link.find("/", pci_start);
+  return drm_link.substr(pci_start, pci_end - pci_start);
+}
+
+int main(int argc, char **argv) {
+  if (argc < 2) {
+    fprintf(stderr, "Not enough arguments. usage: mango_intel_stats \"card0\"\n");
+    exit(1);
+  }
+  char *drm_dev = argv[1];
+  std::string pci_path = find_pci_busid(drm_dev);
+  std::replace(pci_path.begin(), pci_path.end(), ':',
+               '_'); // just sysfs things.
+  uint64_t type = i915_perf_device_type(pci_path.c_str());
+
+  // Counters for this tool.
+  struct counter busy_gpu, freq_gpu, energy_dgpu, energy_igpu, energy_cpu;
+
+  freq_gpu.fd = i915_perf_event_open(type, I915_PMU_ACTUAL_FREQUENCY);
+  if (errno == EACCES) {
+    fprintf(
+        stderr,
+        "Permission denied on perf events.\nThis binary is meant to have "
+        "CAP_PERFMON\n via: sudo setcap cap_perfmon=+ep mango_intel_stats\n");
+    exit(1);
+  }
+  busy_gpu.fd = i915_perf_event_open(
+      type, I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_RENDER, 0));
+  energy_cpu.fd = power_perf_event_open("energy-pkg");
+  energy_igpu.fd = power_perf_event_open("energy-gpu");
+
+  energy_dgpu = {0};
+  std::string hwmon_path =
+      std::string("/sys/class/drm/") + drm_dev + "/device/hwmon/";
+  const auto dirs = lsdir(hwmon_path.c_str(), "hwmon");
+  for (const auto &dir : dirs) {
+    FILE *fp = fopen((hwmon_path + dir + "/energy1_input").c_str(), "r");
+    if (fp) {
+      energy_dgpu.fp = fp;
+      break;
+    }
+  }
+
+  while (true) {
+    nlohmann::json j;
+    uint64_t res[4] = {0};
+    if (read(busy_gpu.fd, res, sizeof(res)) > 0) {
+      counter_update(&busy_gpu, res);
+      j["gpu_busy_pct"] = counter_value(busy_gpu, 100.0);
+    }
+    if (read(freq_gpu.fd, res, sizeof(res)) > 0) {
+      counter_update(&freq_gpu, res);
+      j["gpu_clock_mhz"] = counter_value(freq_gpu, 1e9);
+    }
+    if (read(energy_cpu.fd, res, sizeof(res)) > 0) {
+      counter_update(&energy_cpu, res);
+      // https://lwn.net/Articles/573602/, in practice intel and amd use 1/2^32J
+      j["cpu_power_w"] = counter_value(energy_cpu, 2.328e-1);
+    }
+    if (read(energy_igpu.fd, res, sizeof(res)) > 0) {
+      counter_update(&energy_igpu, res);
+      j["igpu_power_w"] = counter_value(energy_igpu, 2.328e-1);
+    }
+    if (energy_dgpu.fp && fscanf(energy_dgpu.fp, "%" PRIu64, res) == 1) {
+      rewind(energy_dgpu.fp);
+      fflush(energy_dgpu.fp);
+      // We can just depend on old clock readings in res[1]
+      /*
+      struct timespec t = {0};
+      clock_gettime(CLOCK_MONOTONIC, &t);
+      res[1] = (t.tv_sec % (3600 * 24)) * 1e9 + t.tv_nsec;
+      */
+      counter_update(&energy_dgpu, res);
+      j["dgpu_power_w"] = counter_value(energy_dgpu, 1e3);
+    }
+    std::cout << j << std::endl;
+    std::this_thread::sleep_for(std::chrono::milliseconds(500));
+  }
+}
diff --git a/src/gpu.h b/src/gpu.h
index 07ceeb7..d6d4f6e 100644
--- a/src/gpu.h
+++ b/src/gpu.h
@@ -48,7 +48,7 @@ extern struct gpuInfo gpu_info;
 
 void getNvidiaGpuInfo(const struct overlay_params& params);
 void getAmdGpuInfo(void);
-void getIntelGpuInfo();
+void getIntelGpuInfo(const char *drm_dev);
 bool checkNvidia(const char *pci_dev);
 extern void nvapi_util();
 extern bool checkNVAPI();
diff --git a/src/intel.cpp b/src/intel.cpp
index 3a4e988..126f4e7 100644
--- a/src/intel.cpp
+++ b/src/intel.cpp
@@ -8,74 +8,52 @@ using json = nlohmann::json;
 
 static bool init_intel = false;
 struct gpuInfo gpu_info_intel {};
+static char drm_dev[10];
 
-static void intelGpuThread(bool runtime){
+static void intelGpuThread(){
     init_intel = true;
-    static char stdout_buffer[1024];
-    static FILE* intel_gpu_top;
-    if (runtime)
-        intel_gpu_top = popen("steam-runtime-launch-client --alongside-steam --host -- intel_gpu_top -J -s 500", "r");
-    else
-        intel_gpu_top = popen("intel_gpu_top -J -s 500", "r");
+    static char stdout_buffer[4096];
 
-    int num_line = 0;
-    std::string buf;
-    int num_iterations = 0;
-    while (fgets(stdout_buffer, sizeof(stdout_buffer), intel_gpu_top)) {
-        if (num_line > 0)
-            buf += stdout_buffer;
-
-        num_line++;
-        if (strlen(stdout_buffer) < 4 && !strchr(stdout_buffer, '{') && !strchr(stdout_buffer, ',')) {
-            if (buf[0] != '{')
-                buf = "{\n" + buf;
-
-            if (num_iterations > 0){
-                buf += "\n}";
-                json j = json::parse(buf);
-                if  (j.contains("engines"))
-                    if (j["engines"].contains("Render/3D/0"))
-                        if (j["engines"]["Render/3D/0"].contains("busy"))
-                            gpu_info_intel.load = j["engines"]["Render/3D/0"]["busy"].get<int>();
-
-                if (j.contains("frequency"))
-                    if (j["frequency"].contains("actual"))
-                        gpu_info_intel.CoreClock = j["frequency"]["actual"].get<int>();
-                if (j.contains("power")){
-                    if (j["power"].contains("GPU"))
-                        gpu_info_intel.powerUsage = j["power"]["GPU"].get<float>();
-                    if (j["power"].contains("Package"))
-                        gpu_info_intel.apu_cpu_power = j["power"]["Package"].get<float>();
-                }
-
-            }
-            buf = "";
-            num_line = 0;
+    std::string cmd("mango_intel_stats ");
+    cmd += drm_dev;
+    FILE* mango_intel_stats = popen(cmd.c_str() , "r");
+    while (fgets(stdout_buffer, sizeof(stdout_buffer), mango_intel_stats)) {
+        if ( stdout_buffer[0] != '{' || stdout_buffer[strlen(stdout_buffer)-1] != '\n') {
+            SPDLOG_ERROR("Overran 4k buffer for fgets output. Expect sadness:\n {}", stdout_buffer);
+            continue;
         }
-        num_iterations++;
+
+        json j = json::parse(stdout_buffer);
+        if  (j.contains("gpu_busy_pct"))
+            gpu_info_intel.load = j["gpu_busy_pct"].get<int>();
+        if  (j.contains("gpu_clock_mhz"))
+            gpu_info_intel.CoreClock = j["gpu_clock_mhz"].get<int>();
+        if  (j.contains("igpu_power_w"))
+            gpu_info_intel.powerUsage = j["igpu_power_w"].get<float>();
+        if  (j.contains("dgpu_power_w"))
+            gpu_info_intel.powerUsage = j["dgpu_power_w"].get<float>();
+        if  (j.contains("cpu_power_w"))
+            gpu_info_intel.apu_cpu_power = j["cpu_power_w"].get<float>();
     }
 
-    int exitcode = pclose(intel_gpu_top) / 256;
+    int exitcode = pclose(mango_intel_stats) / 256;
     if (exitcode > 0){
         if (exitcode == 127)
-        SPDLOG_INFO("Failed to open '{}'", "intel_gpu_top");
+            SPDLOG_INFO("Failed to open '{}'", "mango_intel_stats");
 
         if (exitcode == 1)
-        SPDLOG_INFO("Missing permissions for '{}'", "intel_gpu_top");
+            SPDLOG_INFO("Missing permissions for '{}'", "mango_intel_stats");
 
         SPDLOG_INFO("Disabling gpu_stats");
         _params->enabled[OVERLAY_PARAM_ENABLED_gpu_stats] = false;
     }
 }
 
-void getIntelGpuInfo(){
-    if (!init_intel){
-        static bool runtime = false;
-        static struct stat buffer;
-        if (stat("/run/pressure-vessel", &buffer) == 0)
-            runtime = true;
-
-        std::thread(intelGpuThread, runtime).detach();
+void getIntelGpuInfo(const char *_drm_dev){
+    if (!init_intel) {
+        assert(strlen(_drm_dev) < sizeof(drm_dev)-1);
+        strcpy(drm_dev,  _drm_dev);
+        std::thread(intelGpuThread).detach();
     }
 
     gpu_info = gpu_info_intel;
diff --git a/src/meson.build b/src/meson.build
index e1cdbea..3c8f6ab 100644
--- a/src/meson.build
+++ b/src/meson.build
@@ -255,6 +255,18 @@ mangoapp = executable(
 )
 endif
 
+if get_option('mango_intel_stats').enabled() and is_unixy
+  drm_dep = cc.find_library('libdrm')
+  warning('The mango_intel_stats binary must have CAP_PERFMON or setuid to be useful')
+  warning('You must do this manually post-install: setcap cap_perfmon=+ep /path/to/mango_intel_stats')
+  mangointel = executable(
+    'mango_intel_stats',
+    files('app/intel.cpp'),
+    install : true,
+    dependencies : [drm_dep, json_dep],
+  )
+endif
+
 if get_option('mangoapp_layer')
   mangoapp_layer = shared_library(
     'MangoApp',
diff --git a/src/overlay.cpp b/src/overlay.cpp
index 11ef001..c034a91 100644
--- a/src/overlay.cpp
+++ b/src/overlay.cpp
@@ -127,8 +127,9 @@ void update_hw_info(const struct overlay_params& params, uint32_t vendorID)
       if (vendorID == 0x10de)
          getNvidiaGpuInfo(params);
 
-      if (vendorID== 0x8086)
-         getIntelGpuInfo();
+      if (vendorID== 0x8086) {
+         getIntelGpuInfo(drm_dev.c_str());
+      }
    }
 
 #ifdef __linux__
@@ -824,6 +825,7 @@ void init_gpu_stats(uint32_t& vendorID, uint32_t reported_deviceID, overlay_para
              continue; // filter display adapters
          }
          path = drm + dir;
+         drm_dev = dir;
 
          SPDLOG_DEBUG("drm path check: {}", path);
          if (pci_bus_parsed && pci_dev) {