Add perf counters based metrics for intel platforms

Unlike AMD or Nvidia intel historically only exposes gpu telemetry with
linux perf counters. As with all perf counters on modern linux these
require root or CAP_PERFMON.

This adds a small helper program that can be setuid/setcap'ed with the
appropriate permissions to export the telemetry that mangohud is
interested in. Most of the gpu metrics were informed by the existing
intel_gpu_top tool we are using, however this also exports power metrics
that the intel_gpu_top tool does not support for dgpu configurations and
ensures that we select the appropriate card for all metrics in a
multi-gpu system.

Also this tool exports CPU power metrics from the perf counters. This
allows power monitoring for Intel and zen+ AMD cpus. But this isnt
hooked in this PR since it seems cpu and gpu metrics collection are
separated in the rest of the mangohud.

Mostly this is just a PoC to show what avoiding taking a third-party
dependency on the rarely packaged intel_gpu_top might require. The meson
setup in particular is just something thrown together that builds.
Kurt Kartaltepe 2 years ago
parent 38604927d7
commit 96b605ce88

@ -9,6 +9,7 @@ option('with_wayland', type : 'feature', value : 'disabled')
option('with_dbus', type : 'feature', value : 'enabled')
option('loglevel', type: 'combo', choices : ['trace', 'debug', 'info', 'warn', 'err', 'critical', 'off'], value : 'info', description: 'Max log level in non-debug build')
option('mangoapp', type: 'boolean', value : false)
option('mango_intel_stats', type: 'feature', value : 'enabled', description: 'Enable helper binary for gathering stats for intel hardware on linux')
option('mangohudctl', type: 'boolean', value : false)
option('mangoapp_layer', type: 'boolean', value : false)
option('tests', type: 'feature', value: 'auto', description: 'Run tests')

@ -0,0 +1,230 @@
#include <nlohmann/json.hpp>
#include <dirent.h>
#include <iostream>
#include <libdrm/i915_drm.h>
#include <linux/perf_event.h>
#include <stdio.h>
#include <sys/stat.h>
#include <sys/syscall.h>
#include <thread>
#include <unistd.h>
#include <cinttypes>
// cut down from file_utils.cpp
static std::vector<std::string> lsdir(const char *root, const char *prefix) {
std::vector<std::string> list;
struct dirent *dp;
DIR *dirp = opendir(root);
if (!dirp) {
return list;
while ((dp = readdir(dirp))) {
if ((prefix && !std::string(dp->d_name).find(prefix) == 0) ||
!strcmp(dp->d_name, ".") || !strcmp(dp->d_name, ".."))
switch (dp->d_type) {
case DT_DIR:
return list;
static inline int perf_event_open(struct perf_event_attr *attr, pid_t pid,
int cpu, int group_fd, unsigned long flags) {
attr->size = sizeof(*attr);
return syscall(SYS_perf_event_open, attr, pid, cpu, group_fd, flags);
static uint64_t i915_perf_device_type(const char *bus_id) {
FILE *fd;
char buf[80] = {0};
uint64_t type;
bool is_igpu = strcmp(bus_id, "0000_00_02.0") == 0; // Ref: igt-gpu-tools for magic.
const char *dgpu_path = "/sys/bus/event_source/devices/i915_%s/type";
const char *igpu_path = "/sys/bus/event_source/devices/i915/type";
snprintf(buf, sizeof(buf), is_igpu ? igpu_path : dgpu_path, bus_id);
if ((fd = fopen(buf, "r")) == nullptr)
return 0;
if (fscanf(fd, "%" PRIu64, &type) != 1)
type = 0;
return type;
static inline int i915_perf_event_open(uint64_t type, uint64_t config) {
uint64_t group = -1;
struct perf_event_attr attr = {0};
attr.type = type;
attr.read_format = format;
attr.config = config;
attr.use_clockid = 1;
attr.clockid = CLOCK_MONOTONIC;
int cpu = 0, ncpu = 16; // hopefully one of these works.
int ret;
do {
ret = perf_event_open(&attr, -1, cpu++, group, 0);
} while ((ret < 0 && errno == EINVAL) &&
(cpu < ncpu)); // find a cpu to open on.
return ret;
static int power_perf_event_open(const char *metric) {
FILE *fd;
char buf[80] = {0};
uint64_t type, config;
snprintf(buf, sizeof(buf), "/sys/devices/power/type");
if ((fd = fopen(buf, "r")) == nullptr)
return -1;
int scanned = fscanf(fd, "%" PRIu64, &type);
if (scanned != 1)
return -1;
snprintf(buf, sizeof(buf), "/sys/devices/power/events/%s", metric);
if ((fd = fopen(buf, "r")) == nullptr)
return -1;
scanned = fscanf(fd, "event=%" PRIx64, &config);
if (scanned != 1)
return -1;
return i915_perf_event_open(type, config);
struct counter {
union {
int fd;
FILE *fp;
uint64_t val;
uint64_t ts;
uint64_t val_prev;
uint64_t ts_prev;
static void counter_update(struct counter *c, uint64_t in[4]) {
c->val_prev = c->val;
c->ts_prev = c->ts;
c->val = in[0];
c->ts = in[1];
static float counter_value(struct counter c, float scale) {
if (c.val < c.val_prev || c.ts_prev == 0) { // just try again next time.
return 0.0;
float t = c.ts - c.ts_prev;
float d = c.val - c.val_prev;
return d / t * scale;
static std::string find_pci_busid(const char *drm_dev) {
char drm_link_c[PATH_MAX] = {0};
ssize_t count = readlink((std::string("/sys/class/drm/") + drm_dev).c_str(),
drm_link_c, PATH_MAX);
if (count <= 0) {
fprintf(stderr, "Invalid drm device: %s\n", drm_dev);
std::string drm_link(drm_link_c);
// Example link format:
// ../../devices/pci0000:00/0000:00:03.1/0000:07:00.0/0000:08:01.0/0000:09:00.0/drm/card0
ssize_t pci_start = drm_link.rfind("/", count - 14) + 1;
ssize_t pci_end = drm_link.find("/", pci_start);
return drm_link.substr(pci_start, pci_end - pci_start);
int main(int argc, char **argv) {
if (argc < 2) {
fprintf(stderr, "Not enough arguments. usage: mango_intel_stats \"card0\"\n");
char *drm_dev = argv[1];
std::string pci_path = find_pci_busid(drm_dev);
std::replace(pci_path.begin(), pci_path.end(), ':',
'_'); // just sysfs things.
uint64_t type = i915_perf_device_type(pci_path.c_str());
// Counters for this tool.
struct counter busy_gpu, freq_gpu, energy_dgpu, energy_igpu, energy_cpu;
freq_gpu.fd = i915_perf_event_open(type, I915_PMU_ACTUAL_FREQUENCY);
if (errno == EACCES) {
"Permission denied on perf events.\nThis binary is meant to have "
"CAP_PERFMON\n via: sudo setcap cap_perfmon=+ep mango_intel_stats\n");
busy_gpu.fd = i915_perf_event_open(
energy_cpu.fd = power_perf_event_open("energy-pkg");
energy_igpu.fd = power_perf_event_open("energy-gpu");
energy_dgpu = {0};
std::string hwmon_path =
std::string("/sys/class/drm/") + drm_dev + "/device/hwmon/";
const auto dirs = lsdir(hwmon_path.c_str(), "hwmon");
for (const auto &dir : dirs) {
FILE *fp = fopen((hwmon_path + dir + "/energy1_input").c_str(), "r");
if (fp) {
energy_dgpu.fp = fp;
while (true) {
nlohmann::json j;
uint64_t res[4] = {0};
if (read(busy_gpu.fd, res, sizeof(res)) > 0) {
counter_update(&busy_gpu, res);
j["gpu_busy_pct"] = counter_value(busy_gpu, 100.0);
if (read(freq_gpu.fd, res, sizeof(res)) > 0) {
counter_update(&freq_gpu, res);
j["gpu_clock_mhz"] = counter_value(freq_gpu, 1e9);
if (read(energy_cpu.fd, res, sizeof(res)) > 0) {
counter_update(&energy_cpu, res);
//, in practice intel and amd use 1/2^32J
j["cpu_power_w"] = counter_value(energy_cpu, 2.328e-1);
if (read(energy_igpu.fd, res, sizeof(res)) > 0) {
counter_update(&energy_igpu, res);
j["igpu_power_w"] = counter_value(energy_igpu, 2.328e-1);
if (energy_dgpu.fp && fscanf(energy_dgpu.fp, "%" PRIu64, res) == 1) {
// We can just depend on old clock readings in res[1]
struct timespec t = {0};
clock_gettime(CLOCK_MONOTONIC, &t);
res[1] = (t.tv_sec % (3600 * 24)) * 1e9 + t.tv_nsec;
counter_update(&energy_dgpu, res);
j["dgpu_power_w"] = counter_value(energy_dgpu, 1e3);
std::cout << j << std::endl;

@ -48,7 +48,7 @@ extern struct gpuInfo gpu_info;
void getNvidiaGpuInfo(const struct overlay_params& params);
void getAmdGpuInfo(void);
void getIntelGpuInfo();
void getIntelGpuInfo(const char *drm_dev);
bool checkNvidia(const char *pci_dev);
extern void nvapi_util();
extern bool checkNVAPI();

@ -8,74 +8,52 @@ using json = nlohmann::json;
static bool init_intel = false;
struct gpuInfo gpu_info_intel {};
static char drm_dev[10];
static void intelGpuThread(bool runtime){
static void intelGpuThread(){
init_intel = true;
static char stdout_buffer[1024];
static FILE* intel_gpu_top;
if (runtime)
intel_gpu_top = popen("steam-runtime-launch-client --alongside-steam --host -- intel_gpu_top -J -s 500", "r");
intel_gpu_top = popen("intel_gpu_top -J -s 500", "r");
int num_line = 0;
std::string buf;
int num_iterations = 0;
while (fgets(stdout_buffer, sizeof(stdout_buffer), intel_gpu_top)) {
if (num_line > 0)
buf += stdout_buffer;
if (strlen(stdout_buffer) < 4 && !strchr(stdout_buffer, '{') && !strchr(stdout_buffer, ',')) {
if (buf[0] != '{')
buf = "{\n" + buf;
if (num_iterations > 0){
buf += "\n}";
json j = json::parse(buf);
if (j.contains("engines"))
if (j["engines"].contains("Render/3D/0"))
if (j["engines"]["Render/3D/0"].contains("busy"))
gpu_info_intel.load = j["engines"]["Render/3D/0"]["busy"].get<int>();
if (j.contains("frequency"))
if (j["frequency"].contains("actual"))
gpu_info_intel.CoreClock = j["frequency"]["actual"].get<int>();
if (j.contains("power")){
if (j["power"].contains("GPU"))
gpu_info_intel.powerUsage = j["power"]["GPU"].get<float>();
if (j["power"].contains("Package"))
gpu_info_intel.apu_cpu_power = j["power"]["Package"].get<float>();
buf = "";
num_line = 0;
static char stdout_buffer[4096];
std::string cmd("mango_intel_stats ");
cmd += drm_dev;
FILE* mango_intel_stats = popen(cmd.c_str() , "r");
while (fgets(stdout_buffer, sizeof(stdout_buffer), mango_intel_stats)) {
if ( stdout_buffer[0] != '{' || stdout_buffer[strlen(stdout_buffer)-1] != '\n') {
SPDLOG_ERROR("Overran 4k buffer for fgets output. Expect sadness:\n {}", stdout_buffer);
json j = json::parse(stdout_buffer);
if (j.contains("gpu_busy_pct"))
gpu_info_intel.load = j["gpu_busy_pct"].get<int>();
if (j.contains("gpu_clock_mhz"))
gpu_info_intel.CoreClock = j["gpu_clock_mhz"].get<int>();
if (j.contains("igpu_power_w"))
gpu_info_intel.powerUsage = j["igpu_power_w"].get<float>();
if (j.contains("dgpu_power_w"))
gpu_info_intel.powerUsage = j["dgpu_power_w"].get<float>();
if (j.contains("cpu_power_w"))
gpu_info_intel.apu_cpu_power = j["cpu_power_w"].get<float>();
int exitcode = pclose(intel_gpu_top) / 256;
int exitcode = pclose(mango_intel_stats) / 256;
if (exitcode > 0){
if (exitcode == 127)
SPDLOG_INFO("Failed to open '{}'", "intel_gpu_top");
SPDLOG_INFO("Failed to open '{}'", "mango_intel_stats");
if (exitcode == 1)
SPDLOG_INFO("Missing permissions for '{}'", "intel_gpu_top");
SPDLOG_INFO("Missing permissions for '{}'", "mango_intel_stats");
SPDLOG_INFO("Disabling gpu_stats");
_params->enabled[OVERLAY_PARAM_ENABLED_gpu_stats] = false;
void getIntelGpuInfo(){
if (!init_intel){
static bool runtime = false;
static struct stat buffer;
if (stat("/run/pressure-vessel", &buffer) == 0)
runtime = true;
std::thread(intelGpuThread, runtime).detach();
void getIntelGpuInfo(const char *_drm_dev){
if (!init_intel) {
assert(strlen(_drm_dev) < sizeof(drm_dev)-1);
strcpy(drm_dev, _drm_dev);
gpu_info = gpu_info_intel;

@ -255,6 +255,18 @@ mangoapp = executable(
if get_option('mango_intel_stats').enabled() and is_unixy
drm_dep = cc.find_library('libdrm')
warning('The mango_intel_stats binary must have CAP_PERFMON or setuid to be useful')
warning('You must do this manually post-install: setcap cap_perfmon=+ep /path/to/mango_intel_stats')
mangointel = executable(
install : true,
dependencies : [drm_dep, json_dep],
if get_option('mangoapp_layer')
mangoapp_layer = shared_library(

@ -127,8 +127,9 @@ void update_hw_info(const struct overlay_params& params, uint32_t vendorID)
if (vendorID == 0x10de)
if (vendorID== 0x8086)
if (vendorID== 0x8086) {
#ifdef __linux__
@ -824,6 +825,7 @@ void init_gpu_stats(uint32_t& vendorID, uint32_t reported_deviceID, overlay_para
continue; // filter display adapters
path = drm + dir;
drm_dev = dir;
SPDLOG_DEBUG("drm path check: {}", path);
if (pci_bus_parsed && pci_dev) {
