diff --git a/meson_options.txt b/meson_options.txt index 5f7d11b..50ba289 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -9,6 +9,7 @@ option('with_wayland', type : 'feature', value : 'disabled') option('with_dbus', type : 'feature', value : 'enabled') option('loglevel', type: 'combo', choices : ['trace', 'debug', 'info', 'warn', 'err', 'critical', 'off'], value : 'info', description: 'Max log level in non-debug build') option('mangoapp', type: 'boolean', value : false) +option('mango_intel_stats', type: 'feature', value : 'enabled', description: 'Enable helper binary for gathering stats for intel hardware on linux') option('mangohudctl', type: 'boolean', value : false) option('mangoapp_layer', type: 'boolean', value : false) option('tests', type: 'feature', value: 'auto', description: 'Run tests') diff --git a/src/app/intel.cpp b/src/app/intel.cpp new file mode 100644 index 0000000..34b4e85 --- /dev/null +++ b/src/app/intel.cpp @@ -0,0 +1,230 @@ +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// cut down from file_utils.cpp +static std::vector lsdir(const char *root, const char *prefix) { + std::vector list; + struct dirent *dp; + + DIR *dirp = opendir(root); + if (!dirp) { + return list; + } + + while ((dp = readdir(dirp))) { + if ((prefix && !std::string(dp->d_name).find(prefix) == 0) || + !strcmp(dp->d_name, ".") || !strcmp(dp->d_name, "..")) + continue; + + switch (dp->d_type) { + case DT_DIR: + list.push_back(dp->d_name); + break; + } + } + + closedir(dirp); + return list; +} + +static inline int perf_event_open(struct perf_event_attr *attr, pid_t pid, + int cpu, int group_fd, unsigned long flags) { + attr->size = sizeof(*attr); + return syscall(SYS_perf_event_open, attr, pid, cpu, group_fd, flags); +} + +static uint64_t i915_perf_device_type(const char *bus_id) { + FILE *fd; + char buf[80] = {0}; + uint64_t type; + + bool is_igpu = strcmp(bus_id, "0000_00_02.0") == 0; // Ref: igt-gpu-tools for magic. + const char *dgpu_path = "/sys/bus/event_source/devices/i915_%s/type"; + const char *igpu_path = "/sys/bus/event_source/devices/i915/type"; + + snprintf(buf, sizeof(buf), is_igpu ? igpu_path : dgpu_path, bus_id); + if ((fd = fopen(buf, "r")) == nullptr) + return 0; + + if (fscanf(fd, "%" PRIu64, &type) != 1) + type = 0; + + fclose(fd); + return type; +} + +static inline int i915_perf_event_open(uint64_t type, uint64_t config) { + uint64_t format = PERF_FORMAT_TOTAL_TIME_ENABLED; + uint64_t group = -1; + struct perf_event_attr attr = {0}; + + attr.type = type; + attr.read_format = format; + attr.config = config; + attr.use_clockid = 1; + attr.clockid = CLOCK_MONOTONIC; + + int cpu = 0, ncpu = 16; // hopefully one of these works. + int ret; + do { + ret = perf_event_open(&attr, -1, cpu++, group, 0); + } while ((ret < 0 && errno == EINVAL) && + (cpu < ncpu)); // find a cpu to open on. + + return ret; +} + +static int power_perf_event_open(const char *metric) { + FILE *fd; + char buf[80] = {0}; + uint64_t type, config; + + snprintf(buf, sizeof(buf), "/sys/devices/power/type"); + if ((fd = fopen(buf, "r")) == nullptr) + return -1; + + int scanned = fscanf(fd, "%" PRIu64, &type); + fclose(fd); + if (scanned != 1) + return -1; + + snprintf(buf, sizeof(buf), "/sys/devices/power/events/%s", metric); + if ((fd = fopen(buf, "r")) == nullptr) + return -1; + + scanned = fscanf(fd, "event=%" PRIx64, &config); + fclose(fd); + if (scanned != 1) + return -1; + + return i915_perf_event_open(type, config); +} + +struct counter { + union { + int fd; + FILE *fp; + }; + uint64_t val; + uint64_t ts; + uint64_t val_prev; + uint64_t ts_prev; +}; + +static void counter_update(struct counter *c, uint64_t in[4]) { + c->val_prev = c->val; + c->ts_prev = c->ts; + c->val = in[0]; + c->ts = in[1]; +} + +static float counter_value(struct counter c, float scale) { + if (c.val < c.val_prev || c.ts_prev == 0) { // just try again next time. + return 0.0; + } + float t = c.ts - c.ts_prev; + float d = c.val - c.val_prev; + return d / t * scale; +} + +static std::string find_pci_busid(const char *drm_dev) { + char drm_link_c[PATH_MAX] = {0}; + ssize_t count = readlink((std::string("/sys/class/drm/") + drm_dev).c_str(), + drm_link_c, PATH_MAX); + if (count <= 0) { + fprintf(stderr, "Invalid drm device: %s\n", drm_dev); + exit(1); + } + std::string drm_link(drm_link_c); + // Example link format: + // ../../devices/pci0000:00/0000:00:03.1/0000:07:00.0/0000:08:01.0/0000:09:00.0/drm/card0 + ssize_t pci_start = drm_link.rfind("/", count - 14) + 1; + ssize_t pci_end = drm_link.find("/", pci_start); + return drm_link.substr(pci_start, pci_end - pci_start); +} + +int main(int argc, char **argv) { + if (argc < 2) { + fprintf(stderr, "Not enough arguments. usage: mango_intel_stats \"card0\"\n"); + exit(1); + } + char *drm_dev = argv[1]; + std::string pci_path = find_pci_busid(drm_dev); + std::replace(pci_path.begin(), pci_path.end(), ':', + '_'); // just sysfs things. + uint64_t type = i915_perf_device_type(pci_path.c_str()); + + // Counters for this tool. + struct counter busy_gpu, freq_gpu, energy_dgpu, energy_igpu, energy_cpu; + + freq_gpu.fd = i915_perf_event_open(type, I915_PMU_ACTUAL_FREQUENCY); + if (errno == EACCES) { + fprintf( + stderr, + "Permission denied on perf events.\nThis binary is meant to have " + "CAP_PERFMON\n via: sudo setcap cap_perfmon=+ep mango_intel_stats\n"); + exit(1); + } + busy_gpu.fd = i915_perf_event_open( + type, I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_RENDER, 0)); + energy_cpu.fd = power_perf_event_open("energy-pkg"); + energy_igpu.fd = power_perf_event_open("energy-gpu"); + + energy_dgpu = {0}; + std::string hwmon_path = + std::string("/sys/class/drm/") + drm_dev + "/device/hwmon/"; + const auto dirs = lsdir(hwmon_path.c_str(), "hwmon"); + for (const auto &dir : dirs) { + FILE *fp = fopen((hwmon_path + dir + "/energy1_input").c_str(), "r"); + if (fp) { + energy_dgpu.fp = fp; + break; + } + } + + while (true) { + nlohmann::json j; + uint64_t res[4] = {0}; + if (read(busy_gpu.fd, res, sizeof(res)) > 0) { + counter_update(&busy_gpu, res); + j["gpu_busy_pct"] = counter_value(busy_gpu, 100.0); + } + if (read(freq_gpu.fd, res, sizeof(res)) > 0) { + counter_update(&freq_gpu, res); + j["gpu_clock_mhz"] = counter_value(freq_gpu, 1e9); + } + if (read(energy_cpu.fd, res, sizeof(res)) > 0) { + counter_update(&energy_cpu, res); + // https://lwn.net/Articles/573602/, in practice intel and amd use 1/2^32J + j["cpu_power_w"] = counter_value(energy_cpu, 2.328e-1); + } + if (read(energy_igpu.fd, res, sizeof(res)) > 0) { + counter_update(&energy_igpu, res); + j["igpu_power_w"] = counter_value(energy_igpu, 2.328e-1); + } + if (energy_dgpu.fp && fscanf(energy_dgpu.fp, "%" PRIu64, res) == 1) { + rewind(energy_dgpu.fp); + fflush(energy_dgpu.fp); + // We can just depend on old clock readings in res[1] + /* + struct timespec t = {0}; + clock_gettime(CLOCK_MONOTONIC, &t); + res[1] = (t.tv_sec % (3600 * 24)) * 1e9 + t.tv_nsec; + */ + counter_update(&energy_dgpu, res); + j["dgpu_power_w"] = counter_value(energy_dgpu, 1e3); + } + std::cout << j << std::endl; + std::this_thread::sleep_for(std::chrono::milliseconds(500)); + } +} diff --git a/src/gpu.h b/src/gpu.h index 07ceeb7..d6d4f6e 100644 --- a/src/gpu.h +++ b/src/gpu.h @@ -48,7 +48,7 @@ extern struct gpuInfo gpu_info; void getNvidiaGpuInfo(const struct overlay_params& params); void getAmdGpuInfo(void); -void getIntelGpuInfo(); +void getIntelGpuInfo(const char *drm_dev); bool checkNvidia(const char *pci_dev); extern void nvapi_util(); extern bool checkNVAPI(); diff --git a/src/intel.cpp b/src/intel.cpp index 3a4e988..126f4e7 100644 --- a/src/intel.cpp +++ b/src/intel.cpp @@ -8,74 +8,52 @@ using json = nlohmann::json; static bool init_intel = false; struct gpuInfo gpu_info_intel {}; +static char drm_dev[10]; -static void intelGpuThread(bool runtime){ +static void intelGpuThread(){ init_intel = true; - static char stdout_buffer[1024]; - static FILE* intel_gpu_top; - if (runtime) - intel_gpu_top = popen("steam-runtime-launch-client --alongside-steam --host -- intel_gpu_top -J -s 500", "r"); - else - intel_gpu_top = popen("intel_gpu_top -J -s 500", "r"); + static char stdout_buffer[4096]; - int num_line = 0; - std::string buf; - int num_iterations = 0; - while (fgets(stdout_buffer, sizeof(stdout_buffer), intel_gpu_top)) { - if (num_line > 0) - buf += stdout_buffer; - - num_line++; - if (strlen(stdout_buffer) < 4 && !strchr(stdout_buffer, '{') && !strchr(stdout_buffer, ',')) { - if (buf[0] != '{') - buf = "{\n" + buf; - - if (num_iterations > 0){ - buf += "\n}"; - json j = json::parse(buf); - if (j.contains("engines")) - if (j["engines"].contains("Render/3D/0")) - if (j["engines"]["Render/3D/0"].contains("busy")) - gpu_info_intel.load = j["engines"]["Render/3D/0"]["busy"].get(); - - if (j.contains("frequency")) - if (j["frequency"].contains("actual")) - gpu_info_intel.CoreClock = j["frequency"]["actual"].get(); - if (j.contains("power")){ - if (j["power"].contains("GPU")) - gpu_info_intel.powerUsage = j["power"]["GPU"].get(); - if (j["power"].contains("Package")) - gpu_info_intel.apu_cpu_power = j["power"]["Package"].get(); - } - - } - buf = ""; - num_line = 0; + std::string cmd("mango_intel_stats "); + cmd += drm_dev; + FILE* mango_intel_stats = popen(cmd.c_str() , "r"); + while (fgets(stdout_buffer, sizeof(stdout_buffer), mango_intel_stats)) { + if ( stdout_buffer[0] != '{' || stdout_buffer[strlen(stdout_buffer)-1] != '\n') { + SPDLOG_ERROR("Overran 4k buffer for fgets output. Expect sadness:\n {}", stdout_buffer); + continue; } - num_iterations++; + + json j = json::parse(stdout_buffer); + if (j.contains("gpu_busy_pct")) + gpu_info_intel.load = j["gpu_busy_pct"].get(); + if (j.contains("gpu_clock_mhz")) + gpu_info_intel.CoreClock = j["gpu_clock_mhz"].get(); + if (j.contains("igpu_power_w")) + gpu_info_intel.powerUsage = j["igpu_power_w"].get(); + if (j.contains("dgpu_power_w")) + gpu_info_intel.powerUsage = j["dgpu_power_w"].get(); + if (j.contains("cpu_power_w")) + gpu_info_intel.apu_cpu_power = j["cpu_power_w"].get(); } - int exitcode = pclose(intel_gpu_top) / 256; + int exitcode = pclose(mango_intel_stats) / 256; if (exitcode > 0){ if (exitcode == 127) - SPDLOG_INFO("Failed to open '{}'", "intel_gpu_top"); + SPDLOG_INFO("Failed to open '{}'", "mango_intel_stats"); if (exitcode == 1) - SPDLOG_INFO("Missing permissions for '{}'", "intel_gpu_top"); + SPDLOG_INFO("Missing permissions for '{}'", "mango_intel_stats"); SPDLOG_INFO("Disabling gpu_stats"); _params->enabled[OVERLAY_PARAM_ENABLED_gpu_stats] = false; } } -void getIntelGpuInfo(){ - if (!init_intel){ - static bool runtime = false; - static struct stat buffer; - if (stat("/run/pressure-vessel", &buffer) == 0) - runtime = true; - - std::thread(intelGpuThread, runtime).detach(); +void getIntelGpuInfo(const char *_drm_dev){ + if (!init_intel) { + assert(strlen(_drm_dev) < sizeof(drm_dev)-1); + strcpy(drm_dev, _drm_dev); + std::thread(intelGpuThread).detach(); } gpu_info = gpu_info_intel; diff --git a/src/meson.build b/src/meson.build index e1cdbea..3c8f6ab 100644 --- a/src/meson.build +++ b/src/meson.build @@ -255,6 +255,18 @@ mangoapp = executable( ) endif +if get_option('mango_intel_stats').enabled() and is_unixy + drm_dep = cc.find_library('libdrm') + warning('The mango_intel_stats binary must have CAP_PERFMON or setuid to be useful') + warning('You must do this manually post-install: setcap cap_perfmon=+ep /path/to/mango_intel_stats') + mangointel = executable( + 'mango_intel_stats', + files('app/intel.cpp'), + install : true, + dependencies : [drm_dep, json_dep], + ) +endif + if get_option('mangoapp_layer') mangoapp_layer = shared_library( 'MangoApp', diff --git a/src/overlay.cpp b/src/overlay.cpp index 11ef001..c034a91 100644 --- a/src/overlay.cpp +++ b/src/overlay.cpp @@ -127,8 +127,9 @@ void update_hw_info(const struct overlay_params& params, uint32_t vendorID) if (vendorID == 0x10de) getNvidiaGpuInfo(params); - if (vendorID== 0x8086) - getIntelGpuInfo(); + if (vendorID== 0x8086) { + getIntelGpuInfo(drm_dev.c_str()); + } } #ifdef __linux__ @@ -824,6 +825,7 @@ void init_gpu_stats(uint32_t& vendorID, uint32_t reported_deviceID, overlay_para continue; // filter display adapters } path = drm + dir; + drm_dev = dir; SPDLOG_DEBUG("drm path check: {}", path); if (pci_bus_parsed && pci_dev) {