From 5e1493a0cb47532624ce0c50ede2428876f075a5 Mon Sep 17 00:00:00 2001
From: jackun <jack.un@gmail.com>
Date: Sat, 18 Sep 2021 02:43:36 +0300
Subject: [PATCH] Support multiple gpus

---
 src/gpu.cpp          | 219 ++++++++++++++++---------------
 src/gpu.h            | 128 +++++++++++++++---
 src/gpu_radeon.cpp   |  50 ++++---
 src/hud_elements.cpp | 199 +++++++++++++++++-----------
 src/hud_elements.h   |   3 +-
 src/nvapi.cpp        |  32 +++--
 src/nvctrl.cpp       |  33 +++--
 src/nvidia_info.h    |   4 +-
 src/nvml.cpp         | 118 ++++++++++++-----
 src/overlay.cpp      | 303 +++++++++++++++++++++++++------------------
 src/overlay.h        |   1 +
 src/overlay_params.h |   1 +
 12 files changed, 679 insertions(+), 412 deletions(-)
diff --git a/src/gpu.cpp b/src/gpu.cpp
index e764685..1b826b0 100644
--- a/src/gpu.cpp
+++ b/src/gpu.cpp
@@ -7,12 +7,13 @@
 #include <spdlog/spdlog.h>
 #include "nvctrl.h"
 #include "timing.hpp"
+#include "file_utils.h"
 #ifdef HAVE_NVML
 #include "nvidia_info.h"
 #endif
 
 #ifdef HAVE_LIBDRM_AMDGPU
-//#include "auth.h"
+#include "auth.h"
 #include <xf86drm.h>
 #include <libdrm/amdgpu_drm.h>
 #include <libdrm/amdgpu.h>
@@ -22,120 +23,118 @@
 
 using namespace std::chrono_literals;
 
-struct gpuInfo gpu_info {};
-amdgpu_files amdgpu {};
-decltype(&getAmdGpuInfo) getAmdGpuInfo_actual = nullptr;
+std::shared_ptr<GpuInfo> g_active_gpu;
+std::unordered_map<std::string /*device*/, std::shared_ptr<struct GpuInfo>> g_gpu_infos;
 
-bool checkNvidia(const char *pci_dev){
-    bool nvSuccess = false;
-#ifdef HAVE_NVML
-    nvSuccess = checkNVML(pci_dev) && getNVMLInfo();
-#endif
+bool NVCtrlInfo::init()
+{
 #ifdef HAVE_XNVCTRL
-    if (!nvSuccess)
-        nvSuccess = checkXNVCtrl();
+    // FIXME correct device index
+    return checkXNVCtrl();
+#else
+    return false;
 #endif
-#ifdef _WIN32
-    if (!nvSuccess)
-        nvSuccess = checkNVAPI();
-#endif
-    return nvSuccess;
 }
 
-void getNvidiaGpuInfo(){
-#ifdef HAVE_NVML
-    if (nvmlSuccess){
-        getNVMLInfo();
-        gpu_info.load = nvidiaUtilization.gpu;
-        gpu_info.temp = nvidiaTemp;
-        gpu_info.memoryUsed = nvidiaMemory.used / (1024.f * 1024.f * 1024.f);
-        gpu_info.CoreClock = nvidiaCoreClock;
-        gpu_info.MemClock = nvidiaMemClock;
-        gpu_info.powerUsage = nvidiaPowerUsage / 1000;
-        gpu_info.memoryTotal = nvidiaMemory.total / (1024.f * 1024.f * 1024.f);
-        return;
-    }
-#endif
+void NVCtrlInfo::update()
+{
 #ifdef HAVE_XNVCTRL
     if (nvctrlSuccess) {
         getNvctrlInfo();
-        gpu_info.load = nvctrl_info.load;
-        gpu_info.temp = nvctrl_info.temp;
-        gpu_info.memoryUsed = nvctrl_info.memoryUsed / (1024.f);
-        gpu_info.CoreClock = nvctrl_info.CoreClock;
-        gpu_info.MemClock = nvctrl_info.MemClock;
-        gpu_info.powerUsage = 0;
-        gpu_info.memoryTotal = nvctrl_info.memoryTotal;
+        s.load = nvctrl_info.load;
+        s.temp = nvctrl_info.temp;
+        s.memory_used = nvctrl_info.memoryUsed / (1024.f);
+        s.core_clock = nvctrl_info.CoreClock;
+        s.memory_clock = nvctrl_info.MemClock;
+        s.power_usage = 0;
+        s.memory_total = nvctrl_info.memoryTotal;
         return;
     }
 #endif
-#ifdef _WIN32
-nvapi_util();
-#endif
 }
 
-void getAmdGpuInfo(){
-    if (amdgpu.busy) {
-        rewind(amdgpu.busy);
-        fflush(amdgpu.busy);
-        int value = 0;
-        if (fscanf(amdgpu.busy, "%d", &value) != 1)
-            value = 0;
-        gpu_info.load = value;
+bool AMDGPUHWMonInfo::init()
+{
+    auto path  = sysfs_path + "/device";
+    handles.busy = fopen((path + "/gpu_busy_percent").c_str(), "r");
+    handles.vram_total = fopen((path + "/mem_info_vram_total").c_str(), "r");
+    handles.vram_used = fopen((path + "/mem_info_vram_used").c_str(), "r");
+
+    path += "/hwmon/";
+    std::string tempFolder;
+    if (find_folder(path, "hwmon", tempFolder)) {
+        handles.core_clock = fopen((path + tempFolder + "/freq1_input").c_str(), "r");
+        handles.memory_clock = fopen((path + tempFolder + "/freq2_input").c_str(), "r");
+        handles.temp = fopen((path + tempFolder + "/temp1_input").c_str(), "r");
+        handles.power_usage = fopen((path + tempFolder + "/power1_average").c_str(), "r");
     }
 
-    if (amdgpu.temp) {
-        rewind(amdgpu.temp);
-        fflush(amdgpu.temp);
+    return handles.busy && handles.temp && handles.vram_total && handles.vram_used;
+}
+
+void AMDGPUHWMonInfo::update()
+{
+    if (handles.busy) {
+        rewind(handles.busy);
+        fflush(handles.busy);
         int value = 0;
-        if (fscanf(amdgpu.temp, "%d", &value) != 1)
+        if (fscanf(handles.busy, "%d", &value) != 1)
             value = 0;
-        gpu_info.temp = value / 1000;
+        s.load = value;
+    }
+
+    if (handles.temp) {
+        rewind(handles.temp);
+        fflush(handles.temp);
+        int value = 0;
+        if (fscanf(handles.temp, "%d", &value) != 1)
+            value = 0;
+        s.temp = value / 1000;
     }
 
     int64_t value = 0;
 
-    if (amdgpu.vram_total) {
-        rewind(amdgpu.vram_total);
-        fflush(amdgpu.vram_total);
-        if (fscanf(amdgpu.vram_total, "%" PRId64, &value) != 1)
+    if (handles.vram_total) {
+        rewind(handles.vram_total);
+        fflush(handles.vram_total);
+        if (fscanf(handles.vram_total, "%" PRId64, &value) != 1)
             value = 0;
-        gpu_info.memoryTotal = float(value) / (1024 * 1024 * 1024);
+        s.memory_total = float(value) / (1024 * 1024 * 1024);
     }
 
-    if (amdgpu.vram_used) {
-        rewind(amdgpu.vram_used);
-        fflush(amdgpu.vram_used);
-        if (fscanf(amdgpu.vram_used, "%" PRId64, &value) != 1)
+    if (handles.vram_used) {
+        rewind(handles.vram_used);
+        fflush(handles.vram_used);
+        if (fscanf(handles.vram_used, "%" PRId64, &value) != 1)
             value = 0;
-        gpu_info.memoryUsed = float(value) / (1024 * 1024 * 1024);
+        s.memory_used = float(value) / (1024 * 1024 * 1024);
     }
 
-    if (amdgpu.core_clock) {
-        rewind(amdgpu.core_clock);
-        fflush(amdgpu.core_clock);
-        if (fscanf(amdgpu.core_clock, "%" PRId64, &value) != 1)
+    if (handles.core_clock) {
+        rewind(handles.core_clock);
+        fflush(handles.core_clock);
+        if (fscanf(handles.core_clock, "%" PRId64, &value) != 1)
             value = 0;
 
-        gpu_info.CoreClock = value / 1000000;
+        s.core_clock = value / 1000000;
     }
 
-    if (amdgpu.memory_clock) {
-        rewind(amdgpu.memory_clock);
-        fflush(amdgpu.memory_clock);
-        if (fscanf(amdgpu.memory_clock, "%" PRId64, &value) != 1)
+    if (handles.memory_clock) {
+        rewind(handles.memory_clock);
+        fflush(handles.memory_clock);
+        if (fscanf(handles.memory_clock, "%" PRId64, &value) != 1)
             value = 0;
 
-        gpu_info.MemClock = value / 1000000;
+        s.memory_clock = value / 1000000;
     }
 
-    if (amdgpu.power_usage) {
-        rewind(amdgpu.power_usage);
-        fflush(amdgpu.power_usage);
-        if (fscanf(amdgpu.power_usage, "%" PRId64, &value) != 1)
+    if (handles.power_usage) {
+        rewind(handles.power_usage);
+        fflush(handles.power_usage);
+        if (fscanf(handles.power_usage, "%" PRId64, &value) != 1)
             value = 0;
 
-        gpu_info.powerUsage = value / 1000000;
+        s.power_usage = value / 1000000;
     }
 }
 
@@ -148,7 +147,7 @@ static int getgrbm_amdgpu(amdgpu_device_handle dev, uint32_t *out) {
                                     0xffffffff, 0, out);
 }
 
-struct amdgpu_handles
+struct amdgpu_handles : public gpu_handles
 {
     amdgpu_device_handle dev;
     int fd;
@@ -208,21 +207,20 @@ struct amdgpu_handles
     }
 };
 
-typedef std::unique_ptr<amdgpu_handles> amdgpu_ptr;
-static amdgpu_ptr amdgpu_dev;
-
-void amdgpu_set_sampling_period(uint32_t period)
+void amdgpu_set_sampling_period(gpu_handles* dev, uint32_t period)
 {
+    auto amdgpu_dev = reinterpret_cast<amdgpu_handles*>(dev);
     if (amdgpu_dev)
         amdgpu_dev->set_sampling_period(period);
 }
 
-bool amdgpu_open(const char *path) {
+static amdgpu_handles* amdgpu_open(const char* path)
+{
     int fd = open(path, O_RDWR | O_CLOEXEC);
 
     if (fd < 0) {
         SPDLOG_ERROR("Failed to open DRM device: {}", strerror(errno));
-        return false;
+        return nullptr;
     }
 
     drmVersionPtr ver = drmGetVersion(fd);
@@ -230,69 +228,82 @@ bool amdgpu_open(const char *path) {
     if (!ver) {
         SPDLOG_ERROR("Failed to query driver version: {}", strerror(errno));
         close(fd);
-        return false;
+        return nullptr;
     }
 
     if (strcmp(ver->name, "amdgpu") || !DRM_ATLEAST_VERSION(ver, 3, 11)) {
         SPDLOG_ERROR("Unsupported driver/version: {} {}.{}.{}", ver->name, ver->version_major, ver->version_minor, ver->version_patchlevel);
         close(fd);
         drmFreeVersion(ver);
-        return false;
+        return nullptr;
     }
     drmFreeVersion(ver);
 
-/*
     if (!authenticate_drm(fd)) {
         close(fd);
-        return false;
+        return nullptr;
     }
-*/
 
     uint32_t drm_major, drm_minor;
     amdgpu_device_handle dev;
     if (amdgpu_device_initialize(fd, &drm_major, &drm_minor, &dev)){
         SPDLOG_ERROR("Failed to initialize amdgpu device: {}", strerror(errno));
         close(fd);
+        return nullptr;
+    }
+
+    return new amdgpu_handles(dev, fd, drm_major, drm_minor);
+}
+
+bool AMDGPUInfo::init()
+{
+    int idx = -1;
+
+    if (sscanf(sysfs_path.c_str(), "%*[^0-9]%d", &idx) != 1 || idx < 0)
+        return false;
+
+    const std::string dri_path = "/dev/dri/card" + std::to_string(idx);
+    device = reinterpret_cast<gpu_handles*>(amdgpu_open(dri_path.c_str()));
+    if (!device)
+    {
+        SPDLOG_WARN("Failed to open device '{}' with libdrm", dri_path);
         return false;
     }
 
-    amdgpu_dev = std::make_unique<amdgpu_handles>(dev, fd, drm_major, drm_minor);
     return true;
 }
 
-void getAmdGpuInfo_libdrm()
+
+void AMDGPUInfo::update()
 {
     uint64_t value = 0;
     uint32_t value32 = 0;
+    auto amdgpu_dev = reinterpret_cast<amdgpu_handles*>(device);
 
     if (!amdgpu_dev || !DRM_ATLEAST_VERSION(amdgpu_dev, 3, 11))
-    {
-        getAmdGpuInfo();
-        getAmdGpuInfo_actual = getAmdGpuInfo;
         return;
-    }
 
     if (!amdgpu_query_info(amdgpu_dev->dev, AMDGPU_INFO_VRAM_USAGE, sizeof(uint64_t), &value))
-        gpu_info.memoryUsed = float(value) / (1024 * 1024 * 1024);
+        s.memory_used = float(value) / (1024 * 1024 * 1024);
 
     // FIXME probably not correct sensor
     if (!amdgpu_query_info(amdgpu_dev->dev, AMDGPU_INFO_MEMORY, sizeof(uint64_t), &value))
-        gpu_info.memoryTotal = float(value) / (1024 * 1024 * 1024);
+        s.memory_total = float(value) / (1024 * 1024 * 1024);
 
     if (!amdgpu_query_sensor_info(amdgpu_dev->dev, AMDGPU_INFO_SENSOR_GFX_SCLK, sizeof(uint32_t), &value32))
-        gpu_info.CoreClock = value32;
+        s.core_clock = value32;
 
     if (!amdgpu_query_sensor_info(amdgpu_dev->dev, AMDGPU_INFO_SENSOR_GFX_MCLK, sizeof(uint32_t), &value32)) // XXX Doesn't work on APUs
-        gpu_info.MemClock = value32;
+        s.memory_clock = value32;
 
     //if (!amdgpu_query_sensor_info(amdgpu_dev->dev, AMDGPU_INFO_SENSOR_GPU_LOAD, sizeof(uint32_t), &value32))
-    //    gpu_info.load = value32;
-    gpu_info.load = amdgpu_dev->gui_percent;
+    //    load = value32;
+    s.load = amdgpu_dev->gui_percent;
 
     if (!amdgpu_query_sensor_info(amdgpu_dev->dev, AMDGPU_INFO_SENSOR_GPU_TEMP, sizeof(uint32_t), &value32))
-        gpu_info.temp = value32 / 1000;
+        s.temp = value32 / 1000;
 
     if (!amdgpu_query_sensor_info(amdgpu_dev->dev, AMDGPU_INFO_SENSOR_GPU_AVG_POWER, sizeof(uint32_t), &value32))
-        gpu_info.powerUsage = value32;
+        s.power_usage = value32;
 }
 #endif
diff --git a/src/gpu.h b/src/gpu.h
index 55df774..d5e026d 100644
--- a/src/gpu.h
+++ b/src/gpu.h
@@ -4,6 +4,9 @@
 
 #include <cstdio>
 #include <cstdint>
+#include <unordered_map>
+#include <memory>
+#include <string>
 
 enum {
     GRBM_STATUS = 0x8010,
@@ -20,33 +23,118 @@ struct amdgpu_files
     FILE *power_usage;
 };
 
-extern amdgpu_files amdgpu;
-
-struct gpuInfo{
-    int load;
-    int temp;
-    float memoryUsed;
-    float memoryTotal;
-    int MemClock;
-    int CoreClock;
-    int powerUsage;
+struct gpu_handles
+{
+    virtual ~gpu_handles() {};
 };
 
-extern struct gpuInfo gpu_info;
+struct GpuInfo
+{
+    GpuInfo(const std::string& sysfs, const std::string& pci)
+    : sysfs_path(sysfs)
+    , pci_device(pci)
+    {}
+    virtual ~GpuInfo() {}
+    virtual void update() = 0;
+    virtual bool init() = 0;
+
+    std::string sysfs_path;
+    std::string pci_device;
+    std::string dev_name;
+    bool inited;
+
+    struct {
+        int load;
+        int temp;
+        float memory_used;
+        float memory_total;
+        int memory_clock;
+        int core_clock;
+        int power_usage;
+    } s {};
+
+    uint32_t vendorID {}, deviceID {};
+    gpu_handles* device {};
+};
+
+extern std::shared_ptr<GpuInfo> g_active_gpu;
+
+struct NVMLInfo : public GpuInfo
+{
+    NVMLInfo(const std::string& sysfs, const std::string& pci) : GpuInfo(sysfs, pci) {}
+    void update();
+    bool init();
+};
+
+struct NVCtrlInfo : public GpuInfo
+{
+    NVCtrlInfo(const std::string& sysfs, const std::string& pci) : GpuInfo(sysfs, pci) {}
+    void update();
+    bool init();
+};
+
+struct NVAPIInfo : public GpuInfo
+{
+    NVAPIInfo() : GpuInfo({}, {}) {}
+    void update();
+    bool init();
+};
+
+struct AMDGPUInfo : public GpuInfo
+{
+    AMDGPUInfo(const std::string& sysfs, const std::string& pci) : GpuInfo(sysfs, pci) {}
+    ~AMDGPUInfo()
+    {
+        delete device;
+    }
+    void update();
+    bool init();
+};
+
+struct AMDGPUHWMonInfo : public GpuInfo
+{
+    AMDGPUHWMonInfo(const std::string& sysfs, const std::string& pci) : GpuInfo(sysfs, pci) {}
+    ~AMDGPUHWMonInfo()
+    {
+        delete device;
+        if (handles.busy)
+            fclose(handles.busy);
+        if (handles.temp)
+            fclose(handles.temp);
+        if (handles.vram_total)
+            fclose(handles.vram_total);
+        if (handles.vram_used)
+            fclose(handles.vram_used);
+        if (handles.core_clock)
+            fclose(handles.core_clock);
+        if (handles.memory_clock)
+            fclose(handles.memory_clock);
+        if (handles.power_usage)
+            fclose(handles.power_usage);
+        handles = {};
+    }
+
+    void update();
+    bool init();
+
+    amdgpu_files handles {};
+};
+
+struct RadeonInfo : public GpuInfo
+{
+    RadeonInfo(const std::string& sysfs, const std::string& pci) : GpuInfo(sysfs, pci) {}
+    void update();
+    bool init();
+};
+
+extern std::unordered_map<std::string /*device*/, std::shared_ptr<struct GpuInfo>> g_gpu_infos;
 
-void getNvidiaGpuInfo(void);
-void getAmdGpuInfo(void);
 #ifdef HAVE_LIBDRM
-void getRadeonInfo_libdrm();
-bool radeon_open(const char *path);
-void radeon_set_sampling_period(uint32_t period);
+void radeon_set_sampling_period(gpu_handles* dev, uint32_t period);
 #endif
 #ifdef HAVE_LIBDRM_AMDGPU
-void getAmdGpuInfo_libdrm();
-bool amdgpu_open(const char *path);
-void amdgpu_set_sampling_period(uint32_t period);
+void amdgpu_set_sampling_period(gpu_handles* dev, uint32_t period);
 #endif
-extern decltype(&getAmdGpuInfo) getAmdGpuInfo_actual;
 bool checkNvidia(const char *pci_dev);
 extern void nvapi_util();
 extern bool checkNVAPI();
diff --git a/src/gpu_radeon.cpp b/src/gpu_radeon.cpp
index 4114e3e..d37e416 100644
--- a/src/gpu_radeon.cpp
+++ b/src/gpu_radeon.cpp
@@ -119,16 +119,14 @@ struct radeon_handles
     }
 };
 
-typedef std::unique_ptr<radeon_handles> radeon_ptr;
-static radeon_ptr radeon_dev;
-
-void radeon_set_sampling_period(uint32_t period)
+void radeon_set_sampling_period(gpu_handles* dev, uint32_t period)
 {
+    auto radeon_dev = reinterpret_cast<radeon_handles*>(dev);
     if (radeon_dev)
         radeon_dev->set_sampling_period(period);
 }
 
-bool radeon_open(const char *path)
+static radeon_handles* radeon_open(const char *path)
 {
     uint32_t drm_major = 0, drm_minor = 0;
 
@@ -136,7 +134,7 @@ bool radeon_open(const char *path)
 
     if (fd < 0) {
         SPDLOG_ERROR("Failed to open DRM device: {}", strerror(errno));
-        return false;
+        return nullptr;
     }
 
     drmVersionPtr ver = drmGetVersion(fd);
@@ -144,7 +142,7 @@ bool radeon_open(const char *path)
     if (!ver) {
         SPDLOG_ERROR("Failed to query driver version: {}", strerror(errno));
         close(fd);
-        return false;
+        return nullptr;
     }
 
     if (strcmp(ver->name, "radeon") || !DRM_ATLEAST_VERSION(ver, 2, 42)) {
@@ -152,7 +150,7 @@ bool radeon_open(const char *path)
                      ver->name, ver->version_major, ver->version_minor, ver->version_patchlevel);
         close(fd);
         drmFreeVersion(ver);
-        return false;
+        return nullptr;
     }
 
     drm_major = ver->version_major;
@@ -161,42 +159,60 @@ bool radeon_open(const char *path)
 
     if (!authenticate_drm(fd)) {
         close(fd);
+        return nullptr;
+    }
+
+    return new radeon_handles(fd, drm_major, drm_minor);
+}
+
+bool RadeonInfo::init()
+{
+    int idx = -1;
+
+    if (sscanf(sysfs_path.c_str(), "%*[^0-9]%d", &idx) != 1 || idx < 0)
+        return false;
+
+    const std::string dri_path = "/dev/dri/card" + std::to_string(idx);
+    device = reinterpret_cast<gpu_handles*>(radeon_open(dri_path.c_str()));
+    if (!device)
+    {
+        SPDLOG_WARN("Failed to open device '{}' with libdrm", dri_path);
         return false;
     }
 
-    radeon_dev = std::make_unique<radeon_handles>(fd, drm_major, drm_minor);
     return true;
 }
 
-void getRadeonInfo_libdrm()
+void RadeonInfo::update()
 {
     uint64_t value = 0;
     uint32_t value32 = 0;
 
+    auto radeon_dev = reinterpret_cast<radeon_handles*>(device);
     if (!radeon_dev)
         return;
 
-    gpu_info.load = radeon_dev->gui_percent;
+    s.load = radeon_dev->gui_percent;
 
     // TODO one shot?
     struct drm_radeon_gem_info buffer {};
     int ret = 0;
     if (!(ret = ioctl(radeon_dev->fd, DRM_IOCTL_RADEON_GEM_INFO, &buffer)))
-        gpu_info.memoryTotal = buffer.vram_size / (1024.f * 1024.f * 1024.f);
+        s.memory_total = buffer.vram_size / (1024.f * 1024.f * 1024.f);
     else
         SPDLOG_ERROR("DRM_IOCTL_RADEON_GEM_INFO failed: {}", ret);
 
     if (!get_radeon_drm_value(radeon_dev->fd, RADEON_INFO_VRAM_USAGE, &value))
-        gpu_info.memoryUsed = value / (1024.f * 1024.f * 1024.f);
+        s.memory_used = value / (1024.f * 1024.f * 1024.f);
 
     if (!get_radeon_drm_value(radeon_dev->fd, RADEON_INFO_CURRENT_GPU_SCLK, &value32))
-        gpu_info.CoreClock = value32;
+        s.core_clock = value32;
 
     if (!get_radeon_drm_value(radeon_dev->fd, RADEON_INFO_CURRENT_GPU_MCLK, &value32))
-        gpu_info.MemClock = value32;
+        s.memory_clock = value32;
 
     if (!get_radeon_drm_value(radeon_dev->fd, RADEON_INFO_CURRENT_GPU_TEMP, &value32))
-        gpu_info.temp = value32 / 1000;
+        s.temp = value32 / 1000;
 
-    gpu_info.powerUsage = 0;
+    s.power_usage = 0;
 }
diff --git a/src/hud_elements.cpp b/src/hud_elements.cpp
index 6ba6f97..a42f725 100644
--- a/src/hud_elements.cpp
+++ b/src/hud_elements.cpp
@@ -130,64 +130,126 @@ void HudElements::version(){
     }
 }
 
-void HudElements::gpu_stats(){
-    if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_gpu_stats]){
-        ImGui::TableNextRow(); ImGui::TableNextColumn();
-        const char* gpu_text;
+static void per_gpu_vram(GpuInfo* gpu)
+{
+    if (!HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_vram])
+        return;
+    ImGui::TableNextRow(); ImGui::TableNextColumn();
+    ImGui::TextColored(HUDElements.colors.vram, "VRAM");
+    ImGui::TableNextColumn();
+    right_aligned_text(HUDElements.colors.text, HUDElements.ralign_width, "%.1f", gpu->s.memory_used);
+    ImGui::SameLine(0,1.0f);
+    ImGui::PushFont(HUDElements.sw_stats->font1);
+    ImGui::Text("GiB");
+    ImGui::PopFont();
+    if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_gpu_mem_clock]){
+        ImGui::TableNextColumn();
+        right_aligned_text(HUDElements.colors.text, HUDElements.ralign_width, "%i", gpu->s.memory_clock);
+        ImGui::SameLine(0, 1.0f);
+        ImGui::PushFont(HUDElements.sw_stats->font1);
+        ImGui::Text("MHz");
+        ImGui::PopFont();
+    }
+}
+
+void HudElements::vram(){
+    if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_show_all_gpus])
+        return;
+
+    if (g_active_gpu)
+        per_gpu_vram(g_active_gpu.get());
+}
+
+static void per_gpu_stats(GpuInfo* gpu, bool single){
+    ImGui::TableNextRow(); ImGui::TableNextColumn();
+    const char* gpu_text;
+    if (single)
+    {
         if (HUDElements.params->gpu_text.empty())
             gpu_text = "GPU";
         else
             gpu_text = HUDElements.params->gpu_text.c_str();
         ImGui::TextColored(HUDElements.colors.gpu, "%s", gpu_text);
         ImGui::TableNextColumn();
-        auto text_color = HUDElements.colors.text;
-        if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_gpu_load_change]){
-            struct LOAD_DATA gpu_data = {
-                HUDElements.colors.gpu_load_low,
-                HUDElements.colors.gpu_load_med,
-                HUDElements.colors.gpu_load_high,
-                HUDElements.params->gpu_load_value[0],
-                HUDElements.params->gpu_load_value[1]
-            };
-
-            auto load_color = change_on_load_temp(gpu_data, gpu_info.load);
-            right_aligned_text(load_color, HUDElements.ralign_width, "%i", gpu_info.load);
-            ImGui::SameLine(0, 1.0f);
-            ImGui::TextColored(load_color,"%%");
-        }
-        else {
-            right_aligned_text(text_color, HUDElements.ralign_width, "%i", gpu_info.load);
-            ImGui::SameLine(0, 1.0f);
-            ImGui::TextColored(text_color,"%%");
-            // ImGui::SameLine(150);
-            // ImGui::Text("%s", "%");
-        }
-        if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_gpu_temp]){
-            ImGui::TableNextColumn();
-            right_aligned_text(text_color, HUDElements.ralign_width, "%i", gpu_info.temp);
-            ImGui::SameLine(0, 1.0f);
-            ImGui::Text("°C");
-        }
-        if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_gpu_core_clock] || HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_gpu_power]){
-            ImGui::TableNextRow(); ImGui::TableNextColumn();
-        }
-        if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_gpu_core_clock]){
-            ImGui::TableNextColumn();
-            right_aligned_text(text_color, HUDElements.ralign_width, "%i", gpu_info.CoreClock);
-            ImGui::SameLine(0, 1.0f);
-            ImGui::PushFont(HUDElements.sw_stats->font1);
-            ImGui::Text("MHz");
-            ImGui::PopFont();
-        }
-        if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_gpu_power]) {
-            ImGui::TableNextColumn();
-            right_aligned_text(text_color, HUDElements.ralign_width, "%i", gpu_info.powerUsage);
-            ImGui::SameLine(0, 1.0f);
-            ImGui::PushFont(HUDElements.sw_stats->font1);
-            ImGui::Text("W");
-            ImGui::PopFont();
-        }
     }
+    else
+    {
+        ImGui::TextColored(HUDElements.colors.gpu, "%s", gpu->dev_name.c_str());
+        ImGui::TableNextRow(); ImGui::TableNextColumn();
+    }
+
+    auto text_color = HUDElements.colors.text;
+    if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_gpu_load_change]){
+        struct LOAD_DATA gpu_data = {
+            HUDElements.colors.gpu_load_low,
+            HUDElements.colors.gpu_load_med,
+            HUDElements.colors.gpu_load_high,
+            HUDElements.params->gpu_load_value[0],
+            HUDElements.params->gpu_load_value[1]
+        };
+
+        auto load_color = change_on_load_temp(gpu_data, gpu->s.load);
+        right_aligned_text(load_color, HUDElements.ralign_width, "%i", gpu->s.load);
+        ImGui::SameLine(0, 1.0f);
+        ImGui::TextColored(load_color,"%%");
+    }
+    else {
+        right_aligned_text(text_color, HUDElements.ralign_width, "%i", gpu->s.load);
+        ImGui::SameLine(0, 1.0f);
+        ImGui::TextColored(text_color,"%%");
+        // ImGui::SameLine(150);
+        // ImGui::Text("%s", "%");
+    }
+    if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_gpu_temp]){
+        ImGui::TableNextColumn();
+        right_aligned_text(text_color, HUDElements.ralign_width, "%i", gpu->s.temp);
+        ImGui::SameLine(0, 1.0f);
+        ImGui::Text("°C");
+    }
+
+    if (single && (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_gpu_core_clock] || HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_gpu_power])){
+        ImGui::TableNextRow(); ImGui::TableNextColumn();
+    }
+
+    if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_gpu_core_clock]){
+        ImGui::TableNextColumn();
+        right_aligned_text(text_color, HUDElements.ralign_width, "%i", gpu->s.core_clock);
+        ImGui::SameLine(0, 1.0f);
+        ImGui::PushFont(HUDElements.sw_stats->font1);
+        ImGui::Text("MHz");
+        ImGui::PopFont();
+    }
+    if (!single && HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_gpu_core_clock]) {
+        ImGui::TableNextRow();
+        ImGui::TableNextColumn();
+    }
+    if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_gpu_power]) {
+        ImGui::TableNextColumn();
+        right_aligned_text(text_color, HUDElements.ralign_width, "%i", gpu->s.power_usage);
+        ImGui::SameLine(0, 1.0f);
+        ImGui::PushFont(HUDElements.sw_stats->font1);
+        ImGui::Text("W");
+        ImGui::PopFont();
+    }
+
+    if (!single)
+        per_gpu_vram(gpu);
+}
+
+void HudElements::gpu_stats(){
+    auto p = HUDElements.params;
+    if (!p->enabled[OVERLAY_PARAM_ENABLED_gpu_stats])
+        return;
+
+    if (p->enabled[OVERLAY_PARAM_ENABLED_show_all_gpus])
+    {
+        for (const auto& g : g_gpu_infos)
+            per_gpu_stats(g.second.get(), false);
+        return;
+    }
+
+    if (g_active_gpu)
+        per_gpu_stats(g_active_gpu.get(), true);
 }
 
 void HudElements::cpu_stats(){
@@ -326,27 +388,6 @@ void HudElements::io_stats(){
     }
 }
 
-void HudElements::vram(){
-    if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_vram]){
-        ImGui::TableNextRow(); ImGui::TableNextColumn();
-        ImGui::TextColored(HUDElements.colors.vram, "VRAM");
-        ImGui::TableNextColumn();
-        right_aligned_text(HUDElements.colors.text, HUDElements.ralign_width, "%.1f", gpu_info.memoryUsed);
-        ImGui::SameLine(0,1.0f);
-        ImGui::PushFont(HUDElements.sw_stats->font1);
-        ImGui::Text("GiB");
-        ImGui::PopFont();
-        if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_gpu_mem_clock]){
-            ImGui::TableNextColumn();
-            right_aligned_text(HUDElements.colors.text, HUDElements.ralign_width, "%i", gpu_info.MemClock);
-            ImGui::SameLine(0, 1.0f);
-            ImGui::PushFont(HUDElements.sw_stats->font1);
-            ImGui::Text("MHz");
-            ImGui::PopFont();
-        }
-    }
-}
-
 void HudElements::ram(){
 #ifdef __linux__
     if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_ram]){
@@ -451,11 +492,11 @@ void HudElements::fps(){
 }
 
 void HudElements::gpu_name(){
-    if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_gpu_name] && !HUDElements.sw_stats->gpuName.empty()){
+    if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_gpu_name] && g_active_gpu && !g_active_gpu->dev_name.empty()){
         ImGui::TableNextRow(); ImGui::TableNextColumn();
         ImGui::PushFont(HUDElements.sw_stats->font1);
         ImGui::TextColored(HUDElements.colors.engine,
-            "%s", HUDElements.sw_stats->gpuName.c_str());
+            "%s", g_active_gpu->dev_name.c_str());
         ImGui::PopFont();
     }
 }
@@ -703,7 +744,7 @@ void HudElements::graphs(){
     ImGui::PushFont(HUDElements.sw_stats->font1);
     if (value == "cpu_load"){
         for (auto& it : graph_data){
-            arr.push_back(float(it.cpu_load));
+            arr.push_back(it.cpu_load);
             arr.erase(arr.begin());
         }
         HUDElements.max = 100; HUDElements.min = 0;
@@ -771,13 +812,13 @@ void HudElements::graphs(){
         ImGui::TextColored(HUDElements.colors.engine, "%s", "GPU Mem Clock");
     }
 
-    if (value == "vram"){
+    if (value == "vram" && g_active_gpu){
         for (auto& it : graph_data){
-            arr.push_back(float(it.gpu_vram_used));
+            arr.push_back(it.gpu_vram_used);
             arr.erase(arr.begin());
         }
 
-        HUDElements.max = gpu_info.memoryTotal;
+        HUDElements.max = g_active_gpu->s.memory_total;
         HUDElements.min = 0;
         ImGui::TextColored(HUDElements.colors.engine, "%s", "VRAM");
     }
@@ -786,7 +827,7 @@ void HudElements::graphs(){
         if (!HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_ram])
             HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_ram] = true;
         for (auto& it : graph_data){
-            arr.push_back(float(it.ram_used));
+            arr.push_back(it.ram_used);
             arr.erase(arr.begin());
         }
 
diff --git a/src/hud_elements.h b/src/hud_elements.h
index 6b001a6..dd2826b 100644
--- a/src/hud_elements.h
+++ b/src/hud_elements.h
@@ -23,7 +23,8 @@ class HudElements{
         Clock::time_point last_exec;
         std::vector<std::pair<std::string, std::string>> options;
         std::vector<std::pair<void(*)(), std::string >> ordered_functions;
-        int min, max, gpu_core_max, gpu_mem_max, cpu_temp_max, gpu_temp_max;
+        float min, max;
+        int gpu_core_max, gpu_mem_max, cpu_temp_max, gpu_temp_max;
         const std::vector<std::string> permitted_params = {
             "gpu_load", "cpu_load", "gpu_core_clock", "gpu_mem_clock",
             "vram", "ram", "cpu_temp", "gpu_temp"
diff --git a/src/nvapi.cpp b/src/nvapi.cpp
index 19e909e..c20a1ad 100644
--- a/src/nvapi.cpp
+++ b/src/nvapi.cpp
@@ -40,28 +40,34 @@ bool checkNVAPI(){
     NvAPI_Initialize = (NvAPI_Initialize_t) (*NvAPI_QueryInterface)(0x0150E828);
     NvAPI_EnumPhysicalGPUs = (NvAPI_EnumPhysicalGPUs_t) (*NvAPI_QueryInterface)(0xE5AC921F);
     NvAPI_GPU_GetUsages = (NvAPI_GPU_GetUsages_t) (*NvAPI_QueryInterface)(0x189A1FDF);
-    if (NvAPI_Initialize == NULL || NvAPI_EnumPhysicalGPUs == NULL ||
-        NvAPI_EnumPhysicalGPUs == NULL || NvAPI_GPU_GetUsages == NULL)
+
+    if (!NvAPI_Initialize || !NvAPI_EnumPhysicalGPUs || !NvAPI_EnumPhysicalGPUs || !NvAPI_GPU_GetUsages)
     {
         std::cerr << "Couldn't get functions in nvapi.dll" << std::endl;
         return 2;
     }
-    (*NvAPI_Initialize)();
-    
-    int         *gpuHandles[NVAPI_MAX_PHYSICAL_GPUS] = { NULL };
+    NvAPI_Initialize();
+
+    NvAPI_EnumPhysicalGPUs(gpuHandles, &gpuCount);
 
     return true;
 }
 
-void nvapi_util()
-{  
+bool NVAPIInfo::init()
+{
+    if (!init_nvapi_bool)
+        init_nvapi_bool = checkNVAPI();
+    return init_nvapi_bool;
+}
+
+void NVAPIInfo::update()
+{
     if (!init_nvapi_bool){
         init_nvapi_bool = checkNVAPI();
     }
-    
-    gpuUsages[0] = (NVAPI_MAX_USAGES_PER_GPU * 4) | 0x10000;
-    (*NvAPI_EnumPhysicalGPUs)(gpuHandles, &gpuCount);
-    (*NvAPI_GPU_GetUsages)(gpuHandles[0], gpuUsages);
-    gpu_info.load = gpuUsages[3];
 
-}
\ No newline at end of file
+    gpuUsages[0] = (NVAPI_MAX_USAGES_PER_GPU * 4) | 0x10000;
+    NvAPI_GPU_GetUsages(gpuHandles[0], gpuUsages);
+    if (g_active_gpu)
+        g_active_gpu->s.load = gpuUsages[3];
+}
diff --git a/src/nvctrl.cpp b/src/nvctrl.cpp
index daedd80..9d9bfd7 100644
--- a/src/nvctrl.cpp
+++ b/src/nvctrl.cpp
@@ -17,17 +17,22 @@ static std::unique_ptr<Display, std::function<void(Display*)>> display;
 struct nvctrlInfo nvctrl_info;
 bool nvctrlSuccess = false;
 
-static bool find_nv_x11(libnvctrl_loader& nvctrl, Display*& dpy)
+static bool find_nv_x11(libnvctrl_loader& nvctrl, Display*& dpy, int& scr)
 {
     char buf[8] {};
     for (int i = 0; i < 16; i++) {
         snprintf(buf, sizeof(buf), ":%d", i);
         Display *d = g_x11->XOpenDisplay(buf);
         if (d) {
-            if (nvctrl.XNVCTRLIsNvScreen(d, 0)) {
-                dpy = d;
-                SPDLOG_DEBUG("XNVCtrl is using display {}", buf);
-                return true;
+            int nscreens = ScreenCount(d); //FIXME yes, no, maybe?
+            for (int screen = 0; screen < nscreens; screen++)
+            {
+                if (nvctrl.XNVCTRLIsNvScreen(d, screen)) {
+                    dpy = d;
+                    scr = screen;
+                    SPDLOG_DEBUG("XNVCtrl is using display {}", buf);
+                    return true;
+                }
             }
             g_x11->XCloseDisplay(d);
         }
@@ -46,20 +51,15 @@ bool checkXNVCtrl()
         return false;
     }
 
-    Display *dpy;
-    nvctrlSuccess = find_nv_x11(nvctrl, dpy);
+    Display *dpy = nullptr;
+    int screen = 0;
+    nvctrlSuccess = find_nv_x11(nvctrl, dpy, screen);
 
     if (!nvctrlSuccess) {
         SPDLOG_ERROR("XNVCtrl didn't find the correct display");
         return false;
     }
 
-    auto local_x11 = g_x11;
-    display = { dpy,
-        [local_x11](Display *dpy) {
-            local_x11->XCloseDisplay(dpy);
-        }
-    };
     // get device id at init
     int64_t pci_id;
     nvctrl.XNVCTRLQueryTargetAttribute64(display.get(),
@@ -70,6 +70,13 @@ bool checkXNVCtrl()
                     &pci_id);
     deviceID = (pci_id & 0xFFFF);
 
+    auto local_x11 = g_x11;
+    display = { dpy,
+        [local_x11](Display *dpy) {
+            local_x11->XCloseDisplay(dpy);
+        }
+    };
+
     return true;
 }
 
diff --git a/src/nvidia_info.h b/src/nvidia_info.h
index a21aa8d..362242a 100644
--- a/src/nvidia_info.h
+++ b/src/nvidia_info.h
@@ -11,7 +11,7 @@ extern struct nvmlUtilization_st nvidiaUtilization;
 extern struct nvmlMemory_st nvidiaMemory;
 extern bool nvmlSuccess;
 
-bool checkNVML(const char* pciBusId);
-bool getNVMLInfo(void);
+bool checkNVML(const char* pciBusId, nvmlDevice_t& device, uint32_t& device_id);
+bool getNVMLInfo(nvmlDevice_t device);
 
 #endif //MANGOHUD_NVIDIA_INFO_H
diff --git a/src/nvml.cpp b/src/nvml.cpp
index d5848fe..71b0a8b 100644
--- a/src/nvml.cpp
+++ b/src/nvml.cpp
@@ -12,50 +12,100 @@ unsigned int nvidiaTemp = 0, nvidiaCoreClock = 0, nvidiaMemClock = 0, nvidiaPowe
 struct nvmlUtilization_st nvidiaUtilization;
 struct nvmlMemory_st nvidiaMemory {};
 
-bool checkNVML(const char* pciBusId){
+static std::unique_ptr<libnvml_loader, std::function<void(libnvml_loader*)>> nvml_shutdown;
+
+bool checkNVML()
+{
     auto& nvml = get_libnvml_loader();
-    if (nvml.IsLoaded()){
-        result = nvml.nvmlInit();
-        if (NVML_SUCCESS != result) {
-            SPDLOG_ERROR("Nvidia module not loaded");
-        } else {
-            nvmlReturn_t ret = NVML_ERROR_UNKNOWN;
-            if (pciBusId && ((ret = nvml.nvmlDeviceGetHandleByPciBusId(pciBusId, &nvidiaDevice)) != NVML_SUCCESS)) {
-                SPDLOG_ERROR("Getting device handle by PCI bus ID failed: {}", nvml.nvmlErrorString(ret));
-                SPDLOG_ERROR("Using index 0.");
-            }
-
-            if (ret != NVML_SUCCESS)
-                ret = nvml.nvmlDeviceGetHandleByIndex(0, &nvidiaDevice);
-
-            if (ret != NVML_SUCCESS)
-                SPDLOG_ERROR("Getting device handle failed: {}", nvml.nvmlErrorString(ret));
-
-            nvmlSuccess = (ret == NVML_SUCCESS);
-            if (ret == NVML_SUCCESS)
-                nvml.nvmlDeviceGetPciInfo_v3(nvidiaDevice, &nvidiaPciInfo);
-
-            return nvmlSuccess;
-        }
-    } else {
+    if (!nvml.IsLoaded())
+    {
         SPDLOG_ERROR("Failed to load NVML");
+        return false;
     }
 
-    return false;
+    if (nvmlSuccess)
+        return nvmlSuccess;
+
+    result = nvml.nvmlInit();
+    if (NVML_SUCCESS != result)
+    {
+        SPDLOG_ERROR("Nvidia module not loaded");
+        return false;
+    }
+
+    nvml_shutdown = { &nvml,
+        [](libnvml_loader *nvml) -> void {
+            nvml->nvmlShutdown();
+        }
+    };
+    nvmlSuccess = true;
+    return nvmlSuccess;
 }
 
-bool getNVMLInfo(){
+bool getNVMLInfo(nvmlDevice_t device){
     nvmlReturn_t response;
     auto& nvml = get_libnvml_loader();
-    response = nvml.nvmlDeviceGetUtilizationRates(nvidiaDevice, &nvidiaUtilization);
-    nvml.nvmlDeviceGetTemperature(nvidiaDevice, NVML_TEMPERATURE_GPU, &nvidiaTemp);
-    nvml.nvmlDeviceGetMemoryInfo(nvidiaDevice, &nvidiaMemory);
-    nvml.nvmlDeviceGetClockInfo(nvidiaDevice, NVML_CLOCK_GRAPHICS, &nvidiaCoreClock);
-    nvml.nvmlDeviceGetClockInfo(nvidiaDevice, NVML_CLOCK_MEM, &nvidiaMemClock);
-    nvml.nvmlDeviceGetPowerUsage(nvidiaDevice, &nvidiaPowerUsage);
-    deviceID = nvidiaPciInfo.pciDeviceId >> 16;
+    response = nvml.nvmlDeviceGetUtilizationRates(device, &nvidiaUtilization);
+    nvml.nvmlDeviceGetTemperature(device, NVML_TEMPERATURE_GPU, &nvidiaTemp);
+    nvml.nvmlDeviceGetMemoryInfo(device, &nvidiaMemory);
+    nvml.nvmlDeviceGetClockInfo(device, NVML_CLOCK_GRAPHICS, &nvidiaCoreClock);
+    nvml.nvmlDeviceGetClockInfo(device, NVML_CLOCK_MEM, &nvidiaMemClock);
+    nvml.nvmlDeviceGetPowerUsage(device, &nvidiaPowerUsage);
 
     if (response == NVML_ERROR_NOT_SUPPORTED)
         nvmlSuccess = false;
     return nvmlSuccess;
 }
+
+bool NVMLInfo::init()
+{
+    nvmlDevice_t nvml_dev;
+    if (!checkNVML())
+        return false;
+
+    auto& nvml = get_libnvml_loader();
+    nvmlReturn_t ret = NVML_ERROR_UNKNOWN;
+    if ((ret = nvml.nvmlDeviceGetHandleByPciBusId(pci_device.c_str(), &nvml_dev)) != NVML_SUCCESS)
+    {
+        SPDLOG_ERROR("Getting device handle by PCI bus ID failed: {}", nvml.nvmlErrorString(ret));
+    }
+
+    if (ret != NVML_SUCCESS)
+    {
+        unsigned int deviceCount = 0;
+        ret = nvml.nvmlDeviceGetCount(&deviceCount);
+
+        if (ret == NVML_SUCCESS)
+        {
+            for (unsigned i = 0; i < deviceCount; i++)
+            {
+                ret = nvml.nvmlDeviceGetHandleByIndex(0, &nvml_dev);
+                if (ret != NVML_SUCCESS)
+                    SPDLOG_ERROR("Getting device {} handle failed: {}", i, nvml.nvmlErrorString(ret));
+                else if (nvml.nvmlDeviceGetPciInfo_v3(nvml_dev, &nvidiaPciInfo) == NVML_SUCCESS)
+                {
+                    if (this->deviceID == nvidiaPciInfo.pciDeviceId >> 16)
+                        break;
+                }
+            }
+        }
+    }
+
+    device = reinterpret_cast<gpu_handles*>(nvml_dev);
+    return true;
+}
+
+void NVMLInfo::update()
+{
+    if (nvmlSuccess){
+        getNVMLInfo(reinterpret_cast<nvmlDevice_t>(device));
+        s.load = nvidiaUtilization.gpu;
+        s.temp = nvidiaTemp;
+        s.memory_used = nvidiaMemory.used / (1024.f * 1024.f * 1024.f);
+        s.core_clock = nvidiaCoreClock;
+        s.memory_clock = nvidiaMemClock;
+        s.power_usage = nvidiaPowerUsage / 1000;
+        s.memory_total = nvidiaMemory.total / (1024.f * 1024.f * 1024.f);
+        return;
+    }
+}
diff --git a/src/overlay.cpp b/src/overlay.cpp
index 8bd5cc3..918bcaf 100644
--- a/src/overlay.cpp
+++ b/src/overlay.cpp
@@ -58,11 +58,8 @@ void update_hw_info(struct swapchain_stats& sw_stats, struct overlay_params& par
 #endif
    }
    if (params.enabled[OVERLAY_PARAM_ENABLED_gpu_stats] || logger->is_active()) {
-      if (vendorID == 0x1002 && getAmdGpuInfo_actual)
-         getAmdGpuInfo_actual();
-
-      if (vendorID == 0x10de)
-         getNvidiaGpuInfo();
+      for (auto& gpu : g_gpu_infos)
+         gpu.second->update();
    }
 
 #ifdef __linux__
@@ -76,12 +73,15 @@ void update_hw_info(struct swapchain_stats& sw_stats, struct overlay_params& par
       getIoStats(&sw_stats.io);
 #endif
 
-   currentLogData.gpu_load = gpu_info.load;
-   currentLogData.gpu_temp = gpu_info.temp;
-   currentLogData.gpu_core_clock = gpu_info.CoreClock;
-   currentLogData.gpu_mem_clock = gpu_info.MemClock;
-   currentLogData.gpu_vram_used = gpu_info.memoryUsed;
-   currentLogData.gpu_power = gpu_info.powerUsage;
+   if (g_active_gpu)
+   {
+      currentLogData.gpu_load = g_active_gpu->s.load;
+      currentLogData.gpu_temp = g_active_gpu->s.temp;
+      currentLogData.gpu_core_clock = g_active_gpu->s.core_clock;
+      currentLogData.gpu_mem_clock = g_active_gpu->s.memory_clock;
+      currentLogData.gpu_vram_used = g_active_gpu->s.memory_used;
+      currentLogData.gpu_power = g_active_gpu->s.power_usage;
+   }
 #ifdef __linux__
    currentLogData.ram_used = memused;
 #endif
@@ -544,6 +544,118 @@ struct pci_bus {
    int func;
 };
 
+static void enumerate_gpus(overlay_params& params)
+{
+#ifdef WIN32
+   auto gpu = std::make_shared<NVAPIInfo>();
+   if (gpu->init())
+      g_gpu_infos["nvapi_0"] = gpu;
+   return;
+#endif
+
+#ifdef __gnu_linux__
+   string path;
+   string drm = "/sys/class/drm/";
+
+   auto dirs = ls(drm.c_str(), "card");
+   for (auto& dir : dirs) {
+      path = drm + dir;
+
+      // skip display outputs
+      if (!file_exists(path + "/device/vendor"))
+         continue;
+
+      string vendor = read_line(path + "/device/vendor");
+      uint32_t vendor_id = strtoul(vendor.c_str(), NULL, 16);
+
+      string device = read_line(path + "/device/device");
+      uint32_t device_id = strtoul(device.c_str(), NULL, 16); // OGL might fail so read from sysfs
+
+      string pci_device = read_symlink((path + "/device").c_str());
+      auto pos = pci_device.find_last_of('/');
+      pci_device = pci_device.substr(pos != std::string::npos ? pos + 1 : 0);
+      SPDLOG_DEBUG("PCI device symlink: '{}' {}", pci_device, path);
+
+      string module = get_basename(read_symlink(path + "/device/driver/module"));
+      SPDLOG_DEBUG("using device path: '{}', module: '{}'", path, module);
+
+      auto dev_name = get_device_name(vendor_id, device_id);
+      if (module == "amdgpu")
+      {
+#ifdef HAVE_LIBDRM_AMDGPU
+         if (!params.enabled[OVERLAY_PARAM_ENABLED_force_amdgpu_hwmon])
+         {
+            auto gpu = std::make_shared<AMDGPUInfo>(path, pci_device);
+            if (gpu->init())
+            {
+               gpu->vendorID = vendor_id;
+               gpu->deviceID = device_id;
+               gpu->dev_name = dev_name;
+               amdgpu_set_sampling_period(gpu->device, params.fps_sampling_period);
+               g_gpu_infos[pci_device] = gpu;
+
+               SPDLOG_DEBUG("Using libdrm with {}", pci_device);
+               continue;
+            }
+            // fall through and open sysfs handles for fallback or check DRM version beforehand
+            else
+            {
+               SPDLOG_WARN("Falling back to using hwmon sysfs.");
+            }
+         }
+#endif
+
+         if (!file_exists(path + "/device/gpu_busy_percent"))
+            continue;
+
+         auto gpu = std::make_shared<AMDGPUHWMonInfo>(path, pci_device);
+         if (gpu->init())
+         {
+            gpu->vendorID = vendor_id;
+            gpu->deviceID = device_id;
+            gpu->dev_name = dev_name;
+            g_gpu_infos[pci_device] = gpu;
+         }
+      }
+      else if (module == "radeon")
+      {
+#ifdef HAVE_LIBDRM
+         auto gpu = std::make_shared<RadeonInfo>(path, pci_device);
+         if (gpu->init())
+         {
+            gpu->vendorID = vendor_id;
+            gpu->deviceID = device_id;
+            gpu->dev_name = dev_name;
+            g_gpu_infos[pci_device] = gpu;
+         }
+#endif
+      }
+      else if (module == "nvidia")
+      {
+         auto gpu = std::make_shared<NVMLInfo>(path, pci_device);
+         if (gpu->init())
+         {
+            gpu->vendorID = vendor_id;
+            gpu->deviceID = device_id;
+            gpu->dev_name = dev_name;
+            g_gpu_infos[pci_device] = gpu;
+         }
+         else
+         {
+            auto gpu = std::make_shared<NVCtrlInfo>(path, pci_device);
+            if (gpu->init())
+            {
+               gpu->vendorID = vendor_id;
+               gpu->deviceID = device_id;
+               gpu->dev_name = dev_name;
+               g_gpu_infos[pci_device] = gpu;
+            }
+         }
+      }
+   }
+#endif
+}
+
 void init_gpu_stats(uint32_t& vendorID, uint32_t target_device_id, overlay_params& params)
 {
    //if (!params.enabled[OVERLAY_PARAM_ENABLED_gpu_stats])
@@ -578,129 +690,55 @@ void init_gpu_stats(uint32_t& vendorID, uint32_t target_device_id, overlay_param
       }
    }
 
-   // NVIDIA or Intel but maybe has Optimus
-   if (vendorID == 0x8086
-      || vendorID == 0x10de) {
+   if (!g_gpu_infos.size())
+      enumerate_gpus(params);
 
-      if(checkNvidia(pci_dev))
-         vendorID = 0x10de;
-      else
-         params.enabled[OVERLAY_PARAM_ENABLED_gpu_stats] = false;
+   if (pci_bus_parsed && pci_dev && g_gpu_infos.find(params.pci_dev) != g_gpu_infos.end())
+   {
+      g_active_gpu = g_gpu_infos[params.pci_dev];
    }
-
-#ifdef __linux__
-   if (vendorID == 0x8086 || vendorID == 0x1002
-       || gpu.find("Radeon") != std::string::npos
-       || gpu.find("AMD") != std::string::npos) {
-      string path;
-      string drm = "/sys/class/drm/";
-      getAmdGpuInfo_actual = getAmdGpuInfo;
-      bool using_libdrm = false;
-
-      auto dirs = ls(drm.c_str(), "card");
-      for (auto& dir : dirs) {
-         path = drm + dir;
-
-         SPDLOG_DEBUG("device path check: {}/device/vendor", path);
-
-         string device = read_line(path + "/device/device");
-         deviceID = strtol(device.c_str(), NULL, 16); // OGL might fail so read from sysfs
-
-         string vendor = read_line(path + "/device/vendor");
-         trim(vendor);
-         if (vendor != "0x1002")
-            continue;
-
-         if (pci_bus_parsed && pci_dev) {
-            string pci_device = read_symlink((path + "/device").c_str());
-            SPDLOG_DEBUG("PCI device symlink: '{}'", pci_device);
-            if (!ends_with(pci_device, pci_dev)) {
-               SPDLOG_WARN("skipping GPU, PCI ID doesn't match: {}", pci_device);
-               continue;
-            }
-         }
-         // Don't skip if intel, maybe there's a dgpu
-         else if (vendorID != 0x8086 && target_device_id && target_device_id != deviceID)
+   else if (vendorID == 0x8086 /*&& target_device_id && target_device_id != deviceID*/)
+   {
+      for (auto& info : g_gpu_infos)
+      {
+         const auto& gpu = info.second;
+         if (gpu->vendorID != 0x8086 && gpu->deviceID == target_device_id)
          {
-            SPDLOG_WARN("expected device id {:04X}, got {:04X}, skipping", target_device_id, deviceID);
-            continue;
-         }
-
-         string module = get_basename(read_symlink(path + "/device/driver/module"));
-         SPDLOG_DEBUG("using device path: '{}', module: '{}'", path, module);
-
-         int idx = -1;
-         //TODO make neater
-         int res = sscanf(path.c_str(), "%*[^0-9]%d", &idx);
-         (void)res;
-         std::string dri_path = "/dev/dri/card" + std::to_string(idx);
-
-         if (module == "amdgpu")
-         {
-#ifdef HAVE_LIBDRM_AMDGPU
-            if (!params.enabled[OVERLAY_PARAM_ENABLED_force_amdgpu_hwmon] && res == 1 && amdgpu_open(dri_path.c_str())) {
-               vendorID = 0x1002;
-               using_libdrm = true;
-               getAmdGpuInfo_actual = getAmdGpuInfo_libdrm;
-               amdgpu_set_sampling_period(params.fps_sampling_period);
-
-               SPDLOG_DEBUG("Using libdrm");
-               // fall through and open sysfs handles for fallback or check DRM version beforehand
-            } else if (!params.enabled[OVERLAY_PARAM_ENABLED_force_amdgpu_hwmon]) {
-               SPDLOG_WARN("Failed to open device '/dev/dri/card{}' with libdrm, falling back to using hwmon sysfs.", idx);
-            }
-#endif
-         }
-#ifdef HAVE_LIBDRM
-         else if (module == "radeon")
-         {
-            if (res == 1 && radeon_open(dri_path.c_str())) {
-               vendorID = 0x1002;
-               using_libdrm = true;
-               getAmdGpuInfo_actual = getRadeonInfo_libdrm;
-               radeon_set_sampling_period(params.fps_sampling_period);
-            } else {
-               SPDLOG_WARN("Failed to open device '/dev/dri/card{}' with libdrm.", idx);
-               params.enabled[OVERLAY_PARAM_ENABLED_gpu_stats] = false;
-               break;
-            }
-         }
-#endif
-
-         if (!file_exists(path + "/device/gpu_busy_percent"))
-            continue;
-
-         path += "/device";
-         if (!amdgpu.busy)
-            amdgpu.busy = fopen((path + "/gpu_busy_percent").c_str(), "r");
-         if (!amdgpu.vram_total)
-            amdgpu.vram_total = fopen((path + "/mem_info_vram_total").c_str(), "r");
-         if (!amdgpu.vram_used)
-            amdgpu.vram_used = fopen((path + "/mem_info_vram_used").c_str(), "r");
-
-         path += "/hwmon/";
-         string tempFolder;
-         if (find_folder(path, "hwmon", tempFolder)) {
-            if (!amdgpu.core_clock)
-               amdgpu.core_clock = fopen((path + tempFolder + "/freq1_input").c_str(), "r");
-            if (!amdgpu.memory_clock)
-               amdgpu.memory_clock = fopen((path + tempFolder + "/freq2_input").c_str(), "r");
-            if (!amdgpu.temp)
-               amdgpu.temp = fopen((path + tempFolder + "/temp1_input").c_str(), "r");
-            if (!amdgpu.power_usage)
-               amdgpu.power_usage = fopen((path + tempFolder + "/power1_average").c_str(), "r");
-
-            vendorID = 0x1002;
+            g_active_gpu = gpu;
             break;
          }
       }
-
-      // don't bother then
-      if (!using_libdrm && !amdgpu.busy && !amdgpu.temp && !amdgpu.vram_total && !amdgpu.vram_used) {
-         params.enabled[OVERLAY_PARAM_ENABLED_gpu_stats] = false;
+//       SPDLOG_WARN("expected device id {:04X}, got {:04X}, skipping", target_device_id, deviceID);
+   }
+   else
+   {
+      for (auto& info : g_gpu_infos)
+      {
+         auto& gpu = info.second;
+         if (gpu->vendorID == vendorID && gpu->deviceID == target_device_id)
+         {
+            g_active_gpu = gpu;
+            break;
+         }
       }
    }
+
+#ifdef WIN32
+   //TODO windows' gpu stats
+   if (g_gpu_infos.size())
+      g_active_gpu = g_gpu_infos.begin()->second;
 #endif
+
+   // for compatibility
+   if (g_active_gpu)
+   {
+      vendorID = g_active_gpu->vendorID;
+      deviceID = g_active_gpu->deviceID;
+   }
+
+   if (g_active_gpu)
+      SPDLOG_INFO("Selected GPU: {}, {}, 0x{:X}:0x{:X}", g_active_gpu->sysfs_path, g_active_gpu->dev_name, vendorID, deviceID);
+
    if (!params.permit_upload)
       SPDLOG_INFO("Uploading is disabled (permit_upload = 0)");
 }
@@ -795,13 +833,13 @@ void init_system_info(){
       SPDLOG_DEBUG("Cpu:{}", cpu);
       SPDLOG_DEBUG("Kernel:{}", kernel);
       SPDLOG_DEBUG("Os:{}", os);
-      SPDLOG_DEBUG("Gpu:{}", gpu);
+//       SPDLOG_DEBUG("Gpu:{}", gpu);
       SPDLOG_DEBUG("Driver:{}", driver);
       SPDLOG_DEBUG("CPU Scheduler:{}", cpusched);
 #endif
 }
 
-void get_device_name(int32_t vendorID, int32_t deviceID, struct swapchain_stats& sw_stats)
+std::string get_device_name(int32_t vendorID, int32_t deviceID)
 {
 #ifdef __linux__
    if (pci_ids.find(vendorID) == pci_ids.end())
@@ -815,7 +853,14 @@ void get_device_name(int32_t vendorID, int32_t deviceID, struct swapchain_stats&
       for (char c: chars)
          desc.erase(remove(desc.begin(), desc.end(), c), desc.end());
    }
-   gpu = sw_stats.gpuName = desc;
-   trim(sw_stats.gpuName); trim(gpu);
+   trim(desc);
+   return desc;
+#else
+   return {};
 #endif
 }
+
+void get_device_name(int32_t vendorID, int32_t deviceID, struct swapchain_stats& sw_stats)
+{
+   gpu = sw_stats.gpuName = get_device_name(vendorID, deviceID);
+}
diff --git a/src/overlay.h b/src/overlay.h
index 9442722..a97f9d3 100644
--- a/src/overlay.h
+++ b/src/overlay.h
@@ -116,6 +116,7 @@ void init_cpu_stats(overlay_params& params);
 void check_keybinds(struct swapchain_stats& sw_stats, struct overlay_params& params, uint32_t vendorID);
 void init_system_info(void);
 void FpsLimiter(struct fps_limit& stats);
+std::string get_device_name(int32_t vendorID, int32_t deviceID);
 void get_device_name(int32_t vendorID, int32_t deviceID, struct swapchain_stats& sw_stats);
 void calculate_benchmark_data();
 void create_fonts(const overlay_params& params, ImFont*& small_font, ImFont*& text_font);
diff --git a/src/overlay_params.h b/src/overlay_params.h
index 36aef58..07b216b 100644
--- a/src/overlay_params.h
+++ b/src/overlay_params.h
@@ -33,6 +33,7 @@ typedef unsigned long KeySym;
    OVERLAY_PARAM_BOOL(gpu_temp)                      \
    OVERLAY_PARAM_BOOL(cpu_stats)                     \
    OVERLAY_PARAM_BOOL(gpu_stats)                     \
+   OVERLAY_PARAM_BOOL(show_all_gpus)                 \
    OVERLAY_PARAM_BOOL(ram)                           \
    OVERLAY_PARAM_BOOL(swap)                          \
    OVERLAY_PARAM_BOOL(vram)                          \