Support multiple gpus

3 years ago · 5e1493a0cb
parent 0bbdb4398d
commit 5e1493a0cb
12 changed files with 673 additions and 406 deletions
--- a/src/gpu.cpp
+++ b/src/gpu.cpp
@ -7,12 +7,13 @@
 #include <spdlog/spdlog.h>
 #include "nvctrl.h"
 #include "timing.hpp"
+#include "file_utils.h"
 #ifdef HAVE_NVML
 #include "nvidia_info.h"
 #endif

 #ifdef HAVE_LIBDRM_AMDGPU
-//#include "auth.h"
+#include "auth.h"
 #include <xf86drm.h>
 #include <libdrm/amdgpu_drm.h>
 #include <libdrm/amdgpu.h>
@ -22,120 +23,118 @@

 using namespace std::chrono_literals;

-struct gpuInfo gpu_info {};
-amdgpu_files amdgpu {};
-decltype(&getAmdGpuInfo) getAmdGpuInfo_actual = nullptr;
+std::shared_ptr<GpuInfo> g_active_gpu;
+std::unordered_map<std::string /*device*/, std::shared_ptr<struct GpuInfo>> g_gpu_infos;

-bool checkNvidia(const char *pci_dev){
-    bool nvSuccess = false;
-#ifdef HAVE_NVML
-    nvSuccess = checkNVML(pci_dev) && getNVMLInfo();
-#endif
+bool NVCtrlInfo::init()
+{
 #ifdef HAVE_XNVCTRL
-    if (!nvSuccess)
-        nvSuccess = checkXNVCtrl();
+    // FIXME correct device index
+    return checkXNVCtrl();
+#else
+    return false;
 #endif
-#ifdef _WIN32
-    if (!nvSuccess)
-        nvSuccess = checkNVAPI();
-#endif
-    return nvSuccess;
 }

-void getNvidiaGpuInfo(){
-#ifdef HAVE_NVML
-    if (nvmlSuccess){
-        getNVMLInfo();
-        gpu_info.load = nvidiaUtilization.gpu;
-        gpu_info.temp = nvidiaTemp;
-        gpu_info.memoryUsed = nvidiaMemory.used / (1024.f * 1024.f * 1024.f);
-        gpu_info.CoreClock = nvidiaCoreClock;
-        gpu_info.MemClock = nvidiaMemClock;
-        gpu_info.powerUsage = nvidiaPowerUsage / 1000;
-        gpu_info.memoryTotal = nvidiaMemory.total / (1024.f * 1024.f * 1024.f);
-        return;
-    }
-#endif
+void NVCtrlInfo::update()
+{
 #ifdef HAVE_XNVCTRL
    if (nvctrlSuccess) {
        getNvctrlInfo();
-        gpu_info.load = nvctrl_info.load;
-        gpu_info.temp = nvctrl_info.temp;
-        gpu_info.memoryUsed = nvctrl_info.memoryUsed / (1024.f);
-        gpu_info.CoreClock = nvctrl_info.CoreClock;
-        gpu_info.MemClock = nvctrl_info.MemClock;
-        gpu_info.powerUsage = 0;
-        gpu_info.memoryTotal = nvctrl_info.memoryTotal;
+        s.load = nvctrl_info.load;
+        s.temp = nvctrl_info.temp;
+        s.memory_used = nvctrl_info.memoryUsed / (1024.f);
+        s.core_clock = nvctrl_info.CoreClock;
+        s.memory_clock = nvctrl_info.MemClock;
+        s.power_usage = 0;
+        s.memory_total = nvctrl_info.memoryTotal;
        return;
    }
 #endif
-#ifdef _WIN32
-nvapi_util();
-#endif
 }

-void getAmdGpuInfo(){
-    if (amdgpu.busy) {
-        rewind(amdgpu.busy);
-        fflush(amdgpu.busy);
+bool AMDGPUHWMonInfo::init()
+{
+    auto path  = sysfs_path + "/device";
+    handles.busy = fopen((path + "/gpu_busy_percent").c_str(), "r");
+    handles.vram_total = fopen((path + "/mem_info_vram_total").c_str(), "r");
+    handles.vram_used = fopen((path + "/mem_info_vram_used").c_str(), "r");
+
+    path += "/hwmon/";
+    std::string tempFolder;
+    if (find_folder(path, "hwmon", tempFolder)) {
+        handles.core_clock = fopen((path + tempFolder + "/freq1_input").c_str(), "r");
+        handles.memory_clock = fopen((path + tempFolder + "/freq2_input").c_str(), "r");
+        handles.temp = fopen((path + tempFolder + "/temp1_input").c_str(), "r");
+        handles.power_usage = fopen((path + tempFolder + "/power1_average").c_str(), "r");
+    }
+
+    return handles.busy && handles.temp && handles.vram_total && handles.vram_used;
+}
+
+void AMDGPUHWMonInfo::update()
+{
+    if (handles.busy) {
+        rewind(handles.busy);
+        fflush(handles.busy);
        int value = 0;
-        if (fscanf(amdgpu.busy, "%d", &value) != 1)
+        if (fscanf(handles.busy, "%d", &value) != 1)
            value = 0;
-        gpu_info.load = value;
+        s.load = value;
    }

-    if (amdgpu.temp) {
-        rewind(amdgpu.temp);
-        fflush(amdgpu.temp);
+    if (handles.temp) {
+        rewind(handles.temp);
+        fflush(handles.temp);
        int value = 0;
-        if (fscanf(amdgpu.temp, "%d", &value) != 1)
+        if (fscanf(handles.temp, "%d", &value) != 1)
            value = 0;
-        gpu_info.temp = value / 1000;
+        s.temp = value / 1000;
    }

    int64_t value = 0;

-    if (amdgpu.vram_total) {
-        rewind(amdgpu.vram_total);
-        fflush(amdgpu.vram_total);
-        if (fscanf(amdgpu.vram_total, "%" PRId64, &value) != 1)
+    if (handles.vram_total) {
+        rewind(handles.vram_total);
+        fflush(handles.vram_total);
+        if (fscanf(handles.vram_total, "%" PRId64, &value) != 1)
            value = 0;
-        gpu_info.memoryTotal = float(value) / (1024 * 1024 * 1024);
+        s.memory_total = float(value) / (1024 * 1024 * 1024);
    }

-    if (amdgpu.vram_used) {
-        rewind(amdgpu.vram_used);
-        fflush(amdgpu.vram_used);
-        if (fscanf(amdgpu.vram_used, "%" PRId64, &value) != 1)
+    if (handles.vram_used) {
+        rewind(handles.vram_used);
+        fflush(handles.vram_used);
+        if (fscanf(handles.vram_used, "%" PRId64, &value) != 1)
            value = 0;
-        gpu_info.memoryUsed = float(value) / (1024 * 1024 * 1024);
+        s.memory_used = float(value) / (1024 * 1024 * 1024);
    }

-    if (amdgpu.core_clock) {
-        rewind(amdgpu.core_clock);
-        fflush(amdgpu.core_clock);
-        if (fscanf(amdgpu.core_clock, "%" PRId64, &value) != 1)
+    if (handles.core_clock) {
+        rewind(handles.core_clock);
+        fflush(handles.core_clock);
+        if (fscanf(handles.core_clock, "%" PRId64, &value) != 1)
            value = 0;

-        gpu_info.CoreClock = value / 1000000;
+        s.core_clock = value / 1000000;
    }

-    if (amdgpu.memory_clock) {
-        rewind(amdgpu.memory_clock);
-        fflush(amdgpu.memory_clock);
-        if (fscanf(amdgpu.memory_clock, "%" PRId64, &value) != 1)
+    if (handles.memory_clock) {
+        rewind(handles.memory_clock);
+        fflush(handles.memory_clock);
+        if (fscanf(handles.memory_clock, "%" PRId64, &value) != 1)
            value = 0;

-        gpu_info.MemClock = value / 1000000;
+        s.memory_clock = value / 1000000;
    }

-    if (amdgpu.power_usage) {
-        rewind(amdgpu.power_usage);
-        fflush(amdgpu.power_usage);
-        if (fscanf(amdgpu.power_usage, "%" PRId64, &value) != 1)
+    if (handles.power_usage) {
+        rewind(handles.power_usage);
+        fflush(handles.power_usage);
+        if (fscanf(handles.power_usage, "%" PRId64, &value) != 1)
            value = 0;

-        gpu_info.powerUsage = value / 1000000;
+        s.power_usage = value / 1000000;
    }
 }

@ -148,7 +147,7 @@ static int getgrbm_amdgpu(amdgpu_device_handle dev, uint32_t *out) {
                                    0xffffffff, 0, out);
 }

-struct amdgpu_handles
+struct amdgpu_handles : public gpu_handles
 {
    amdgpu_device_handle dev;
    int fd;
@ -208,21 +207,20 @@ struct amdgpu_handles
    }
 };

-typedef std::unique_ptr<amdgpu_handles> amdgpu_ptr;
-static amdgpu_ptr amdgpu_dev;
-
-void amdgpu_set_sampling_period(uint32_t period)
+void amdgpu_set_sampling_period(gpu_handles* dev, uint32_t period)
 {
+    auto amdgpu_dev = reinterpret_cast<amdgpu_handles*>(dev);
    if (amdgpu_dev)
        amdgpu_dev->set_sampling_period(period);
 }

-bool amdgpu_open(const char *path) {
+static amdgpu_handles* amdgpu_open(const char* path)
+{
    int fd = open(path, O_RDWR | O_CLOEXEC);

    if (fd < 0) {
        SPDLOG_ERROR("Failed to open DRM device: {}", strerror(errno));
-        return false;
+        return nullptr;
    }

    drmVersionPtr ver = drmGetVersion(fd);
@ -230,69 +228,82 @@ bool amdgpu_open(const char *path) {
    if (!ver) {
        SPDLOG_ERROR("Failed to query driver version: {}", strerror(errno));
        close(fd);
-        return false;
+        return nullptr;
    }

    if (strcmp(ver->name, "amdgpu") || !DRM_ATLEAST_VERSION(ver, 3, 11)) {
        SPDLOG_ERROR("Unsupported driver/version: {} {}.{}.{}", ver->name, ver->version_major, ver->version_minor, ver->version_patchlevel);
        close(fd);
        drmFreeVersion(ver);
-        return false;
+        return nullptr;
    }
    drmFreeVersion(ver);

-/*
    if (!authenticate_drm(fd)) {
        close(fd);
-        return false;
+        return nullptr;
    }
-*/

    uint32_t drm_major, drm_minor;
    amdgpu_device_handle dev;
    if (amdgpu_device_initialize(fd, &drm_major, &drm_minor, &dev)){
        SPDLOG_ERROR("Failed to initialize amdgpu device: {}", strerror(errno));
        close(fd);
+        return nullptr;
+    }
+
+    return new amdgpu_handles(dev, fd, drm_major, drm_minor);
+}
+
+bool AMDGPUInfo::init()
+{
+    int idx = -1;
+
+    if (sscanf(sysfs_path.c_str(), "%*[^0-9]%d", &idx) != 1 || idx < 0)
+        return false;
+
+    const std::string dri_path = "/dev/dri/card" + std::to_string(idx);
+    device = reinterpret_cast<gpu_handles*>(amdgpu_open(dri_path.c_str()));
+    if (!device)
+    {
+        SPDLOG_WARN("Failed to open device '{}' with libdrm", dri_path);
        return false;
    }

-    amdgpu_dev = std::make_unique<amdgpu_handles>(dev, fd, drm_major, drm_minor);
    return true;
 }

-void getAmdGpuInfo_libdrm()
+
+void AMDGPUInfo::update()
 {
    uint64_t value = 0;
    uint32_t value32 = 0;
+    auto amdgpu_dev = reinterpret_cast<amdgpu_handles*>(device);

    if (!amdgpu_dev || !DRM_ATLEAST_VERSION(amdgpu_dev, 3, 11))
-    {
-        getAmdGpuInfo();
-        getAmdGpuInfo_actual = getAmdGpuInfo;
        return;
-    }

    if (!amdgpu_query_info(amdgpu_dev->dev, AMDGPU_INFO_VRAM_USAGE, sizeof(uint64_t), &value))
-        gpu_info.memoryUsed = float(value) / (1024 * 1024 * 1024);
+        s.memory_used = float(value) / (1024 * 1024 * 1024);

    // FIXME probably not correct sensor
    if (!amdgpu_query_info(amdgpu_dev->dev, AMDGPU_INFO_MEMORY, sizeof(uint64_t), &value))
-        gpu_info.memoryTotal = float(value) / (1024 * 1024 * 1024);
+        s.memory_total = float(value) / (1024 * 1024 * 1024);

    if (!amdgpu_query_sensor_info(amdgpu_dev->dev, AMDGPU_INFO_SENSOR_GFX_SCLK, sizeof(uint32_t), &value32))
-        gpu_info.CoreClock = value32;
+        s.core_clock = value32;

    if (!amdgpu_query_sensor_info(amdgpu_dev->dev, AMDGPU_INFO_SENSOR_GFX_MCLK, sizeof(uint32_t), &value32)) // XXX Doesn't work on APUs
-        gpu_info.MemClock = value32;
+        s.memory_clock = value32;

    //if (!amdgpu_query_sensor_info(amdgpu_dev->dev, AMDGPU_INFO_SENSOR_GPU_LOAD, sizeof(uint32_t), &value32))
-    //    gpu_info.load = value32;
-    gpu_info.load = amdgpu_dev->gui_percent;
+    //    load = value32;
+    s.load = amdgpu_dev->gui_percent;

    if (!amdgpu_query_sensor_info(amdgpu_dev->dev, AMDGPU_INFO_SENSOR_GPU_TEMP, sizeof(uint32_t), &value32))
-        gpu_info.temp = value32 / 1000;
+        s.temp = value32 / 1000;

    if (!amdgpu_query_sensor_info(amdgpu_dev->dev, AMDGPU_INFO_SENSOR_GPU_AVG_POWER, sizeof(uint32_t), &value32))
-        gpu_info.powerUsage = value32;
+        s.power_usage = value32;
 }
 #endif
--- a/src/gpu.h
+++ b/src/gpu.h
@ -4,6 +4,9 @@

 #include <cstdio>
 #include <cstdint>
+#include <unordered_map>
+#include <memory>
+#include <string>

 enum {
    GRBM_STATUS = 0x8010,
@ -20,33 +23,118 @@ struct amdgpu_files
    FILE *power_usage;
 };

-extern amdgpu_files amdgpu;
+struct gpu_handles
+{
+    virtual ~gpu_handles() {};
+};
+
+struct GpuInfo
+{
+    GpuInfo(const std::string& sysfs, const std::string& pci)
+    : sysfs_path(sysfs)
+    , pci_device(pci)
+    {}
+    virtual ~GpuInfo() {}
+    virtual void update() = 0;
+    virtual bool init() = 0;
+
+    std::string sysfs_path;
+    std::string pci_device;
+    std::string dev_name;
+    bool inited;
+
+    struct {
+        int load;
+        int temp;
+        float memory_used;
+        float memory_total;
+        int memory_clock;
+        int core_clock;
+        int power_usage;
+    } s {};
+
+    uint32_t vendorID {}, deviceID {};
+    gpu_handles* device {};
+};
+
+extern std::shared_ptr<GpuInfo> g_active_gpu;

-struct gpuInfo{
-    int load;
-    int temp;
-    float memoryUsed;
-    float memoryTotal;
-    int MemClock;
-    int CoreClock;
-    int powerUsage;
+struct NVMLInfo : public GpuInfo
+{
+    NVMLInfo(const std::string& sysfs, const std::string& pci) : GpuInfo(sysfs, pci) {}
+    void update();
+    bool init();
+};
+
+struct NVCtrlInfo : public GpuInfo
+{
+    NVCtrlInfo(const std::string& sysfs, const std::string& pci) : GpuInfo(sysfs, pci) {}
+    void update();
+    bool init();
+};
+
+struct NVAPIInfo : public GpuInfo
+{
+    NVAPIInfo() : GpuInfo({}, {}) {}
+    void update();
+    bool init();
+};
+
+struct AMDGPUInfo : public GpuInfo
+{
+    AMDGPUInfo(const std::string& sysfs, const std::string& pci) : GpuInfo(sysfs, pci) {}
+    ~AMDGPUInfo()
+    {
+        delete device;
+    }
+    void update();
+    bool init();
+};
+
+struct AMDGPUHWMonInfo : public GpuInfo
+{
+    AMDGPUHWMonInfo(const std::string& sysfs, const std::string& pci) : GpuInfo(sysfs, pci) {}
+    ~AMDGPUHWMonInfo()
+    {
+        delete device;
+        if (handles.busy)
+            fclose(handles.busy);
+        if (handles.temp)
+            fclose(handles.temp);
+        if (handles.vram_total)
+            fclose(handles.vram_total);
+        if (handles.vram_used)
+            fclose(handles.vram_used);
+        if (handles.core_clock)
+            fclose(handles.core_clock);
+        if (handles.memory_clock)
+            fclose(handles.memory_clock);
+        if (handles.power_usage)
+            fclose(handles.power_usage);
+        handles = {};
+    }
+
+    void update();
+    bool init();
+
+    amdgpu_files handles {};
+};
+
+struct RadeonInfo : public GpuInfo
+{
+    RadeonInfo(const std::string& sysfs, const std::string& pci) : GpuInfo(sysfs, pci) {}
+    void update();
+    bool init();
 };

-extern struct gpuInfo gpu_info;
+extern std::unordered_map<std::string /*device*/, std::shared_ptr<struct GpuInfo>> g_gpu_infos;

-void getNvidiaGpuInfo(void);
-void getAmdGpuInfo(void);
 #ifdef HAVE_LIBDRM
-void getRadeonInfo_libdrm();
-bool radeon_open(const char *path);
-void radeon_set_sampling_period(uint32_t period);
+void radeon_set_sampling_period(gpu_handles* dev, uint32_t period);
 #endif
 #ifdef HAVE_LIBDRM_AMDGPU
-void getAmdGpuInfo_libdrm();
-bool amdgpu_open(const char *path);
-void amdgpu_set_sampling_period(uint32_t period);
+void amdgpu_set_sampling_period(gpu_handles* dev, uint32_t period);
 #endif
-extern decltype(&getAmdGpuInfo) getAmdGpuInfo_actual;
 bool checkNvidia(const char *pci_dev);
 extern void nvapi_util();
 extern bool checkNVAPI();
--- a/src/gpu_radeon.cpp
+++ b/src/gpu_radeon.cpp
@ -119,16 +119,14 @@ struct radeon_handles
    }
 };

-typedef std::unique_ptr<radeon_handles> radeon_ptr;
-static radeon_ptr radeon_dev;
-
-void radeon_set_sampling_period(uint32_t period)
+void radeon_set_sampling_period(gpu_handles* dev, uint32_t period)
 {
+    auto radeon_dev = reinterpret_cast<radeon_handles*>(dev);
    if (radeon_dev)
        radeon_dev->set_sampling_period(period);
 }

-bool radeon_open(const char *path)
+static radeon_handles* radeon_open(const char *path)
 {
    uint32_t drm_major = 0, drm_minor = 0;

@ -136,7 +134,7 @@ bool radeon_open(const char *path)

    if (fd < 0) {
        SPDLOG_ERROR("Failed to open DRM device: {}", strerror(errno));
-        return false;
+        return nullptr;
    }

    drmVersionPtr ver = drmGetVersion(fd);
@ -144,7 +142,7 @@ bool radeon_open(const char *path)
    if (!ver) {
        SPDLOG_ERROR("Failed to query driver version: {}", strerror(errno));
        close(fd);
-        return false;
+        return nullptr;
    }

    if (strcmp(ver->name, "radeon") || !DRM_ATLEAST_VERSION(ver, 2, 42)) {
@ -152,7 +150,7 @@ bool radeon_open(const char *path)
                     ver->name, ver->version_major, ver->version_minor, ver->version_patchlevel);
        close(fd);
        drmFreeVersion(ver);
-        return false;
+        return nullptr;
    }

    drm_major = ver->version_major;
@ -161,42 +159,60 @@ bool radeon_open(const char *path)

    if (!authenticate_drm(fd)) {
        close(fd);
+        return nullptr;
+    }
+
+    return new radeon_handles(fd, drm_major, drm_minor);
+}
+
+bool RadeonInfo::init()
+{
+    int idx = -1;
+
+    if (sscanf(sysfs_path.c_str(), "%*[^0-9]%d", &idx) != 1 || idx < 0)
+        return false;
+
+    const std::string dri_path = "/dev/dri/card" + std::to_string(idx);
+    device = reinterpret_cast<gpu_handles*>(radeon_open(dri_path.c_str()));
+    if (!device)
+    {
+        SPDLOG_WARN("Failed to open device '{}' with libdrm", dri_path);
        return false;
    }

-    radeon_dev = std::make_unique<radeon_handles>(fd, drm_major, drm_minor);
    return true;
 }

-void getRadeonInfo_libdrm()
+void RadeonInfo::update()
 {
    uint64_t value = 0;
    uint32_t value32 = 0;

+    auto radeon_dev = reinterpret_cast<radeon_handles*>(device);
    if (!radeon_dev)
        return;

-    gpu_info.load = radeon_dev->gui_percent;
+    s.load = radeon_dev->gui_percent;

    // TODO one shot?
    struct drm_radeon_gem_info buffer {};
    int ret = 0;
    if (!(ret = ioctl(radeon_dev->fd, DRM_IOCTL_RADEON_GEM_INFO, &buffer)))
-        gpu_info.memoryTotal = buffer.vram_size / (1024.f * 1024.f * 1024.f);
+        s.memory_total = buffer.vram_size / (1024.f * 1024.f * 1024.f);
    else
        SPDLOG_ERROR("DRM_IOCTL_RADEON_GEM_INFO failed: {}", ret);

    if (!get_radeon_drm_value(radeon_dev->fd, RADEON_INFO_VRAM_USAGE, &value))
-        gpu_info.memoryUsed = value / (1024.f * 1024.f * 1024.f);
+        s.memory_used = value / (1024.f * 1024.f * 1024.f);

    if (!get_radeon_drm_value(radeon_dev->fd, RADEON_INFO_CURRENT_GPU_SCLK, &value32))
-        gpu_info.CoreClock = value32;
+        s.core_clock = value32;

    if (!get_radeon_drm_value(radeon_dev->fd, RADEON_INFO_CURRENT_GPU_MCLK, &value32))
-        gpu_info.MemClock = value32;
+        s.memory_clock = value32;

    if (!get_radeon_drm_value(radeon_dev->fd, RADEON_INFO_CURRENT_GPU_TEMP, &value32))
-        gpu_info.temp = value32 / 1000;
+        s.temp = value32 / 1000;

-    gpu_info.powerUsage = 0;
+    s.power_usage = 0;
 }
--- a/src/hud_elements.cpp
+++ b/src/hud_elements.cpp
@ -130,64 +130,126 @@ void HudElements::version(){
    }
 }

-void HudElements::gpu_stats(){
-    if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_gpu_stats]){
-        ImGui::TableNextRow(); ImGui::TableNextColumn();
-        const char* gpu_text;
+static void per_gpu_vram(GpuInfo* gpu)
+{
+    if (!HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_vram])
+        return;
+    ImGui::TableNextRow(); ImGui::TableNextColumn();
+    ImGui::TextColored(HUDElements.colors.vram, "VRAM");
+    ImGui::TableNextColumn();
+    right_aligned_text(HUDElements.colors.text, HUDElements.ralign_width, "%.1f", gpu->s.memory_used);
+    ImGui::SameLine(0,1.0f);
+    ImGui::PushFont(HUDElements.sw_stats->font1);
+    ImGui::Text("GiB");
+    ImGui::PopFont();
+    if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_gpu_mem_clock]){
+        ImGui::TableNextColumn();
+        right_aligned_text(HUDElements.colors.text, HUDElements.ralign_width, "%i", gpu->s.memory_clock);
+        ImGui::SameLine(0, 1.0f);
+        ImGui::PushFont(HUDElements.sw_stats->font1);
+        ImGui::Text("MHz");
+        ImGui::PopFont();
+    }
+}
+
+void HudElements::vram(){
+    if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_show_all_gpus])
+        return;
+
+    if (g_active_gpu)
+        per_gpu_vram(g_active_gpu.get());
+}
+
+static void per_gpu_stats(GpuInfo* gpu, bool single){
+    ImGui::TableNextRow(); ImGui::TableNextColumn();
+    const char* gpu_text;
+    if (single)
+    {
        if (HUDElements.params->gpu_text.empty())
            gpu_text = "GPU";
        else
            gpu_text = HUDElements.params->gpu_text.c_str();
        ImGui::TextColored(HUDElements.colors.gpu, "%s", gpu_text);
        ImGui::TableNextColumn();
-        auto text_color = HUDElements.colors.text;
-        if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_gpu_load_change]){
-            struct LOAD_DATA gpu_data = {
-                HUDElements.colors.gpu_load_low,
-                HUDElements.colors.gpu_load_med,
-                HUDElements.colors.gpu_load_high,
-                HUDElements.params->gpu_load_value[0],
-                HUDElements.params->gpu_load_value[1]
-            };
+    }
+    else
+    {
+        ImGui::TextColored(HUDElements.colors.gpu, "%s", gpu->dev_name.c_str());
+        ImGui::TableNextRow(); ImGui::TableNextColumn();
+    }

-            auto load_color = change_on_load_temp(gpu_data, gpu_info.load);
-            right_aligned_text(load_color, HUDElements.ralign_width, "%i", gpu_info.load);
-            ImGui::SameLine(0, 1.0f);
-            ImGui::TextColored(load_color,"%%");
-        }
-        else {
-            right_aligned_text(text_color, HUDElements.ralign_width, "%i", gpu_info.load);
-            ImGui::SameLine(0, 1.0f);
-            ImGui::TextColored(text_color,"%%");
-            // ImGui::SameLine(150);
-            // ImGui::Text("%s", "%");
-        }
-        if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_gpu_temp]){
-            ImGui::TableNextColumn();
-            right_aligned_text(text_color, HUDElements.ralign_width, "%i", gpu_info.temp);
-            ImGui::SameLine(0, 1.0f);
-            ImGui::Text("°C");
-        }
-        if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_gpu_core_clock] || HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_gpu_power]){
-            ImGui::TableNextRow(); ImGui::TableNextColumn();
-        }
-        if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_gpu_core_clock]){
-            ImGui::TableNextColumn();
-            right_aligned_text(text_color, HUDElements.ralign_width, "%i", gpu_info.CoreClock);
-            ImGui::SameLine(0, 1.0f);
-            ImGui::PushFont(HUDElements.sw_stats->font1);
-            ImGui::Text("MHz");
-            ImGui::PopFont();
-        }
-        if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_gpu_power]) {
-            ImGui::TableNextColumn();
-            right_aligned_text(text_color, HUDElements.ralign_width, "%i", gpu_info.powerUsage);
-            ImGui::SameLine(0, 1.0f);
-            ImGui::PushFont(HUDElements.sw_stats->font1);
-            ImGui::Text("W");
-            ImGui::PopFont();
-        }
+    auto text_color = HUDElements.colors.text;
+    if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_gpu_load_change]){
+        struct LOAD_DATA gpu_data = {
+            HUDElements.colors.gpu_load_low,
+            HUDElements.colors.gpu_load_med,
+            HUDElements.colors.gpu_load_high,
+            HUDElements.params->gpu_load_value[0],
+            HUDElements.params->gpu_load_value[1]
+        };
+
+        auto load_color = change_on_load_temp(gpu_data, gpu->s.load);
+        right_aligned_text(load_color, HUDElements.ralign_width, "%i", gpu->s.load);
+        ImGui::SameLine(0, 1.0f);
+        ImGui::TextColored(load_color,"%%");
+    }
+    else {
+        right_aligned_text(text_color, HUDElements.ralign_width, "%i", gpu->s.load);
+        ImGui::SameLine(0, 1.0f);
+        ImGui::TextColored(text_color,"%%");
+        // ImGui::SameLine(150);
+        // ImGui::Text("%s", "%");
+    }
+    if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_gpu_temp]){
+        ImGui::TableNextColumn();
+        right_aligned_text(text_color, HUDElements.ralign_width, "%i", gpu->s.temp);
+        ImGui::SameLine(0, 1.0f);
+        ImGui::Text("°C");
+    }
+
+    if (single && (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_gpu_core_clock] || HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_gpu_power])){
+        ImGui::TableNextRow(); ImGui::TableNextColumn();
+    }
+
+    if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_gpu_core_clock]){
+        ImGui::TableNextColumn();
+        right_aligned_text(text_color, HUDElements.ralign_width, "%i", gpu->s.core_clock);
+        ImGui::SameLine(0, 1.0f);
+        ImGui::PushFont(HUDElements.sw_stats->font1);
+        ImGui::Text("MHz");
+        ImGui::PopFont();
+    }
+    if (!single && HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_gpu_core_clock]) {
+        ImGui::TableNextRow();
+        ImGui::TableNextColumn();
    }
+    if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_gpu_power]) {
+        ImGui::TableNextColumn();
+        right_aligned_text(text_color, HUDElements.ralign_width, "%i", gpu->s.power_usage);
+        ImGui::SameLine(0, 1.0f);
+        ImGui::PushFont(HUDElements.sw_stats->font1);
+        ImGui::Text("W");
+        ImGui::PopFont();
+    }
+
+    if (!single)
+        per_gpu_vram(gpu);
+}
+
+void HudElements::gpu_stats(){
+    auto p = HUDElements.params;
+    if (!p->enabled[OVERLAY_PARAM_ENABLED_gpu_stats])
+        return;
+
+    if (p->enabled[OVERLAY_PARAM_ENABLED_show_all_gpus])
+    {
+        for (const auto& g : g_gpu_infos)
+            per_gpu_stats(g.second.get(), false);
+        return;
+    }
+
+    if (g_active_gpu)
+        per_gpu_stats(g_active_gpu.get(), true);
 }

 void HudElements::cpu_stats(){
@ -326,27 +388,6 @@ void HudElements::io_stats(){
    }
 }

-void HudElements::vram(){
-    if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_vram]){
-        ImGui::TableNextRow(); ImGui::TableNextColumn();
-        ImGui::TextColored(HUDElements.colors.vram, "VRAM");
-        ImGui::TableNextColumn();
-        right_aligned_text(HUDElements.colors.text, HUDElements.ralign_width, "%.1f", gpu_info.memoryUsed);
-        ImGui::SameLine(0,1.0f);
-        ImGui::PushFont(HUDElements.sw_stats->font1);
-        ImGui::Text("GiB");
-        ImGui::PopFont();
-        if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_gpu_mem_clock]){
-            ImGui::TableNextColumn();
-            right_aligned_text(HUDElements.colors.text, HUDElements.ralign_width, "%i", gpu_info.MemClock);
-            ImGui::SameLine(0, 1.0f);
-            ImGui::PushFont(HUDElements.sw_stats->font1);
-            ImGui::Text("MHz");
-            ImGui::PopFont();
-        }
-    }
-}
-
 void HudElements::ram(){
 #ifdef __linux__
    if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_ram]){
@ -451,11 +492,11 @@ void HudElements::fps(){
 }

 void HudElements::gpu_name(){
-    if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_gpu_name] && !HUDElements.sw_stats->gpuName.empty()){
+    if (HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_gpu_name] && g_active_gpu && !g_active_gpu->dev_name.empty()){
        ImGui::TableNextRow(); ImGui::TableNextColumn();
        ImGui::PushFont(HUDElements.sw_stats->font1);
        ImGui::TextColored(HUDElements.colors.engine,
-            "%s", HUDElements.sw_stats->gpuName.c_str());
+            "%s", g_active_gpu->dev_name.c_str());
        ImGui::PopFont();
    }
 }
@ -703,7 +744,7 @@ void HudElements::graphs(){
    ImGui::PushFont(HUDElements.sw_stats->font1);
    if (value == "cpu_load"){
        for (auto& it : graph_data){
-            arr.push_back(float(it.cpu_load));
+            arr.push_back(it.cpu_load);
            arr.erase(arr.begin());
        }
        HUDElements.max = 100; HUDElements.min = 0;
@ -771,13 +812,13 @@ void HudElements::graphs(){
        ImGui::TextColored(HUDElements.colors.engine, "%s", "GPU Mem Clock");
    }

-    if (value == "vram"){
+    if (value == "vram" && g_active_gpu){
        for (auto& it : graph_data){
-            arr.push_back(float(it.gpu_vram_used));
+            arr.push_back(it.gpu_vram_used);
            arr.erase(arr.begin());
        }

-        HUDElements.max = gpu_info.memoryTotal;
+        HUDElements.max = g_active_gpu->s.memory_total;
        HUDElements.min = 0;
        ImGui::TextColored(HUDElements.colors.engine, "%s", "VRAM");
    }
@ -786,7 +827,7 @@ void HudElements::graphs(){
        if (!HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_ram])
            HUDElements.params->enabled[OVERLAY_PARAM_ENABLED_ram] = true;
        for (auto& it : graph_data){
-            arr.push_back(float(it.ram_used));
+            arr.push_back(it.ram_used);
            arr.erase(arr.begin());
        }

--- a/src/hud_elements.h
+++ b/src/hud_elements.h
@ -23,7 +23,8 @@ class HudElements{
        Clock::time_point last_exec;
        std::vector<std::pair<std::string, std::string>> options;
        std::vector<std::pair<void(*)(), std::string >> ordered_functions;
-        int min, max, gpu_core_max, gpu_mem_max, cpu_temp_max, gpu_temp_max;
+        float min, max;
+        int gpu_core_max, gpu_mem_max, cpu_temp_max, gpu_temp_max;
        const std::vector<std::string> permitted_params = {
            "gpu_load", "cpu_load", "gpu_core_clock", "gpu_mem_clock",
            "vram", "ram", "cpu_temp", "gpu_temp"
--- a/src/nvapi.cpp
+++ b/src/nvapi.cpp
@ -40,28 +40,34 @@ bool checkNVAPI(){
    NvAPI_Initialize = (NvAPI_Initialize_t) (*NvAPI_QueryInterface)(0x0150E828);
    NvAPI_EnumPhysicalGPUs = (NvAPI_EnumPhysicalGPUs_t) (*NvAPI_QueryInterface)(0xE5AC921F);
    NvAPI_GPU_GetUsages = (NvAPI_GPU_GetUsages_t) (*NvAPI_QueryInterface)(0x189A1FDF);
-    if (NvAPI_Initialize == NULL || NvAPI_EnumPhysicalGPUs == NULL ||
-        NvAPI_EnumPhysicalGPUs == NULL || NvAPI_GPU_GetUsages == NULL)
+
+    if (!NvAPI_Initialize || !NvAPI_EnumPhysicalGPUs || !NvAPI_EnumPhysicalGPUs || !NvAPI_GPU_GetUsages)
    {
        std::cerr << "Couldn't get functions in nvapi.dll" << std::endl;
        return 2;
    }
-    (*NvAPI_Initialize)();
-    
-    int         *gpuHandles[NVAPI_MAX_PHYSICAL_GPUS] = { NULL };
+    NvAPI_Initialize();
+
+    NvAPI_EnumPhysicalGPUs(gpuHandles, &gpuCount);

    return true;
 }

-void nvapi_util()
-{  
+bool NVAPIInfo::init()
+{
+    if (!init_nvapi_bool)
+        init_nvapi_bool = checkNVAPI();
+    return init_nvapi_bool;
+}
+
+void NVAPIInfo::update()
+{
    if (!init_nvapi_bool){
        init_nvapi_bool = checkNVAPI();
    }
-    
-    gpuUsages[0] = (NVAPI_MAX_USAGES_PER_GPU * 4) | 0x10000;
-    (*NvAPI_EnumPhysicalGPUs)(gpuHandles, &gpuCount);
-    (*NvAPI_GPU_GetUsages)(gpuHandles[0], gpuUsages);
-    gpu_info.load = gpuUsages[3];

-}
+    gpuUsages[0] = (NVAPI_MAX_USAGES_PER_GPU * 4) | 0x10000;
+    NvAPI_GPU_GetUsages(gpuHandles[0], gpuUsages);
+    if (g_active_gpu)
+        g_active_gpu->s.load = gpuUsages[3];
+}
--- a/src/nvctrl.cpp
+++ b/src/nvctrl.cpp
@ -17,17 +17,22 @@ static std::unique_ptr<Display, std::function<void(Display*)>> display;
 struct nvctrlInfo nvctrl_info;
 bool nvctrlSuccess = false;

-static bool find_nv_x11(libnvctrl_loader& nvctrl, Display*& dpy)
+static bool find_nv_x11(libnvctrl_loader& nvctrl, Display*& dpy, int& scr)
 {
    char buf[8] {};
    for (int i = 0; i < 16; i++) {
        snprintf(buf, sizeof(buf), ":%d", i);
        Display *d = g_x11->XOpenDisplay(buf);
        if (d) {
-            if (nvctrl.XNVCTRLIsNvScreen(d, 0)) {
-                dpy = d;
-                SPDLOG_DEBUG("XNVCtrl is using display {}", buf);
-                return true;
+            int nscreens = ScreenCount(d); //FIXME yes, no, maybe?
+            for (int screen = 0; screen < nscreens; screen++)
+            {
+                if (nvctrl.XNVCTRLIsNvScreen(d, screen)) {
+                    dpy = d;
+                    scr = screen;
+                    SPDLOG_DEBUG("XNVCtrl is using display {}", buf);
+                    return true;
+                }
            }
            g_x11->XCloseDisplay(d);
        }
@ -46,20 +51,15 @@ bool checkXNVCtrl()
        return false;
    }

-    Display *dpy;
-    nvctrlSuccess = find_nv_x11(nvctrl, dpy);
+    Display *dpy = nullptr;
+    int screen = 0;
+    nvctrlSuccess = find_nv_x11(nvctrl, dpy, screen);

    if (!nvctrlSuccess) {
        SPDLOG_ERROR("XNVCtrl didn't find the correct display");
        return false;
    }

-    auto local_x11 = g_x11;
-    display = { dpy,
-        [local_x11](Display *dpy) {
-            local_x11->XCloseDisplay(dpy);
-        }
-    };
    // get device id at init
    int64_t pci_id;
    nvctrl.XNVCTRLQueryTargetAttribute64(display.get(),
@ -70,6 +70,13 @@ bool checkXNVCtrl()
                    &pci_id);
    deviceID = (pci_id & 0xFFFF);

+    auto local_x11 = g_x11;
+    display = { dpy,
+        [local_x11](Display *dpy) {
+            local_x11->XCloseDisplay(dpy);
+        }
+    };
+
    return true;
 }

--- a/src/nvidia_info.h
+++ b/src/nvidia_info.h
@ -11,7 +11,7 @@ extern struct nvmlUtilization_st nvidiaUtilization;
 extern struct nvmlMemory_st nvidiaMemory;
 extern bool nvmlSuccess;

-bool checkNVML(const char* pciBusId);
-bool getNVMLInfo(void);
+bool checkNVML(const char* pciBusId, nvmlDevice_t& device, uint32_t& device_id);
+bool getNVMLInfo(nvmlDevice_t device);

 #endif //MANGOHUD_NVIDIA_INFO_H
--- a/src/nvml.cpp
+++ b/src/nvml.cpp
@ -12,50 +12,100 @@ unsigned int nvidiaTemp = 0, nvidiaCoreClock = 0, nvidiaMemClock = 0, nvidiaPowe
 struct nvmlUtilization_st nvidiaUtilization;
 struct nvmlMemory_st nvidiaMemory {};

-bool checkNVML(const char* pciBusId){
-    auto& nvml = get_libnvml_loader();
-    if (nvml.IsLoaded()){
-        result = nvml.nvmlInit();
-        if (NVML_SUCCESS != result) {
-            SPDLOG_ERROR("Nvidia module not loaded");
-        } else {
-            nvmlReturn_t ret = NVML_ERROR_UNKNOWN;
-            if (pciBusId && ((ret = nvml.nvmlDeviceGetHandleByPciBusId(pciBusId, &nvidiaDevice)) != NVML_SUCCESS)) {
-                SPDLOG_ERROR("Getting device handle by PCI bus ID failed: {}", nvml.nvmlErrorString(ret));
-                SPDLOG_ERROR("Using index 0.");
-            }
-
-            if (ret != NVML_SUCCESS)
-                ret = nvml.nvmlDeviceGetHandleByIndex(0, &nvidiaDevice);
+static std::unique_ptr<libnvml_loader, std::function<void(libnvml_loader*)>> nvml_shutdown;

-            if (ret != NVML_SUCCESS)
-                SPDLOG_ERROR("Getting device handle failed: {}", nvml.nvmlErrorString(ret));
+bool checkNVML()
+{
+    auto& nvml = get_libnvml_loader();
+    if (!nvml.IsLoaded())
+    {
+        SPDLOG_ERROR("Failed to load NVML");
+        return false;
+    }

-            nvmlSuccess = (ret == NVML_SUCCESS);
-            if (ret == NVML_SUCCESS)
-                nvml.nvmlDeviceGetPciInfo_v3(nvidiaDevice, &nvidiaPciInfo);
+    if (nvmlSuccess)
+        return nvmlSuccess;

-            return nvmlSuccess;
-        }
-    } else {
-        SPDLOG_ERROR("Failed to load NVML");
+    result = nvml.nvmlInit();
+    if (NVML_SUCCESS != result)
+    {
+        SPDLOG_ERROR("Nvidia module not loaded");
+        return false;
    }

-    return false;
+    nvml_shutdown = { &nvml,
+        [](libnvml_loader *nvml) -> void {
+            nvml->nvmlShutdown();
+        }
+    };
+    nvmlSuccess = true;
+    return nvmlSuccess;
 }

-bool getNVMLInfo(){
+bool getNVMLInfo(nvmlDevice_t device){
    nvmlReturn_t response;
    auto& nvml = get_libnvml_loader();
-    response = nvml.nvmlDeviceGetUtilizationRates(nvidiaDevice, &nvidiaUtilization);
-    nvml.nvmlDeviceGetTemperature(nvidiaDevice, NVML_TEMPERATURE_GPU, &nvidiaTemp);
-    nvml.nvmlDeviceGetMemoryInfo(nvidiaDevice, &nvidiaMemory);
-    nvml.nvmlDeviceGetClockInfo(nvidiaDevice, NVML_CLOCK_GRAPHICS, &nvidiaCoreClock);
-    nvml.nvmlDeviceGetClockInfo(nvidiaDevice, NVML_CLOCK_MEM, &nvidiaMemClock);
-    nvml.nvmlDeviceGetPowerUsage(nvidiaDevice, &nvidiaPowerUsage);
-    deviceID = nvidiaPciInfo.pciDeviceId >> 16;
+    response = nvml.nvmlDeviceGetUtilizationRates(device, &nvidiaUtilization);
+    nvml.nvmlDeviceGetTemperature(device, NVML_TEMPERATURE_GPU, &nvidiaTemp);
+    nvml.nvmlDeviceGetMemoryInfo(device, &nvidiaMemory);
+    nvml.nvmlDeviceGetClockInfo(device, NVML_CLOCK_GRAPHICS, &nvidiaCoreClock);
+    nvml.nvmlDeviceGetClockInfo(device, NVML_CLOCK_MEM, &nvidiaMemClock);
+    nvml.nvmlDeviceGetPowerUsage(device, &nvidiaPowerUsage);

    if (response == NVML_ERROR_NOT_SUPPORTED)
        nvmlSuccess = false;
    return nvmlSuccess;
 }
+
+bool NVMLInfo::init()
+{
+    nvmlDevice_t nvml_dev;
+    if (!checkNVML())
+        return false;
+
+    auto& nvml = get_libnvml_loader();
+    nvmlReturn_t ret = NVML_ERROR_UNKNOWN;
+    if ((ret = nvml.nvmlDeviceGetHandleByPciBusId(pci_device.c_str(), &nvml_dev)) != NVML_SUCCESS)
+    {
+        SPDLOG_ERROR("Getting device handle by PCI bus ID failed: {}", nvml.nvmlErrorString(ret));
+    }
+
+    if (ret != NVML_SUCCESS)
+    {
+        unsigned int deviceCount = 0;
+        ret = nvml.nvmlDeviceGetCount(&deviceCount);
+
+        if (ret == NVML_SUCCESS)
+        {
+            for (unsigned i = 0; i < deviceCount; i++)
+            {
+                ret = nvml.nvmlDeviceGetHandleByIndex(0, &nvml_dev);
+                if (ret != NVML_SUCCESS)
+                    SPDLOG_ERROR("Getting device {} handle failed: {}", i, nvml.nvmlErrorString(ret));
+                else if (nvml.nvmlDeviceGetPciInfo_v3(nvml_dev, &nvidiaPciInfo) == NVML_SUCCESS)
+                {
+                    if (this->deviceID == nvidiaPciInfo.pciDeviceId >> 16)
+                        break;
+                }
+            }
+        }
+    }
+
+    device = reinterpret_cast<gpu_handles*>(nvml_dev);
+    return true;
+}
+
+void NVMLInfo::update()
+{
+    if (nvmlSuccess){
+        getNVMLInfo(reinterpret_cast<nvmlDevice_t>(device));
+        s.load = nvidiaUtilization.gpu;
+        s.temp = nvidiaTemp;
+        s.memory_used = nvidiaMemory.used / (1024.f * 1024.f * 1024.f);
+        s.core_clock = nvidiaCoreClock;
+        s.memory_clock = nvidiaMemClock;
+        s.power_usage = nvidiaPowerUsage / 1000;
+        s.memory_total = nvidiaMemory.total / (1024.f * 1024.f * 1024.f);
+        return;
+    }
+}
--- a/src/overlay.cpp
+++ b/src/overlay.cpp
@ -58,11 +58,8 @@ void update_hw_info(struct swapchain_stats& sw_stats, struct overlay_params& par
 #endif
   }
   if (params.enabled[OVERLAY_PARAM_ENABLED_gpu_stats] || logger->is_active()) {
-      if (vendorID == 0x1002 && getAmdGpuInfo_actual)
-         getAmdGpuInfo_actual();
-
-      if (vendorID == 0x10de)
-         getNvidiaGpuInfo();
+      for (auto& gpu : g_gpu_infos)
+         gpu.second->update();
   }

 #ifdef __linux__
@ -76,12 +73,15 @@ void update_hw_info(struct swapchain_stats& sw_stats, struct overlay_params& par
      getIoStats(&sw_stats.io);
 #endif

-   currentLogData.gpu_load = gpu_info.load;
-   currentLogData.gpu_temp = gpu_info.temp;
-   currentLogData.gpu_core_clock = gpu_info.CoreClock;
-   currentLogData.gpu_mem_clock = gpu_info.MemClock;
-   currentLogData.gpu_vram_used = gpu_info.memoryUsed;
-   currentLogData.gpu_power = gpu_info.powerUsage;
+   if (g_active_gpu)
+   {
+      currentLogData.gpu_load = g_active_gpu->s.load;
+      currentLogData.gpu_temp = g_active_gpu->s.temp;
+      currentLogData.gpu_core_clock = g_active_gpu->s.core_clock;
+      currentLogData.gpu_mem_clock = g_active_gpu->s.memory_clock;
+      currentLogData.gpu_vram_used = g_active_gpu->s.memory_used;
+      currentLogData.gpu_power = g_active_gpu->s.power_usage;
+   }
 #ifdef __linux__
   currentLogData.ram_used = memused;
 #endif
@ -544,6 +544,118 @@ struct pci_bus {
   int func;
 };

+static void enumerate_gpus(overlay_params& params)
+{
+#ifdef WIN32
+   auto gpu = std::make_shared<NVAPIInfo>();
+   if (gpu->init())
+      g_gpu_infos["nvapi_0"] = gpu;
+   return;
+#endif
+
+#ifdef __gnu_linux__
+   string path;
+   string drm = "/sys/class/drm/";
+
+   auto dirs = ls(drm.c_str(), "card");
+   for (auto& dir : dirs) {
+      path = drm + dir;
+
+      // skip display outputs
+      if (!file_exists(path + "/device/vendor"))
+         continue;
+
+      string vendor = read_line(path + "/device/vendor");
+      uint32_t vendor_id = strtoul(vendor.c_str(), NULL, 16);
+
+      string device = read_line(path + "/device/device");
+      uint32_t device_id = strtoul(device.c_str(), NULL, 16); // OGL might fail so read from sysfs
+
+      string pci_device = read_symlink((path + "/device").c_str());
+      auto pos = pci_device.find_last_of('/');
+      pci_device = pci_device.substr(pos != std::string::npos ? pos + 1 : 0);
+      SPDLOG_DEBUG("PCI device symlink: '{}' {}", pci_device, path);
+
+      string module = get_basename(read_symlink(path + "/device/driver/module"));
+      SPDLOG_DEBUG("using device path: '{}', module: '{}'", path, module);
+
+      auto dev_name = get_device_name(vendor_id, device_id);
+      if (module == "amdgpu")
+      {
+#ifdef HAVE_LIBDRM_AMDGPU
+         if (!params.enabled[OVERLAY_PARAM_ENABLED_force_amdgpu_hwmon])
+         {
+            auto gpu = std::make_shared<AMDGPUInfo>(path, pci_device);
+            if (gpu->init())
+            {
+               gpu->vendorID = vendor_id;
+               gpu->deviceID = device_id;
+               gpu->dev_name = dev_name;
+               amdgpu_set_sampling_period(gpu->device, params.fps_sampling_period);
+               g_gpu_infos[pci_device] = gpu;
+
+               SPDLOG_DEBUG("Using libdrm with {}", pci_device);
+               continue;
+            }
+            // fall through and open sysfs handles for fallback or check DRM version beforehand
+            else
+            {
+               SPDLOG_WARN("Falling back to using hwmon sysfs.");
+            }
+         }
+#endif
+
+         if (!file_exists(path + "/device/gpu_busy_percent"))
+            continue;
+
+         auto gpu = std::make_shared<AMDGPUHWMonInfo>(path, pci_device);
+         if (gpu->init())
+         {
+            gpu->vendorID = vendor_id;
+            gpu->deviceID = device_id;
+            gpu->dev_name = dev_name;
+            g_gpu_infos[pci_device] = gpu;
+         }
+      }
+      else if (module == "radeon")
+      {
+#ifdef HAVE_LIBDRM
+         auto gpu = std::make_shared<RadeonInfo>(path, pci_device);
+         if (gpu->init())
+         {
+            gpu->vendorID = vendor_id;
+            gpu->deviceID = device_id;
+            gpu->dev_name = dev_name;
+            g_gpu_infos[pci_device] = gpu;
+         }
+#endif
+      }
+      else if (module == "nvidia")
+      {
+         auto gpu = std::make_shared<NVMLInfo>(path, pci_device);
+         if (gpu->init())
+         {
+            gpu->vendorID = vendor_id;
+            gpu->deviceID = device_id;
+            gpu->dev_name = dev_name;
+            g_gpu_infos[pci_device] = gpu;
+         }
+         else
+         {
+            auto gpu = std::make_shared<NVCtrlInfo>(path, pci_device);
+            if (gpu->init())
+            {
+               gpu->vendorID = vendor_id;
+               gpu->deviceID = device_id;
+               gpu->dev_name = dev_name;
+               g_gpu_infos[pci_device] = gpu;
+            }
+         }
+      }
+   }
+#endif
+}
+
 void init_gpu_stats(uint32_t& vendorID, uint32_t target_device_id, overlay_params& params)
 {
   //if (!params.enabled[OVERLAY_PARAM_ENABLED_gpu_stats])
@ -578,129 +690,55 @@ void init_gpu_stats(uint32_t& vendorID, uint32_t target_device_id, overlay_param
      }
   }

-   // NVIDIA or Intel but maybe has Optimus
-   if (vendorID == 0x8086
-      || vendorID == 0x10de) {
+   if (!g_gpu_infos.size())
+      enumerate_gpus(params);

-      if(checkNvidia(pci_dev))
-         vendorID = 0x10de;
-      else
-         params.enabled[OVERLAY_PARAM_ENABLED_gpu_stats] = false;
+   if (pci_bus_parsed && pci_dev && g_gpu_infos.find(params.pci_dev) != g_gpu_infos.end())
+   {
+      g_active_gpu = g_gpu_infos[params.pci_dev];
   }
-
-#ifdef __linux__
-   if (vendorID == 0x8086 || vendorID == 0x1002
-       || gpu.find("Radeon") != std::string::npos
-       || gpu.find("AMD") != std::string::npos) {
-      string path;
-      string drm = "/sys/class/drm/";
-      getAmdGpuInfo_actual = getAmdGpuInfo;
-      bool using_libdrm = false;
-
-      auto dirs = ls(drm.c_str(), "card");
-      for (auto& dir : dirs) {
-         path = drm + dir;
-
-         SPDLOG_DEBUG("device path check: {}/device/vendor", path);
-
-         string device = read_line(path + "/device/device");
-         deviceID = strtol(device.c_str(), NULL, 16); // OGL might fail so read from sysfs
-
-         string vendor = read_line(path + "/device/vendor");
-         trim(vendor);
-         if (vendor != "0x1002")
-            continue;
-
-         if (pci_bus_parsed && pci_dev) {
-            string pci_device = read_symlink((path + "/device").c_str());
-            SPDLOG_DEBUG("PCI device symlink: '{}'", pci_device);
-            if (!ends_with(pci_device, pci_dev)) {
-               SPDLOG_WARN("skipping GPU, PCI ID doesn't match: {}", pci_device);
-               continue;
-            }
-         }
-         // Don't skip if intel, maybe there's a dgpu
-         else if (vendorID != 0x8086 && target_device_id && target_device_id != deviceID)
-         {
-            SPDLOG_WARN("expected device id {:04X}, got {:04X}, skipping", target_device_id, deviceID);
-            continue;
-         }
-
-         string module = get_basename(read_symlink(path + "/device/driver/module"));
-         SPDLOG_DEBUG("using device path: '{}', module: '{}'", path, module);
-
-         int idx = -1;
-         //TODO make neater
-         int res = sscanf(path.c_str(), "%*[^0-9]%d", &idx);
-         (void)res;
-         std::string dri_path = "/dev/dri/card" + std::to_string(idx);
-
-         if (module == "amdgpu")
+   else if (vendorID == 0x8086 /*&& target_device_id && target_device_id != deviceID*/)
+   {
+      for (auto& info : g_gpu_infos)
+      {
+         const auto& gpu = info.second;
+         if (gpu->vendorID != 0x8086 && gpu->deviceID == target_device_id)
         {
-#ifdef HAVE_LIBDRM_AMDGPU
-            if (!params.enabled[OVERLAY_PARAM_ENABLED_force_amdgpu_hwmon] && res == 1 && amdgpu_open(dri_path.c_str())) {
-               vendorID = 0x1002;
-               using_libdrm = true;
-               getAmdGpuInfo_actual = getAmdGpuInfo_libdrm;
-               amdgpu_set_sampling_period(params.fps_sampling_period);
-
-               SPDLOG_DEBUG("Using libdrm");
-               // fall through and open sysfs handles for fallback or check DRM version beforehand
-            } else if (!params.enabled[OVERLAY_PARAM_ENABLED_force_amdgpu_hwmon]) {
-               SPDLOG_WARN("Failed to open device '/dev/dri/card{}' with libdrm, falling back to using hwmon sysfs.", idx);
-            }
-#endif
+            g_active_gpu = gpu;
+            break;
         }
-#ifdef HAVE_LIBDRM
-         else if (module == "radeon")
+      }
+//       SPDLOG_WARN("expected device id {:04X}, got {:04X}, skipping", target_device_id, deviceID);
+   }
+   else
+   {
+      for (auto& info : g_gpu_infos)
+      {
+         auto& gpu = info.second;
+         if (gpu->vendorID == vendorID && gpu->deviceID == target_device_id)
         {
-            if (res == 1 && radeon_open(dri_path.c_str())) {
-               vendorID = 0x1002;
-               using_libdrm = true;
-               getAmdGpuInfo_actual = getRadeonInfo_libdrm;
-               radeon_set_sampling_period(params.fps_sampling_period);
-            } else {
-               SPDLOG_WARN("Failed to open device '/dev/dri/card{}' with libdrm.", idx);
-               params.enabled[OVERLAY_PARAM_ENABLED_gpu_stats] = false;
-               break;
-            }
-         }
-#endif
-
-         if (!file_exists(path + "/device/gpu_busy_percent"))
-            continue;
-
-         path += "/device";
-         if (!amdgpu.busy)
-            amdgpu.busy = fopen((path + "/gpu_busy_percent").c_str(), "r");
-         if (!amdgpu.vram_total)
-            amdgpu.vram_total = fopen((path + "/mem_info_vram_total").c_str(), "r");
-         if (!amdgpu.vram_used)
-            amdgpu.vram_used = fopen((path + "/mem_info_vram_used").c_str(), "r");
-
-         path += "/hwmon/";
-         string tempFolder;
-         if (find_folder(path, "hwmon", tempFolder)) {
-            if (!amdgpu.core_clock)
-               amdgpu.core_clock = fopen((path + tempFolder + "/freq1_input").c_str(), "r");
-            if (!amdgpu.memory_clock)
-               amdgpu.memory_clock = fopen((path + tempFolder + "/freq2_input").c_str(), "r");
-            if (!amdgpu.temp)
-               amdgpu.temp = fopen((path + tempFolder + "/temp1_input").c_str(), "r");
-            if (!amdgpu.power_usage)
-               amdgpu.power_usage = fopen((path + tempFolder + "/power1_average").c_str(), "r");
-
-            vendorID = 0x1002;
+            g_active_gpu = gpu;
            break;
         }
      }
-
-      // don't bother then
-      if (!using_libdrm && !amdgpu.busy && !amdgpu.temp && !amdgpu.vram_total && !amdgpu.vram_used) {
-         params.enabled[OVERLAY_PARAM_ENABLED_gpu_stats] = false;
-      }
   }
+
+#ifdef WIN32
+   //TODO windows' gpu stats
+   if (g_gpu_infos.size())
+      g_active_gpu = g_gpu_infos.begin()->second;
 #endif
+
+   // for compatibility
+   if (g_active_gpu)
+   {
+      vendorID = g_active_gpu->vendorID;
+      deviceID = g_active_gpu->deviceID;
+   }
+
+   if (g_active_gpu)
+      SPDLOG_INFO("Selected GPU: {}, {}, 0x{:X}:0x{:X}", g_active_gpu->sysfs_path, g_active_gpu->dev_name, vendorID, deviceID);
+
   if (!params.permit_upload)
      SPDLOG_INFO("Uploading is disabled (permit_upload = 0)");
 }
@ -795,13 +833,13 @@ void init_system_info(){
      SPDLOG_DEBUG("Cpu:{}", cpu);
      SPDLOG_DEBUG("Kernel:{}", kernel);
      SPDLOG_DEBUG("Os:{}", os);
-      SPDLOG_DEBUG("Gpu:{}", gpu);
+//       SPDLOG_DEBUG("Gpu:{}", gpu);
      SPDLOG_DEBUG("Driver:{}", driver);
      SPDLOG_DEBUG("CPU Scheduler:{}", cpusched);
 #endif
 }

-void get_device_name(int32_t vendorID, int32_t deviceID, struct swapchain_stats& sw_stats)
+std::string get_device_name(int32_t vendorID, int32_t deviceID)
 {
 #ifdef __linux__
   if (pci_ids.find(vendorID) == pci_ids.end())
@ -815,7 +853,14 @@ void get_device_name(int32_t vendorID, int32_t deviceID, struct swapchain_stats&
      for (char c: chars)
         desc.erase(remove(desc.begin(), desc.end(), c), desc.end());
   }
-   gpu = sw_stats.gpuName = desc;
-   trim(sw_stats.gpuName); trim(gpu);
+   trim(desc);
+   return desc;
+#else
+   return {};
 #endif
 }
+
+void get_device_name(int32_t vendorID, int32_t deviceID, struct swapchain_stats& sw_stats)
+{
+   gpu = sw_stats.gpuName = get_device_name(vendorID, deviceID);
+}
--- a/src/overlay.h
+++ b/src/overlay.h
@ -116,6 +116,7 @@ void init_cpu_stats(overlay_params& params);
 void check_keybinds(struct swapchain_stats& sw_stats, struct overlay_params& params, uint32_t vendorID);
 void init_system_info(void);
 void FpsLimiter(struct fps_limit& stats);
+std::string get_device_name(int32_t vendorID, int32_t deviceID);
 void get_device_name(int32_t vendorID, int32_t deviceID, struct swapchain_stats& sw_stats);
 void calculate_benchmark_data();
 void create_fonts(const overlay_params& params, ImFont*& small_font, ImFont*& text_font);
--- a/src/overlay_params.h
+++ b/src/overlay_params.h
@ -33,6 +33,7 @@ typedef unsigned long KeySym;
   OVERLAY_PARAM_BOOL(gpu_temp)                      \
   OVERLAY_PARAM_BOOL(cpu_stats)                     \
   OVERLAY_PARAM_BOOL(gpu_stats)                     \
+   OVERLAY_PARAM_BOOL(show_all_gpus)                 \
   OVERLAY_PARAM_BOOL(ram)                           \
   OVERLAY_PARAM_BOOL(swap)                          \
   OVERLAY_PARAM_BOOL(vram)                          \