diff --git a/src/amdgpu.cpp b/src/amdgpu.cpp index 1f9394f6..b301274b 100644 --- a/src/amdgpu.cpp +++ b/src/amdgpu.cpp @@ -24,8 +24,10 @@ bool amdgpu_verify_metrics(const std::string& path) metrics_table_header header {}; FILE *f; f = fopen(path.c_str(), "rb"); - if (!f) + if (!f) { + SPDLOG_DEBUG("Failed to read the metrics header of '{}'", path); return false; + } if (fread(&header, sizeof(header), 1, f) == 0) { @@ -56,7 +58,7 @@ bool amdgpu_verify_metrics(const std::string& path) #define IS_VALID_METRIC(FIELD) (FIELD != 0xffff) void amdgpu_get_instant_metrics(struct amdgpu_common_metrics *metrics) { FILE *f; - void *buf[MAX(sizeof(struct gpu_metrics_v1_3), sizeof(struct gpu_metrics_v2_3))/sizeof(void*)+1]; + void *buf[MAX(sizeof(struct gpu_metrics_v1_3), sizeof(struct gpu_metrics_v2_4))/sizeof(void*)+1]; struct metrics_table_header* header = (metrics_table_header*)buf; f = fopen(metrics_path.c_str(), "rb"); diff --git a/src/amdgpu.h b/src/amdgpu.h index 71e975b0..7cfcead1 100644 --- a/src/amdgpu.h +++ b/src/amdgpu.h @@ -162,6 +162,76 @@ struct gpu_metrics_v2_3 { uint16_t average_temperature_l3[2]; }; + +struct gpu_metrics_v2_4 { + struct metrics_table_header common_header; + + /* Temperature (unit: centi-Celsius) */ + uint16_t temperature_gfx; + uint16_t temperature_soc; + uint16_t temperature_core[8]; + uint16_t temperature_l3[2]; + + /* Utilization (unit: centi) */ + uint16_t average_gfx_activity; + uint16_t average_mm_activity; + + /* Driver attached timestamp (in ns) */ + uint64_t system_clock_counter; + + /* Power/Energy (unit: mW) */ + uint16_t average_socket_power; + uint16_t average_cpu_power; + uint16_t average_soc_power; + uint16_t average_gfx_power; + uint16_t average_core_power[8]; + + /* Average clocks (unit: MHz) */ + uint16_t average_gfxclk_frequency; + uint16_t average_socclk_frequency; + uint16_t average_uclk_frequency; + uint16_t average_fclk_frequency; + uint16_t average_vclk_frequency; + uint16_t average_dclk_frequency; + + /* Current clocks (unit: MHz) */ + uint16_t current_gfxclk; + uint16_t current_socclk; + uint16_t current_uclk; + uint16_t current_fclk; + uint16_t current_vclk; + uint16_t current_dclk; + uint16_t current_coreclk[8]; + uint16_t current_l3clk[2]; + + /* Throttle status (ASIC dependent) */ + uint32_t throttle_status; + + /* Fans */ + uint16_t fan_pwm; + + uint16_t padding[3]; + + /* Throttle status (ASIC independent) */ + uint64_t indep_throttle_status; + + /* Average Temperature (unit: centi-Celsius) */ + uint16_t average_temperature_gfx; + uint16_t average_temperature_soc; + uint16_t average_temperature_core[8]; + uint16_t average_temperature_l3[2]; + + /* Power/Voltage (unit: mV) */ + uint16_t average_cpu_voltage; + uint16_t average_soc_voltage; + uint16_t average_gfx_voltage; + + /* Power/Current (unit: mA) */ + uint16_t average_cpu_current; + uint16_t average_soc_current; + uint16_t average_gfx_current; +}; + /* This structure is used to communicate the latest values of the amdgpu metrics. * The direction of communication is amdgpu_polling_thread -> amdgpu_get_metrics(). */