diff --git a/meson.build b/meson.build index f8255e5c..c20b9bec 100644 --- a/meson.build +++ b/meson.build @@ -289,5 +289,22 @@ endif if get_option('tests').enabled() cmocka = subproject('cmocka') cmocka_dep = cmocka.get_variable('cmocka_dep') + + e = executable('amdgpu', 'tests/amdgpu.cpp', + files( + 'src/amdgpu.cpp', + 'src/cpu.cpp', + 'src/gpu.cpp', + 'src/mesa/util/os_time.c', + 'src/file_utils.cpp', + ), + dependencies: [ + cmocka_dep, + spdlog_dep, + dearimgui_dep + ]) + + test('test amdgpu', e, workdir : meson.project_source_root() + '/tests') + endif diff --git a/src/amdgpu.cpp b/src/amdgpu.cpp index 5324a207..ada4d11e 100644 --- a/src/amdgpu.cpp +++ b/src/amdgpu.cpp @@ -1,47 +1,16 @@ #include #include +#include #include "amdgpu.h" #include "gpu.h" #include "cpu.h" #include "overlay.h" -#define METRICS_UPDATE_PERIOD_MS 500 -#define METRICS_POLLING_PERIOD_MS 5 -#define METRICS_SAMPLE_COUNT (METRICS_UPDATE_PERIOD_MS/METRICS_POLLING_PERIOD_MS) - std::string metrics_path = ""; - -/* This structure is used to communicate the latest values of the amdgpu metrics. - * The direction of communication is amdgpu_polling_thread -> amdgpu_get_metrics(). - */ -struct amdgpu_common_metrics { - /* Load level: averaged across the sampling period */ - uint16_t gpu_load_percent; - // uint16_t mem_load_percent; - - /* Power usage: averaged across the sampling period */ - float average_gfx_power_w; - float average_cpu_power_w; - - /* Clocks: latest value of the clock */ - uint16_t current_gfxclk_mhz; - uint16_t current_uclk_mhz; - - /* Temperatures: maximum values over the sampling period */ - uint16_t soc_temp_c; - uint16_t gpu_temp_c; - uint16_t apu_cpu_temp_c; - - /* throttling status */ - bool is_power_throttled; - bool is_current_throttled; - bool is_temp_throttled; - bool is_other_throttled; -} amdgpu_common_metrics; - +struct amdgpu_common_metrics amdgpu_common_metrics; std::mutex amdgpu_common_metrics_m; -bool amdgpu_check_metrics(const std::string& path) +bool amdgpu_verify_metrics(const std::string& path) { metrics_table_header header {}; FILE *f; @@ -141,22 +110,8 @@ void amdgpu_get_instant_metrics(struct amdgpu_common_metrics *metrics) { #define UPDATE_METRIC_MAX(FIELD) do { int cur_max = metrics_buffer[0].FIELD; for (size_t s=1; s < METRICS_SAMPLE_COUNT; s++) { cur_max = MAX(cur_max, metrics_buffer[s].FIELD); }; amdgpu_common_metrics.FIELD = cur_max; } while(0) #define UPDATE_METRIC_LAST(FIELD) do { amdgpu_common_metrics.FIELD = metrics_buffer[METRICS_SAMPLE_COUNT - 1].FIELD; } while(0) -void amdgpu_metrics_polling_thread() { - struct amdgpu_common_metrics metrics_buffer[METRICS_SAMPLE_COUNT]; - bool gpu_load_needs_dividing = false; //some GPUs report load as centipercent - - // Initial poll of the metrics, so that we have values to display as fast as possible - amdgpu_get_instant_metrics(&amdgpu_common_metrics); - if (amdgpu_common_metrics.gpu_load_percent > 100){ - gpu_load_needs_dividing = true; - amdgpu_common_metrics.gpu_load_percent /= 100; - } - - // Set all the fields to 0 by default. Only done once as we're just replacing previous values after - memset(metrics_buffer, 0, sizeof(metrics_buffer)); - - while (1) { - // Get all the samples +void amdgpu_get_samples_and_copy(struct amdgpu_common_metrics metrics_buffer[METRICS_SAMPLE_COUNT], bool gpu_load_needs_dividing) { + // Get all the samples for (size_t cur_sample_id=0; cur_sample_id < METRICS_SAMPLE_COUNT; cur_sample_id++) { amdgpu_get_instant_metrics(&metrics_buffer[cur_sample_id]); @@ -187,6 +142,24 @@ void amdgpu_metrics_polling_thread() { UPDATE_METRIC_MAX(is_temp_throttled); UPDATE_METRIC_MAX(is_other_throttled); amdgpu_common_metrics_m.unlock(); +} + +void amdgpu_metrics_polling_thread() { + struct amdgpu_common_metrics metrics_buffer[METRICS_SAMPLE_COUNT]; + bool gpu_load_needs_dividing = false; //some GPUs report load as centipercent + + // Initial poll of the metrics, so that we have values to display as fast as possible + amdgpu_get_instant_metrics(&amdgpu_common_metrics); + if (amdgpu_common_metrics.gpu_load_percent > 100){ + gpu_load_needs_dividing = true; + amdgpu_common_metrics.gpu_load_percent /= 100; + } + + // Set all the fields to 0 by default. Only done once as we're just replacing previous values after + memset(metrics_buffer, 0, sizeof(metrics_buffer)); + + while (1) { + amdgpu_get_samples_and_copy(metrics_buffer, gpu_load_needs_dividing); } } diff --git a/src/amdgpu.h b/src/amdgpu.h index e8867de6..a0a5f85d 100644 --- a/src/amdgpu.h +++ b/src/amdgpu.h @@ -1,11 +1,15 @@ #pragma once -#include -#include +// #include +// #include #include #include #include -#include +#include +// #include +#define METRICS_UPDATE_PERIOD_MS 500 +#define METRICS_POLLING_PERIOD_MS 5 +#define METRICS_SAMPLE_COUNT (METRICS_UPDATE_PERIOD_MS/METRICS_POLLING_PERIOD_MS) #define NUM_HBM_INSTANCES 4 struct metrics_table_header { @@ -139,6 +143,37 @@ struct gpu_metrics_v2_2 { uint64_t indep_throttle_status; }; -bool amdgpu_check_metrics(const std::string& path); +/* This structure is used to communicate the latest values of the amdgpu metrics. + * The direction of communication is amdgpu_polling_thread -> amdgpu_get_metrics(). + */ +struct amdgpu_common_metrics { + /* Load level: averaged across the sampling period */ + uint16_t gpu_load_percent; + // uint16_t mem_load_percent; + + /* Power usage: averaged across the sampling period */ + float average_gfx_power_w; + float average_cpu_power_w; + + /* Clocks: latest value of the clock */ + uint16_t current_gfxclk_mhz; + uint16_t current_uclk_mhz; + + /* Temperatures: maximum values over the sampling period */ + uint16_t soc_temp_c; + uint16_t gpu_temp_c; + uint16_t apu_cpu_temp_c; + + /* throttling status */ + bool is_power_throttled; + bool is_current_throttled; + bool is_temp_throttled; + bool is_other_throttled; +}; + +bool amdgpu_verify_metrics(const std::string& path); extern void amdgpu_get_metrics(); extern std::string metrics_path; +extern void amdgpu_get_instant_metrics(struct amdgpu_common_metrics *metrics); +extern void amdgpu_metrics_polling_thread(); +extern void amdgpu_get_samples_and_copy(struct amdgpu_common_metrics metrics_buffer[METRICS_SAMPLE_COUNT], bool gpu_load_needs_dividing); \ No newline at end of file diff --git a/tests/gpu_metrics b/tests/gpu_metrics new file mode 100644 index 00000000..db6b5972 Binary files /dev/null and b/tests/gpu_metrics differ diff --git a/tests/gpu_metrics_apu b/tests/gpu_metrics_apu new file mode 100644 index 00000000..c46ed676 Binary files /dev/null and b/tests/gpu_metrics_apu differ diff --git a/tests/gpu_metrics_invalid b/tests/gpu_metrics_invalid new file mode 100644 index 00000000..44372986 Binary files /dev/null and b/tests/gpu_metrics_invalid differ diff --git a/tests/test_amdgpu.cpp b/tests/test_amdgpu.cpp new file mode 100644 index 00000000..5b33ed6f --- /dev/null +++ b/tests/test_amdgpu.cpp @@ -0,0 +1,84 @@ +#include +#include +#include +#include +extern "C" { +#include +} +#include "stdio.h" +#include "../src/amdgpu.h" +#include "../src/cpu.h" +#include "tests.h" + +static void test_amdgpu_verify_metrics(void **state) { + UNUSED(state); + + assert_false(amdgpu_verify_metrics("")); + assert_false(amdgpu_verify_metrics("gpu_metrics_invalid")); + assert_true (amdgpu_verify_metrics("gpu_metrics")); +} + +static void test_amdgpu_get_instant_metrics(void **state){ + UNUSED(state); + struct amdgpu_common_metrics metrics; + + // fail fetch gpu_metrics file + metrics_path = ""; + amdgpu_get_instant_metrics(&metrics); + + // DGPU + metrics_path = "gpu_metrics"; + metrics = {}; + amdgpu_get_instant_metrics(&metrics); + assert_int_equal(metrics.gpu_load_percent, 64); + assert_float_equal(metrics.average_gfx_power_w, 33, 0); + assert_int_equal(metrics.current_gfxclk_mhz, 2165); + assert_int_equal(metrics.current_uclk_mhz, 1000); + assert_int_equal(metrics.gpu_temp_c, 36); + assert_false(metrics.is_power_throttled); + assert_false(metrics.is_current_throttled); + assert_false(metrics.is_temp_throttled); + assert_false(metrics.is_other_throttled); + + metrics_path = "gpu_metrics_apu"; + metrics = {}; + amdgpu_get_instant_metrics(&metrics); + assert_int_equal(metrics.gpu_load_percent, 100); + assert_float_equal(metrics.average_gfx_power_w, 6.161, 0); + assert_int_equal(metrics.current_gfxclk_mhz, 1040); + assert_int_equal(metrics.current_uclk_mhz, 687); + assert_int_equal(metrics.gpu_temp_c, 81); + assert_int_equal(metrics.soc_temp_c, 71); + assert_int_equal(metrics.apu_cpu_temp_c, 655); + assert_true(metrics.is_power_throttled); + assert_false(metrics.is_current_throttled); + assert_false(metrics.is_temp_throttled); + assert_false(metrics.is_other_throttled); +} + +static void test_amdgpu_get_samples_and_copy(void **state){ + UNUSED(state); + + struct amdgpu_common_metrics metrics_buffer[100]; + bool gpu_load_needs_dividing = false; //some GPUs report load as centipercent + amdgpu_get_samples_and_copy(metrics_buffer, gpu_load_needs_dividing); + gpu_load_needs_dividing = true; + amdgpu_get_samples_and_copy(metrics_buffer, gpu_load_needs_dividing); +} + +static void test_amdgpu_get_metrics(void **state){ + UNUSED(state); + + amdgpu_get_metrics(); +} + +const struct CMUnitTest amdgpu_tests[] = { + cmocka_unit_test(test_amdgpu_verify_metrics), + cmocka_unit_test(test_amdgpu_get_instant_metrics), + cmocka_unit_test(test_amdgpu_get_samples_and_copy), + cmocka_unit_test(test_amdgpu_get_metrics) +}; + +int main(void) { + return cmocka_run_group_tests(amdgpu_tests, NULL, NULL); +} \ No newline at end of file diff --git a/tests/tests.h b/tests/tests.h new file mode 100644 index 00000000..4272a6f8 --- /dev/null +++ b/tests/tests.h @@ -0,0 +1,2 @@ +#pragma once +#define UNUSED(x) (void)(x) \ No newline at end of file