amdgpu tests

pull/851/head
FlightlessMango 1 year ago
parent ed6a427751
commit cb7d2a2447

@ -289,5 +289,22 @@ endif
if get_option('tests').enabled()
cmocka = subproject('cmocka')
cmocka_dep = cmocka.get_variable('cmocka_dep')
e = executable('amdgpu', 'tests/amdgpu.cpp',
files(
'src/amdgpu.cpp',
'src/cpu.cpp',
'src/gpu.cpp',
'src/mesa/util/os_time.c',
'src/file_utils.cpp',
),
dependencies: [
cmocka_dep,
spdlog_dep,
dearimgui_dep
])
test('test amdgpu', e, workdir : meson.project_source_root() + '/tests')
endif

@ -1,47 +1,16 @@
#include <spdlog/spdlog.h>
#include <thread>
#include <sys/sysinfo.h>
#include "amdgpu.h"
#include "gpu.h"
#include "cpu.h"
#include "overlay.h"
#define METRICS_UPDATE_PERIOD_MS 500
#define METRICS_POLLING_PERIOD_MS 5
#define METRICS_SAMPLE_COUNT (METRICS_UPDATE_PERIOD_MS/METRICS_POLLING_PERIOD_MS)
std::string metrics_path = "";
/* This structure is used to communicate the latest values of the amdgpu metrics.
* The direction of communication is amdgpu_polling_thread -> amdgpu_get_metrics().
*/
struct amdgpu_common_metrics {
/* Load level: averaged across the sampling period */
uint16_t gpu_load_percent;
// uint16_t mem_load_percent;
/* Power usage: averaged across the sampling period */
float average_gfx_power_w;
float average_cpu_power_w;
/* Clocks: latest value of the clock */
uint16_t current_gfxclk_mhz;
uint16_t current_uclk_mhz;
/* Temperatures: maximum values over the sampling period */
uint16_t soc_temp_c;
uint16_t gpu_temp_c;
uint16_t apu_cpu_temp_c;
/* throttling status */
bool is_power_throttled;
bool is_current_throttled;
bool is_temp_throttled;
bool is_other_throttled;
} amdgpu_common_metrics;
struct amdgpu_common_metrics amdgpu_common_metrics;
std::mutex amdgpu_common_metrics_m;
bool amdgpu_check_metrics(const std::string& path)
bool amdgpu_verify_metrics(const std::string& path)
{
metrics_table_header header {};
FILE *f;
@ -141,22 +110,8 @@ void amdgpu_get_instant_metrics(struct amdgpu_common_metrics *metrics) {
#define UPDATE_METRIC_MAX(FIELD) do { int cur_max = metrics_buffer[0].FIELD; for (size_t s=1; s < METRICS_SAMPLE_COUNT; s++) { cur_max = MAX(cur_max, metrics_buffer[s].FIELD); }; amdgpu_common_metrics.FIELD = cur_max; } while(0)
#define UPDATE_METRIC_LAST(FIELD) do { amdgpu_common_metrics.FIELD = metrics_buffer[METRICS_SAMPLE_COUNT - 1].FIELD; } while(0)
void amdgpu_metrics_polling_thread() {
struct amdgpu_common_metrics metrics_buffer[METRICS_SAMPLE_COUNT];
bool gpu_load_needs_dividing = false; //some GPUs report load as centipercent
// Initial poll of the metrics, so that we have values to display as fast as possible
amdgpu_get_instant_metrics(&amdgpu_common_metrics);
if (amdgpu_common_metrics.gpu_load_percent > 100){
gpu_load_needs_dividing = true;
amdgpu_common_metrics.gpu_load_percent /= 100;
}
// Set all the fields to 0 by default. Only done once as we're just replacing previous values after
memset(metrics_buffer, 0, sizeof(metrics_buffer));
while (1) {
// Get all the samples
void amdgpu_get_samples_and_copy(struct amdgpu_common_metrics metrics_buffer[METRICS_SAMPLE_COUNT], bool gpu_load_needs_dividing) {
// Get all the samples
for (size_t cur_sample_id=0; cur_sample_id < METRICS_SAMPLE_COUNT; cur_sample_id++) {
amdgpu_get_instant_metrics(&metrics_buffer[cur_sample_id]);
@ -187,6 +142,24 @@ void amdgpu_metrics_polling_thread() {
UPDATE_METRIC_MAX(is_temp_throttled);
UPDATE_METRIC_MAX(is_other_throttled);
amdgpu_common_metrics_m.unlock();
}
void amdgpu_metrics_polling_thread() {
struct amdgpu_common_metrics metrics_buffer[METRICS_SAMPLE_COUNT];
bool gpu_load_needs_dividing = false; //some GPUs report load as centipercent
// Initial poll of the metrics, so that we have values to display as fast as possible
amdgpu_get_instant_metrics(&amdgpu_common_metrics);
if (amdgpu_common_metrics.gpu_load_percent > 100){
gpu_load_needs_dividing = true;
amdgpu_common_metrics.gpu_load_percent /= 100;
}
// Set all the fields to 0 by default. Only done once as we're just replacing previous values after
memset(metrics_buffer, 0, sizeof(metrics_buffer));
while (1) {
amdgpu_get_samples_and_copy(metrics_buffer, gpu_load_needs_dividing);
}
}

@ -1,11 +1,15 @@
#pragma once
#include <fstream>
#include <iostream>
// #include <fstream>
// #include <iostream>
#include <stdio.h>
#include <inttypes.h>
#include <unistd.h>
#include <vector>
#include <string>
// #include <vector>
#define METRICS_UPDATE_PERIOD_MS 500
#define METRICS_POLLING_PERIOD_MS 5
#define METRICS_SAMPLE_COUNT (METRICS_UPDATE_PERIOD_MS/METRICS_POLLING_PERIOD_MS)
#define NUM_HBM_INSTANCES 4
struct metrics_table_header {
@ -139,6 +143,37 @@ struct gpu_metrics_v2_2 {
uint64_t indep_throttle_status;
};
bool amdgpu_check_metrics(const std::string& path);
/* This structure is used to communicate the latest values of the amdgpu metrics.
* The direction of communication is amdgpu_polling_thread -> amdgpu_get_metrics().
*/
struct amdgpu_common_metrics {
/* Load level: averaged across the sampling period */
uint16_t gpu_load_percent;
// uint16_t mem_load_percent;
/* Power usage: averaged across the sampling period */
float average_gfx_power_w;
float average_cpu_power_w;
/* Clocks: latest value of the clock */
uint16_t current_gfxclk_mhz;
uint16_t current_uclk_mhz;
/* Temperatures: maximum values over the sampling period */
uint16_t soc_temp_c;
uint16_t gpu_temp_c;
uint16_t apu_cpu_temp_c;
/* throttling status */
bool is_power_throttled;
bool is_current_throttled;
bool is_temp_throttled;
bool is_other_throttled;
};
bool amdgpu_verify_metrics(const std::string& path);
extern void amdgpu_get_metrics();
extern std::string metrics_path;
extern void amdgpu_get_instant_metrics(struct amdgpu_common_metrics *metrics);
extern void amdgpu_metrics_polling_thread();
extern void amdgpu_get_samples_and_copy(struct amdgpu_common_metrics metrics_buffer[METRICS_SAMPLE_COUNT], bool gpu_load_needs_dividing);

Binary file not shown.

Binary file not shown.

Binary file not shown.

@ -0,0 +1,84 @@
#include <stdarg.h>
#include <stddef.h>
#include <setjmp.h>
#include <stdint.h>
extern "C" {
#include <cmocka.h>
}
#include "stdio.h"
#include "../src/amdgpu.h"
#include "../src/cpu.h"
#include "tests.h"
static void test_amdgpu_verify_metrics(void **state) {
UNUSED(state);
assert_false(amdgpu_verify_metrics(""));
assert_false(amdgpu_verify_metrics("gpu_metrics_invalid"));
assert_true (amdgpu_verify_metrics("gpu_metrics"));
}
static void test_amdgpu_get_instant_metrics(void **state){
UNUSED(state);
struct amdgpu_common_metrics metrics;
// fail fetch gpu_metrics file
metrics_path = "";
amdgpu_get_instant_metrics(&metrics);
// DGPU
metrics_path = "gpu_metrics";
metrics = {};
amdgpu_get_instant_metrics(&metrics);
assert_int_equal(metrics.gpu_load_percent, 64);
assert_float_equal(metrics.average_gfx_power_w, 33, 0);
assert_int_equal(metrics.current_gfxclk_mhz, 2165);
assert_int_equal(metrics.current_uclk_mhz, 1000);
assert_int_equal(metrics.gpu_temp_c, 36);
assert_false(metrics.is_power_throttled);
assert_false(metrics.is_current_throttled);
assert_false(metrics.is_temp_throttled);
assert_false(metrics.is_other_throttled);
metrics_path = "gpu_metrics_apu";
metrics = {};
amdgpu_get_instant_metrics(&metrics);
assert_int_equal(metrics.gpu_load_percent, 100);
assert_float_equal(metrics.average_gfx_power_w, 6.161, 0);
assert_int_equal(metrics.current_gfxclk_mhz, 1040);
assert_int_equal(metrics.current_uclk_mhz, 687);
assert_int_equal(metrics.gpu_temp_c, 81);
assert_int_equal(metrics.soc_temp_c, 71);
assert_int_equal(metrics.apu_cpu_temp_c, 655);
assert_true(metrics.is_power_throttled);
assert_false(metrics.is_current_throttled);
assert_false(metrics.is_temp_throttled);
assert_false(metrics.is_other_throttled);
}
static void test_amdgpu_get_samples_and_copy(void **state){
UNUSED(state);
struct amdgpu_common_metrics metrics_buffer[100];
bool gpu_load_needs_dividing = false; //some GPUs report load as centipercent
amdgpu_get_samples_and_copy(metrics_buffer, gpu_load_needs_dividing);
gpu_load_needs_dividing = true;
amdgpu_get_samples_and_copy(metrics_buffer, gpu_load_needs_dividing);
}
static void test_amdgpu_get_metrics(void **state){
UNUSED(state);
amdgpu_get_metrics();
}
const struct CMUnitTest amdgpu_tests[] = {
cmocka_unit_test(test_amdgpu_verify_metrics),
cmocka_unit_test(test_amdgpu_get_instant_metrics),
cmocka_unit_test(test_amdgpu_get_samples_and_copy),
cmocka_unit_test(test_amdgpu_get_metrics)
};
int main(void) {
return cmocka_run_group_tests(amdgpu_tests, NULL, NULL);
}

@ -0,0 +1,2 @@
#pragma once
#define UNUSED(x) (void)(x)
Loading…
Cancel
Save