Safe implementations of uart::dec() and divisions

The previous implementation of uart::dec() forced the compiler to emit a software out-of-bounds acces check into the code, bloating it a bit. Prevent that by using an iterator instead. Same is true for the ordinary division operator that was used in multiple places in the benchmark code. Here, the compiler emitted software-checks for divison by zero. Prevent this by using `checked_div()` where we can implement our own "panic" handling.
6 years ago · c004e78e90
parent 17b582f774
commit c004e78e90
6 changed files with 62 additions and 46 deletions
--- a/0D_cache_performance/README.md
+++ b/0D_cache_performance/README.md
@ -33,8 +33,6 @@ of a `cacheline`, in steps of 8 bytes, aka one processor register at a time. We
 read the value, add 1, and write it back. This whole process is repeated
 `20_000` times.

-### main.rs
-
 The benchmark function is called twice. Once for the cacheable and once for the
 non-cacheable virtual addresses. Remember that both virtual addresses point to
 the _same_ physical DRAM, so the difference in time that we will see will
--- a/0D_cache_performance/kernel8
+++ b/0D_cache_performance/kernel8
--- a/0D_cache_performance/kernel8.img
+++ b/0D_cache_performance/kernel8.img
--- a/0D_cache_performance/src/benchmark.rs
+++ b/0D_cache_performance/src/benchmark.rs
@ -1,8 +1,9 @@
 use core::sync::atomic::{compiler_fence, Ordering};
 use cortex_a::{barrier, regs::*};
+use super::uart;

 /// We assume that addr is cacheline aligned
-pub fn batch_modify(addr: u64) -> u32 {
+fn batch_modify_time(addr: u64) -> Option<u64> {
    const CACHELINE_SIZE_BYTES: usize = 64; // TODO: retrieve this from a system register
    const NUM_CACHELINES_TOUCHED: usize = 5;
    const NUM_BENCH_ITERATIONS: usize = 20_000;
@ -34,6 +35,62 @@ pub fn batch_modify(addr: u64) -> u32 {
    unsafe { barrier::dsb(barrier::SY) };

    let t2 = CNTPCT_EL0.get();
+    let frq = u64::from(CNTFRQ_EL0.get());

-    ((t2 - t1) * 1000 / u64::from(CNTFRQ_EL0.get())) as u32
+    ((t2 - t1) * 1000).checked_div(frq)
+}
+
+pub fn run(uart: &uart::Uart) {
+    const SIZE_2MIB: u64 = 2 * 1024 * 1024;
+    const ERROR_STRING: &str = "Something went wrong!";
+
+    // Start of the __SECOND__ virtual 2 MiB block (counting starts at zero).
+    // NON-cacheable DRAM memory.
+    let non_cacheable_addr: u64 = SIZE_2MIB;
+
+    // Start of the __THIRD__ virtual 2 MiB block.
+    // Cacheable DRAM memory
+    let cacheable_addr: u64 = 2 * SIZE_2MIB;
+
+    uart.puts("Benchmarking non-cacheable DRAM modifications at virtual 0x");
+    uart.hex(non_cacheable_addr as u32);
+    uart.puts(", physical 0x");
+    uart.hex(2 * SIZE_2MIB as u32);
+    uart.puts(":\n");
+
+    let result_nc = match batch_modify_time(non_cacheable_addr) {
+        Some(t) => {
+            uart.dec(t as u32);
+            uart.puts(" miliseconds.\n\n");
+            t
+        },
+        None => {
+            uart.puts(ERROR_STRING);
+            return;
+        }
+    };
+
+    uart.puts("Benchmarking cacheable DRAM modifications at virtual 0x");
+    uart.hex(cacheable_addr as u32);
+    uart.puts(", physical 0x");
+    uart.hex(2 * SIZE_2MIB as u32);
+    uart.puts(":\n");
+
+    let result_c = match batch_modify_time(cacheable_addr) {
+        Some(t) => {
+            uart.dec(t as u32);
+            uart.puts(" miliseconds.\n\n");
+            t
+        },
+        None => {
+            uart.puts(ERROR_STRING);
+            return;
+        }
+    };
+
+    if let Some(t) = (result_nc - result_c).checked_div(result_c) {
+        uart.puts("With caching, the function is ");
+        uart.dec((t * 100) as u32);
+        uart.puts("% faster!\n");
+    }
 }
--- a/0D_cache_performance/src/main.rs
+++ b/0D_cache_performance/src/main.rs
@ -43,43 +43,6 @@ mod mmu;
 mod uart;
 mod benchmark;

-fn do_benchmarks(uart: &uart::Uart) {
-    const SIZE_2MIB: u64 = 2 * 1024 * 1024;
-
-    // Start of the __SECOND__ virtual 2 MiB block (counting starts at zero).
-    // NON-cacheable DRAM memory.
-    let non_cacheable_addr: u64 = SIZE_2MIB;
-
-    // Start of the __THIRD__ virtual 2 MiB block.
-    // Cacheable DRAM memory
-    let cacheable_addr: u64 = 2 * SIZE_2MIB;
-
-    uart.puts("Benchmarking non-cacheable DRAM modifications at virtual 0x");
-    uart.hex(non_cacheable_addr as u32);
-    uart.puts(", physical 0x");
-    uart.hex(2 * SIZE_2MIB as u32);
-    uart.puts(":\n");
-
-    let result_nc = benchmark::batch_modify(non_cacheable_addr);
-    uart.dec(result_nc);
-    uart.puts(" miliseconds.\n\n");
-
-    uart.puts("Benchmarking cacheable DRAM modifications at virtual 0x");
-    uart.hex(cacheable_addr as u32);
-    uart.puts(", physical 0x");
-    uart.hex(2 * SIZE_2MIB as u32);
-    uart.puts(":\n");
-    let result_c = benchmark::batch_modify(cacheable_addr);
-    uart.dec(result_c);
-    uart.puts(" miliseconds.\n\n");
-
-    let percent_diff = (result_nc - result_c) * 100 / result_c;
-
-    uart.puts("With caching, the function is ");
-    uart.dec(percent_diff);
-    uart.puts("% faster!\n");
-}
-
 entry!(kernel_entry);

 fn kernel_entry() -> ! {
@ -102,7 +65,7 @@ fn kernel_entry() -> ! {

    uart.puts("MMU is live \\o/\n\n");

-    do_benchmarks(&uart);
+    benchmark::run(&uart);

    // echo everything back
    loop {
--- a/0D_cache_performance/src/uart.rs
+++ b/0D_cache_performance/src/uart.rs
@ -290,12 +290,10 @@ impl Uart {
    pub fn dec(&self, d: u32) {
        let mut digits: [char; 10] = ['\0'; 10];
        let mut d = d;
-        let mut i: usize = 0;

-        loop {
-            digits[i] = ((d % 10) + 0x30) as u8 as char;
+        for i in digits.iter_mut() {
+            *i = ((d % 10) + 0x30) as u8 as char;

-            i += 1;
            d /= 10;

            if d == 0 {