Safe implementations of uart::dec() and divisions

The previous implementation of uart::dec() forced the compiler to emit a
software out-of-bounds acces check into the code, bloating it a bit. Prevent
that by using an iterator instead.

Same is true for the ordinary division operator that was used in multiple places
in the benchmark code. Here, the compiler emitted software-checks for divison by
zero. Prevent this by using `checked_div()` where we can implement our own
"panic" handling.
pull/9/head
Andre Richter 6 years ago
parent 17b582f774
commit c004e78e90
No known key found for this signature in database
GPG Key ID: 2116C1AB102F615E

@ -33,8 +33,6 @@ of a `cacheline`, in steps of 8 bytes, aka one processor register at a time. We
read the value, add 1, and write it back. This whole process is repeated
`20_000` times.
### main.rs
The benchmark function is called twice. Once for the cacheable and once for the
non-cacheable virtual addresses. Remember that both virtual addresses point to
the _same_ physical DRAM, so the difference in time that we will see will

Binary file not shown.

Binary file not shown.

@ -1,8 +1,9 @@
use core::sync::atomic::{compiler_fence, Ordering};
use cortex_a::{barrier, regs::*};
use super::uart;
/// We assume that addr is cacheline aligned
pub fn batch_modify(addr: u64) -> u32 {
fn batch_modify_time(addr: u64) -> Option<u64> {
const CACHELINE_SIZE_BYTES: usize = 64; // TODO: retrieve this from a system register
const NUM_CACHELINES_TOUCHED: usize = 5;
const NUM_BENCH_ITERATIONS: usize = 20_000;
@ -34,6 +35,62 @@ pub fn batch_modify(addr: u64) -> u32 {
unsafe { barrier::dsb(barrier::SY) };
let t2 = CNTPCT_EL0.get();
let frq = u64::from(CNTFRQ_EL0.get());
((t2 - t1) * 1000 / u64::from(CNTFRQ_EL0.get())) as u32
((t2 - t1) * 1000).checked_div(frq)
}
pub fn run(uart: &uart::Uart) {
const SIZE_2MIB: u64 = 2 * 1024 * 1024;
const ERROR_STRING: &str = "Something went wrong!";
// Start of the __SECOND__ virtual 2 MiB block (counting starts at zero).
// NON-cacheable DRAM memory.
let non_cacheable_addr: u64 = SIZE_2MIB;
// Start of the __THIRD__ virtual 2 MiB block.
// Cacheable DRAM memory
let cacheable_addr: u64 = 2 * SIZE_2MIB;
uart.puts("Benchmarking non-cacheable DRAM modifications at virtual 0x");
uart.hex(non_cacheable_addr as u32);
uart.puts(", physical 0x");
uart.hex(2 * SIZE_2MIB as u32);
uart.puts(":\n");
let result_nc = match batch_modify_time(non_cacheable_addr) {
Some(t) => {
uart.dec(t as u32);
uart.puts(" miliseconds.\n\n");
t
},
None => {
uart.puts(ERROR_STRING);
return;
}
};
uart.puts("Benchmarking cacheable DRAM modifications at virtual 0x");
uart.hex(cacheable_addr as u32);
uart.puts(", physical 0x");
uart.hex(2 * SIZE_2MIB as u32);
uart.puts(":\n");
let result_c = match batch_modify_time(cacheable_addr) {
Some(t) => {
uart.dec(t as u32);
uart.puts(" miliseconds.\n\n");
t
},
None => {
uart.puts(ERROR_STRING);
return;
}
};
if let Some(t) = (result_nc - result_c).checked_div(result_c) {
uart.puts("With caching, the function is ");
uart.dec((t * 100) as u32);
uart.puts("% faster!\n");
}
}

@ -43,43 +43,6 @@ mod mmu;
mod uart;
mod benchmark;
fn do_benchmarks(uart: &uart::Uart) {
const SIZE_2MIB: u64 = 2 * 1024 * 1024;
// Start of the __SECOND__ virtual 2 MiB block (counting starts at zero).
// NON-cacheable DRAM memory.
let non_cacheable_addr: u64 = SIZE_2MIB;
// Start of the __THIRD__ virtual 2 MiB block.
// Cacheable DRAM memory
let cacheable_addr: u64 = 2 * SIZE_2MIB;
uart.puts("Benchmarking non-cacheable DRAM modifications at virtual 0x");
uart.hex(non_cacheable_addr as u32);
uart.puts(", physical 0x");
uart.hex(2 * SIZE_2MIB as u32);
uart.puts(":\n");
let result_nc = benchmark::batch_modify(non_cacheable_addr);
uart.dec(result_nc);
uart.puts(" miliseconds.\n\n");
uart.puts("Benchmarking cacheable DRAM modifications at virtual 0x");
uart.hex(cacheable_addr as u32);
uart.puts(", physical 0x");
uart.hex(2 * SIZE_2MIB as u32);
uart.puts(":\n");
let result_c = benchmark::batch_modify(cacheable_addr);
uart.dec(result_c);
uart.puts(" miliseconds.\n\n");
let percent_diff = (result_nc - result_c) * 100 / result_c;
uart.puts("With caching, the function is ");
uart.dec(percent_diff);
uart.puts("% faster!\n");
}
entry!(kernel_entry);
fn kernel_entry() -> ! {
@ -102,7 +65,7 @@ fn kernel_entry() -> ! {
uart.puts("MMU is live \\o/\n\n");
do_benchmarks(&uart);
benchmark::run(&uart);
// echo everything back
loop {

@ -290,12 +290,10 @@ impl Uart {
pub fn dec(&self, d: u32) {
let mut digits: [char; 10] = ['\0'; 10];
let mut d = d;
let mut i: usize = 0;
loop {
digits[i] = ((d % 10) + 0x30) as u8 as char;
for i in digits.iter_mut() {
*i = ((d % 10) + 0x30) as u8 as char;
i += 1;
d /= 10;
if d == 0 {

Loading…
Cancel
Save