diff --git a/0C_virtual_memory/kernel8 b/0C_virtual_memory/kernel8 index 5fab5fd3..727ac409 100755 Binary files a/0C_virtual_memory/kernel8 and b/0C_virtual_memory/kernel8 differ diff --git a/0C_virtual_memory/kernel8.img b/0C_virtual_memory/kernel8.img index d82eb38d..cf0dcb19 100755 Binary files a/0C_virtual_memory/kernel8.img and b/0C_virtual_memory/kernel8.img differ diff --git a/0C_virtual_memory/src/mmu.rs b/0C_virtual_memory/src/mmu.rs index c2bf1500..13048dad 100644 --- a/0C_virtual_memory/src/mmu.rs +++ b/0C_virtual_memory/src/mmu.rs @@ -219,8 +219,8 @@ pub unsafe fn init() { // First, force all previous changes to be seen before the MMU is enabled. barrier::isb(barrier::SY); - // Enable the MMU and turn on caching - SCTLR_EL1.modify(SCTLR_EL1::M::Enable + SCTLR_EL1::C::Cacheable); + // Enable the MMU and turn on data and instruction caching. + SCTLR_EL1.modify(SCTLR_EL1::M::Enable + SCTLR_EL1::C::Cacheable + SCTLR_EL1::I::Cacheable); // Force MMU init to complete before next instruction barrier::isb(barrier::SY); diff --git a/0D_cache_performance/README.md b/0D_cache_performance/README.md index 1f1dee0a..bfaa50b7 100644 --- a/0D_cache_performance/README.md +++ b/0D_cache_performance/README.md @@ -13,7 +13,7 @@ performance. ## Benchmark Let's write a tiny, arbitrary micro-benchmark to showcase the performance of -operating on the same DRAM with caching enabled and disabled. +operating with data on the same DRAM with caching enabled and disabled. ### mmu.rs @@ -31,7 +31,7 @@ block). This time, the block is configured as cacheable. We write a little function that iteratively reads memory of five times the size of a `cacheline`, in steps of 8 bytes, aka one processor register at a time. We read the value, add 1, and write it back. This whole process is repeated -`100_000` times. +`20_000` times. ### main.rs @@ -46,12 +46,12 @@ On my Raspberry, I get the following results: ```text Benchmarking non-cacheable DRAM modifications at virtual 0x00200000, physical 0x00400000: -664 miliseconds. +1040 miliseconds. Benchmarking cacheable DRAM modifications at virtual 0x00400000, physical 0x00400000: -148 miliseconds. +53 miliseconds. -With caching, the function is 348% faster! +With caching, the function is 1862% faster! ``` Impressive, isn't it? diff --git a/0D_cache_performance/kernel8 b/0D_cache_performance/kernel8 index 7c8f3aed..8b80d88b 100755 Binary files a/0D_cache_performance/kernel8 and b/0D_cache_performance/kernel8 differ diff --git a/0D_cache_performance/kernel8.img b/0D_cache_performance/kernel8.img index ec0a16b0..d68c9481 100755 Binary files a/0D_cache_performance/kernel8.img and b/0D_cache_performance/kernel8.img differ diff --git a/0D_cache_performance/src/benchmark.rs b/0D_cache_performance/src/benchmark.rs index e4f7ce1d..ad01dc91 100644 --- a/0D_cache_performance/src/benchmark.rs +++ b/0D_cache_performance/src/benchmark.rs @@ -3,26 +3,25 @@ use cortex_a::{barrier, regs::*}; /// We assume that addr is cacheline aligned pub fn batch_modify(addr: u64) -> u32 { - const CACHELINE_SIZE_BYTES: u64 = 64; // TODO: retrieve this from a system register - const NUM_CACHELINES_TOUCHED: u64 = 5; - const BYTES_PER_U64_REG: usize = 8; - const NUM_BENCH_ITERATIONS: u64 = 100_000; + const CACHELINE_SIZE_BYTES: usize = 64; // TODO: retrieve this from a system register + const NUM_CACHELINES_TOUCHED: usize = 5; + const NUM_BENCH_ITERATIONS: usize = 20_000; - const NUM_BYTES_TOUCHED: u64 = CACHELINE_SIZE_BYTES * NUM_CACHELINES_TOUCHED; + const NUM_BYTES_TOUCHED: usize = CACHELINE_SIZE_BYTES * NUM_CACHELINES_TOUCHED; + let mem = unsafe { core::slice::from_raw_parts_mut(addr as *mut u64, NUM_BYTES_TOUCHED) }; + + // Benchmark starts here let t1 = CNTPCT_EL0.get(); compiler_fence(Ordering::SeqCst); - let mut data_ptr: *mut u64; let mut temp: u64; for _ in 0..NUM_BENCH_ITERATIONS { - for i in (addr..(addr + NUM_BYTES_TOUCHED)).step_by(BYTES_PER_U64_REG) { - data_ptr = i as *mut u64; - + for qword in mem.iter_mut() { unsafe { - temp = core::ptr::read_volatile(data_ptr); - core::ptr::write_volatile(data_ptr, temp + 1); + temp = core::ptr::read_volatile(qword); + core::ptr::write_volatile(qword, temp + 1); } } } diff --git a/0D_cache_performance/src/mmu.rs b/0D_cache_performance/src/mmu.rs index a379c769..df48d434 100644 --- a/0D_cache_performance/src/mmu.rs +++ b/0D_cache_performance/src/mmu.rs @@ -211,8 +211,8 @@ pub unsafe fn init() { // First, force all previous changes to be seen before the MMU is enabled. barrier::isb(barrier::SY); - // Enable the MMU and turn on caching - SCTLR_EL1.modify(SCTLR_EL1::M::Enable + SCTLR_EL1::C::Cacheable); + // Enable the MMU and turn on data and instruction caching. + SCTLR_EL1.modify(SCTLR_EL1::M::Enable + SCTLR_EL1::C::Cacheable + SCTLR_EL1::I::Cacheable); // Force MMU init to complete before next instruction barrier::isb(barrier::SY);