diff --git a/0C_virtual_memory/kernel8 b/0C_virtual_memory/kernel8
index 5fab5fd3..727ac409 100755
Binary files a/0C_virtual_memory/kernel8 and b/0C_virtual_memory/kernel8 differ
diff --git a/0C_virtual_memory/kernel8.img b/0C_virtual_memory/kernel8.img
index d82eb38d..cf0dcb19 100755
Binary files a/0C_virtual_memory/kernel8.img and b/0C_virtual_memory/kernel8.img differ
diff --git a/0C_virtual_memory/src/mmu.rs b/0C_virtual_memory/src/mmu.rs
index c2bf1500..13048dad 100644
--- a/0C_virtual_memory/src/mmu.rs
+++ b/0C_virtual_memory/src/mmu.rs
@@ -219,8 +219,8 @@ pub unsafe fn init() {
     // First, force all previous changes to be seen before the MMU is enabled.
     barrier::isb(barrier::SY);
 
-    // Enable the MMU and turn on caching
-    SCTLR_EL1.modify(SCTLR_EL1::M::Enable + SCTLR_EL1::C::Cacheable);
+    // Enable the MMU and turn on data and instruction caching.
+    SCTLR_EL1.modify(SCTLR_EL1::M::Enable + SCTLR_EL1::C::Cacheable + SCTLR_EL1::I::Cacheable);
 
     // Force MMU init to complete before next instruction
     barrier::isb(barrier::SY);
diff --git a/0D_cache_performance/README.md b/0D_cache_performance/README.md
index 1f1dee0a..bfaa50b7 100644
--- a/0D_cache_performance/README.md
+++ b/0D_cache_performance/README.md
@@ -13,7 +13,7 @@ performance.
 ## Benchmark
 
 Let's write a tiny, arbitrary micro-benchmark to showcase the performance of
-operating on the same DRAM with caching enabled and disabled.
+operating with data on the same DRAM with caching enabled and disabled.
 
 ### mmu.rs
 
@@ -31,7 +31,7 @@ block). This time, the block is configured as cacheable.
 We write a little function that iteratively reads memory of five times the size
 of a `cacheline`, in steps of 8 bytes, aka one processor register at a time. We
 read the value, add 1, and write it back. This whole process is repeated
-`100_000` times.
+`20_000` times.
 
 ### main.rs
 
@@ -46,12 +46,12 @@ On my Raspberry, I get the following results:
 
 ```text
 Benchmarking non-cacheable DRAM modifications at virtual 0x00200000, physical 0x00400000:
-664 miliseconds.
+1040 miliseconds.
 
 Benchmarking cacheable DRAM modifications at virtual 0x00400000, physical 0x00400000:
-148 miliseconds.
+53 miliseconds.
 
-With caching, the function is 348% faster!
+With caching, the function is 1862% faster!
 ```
 
 Impressive, isn't it?
diff --git a/0D_cache_performance/kernel8 b/0D_cache_performance/kernel8
index 7c8f3aed..8b80d88b 100755
Binary files a/0D_cache_performance/kernel8 and b/0D_cache_performance/kernel8 differ
diff --git a/0D_cache_performance/kernel8.img b/0D_cache_performance/kernel8.img
index ec0a16b0..d68c9481 100755
Binary files a/0D_cache_performance/kernel8.img and b/0D_cache_performance/kernel8.img differ
diff --git a/0D_cache_performance/src/benchmark.rs b/0D_cache_performance/src/benchmark.rs
index e4f7ce1d..ad01dc91 100644
--- a/0D_cache_performance/src/benchmark.rs
+++ b/0D_cache_performance/src/benchmark.rs
@@ -3,26 +3,25 @@ use cortex_a::{barrier, regs::*};
 
 /// We assume that addr is cacheline aligned
 pub fn batch_modify(addr: u64) -> u32 {
-    const CACHELINE_SIZE_BYTES: u64 = 64; // TODO: retrieve this from a system register
-    const NUM_CACHELINES_TOUCHED: u64 = 5;
-    const BYTES_PER_U64_REG: usize = 8;
-    const NUM_BENCH_ITERATIONS: u64 = 100_000;
+    const CACHELINE_SIZE_BYTES: usize = 64; // TODO: retrieve this from a system register
+    const NUM_CACHELINES_TOUCHED: usize = 5;
+    const NUM_BENCH_ITERATIONS: usize = 20_000;
 
-    const NUM_BYTES_TOUCHED: u64 = CACHELINE_SIZE_BYTES * NUM_CACHELINES_TOUCHED;
+    const NUM_BYTES_TOUCHED: usize = CACHELINE_SIZE_BYTES * NUM_CACHELINES_TOUCHED;
 
+    let mem = unsafe { core::slice::from_raw_parts_mut(addr as *mut u64, NUM_BYTES_TOUCHED) };
+
+    // Benchmark starts here
     let t1 = CNTPCT_EL0.get();
 
     compiler_fence(Ordering::SeqCst);
 
-    let mut data_ptr: *mut u64;
     let mut temp: u64;
     for _ in 0..NUM_BENCH_ITERATIONS {
-        for i in (addr..(addr + NUM_BYTES_TOUCHED)).step_by(BYTES_PER_U64_REG) {
-            data_ptr = i as *mut u64;
-
+        for qword in mem.iter_mut() {
             unsafe {
-                temp =  core::ptr::read_volatile(data_ptr);
-                core::ptr::write_volatile(data_ptr, temp + 1);
+                temp = core::ptr::read_volatile(qword);
+                core::ptr::write_volatile(qword, temp + 1);
             }
         }
     }
diff --git a/0D_cache_performance/src/mmu.rs b/0D_cache_performance/src/mmu.rs
index a379c769..df48d434 100644
--- a/0D_cache_performance/src/mmu.rs
+++ b/0D_cache_performance/src/mmu.rs
@@ -211,8 +211,8 @@ pub unsafe fn init() {
     // First, force all previous changes to be seen before the MMU is enabled.
     barrier::isb(barrier::SY);
 
-    // Enable the MMU and turn on caching
-    SCTLR_EL1.modify(SCTLR_EL1::M::Enable + SCTLR_EL1::C::Cacheable);
+    // Enable the MMU and turn on data and instruction caching.
+    SCTLR_EL1.modify(SCTLR_EL1::M::Enable + SCTLR_EL1::C::Cacheable + SCTLR_EL1::I::Cacheable);
 
     // Force MMU init to complete before next instruction
     barrier::isb(barrier::SY);