diff --git a/0C_virtual_memory/kernel8 b/0C_virtual_memory/kernel8 new file mode 100755 index 00000000..5fab5fd3 Binary files /dev/null and b/0C_virtual_memory/kernel8 differ diff --git a/0C_virtual_memory/kernel8.img b/0C_virtual_memory/kernel8.img index d8e7e007..d82eb38d 100755 Binary files a/0C_virtual_memory/kernel8.img and b/0C_virtual_memory/kernel8.img differ diff --git a/0C_virtual_memory/src/mmu.rs b/0C_virtual_memory/src/mmu.rs index 375e685d..c2bf1500 100644 --- a/0C_virtual_memory/src/mmu.rs +++ b/0C_virtual_memory/src/mmu.rs @@ -219,8 +219,8 @@ pub unsafe fn init() { // First, force all previous changes to be seen before the MMU is enabled. barrier::isb(barrier::SY); - // Enable the MMU - SCTLR_EL1.modify(SCTLR_EL1::M::Enable); + // Enable the MMU and turn on caching + SCTLR_EL1.modify(SCTLR_EL1::M::Enable + SCTLR_EL1::C::Cacheable); // Force MMU init to complete before next instruction barrier::isb(barrier::SY); diff --git a/0D_cache_performance/.cargo/config b/0D_cache_performance/.cargo/config new file mode 100644 index 00000000..d7fb2ba3 --- /dev/null +++ b/0D_cache_performance/.cargo/config @@ -0,0 +1,6 @@ +[target.aarch64-unknown-none] +rustflags = [ + "-C", "link-arg=-Tlink.ld", + "-C", "target-feature=-fp-armv8", + "-C", "target-cpu=cortex-a53", +] diff --git a/0D_cache_performance/Cargo.lock b/0D_cache_performance/Cargo.lock new file mode 100644 index 00000000..c30853d5 --- /dev/null +++ b/0D_cache_performance/Cargo.lock @@ -0,0 +1,55 @@ +[[package]] +name = "cortex-a" +version = "2.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "register 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "kernel8" +version = "0.1.0" +dependencies = [ + "cortex-a 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)", + "raspi3_boot 0.1.0", + "register 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "panic-abort" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "r0" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "raspi3_boot" +version = "0.1.0" +dependencies = [ + "cortex-a 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)", + "panic-abort 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", + "r0 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "register" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "tock-registers 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "tock-registers" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[metadata] +"checksum cortex-a 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "fe913628f34718bc9e7d0d07e13ca1374638f64f0edc6eb063ec8abe581d395d" +"checksum panic-abort 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "6bc796c620f27056d4ffe7c558533fd67ae5af0fd8e919fbe38de803368af73e" +"checksum r0 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "e2a38df5b15c8d5c7e8654189744d8e396bddc18ad48041a500ce52d6948941f" +"checksum register 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "157a11ac0b1882ff4a527a92f911dd288df17367faaaa0c36f188cd61ec36fc1" +"checksum tock-registers 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3a385d94f3f62e60445a0adb9ff8d9621faa272234530d4c0f848ec98f88e316" diff --git a/0D_cache_performance/Cargo.toml b/0D_cache_performance/Cargo.toml new file mode 100644 index 00000000..b7f40114 --- /dev/null +++ b/0D_cache_performance/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "kernel8" +version = "0.1.0" +authors = ["Andre Richter "] + +[dependencies] +raspi3_boot = { path = "raspi3_boot" } +cortex-a = "2.2.1" +register = "0.2.0" + +[package.metadata.cargo-xbuild] +sysroot_path = "../xbuild_sysroot" diff --git a/0D_cache_performance/Makefile b/0D_cache_performance/Makefile new file mode 100644 index 00000000..26e15973 --- /dev/null +++ b/0D_cache_performance/Makefile @@ -0,0 +1,66 @@ +# +# MIT License +# +# Copyright (c) 2018 Andre Richter +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# + +TARGET = aarch64-unknown-none + +OBJCOPY = cargo objcopy -- +OBJCOPY_PARAMS = --strip-all -O binary + +UTILS_CONTAINER = andrerichter/raspi3-utils +DOCKER_CMD = docker run -it --rm -v $(shell pwd):/work -w /work +DOCKER_TTY = --privileged -v /dev:/dev +QEMU_CMD = qemu-system-aarch64 -M raspi3 -kernel kernel8.img +RASPBOOT_CMD = raspbootcom /dev/ttyUSB0 kernel8.img + +all: clean kernel8.img + +target/$(TARGET)/debug/kernel8: src/main.rs + cargo xbuild --target=$(TARGET) + cp $@ . + +target/$(TARGET)/release/kernel8: src/main.rs + cargo xbuild --target=$(TARGET) --release + cp $@ . + +ifeq ($(DEBUG),1) +kernel8: target/$(TARGET)/debug/kernel8 +else +kernel8: target/$(TARGET)/release/kernel8 +endif + +kernel8.img: kernel8 + $(OBJCOPY) $(OBJCOPY_PARAMS) $< kernel8.img + +qemu: all + $(DOCKER_CMD) $(UTILS_CONTAINER) $(QEMU_CMD) -serial stdio + +raspboot: all + $(DOCKER_CMD) $(DOCKER_TTY) $(UTILS_CONTAINER) $(RASPBOOT_CMD) + +clippy: + cargo xclippy --target=$(TARGET) + +clean: + cargo clean + rm -f kernel8 diff --git a/0D_cache_performance/README.md b/0D_cache_performance/README.md new file mode 100644 index 00000000..1f1dee0a --- /dev/null +++ b/0D_cache_performance/README.md @@ -0,0 +1,57 @@ +# Tutorial 0D - Cache Performance + +Now that we finally have virtual memory capabilities available, we also have +fine grained control over `cacheability`. You've caught a glimpse already in the +last tutorial, where we used page table entries to reference the `MAIR_EL1` +register to indicate the cacheability of a page or block. + +Unfortunately, for the user it is often hard to grasp the advantage of caching +in early stages of OS or bare-metal software development. This tutorial is a +short interlude that tries to give you a feeling of what caching can do for +performance. + +## Benchmark + +Let's write a tiny, arbitrary micro-benchmark to showcase the performance of +operating on the same DRAM with caching enabled and disabled. + +### mmu.rs + +Therefore, we will map the same physical memory via two different virtual +addresses. We set up our pagetables such that the virtual address `0x200000` +points to the physical DRAM at `0x400000`, and we configure it as +`non-cacheable` in the page tables. + +We are still using a `2 MiB` granule, and set up the next block, which starts at +virtual `0x400000`, to point at physical `0x400000` (this is an identity mapped +block). This time, the block is configured as cacheable. + +### benchmark.rs + +We write a little function that iteratively reads memory of five times the size +of a `cacheline`, in steps of 8 bytes, aka one processor register at a time. We +read the value, add 1, and write it back. This whole process is repeated +`100_000` times. + +### main.rs + +The benchmark function is called twice. Once for the cacheable and once for the +non-cacheable virtual addresses. Remember that both virtual addresses point to +the _same_ physical DRAM, so the difference in time that we will see will +showcase how much faster it is to operate on DRAM with caching enabled. + +## Results + +On my Raspberry, I get the following results: + +```text +Benchmarking non-cacheable DRAM modifications at virtual 0x00200000, physical 0x00400000: +664 miliseconds. + +Benchmarking cacheable DRAM modifications at virtual 0x00400000, physical 0x00400000: +148 miliseconds. + +With caching, the function is 348% faster! +``` + +Impressive, isn't it? diff --git a/0D_cache_performance/kernel8 b/0D_cache_performance/kernel8 new file mode 100755 index 00000000..7c8f3aed Binary files /dev/null and b/0D_cache_performance/kernel8 differ diff --git a/0D_cache_performance/kernel8.img b/0D_cache_performance/kernel8.img new file mode 100755 index 00000000..ec0a16b0 Binary files /dev/null and b/0D_cache_performance/kernel8.img differ diff --git a/0D_cache_performance/link.ld b/0D_cache_performance/link.ld new file mode 100644 index 00000000..896153e9 --- /dev/null +++ b/0D_cache_performance/link.ld @@ -0,0 +1,57 @@ +/* + * MIT License + * + * Copyright (c) 2018 Andre Richter + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +ENTRY(_boot_cores); + +SECTIONS +{ + . = 0x80000; /* This is already 4KiB aligned */ + __ro_start = .; + .text : + { + KEEP(*(.text.boot)) *(.text .text.*) + } + + .rodata : + { + *(.rodata .rodata.*) + } + . = ALIGN(4096); /* Fill up to 4KiB */ + __ro_end = .; + + .data : + { + *(.data .data.*) + } + + .bss ALIGN(8): + { + __bss_start = .; + *(.bss .bss.*) + *(COMMON) + __bss_end = .; + } + + /DISCARD/ : { *(.comment) *(.gnu*) *(.note*) *(.eh_frame*) } +} diff --git a/0D_cache_performance/raspi3_boot/Cargo.toml b/0D_cache_performance/raspi3_boot/Cargo.toml new file mode 100644 index 00000000..73cd453b --- /dev/null +++ b/0D_cache_performance/raspi3_boot/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "raspi3_boot" +version = "0.1.0" +authors = ["Andre Richter "] + +[dependencies] +cortex-a = "2.2.1" +panic-abort = "0.2.0" +r0 = "0.2.2" diff --git a/0D_cache_performance/raspi3_boot/src/lib.rs b/0D_cache_performance/raspi3_boot/src/lib.rs new file mode 100644 index 00000000..8a081b53 --- /dev/null +++ b/0D_cache_performance/raspi3_boot/src/lib.rs @@ -0,0 +1,131 @@ +/* + * MIT License + * + * Copyright (c) 2018 Jorge Aparicio + * Copyright (c) 2018 Andre Richter + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#![deny(missing_docs)] +#![deny(warnings)] +#![no_std] + +//! Low-level boot of the Raspberry's processor + +extern crate cortex_a; +extern crate panic_abort; +extern crate r0; + +#[macro_export] +macro_rules! entry { + ($path:path) => { + #[export_name = "main"] + pub unsafe fn __main() -> ! { + // type check the given path + let f: fn() -> ! = $path; + + f() + } + }; +} + +/// Reset function. +/// +/// Initializes the bss section before calling into the user's `main()`. +unsafe fn reset() -> ! { + extern "C" { + // Boundaries of the .bss section, provided by the linker script + static mut __bss_start: u64; + static mut __bss_end: u64; + } + + // Zeroes the .bss section + r0::zero_bss(&mut __bss_start, &mut __bss_end); + + extern "Rust" { + fn main() -> !; + } + + main() +} + +/// Prepare and execute transition from EL2 to EL1. +#[inline] +fn setup_and_enter_el1_from_el2() -> ! { + use cortex_a::{asm, regs::*}; + + // Enable timer counter registers for EL1 + CNTHCTL_EL2.write(CNTHCTL_EL2::EL1PCEN::SET + CNTHCTL_EL2::EL1PCTEN::SET); + + // No offset for reading the counters + CNTVOFF_EL2.set(0); + + // Set EL1 execution state to AArch64 + // TODO: Explain the SWIO bit + HCR_EL2.write(HCR_EL2::RW::EL1IsAarch64 + HCR_EL2::SWIO::SET); + + // Set up a simulated exception return. + // + // First, fake a saved program status, where all interrupts were + // masked and SP_EL0 was used as a stack pointer. + SPSR_EL2.write( + SPSR_EL2::D::Masked + + SPSR_EL2::A::Masked + + SPSR_EL2::I::Masked + + SPSR_EL2::F::Masked + + SPSR_EL2::M::EL1t, + ); + + // Second, let the link register point to reset(). + ELR_EL2.set(reset as *const () as u64); + + // Set up SP_EL0 (stack pointer), which will be used by EL1 once + // we "return" to it. + SP_EL0.set(0x80_000); + + // Use `eret` to "return" to EL1. This will result in execution of + // `reset()` in EL1. + asm::eret() +} + +/// Entrypoint of the processor. +/// +/// Parks all cores except core0 and checks if we started in EL2. If +/// so, proceeds with setting up EL1. +#[link_section = ".text.boot"] +#[no_mangle] +pub unsafe extern "C" fn _boot_cores() -> ! { + use cortex_a::{asm, regs::*}; + + const CORE_0: u64 = 0; + const CORE_MASK: u64 = 0x3; + const EL2: u32 = CurrentEL::EL::EL2.value; + + if let CORE_0 = MPIDR_EL1.get() & CORE_MASK { + if let EL2 = CurrentEL.get() { + setup_and_enter_el1_from_el2() + } + } + + // if not core0 or EL != 2, infinitely wait for events + loop { + asm::wfe(); + } +} diff --git a/0D_cache_performance/src/benchmark.rs b/0D_cache_performance/src/benchmark.rs new file mode 100644 index 00000000..e4f7ce1d --- /dev/null +++ b/0D_cache_performance/src/benchmark.rs @@ -0,0 +1,40 @@ +use core::sync::atomic::{compiler_fence, Ordering}; +use cortex_a::{barrier, regs::*}; + +/// We assume that addr is cacheline aligned +pub fn batch_modify(addr: u64) -> u32 { + const CACHELINE_SIZE_BYTES: u64 = 64; // TODO: retrieve this from a system register + const NUM_CACHELINES_TOUCHED: u64 = 5; + const BYTES_PER_U64_REG: usize = 8; + const NUM_BENCH_ITERATIONS: u64 = 100_000; + + const NUM_BYTES_TOUCHED: u64 = CACHELINE_SIZE_BYTES * NUM_CACHELINES_TOUCHED; + + let t1 = CNTPCT_EL0.get(); + + compiler_fence(Ordering::SeqCst); + + let mut data_ptr: *mut u64; + let mut temp: u64; + for _ in 0..NUM_BENCH_ITERATIONS { + for i in (addr..(addr + NUM_BYTES_TOUCHED)).step_by(BYTES_PER_U64_REG) { + data_ptr = i as *mut u64; + + unsafe { + temp = core::ptr::read_volatile(data_ptr); + core::ptr::write_volatile(data_ptr, temp + 1); + } + } + } + + // Insert a barrier to ensure that the last memory operation has finished + // before we retrieve the elapsed time with the subsequent counter read. Not + // needed at all given the sample size, but let's be a bit pedantic here for + // education purposes. For measuring single-instructions, this would be + // needed. + unsafe { barrier::dsb(barrier::SY) }; + + let t2 = CNTPCT_EL0.get(); + + ((t2 - t1) * 1000 / u64::from(CNTFRQ_EL0.get())) as u32 +} diff --git a/0D_cache_performance/src/gpio.rs b/0D_cache_performance/src/gpio.rs new file mode 100644 index 00000000..3ff0c1e9 --- /dev/null +++ b/0D_cache_performance/src/gpio.rs @@ -0,0 +1,75 @@ +/* + * MIT License + * + * Copyright (c) 2018 Andre Richter + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +use super::MMIO_BASE; +use register::mmio::ReadWrite; + +// Descriptions taken from +// https://github.com/raspberrypi/documentation/files/1888662/BCM2837-ARM-Peripherals.-.Revised.-.V2-1.pdf +register_bitfields! { + u32, + + /// GPIO Function Select 1 + GPFSEL1 [ + /// Pin 15 + FSEL15 OFFSET(15) NUMBITS(3) [ + Input = 0b000, + Output = 0b001, + RXD0 = 0b100, // UART0 - Alternate function 0 + RXD1 = 0b010 // Mini UART - Alternate function 5 + + ], + + /// Pin 14 + FSEL14 OFFSET(12) NUMBITS(3) [ + Input = 0b000, + Output = 0b001, + TXD0 = 0b100, // UART0 - Alternate function 0 + TXD1 = 0b010 // Mini UART - Alternate function 5 + ] + ], + + /// GPIO Pull-up/down Clock Register 0 + GPPUDCLK0 [ + /// Pin 15 + PUDCLK15 OFFSET(15) NUMBITS(1) [ + NoEffect = 0, + AssertClock = 1 + ], + + /// Pin 14 + PUDCLK14 OFFSET(14) NUMBITS(1) [ + NoEffect = 0, + AssertClock = 1 + ] + ] +} + +pub const GPFSEL1: *const ReadWrite = + (MMIO_BASE + 0x0020_0004) as *const ReadWrite; + +pub const GPPUD: *const ReadWrite = (MMIO_BASE + 0x0020_0094) as *const ReadWrite; + +pub const GPPUDCLK0: *const ReadWrite = + (MMIO_BASE + 0x0020_0098) as *const ReadWrite; diff --git a/0D_cache_performance/src/main.rs b/0D_cache_performance/src/main.rs new file mode 100644 index 00000000..41b9d0fb --- /dev/null +++ b/0D_cache_performance/src/main.rs @@ -0,0 +1,111 @@ +/* + * MIT License + * + * Copyright (c) 2018 Andre Richter + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#![no_std] +#![no_main] +#![feature(asm)] +#![feature(const_fn)] + +extern crate cortex_a; + +#[macro_use] +extern crate raspi3_boot; + +#[macro_use] +extern crate register; + +const MMIO_BASE: u32 = 0x3F00_0000; + +mod gpio; +mod mbox; +mod mmu; +mod uart; +mod benchmark; + +fn do_benchmarks(uart: &uart::Uart) { + const SIZE_2MIB: u64 = 2 * 1024 * 1024; + + // Start of the __SECOND__ virtual 2 MiB block (counting starts at zero). + // NON-cacheable DRAM memory. + let non_cacheable_addr: u64 = SIZE_2MIB; + + // Start of the __THIRD__ virtual 2 MiB block. + // Cacheable DRAM memory + let cacheable_addr: u64 = 2 * SIZE_2MIB; + + uart.puts("Benchmarking non-cacheable DRAM modifications at virtual 0x"); + uart.hex(non_cacheable_addr as u32); + uart.puts(", physical 0x"); + uart.hex(2 * SIZE_2MIB as u32); + uart.puts(":\n"); + + let result_nc = benchmark::batch_modify(non_cacheable_addr); + uart.dec(result_nc); + uart.puts(" miliseconds.\n\n"); + + uart.puts("Benchmarking cacheable DRAM modifications at virtual 0x"); + uart.hex(cacheable_addr as u32); + uart.puts(", physical 0x"); + uart.hex(2 * SIZE_2MIB as u32); + uart.puts(":\n"); + let result_c = benchmark::batch_modify(cacheable_addr); + uart.dec(result_c); + uart.puts(" miliseconds.\n\n"); + + let percent_diff = (result_nc - result_c) * 100 / result_c; + + uart.puts("With caching, the function is "); + uart.dec(percent_diff); + uart.puts("% faster!\n"); +} + +entry!(kernel_entry); + +fn kernel_entry() -> ! { + let mut mbox = mbox::Mbox::new(); + let uart = uart::Uart::new(uart::UART_PHYS_BASE); + + // set up serial console + if uart.init(&mut mbox).is_err() { + loop { + cortex_a::asm::wfe() // If UART fails, abort early + } + } + + uart.getc(); // Press a key first before being greeted + uart.puts("Hello Rustacean!\n\n"); + + uart.puts("\nSwitching MMU on now..."); + + unsafe { mmu::init() }; + + uart.puts("MMU is live \\o/\n\n"); + + do_benchmarks(&uart); + + // echo everything back + loop { + uart.send(uart.getc()); + } +} diff --git a/0D_cache_performance/src/mbox.rs b/0D_cache_performance/src/mbox.rs new file mode 100644 index 00000000..18fe5787 --- /dev/null +++ b/0D_cache_performance/src/mbox.rs @@ -0,0 +1,159 @@ +/* + * MIT License + * + * Copyright (c) 2018 Andre Richter + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +use super::MMIO_BASE; +use core::ops; +use cortex_a::asm; +use register::mmio::{ReadOnly, WriteOnly}; + +register_bitfields! { + u32, + + STATUS [ + FULL OFFSET(31) NUMBITS(1) [], + EMPTY OFFSET(30) NUMBITS(1) [] + ] +} + +const VIDEOCORE_MBOX: u32 = MMIO_BASE + 0xB880; + +#[allow(non_snake_case)] +#[repr(C)] +pub struct RegisterBlock { + READ: ReadOnly, // 0x00 + __reserved_0: [u32; 5], // 0x04 + STATUS: ReadOnly, // 0x18 + __reserved_1: u32, // 0x1C + WRITE: WriteOnly, // 0x20 +} + +// Custom errors +pub enum MboxError { + ResponseError, + UnknownError, +} +pub type Result = ::core::result::Result; + +// Channels +pub mod channel { + pub const PROP: u32 = 8; +} + +// Tags +pub mod tag { + pub const SETCLKRATE: u32 = 0x38002; + pub const LAST: u32 = 0; +} + +// Clocks +pub mod clock { + pub const UART: u32 = 0x0_0000_0002; +} + +// Responses +mod response { + pub const SUCCESS: u32 = 0x8000_0000; + pub const ERROR: u32 = 0x8000_0001; // error parsing request buffer (partial response) +} + +pub const REQUEST: u32 = 0; + +// Public interface to the mailbox +#[repr(C)] +#[repr(align(16))] +pub struct Mbox { + // The address for buffer needs to be 16-byte aligned so that the + // Videcore can handle it properly. + pub buffer: [u32; 36], +} + +/// Deref to RegisterBlock +/// +/// Allows writing +/// ``` +/// self.STATUS.read() +/// ``` +/// instead of something along the lines of +/// ``` +/// unsafe { (*Mbox::ptr()).STATUS.read() } +/// ``` +impl ops::Deref for Mbox { + type Target = RegisterBlock; + + fn deref(&self) -> &Self::Target { + unsafe { &*Self::ptr() } + } +} + +impl Mbox { + pub fn new() -> Mbox { + Mbox { buffer: [0; 36] } + } + + /// Returns a pointer to the register block + fn ptr() -> *const RegisterBlock { + VIDEOCORE_MBOX as *const _ + } + + /// Make a mailbox call. Returns Err(MboxError) on failure, Ok(()) success + pub fn call(&self, channel: u32) -> Result<()> { + // wait until we can write to the mailbox + loop { + if !self.STATUS.is_set(STATUS::FULL) { + break; + } + + asm::nop(); + } + + let buf_ptr = self.buffer.as_ptr() as u32; + + // write the address of our message to the mailbox with channel identifier + self.WRITE.set((buf_ptr & !0xF) | (channel & 0xF)); + + // now wait for the response + loop { + // is there a response? + loop { + if !self.STATUS.is_set(STATUS::EMPTY) { + break; + } + + asm::nop(); + } + + let resp: u32 = self.READ.get(); + + // is it a response to our message? + if ((resp & 0xF) == channel) && ((resp & !0xF) == buf_ptr) { + // is it a valid successful response? + return match self.buffer[1] { + response::SUCCESS => Ok(()), + response::ERROR => Err(MboxError::ResponseError), + _ => Err(MboxError::UnknownError), + }; + } + } + } +} diff --git a/0D_cache_performance/src/mmu.rs b/0D_cache_performance/src/mmu.rs new file mode 100644 index 00000000..a379c769 --- /dev/null +++ b/0D_cache_performance/src/mmu.rs @@ -0,0 +1,219 @@ +/* + * MIT License + * + * Copyright (c) 2018 Andre Richter + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +use cortex_a::{barrier, regs::*}; + +register_bitfields! {u64, + // AArch64 Reference Manual page 2150 + STAGE1_DESCRIPTOR [ + /// Execute-never + XN OFFSET(54) NUMBITS(1) [ + False = 0, + True = 1 + ], + + /// Various address fields, depending on use case + LVL2_OUTPUT_ADDR_4KiB OFFSET(21) NUMBITS(27) [], // [47:21] + NEXT_LVL_TABLE_ADDR_4KiB OFFSET(12) NUMBITS(36) [], // [47:12] + + /// Access flag + AF OFFSET(10) NUMBITS(1) [ + False = 0, + True = 1 + ], + + /// Shareability field + SH OFFSET(8) NUMBITS(2) [ + OuterShareable = 0b10, + InnerShareable = 0b11 + ], + + /// Access Permissions + AP OFFSET(6) NUMBITS(2) [ + RW_EL1 = 0b00, + RW_EL1_EL0 = 0b01, + RO_EL1 = 0b10, + RO_EL1_EL0 = 0b11 + ], + + /// Memory attributes index into the MAIR_EL1 register + AttrIndx OFFSET(2) NUMBITS(3) [], + + TYPE OFFSET(1) NUMBITS(1) [ + Block = 0, + Table = 1 + ], + + VALID OFFSET(0) NUMBITS(1) [ + False = 0, + True = 1 + ] + ] +} + +trait BaseAddr { + fn base_addr(&self) -> u64; +} + +impl BaseAddr for [u64; 512] { + fn base_addr(&self) -> u64 { + self as *const u64 as u64 + } +} + +const NUM_ENTRIES_4KIB: usize = 512; + +static mut LVL2_TABLE: [u64; NUM_ENTRIES_4KIB] = [0; NUM_ENTRIES_4KIB]; +static mut SINGLE_LVL3_TABLE: [u64; NUM_ENTRIES_4KIB] = [0; NUM_ENTRIES_4KIB]; + +/// Set up identity mapped page tables for the first 1 gigabyte of address +/// space. +pub unsafe fn init() { + // First, define the three memory types that we will map. Cacheable and + // non-cacheable normal DRAM, and device. + MAIR_EL1.write( + // Attribute 2 + MAIR_EL1::Attr2_HIGH::Memory_OuterNonCacheable + + MAIR_EL1::Attr2_LOW_MEMORY::InnerNonCacheable + + // Attribute 1 + + MAIR_EL1::Attr1_HIGH::Memory_OuterWriteBack_NonTransient_ReadAlloc_WriteAlloc + + MAIR_EL1::Attr1_LOW_MEMORY::InnerWriteBack_NonTransient_ReadAlloc_WriteAlloc + + // Attribute 0 + + MAIR_EL1::Attr0_HIGH::Device + + MAIR_EL1::Attr0_LOW_DEVICE::Device_nGnRE, + ); + + // Descriptive consts for indexing into the correct MAIR_EL1 attributes. + mod mair { + pub const DEVICE: u64 = 0; + pub const NORMAL: u64 = 1; + pub const NORMAL_NON_CACHEABLE: u64 = 2; + } + + // Set up the first LVL2 entry, pointing to a 4KiB table base address. + let lvl3_base: u64 = SINGLE_LVL3_TABLE.base_addr() >> 12; + LVL2_TABLE[0] = (STAGE1_DESCRIPTOR::VALID::True + + STAGE1_DESCRIPTOR::TYPE::Table + + STAGE1_DESCRIPTOR::NEXT_LVL_TABLE_ADDR_4KiB.val(lvl3_base)) + .value; + + // The second 2 MiB block. + LVL2_TABLE[1] = (STAGE1_DESCRIPTOR::VALID::True + + STAGE1_DESCRIPTOR::TYPE::Block + + STAGE1_DESCRIPTOR::AttrIndx.val(mair::NORMAL_NON_CACHEABLE) + + STAGE1_DESCRIPTOR::AP::RW_EL1 + + STAGE1_DESCRIPTOR::SH::OuterShareable + + STAGE1_DESCRIPTOR::AF::True + // This translation is accessed for virtual 0x200000. Point to physical + // 0x400000, aka the third phyiscal 2 MiB DRAM block (third block == 2, + // because we start counting at 0). + // + // Here, we configure it non-cacheable. + + STAGE1_DESCRIPTOR::LVL2_OUTPUT_ADDR_4KiB.val(2) + + STAGE1_DESCRIPTOR::XN::True) + .value; + + // Fill the rest of the LVL2 (2MiB) entries as block + // descriptors. Differentiate between normal and device mem. + let mmio_base: u64 = (super::MMIO_BASE >> 21).into(); + let common = STAGE1_DESCRIPTOR::VALID::True + + STAGE1_DESCRIPTOR::TYPE::Block + + STAGE1_DESCRIPTOR::AP::RW_EL1 + + STAGE1_DESCRIPTOR::AF::True + + STAGE1_DESCRIPTOR::XN::True; + + // Notice the skip(2). Start at the third 2 MiB DRAM block, which will point + // virtual 0x400000 to physical 0x400000, configured as cacheable memory. + for (i, entry) in LVL2_TABLE.iter_mut().enumerate().skip(2) { + let j: u64 = i as u64; + + let mem_attr = if j >= mmio_base { + STAGE1_DESCRIPTOR::SH::OuterShareable + STAGE1_DESCRIPTOR::AttrIndx.val(mair::DEVICE) + } else { + STAGE1_DESCRIPTOR::SH::InnerShareable + STAGE1_DESCRIPTOR::AttrIndx.val(mair::NORMAL) + }; + + *entry = (common + mem_attr + STAGE1_DESCRIPTOR::LVL2_OUTPUT_ADDR_4KiB.val(j)).value; + } + + // Finally, fill the single LVL3 table (4KiB granule). Differentiate between + // code/RO and RW sections. + // + // Using the linker script, we ensure that the RO sections are 4KiB aligned, + // and we export their boundaries via symbols. + extern "C" { + static mut __ro_start: u64; + static mut __ro_end: u64; + } + + const PAGESIZE: u64 = 4096; + let ro_start: u64 = &__ro_start as *const _ as u64 / PAGESIZE; + let ro_end: u64 = &__ro_end as *const _ as u64 / PAGESIZE; + let common = STAGE1_DESCRIPTOR::VALID::True + + STAGE1_DESCRIPTOR::TYPE::Table + + STAGE1_DESCRIPTOR::AttrIndx.val(mair::NORMAL) + + STAGE1_DESCRIPTOR::SH::InnerShareable + + STAGE1_DESCRIPTOR::AF::True; + + for (i, entry) in SINGLE_LVL3_TABLE.iter_mut().enumerate() { + let j: u64 = i as u64; + + let mem_attr = if j < ro_start || j > ro_end { + STAGE1_DESCRIPTOR::AP::RW_EL1 + STAGE1_DESCRIPTOR::XN::True + } else { + STAGE1_DESCRIPTOR::AP::RO_EL1 + STAGE1_DESCRIPTOR::XN::False + }; + + *entry = (common + mem_attr + STAGE1_DESCRIPTOR::NEXT_LVL_TABLE_ADDR_4KiB.val(j)).value; + } + + // Point to the LVL2 table base address in TTBR0. + TTBR0_EL1.set_baddr(LVL2_TABLE.base_addr()); + + // Configure various settings of stage 1 of the EL1 translation regime. + let ips = ID_AA64MMFR0_EL1.read(ID_AA64MMFR0_EL1::PARange); + TCR_EL1.write( + TCR_EL1::TBI0::Ignored + + TCR_EL1::IPS.val(ips) + + TCR_EL1::TG0::KiB_4 // 4 KiB granule + + TCR_EL1::SH0::Inner + + TCR_EL1::ORGN0::WriteBack_ReadAlloc_WriteAlloc_Cacheable + + TCR_EL1::IRGN0::WriteBack_ReadAlloc_WriteAlloc_Cacheable + + TCR_EL1::EPD0::EnableTTBR0Walks + + TCR_EL1::T0SZ.val(34), // Start walks at level 2 + ); + + // Switch the MMU on. + // + // First, force all previous changes to be seen before the MMU is enabled. + barrier::isb(barrier::SY); + + // Enable the MMU and turn on caching + SCTLR_EL1.modify(SCTLR_EL1::M::Enable + SCTLR_EL1::C::Cacheable); + + // Force MMU init to complete before next instruction + barrier::isb(barrier::SY); +} diff --git a/0D_cache_performance/src/uart.rs b/0D_cache_performance/src/uart.rs new file mode 100644 index 00000000..809b8546 --- /dev/null +++ b/0D_cache_performance/src/uart.rs @@ -0,0 +1,310 @@ +/* + * MIT License + * + * Copyright (c) 2018 Andre Richter + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +use super::MMIO_BASE; +use core::{ + ops, + sync::atomic::{compiler_fence, Ordering}, +}; +use cortex_a::asm; +use gpio; +use mbox; +use register::mmio::*; + +// PL011 UART registers. +// +// Descriptions taken from +// https://github.com/raspberrypi/documentation/files/1888662/BCM2837-ARM-Peripherals.-.Revised.-.V2-1.pdf +register_bitfields! { + u32, + + /// Flag Register + FR [ + /// Transmit FIFO full. The meaning of this bit depends on the + /// state of the FEN bit in the UARTLCR_ LCRH Register. If the + /// FIFO is disabled, this bit is set when the transmit + /// holding register is full. If the FIFO is enabled, the TXFF + /// bit is set when the transmit FIFO is full. + TXFF OFFSET(5) NUMBITS(1) [], + + /// Receive FIFO empty. The meaning of this bit depends on the + /// state of the FEN bit in the UARTLCR_H Register. If the + /// FIFO is disabled, this bit is set when the receive holding + /// register is empty. If the FIFO is enabled, the RXFE bit is + /// set when the receive FIFO is empty. + RXFE OFFSET(4) NUMBITS(1) [] + ], + + /// Integer Baud rate divisor + IBRD [ + /// Integer Baud rate divisor + IBRD OFFSET(0) NUMBITS(16) [] + ], + + /// Fractional Baud rate divisor + FBRD [ + /// Fractional Baud rate divisor + FBRD OFFSET(0) NUMBITS(6) [] + ], + + /// Line Control register + LCRH [ + /// Word length. These bits indicate the number of data bits + /// transmitted or received in a frame. + WLEN OFFSET(5) NUMBITS(2) [ + FiveBit = 0b00, + SixBit = 0b01, + SevenBit = 0b10, + EightBit = 0b11 + ] + ], + + /// Control Register + CR [ + /// Receive enable. If this bit is set to 1, the receive + /// section of the UART is enabled. Data reception occurs for + /// UART signals. When the UART is disabled in the middle of + /// reception, it completes the current character before + /// stopping. + RXE OFFSET(9) NUMBITS(1) [ + Disabled = 0, + Enabled = 1 + ], + + /// Transmit enable. If this bit is set to 1, the transmit + /// section of the UART is enabled. Data transmission occurs + /// for UART signals. When the UART is disabled in the middle + /// of transmission, it completes the current character before + /// stopping. + TXE OFFSET(8) NUMBITS(1) [ + Disabled = 0, + Enabled = 1 + ], + + /// UART enable + UARTEN OFFSET(0) NUMBITS(1) [ + /// If the UART is disabled in the middle of transmission + /// or reception, it completes the current character + /// before stopping. + Disabled = 0, + Enabled = 1 + ] + ], + + /// Interupt Clear Register + ICR [ + /// Meta field for all pending interrupts + ALL OFFSET(0) NUMBITS(11) [] + ] +} + +pub const UART_PHYS_BASE: u32 = MMIO_BASE + 0x20_1000; + +#[allow(non_snake_case)] +#[repr(C)] +pub struct RegisterBlock { + DR: ReadWrite, // 0x00 + __reserved_0: [u32; 5], // 0x04 + FR: ReadOnly, // 0x18 + __reserved_1: [u32; 2], // 0x1c + IBRD: WriteOnly, // 0x24 + FBRD: WriteOnly, // 0x28 + LCRH: WriteOnly, // 0x2C + CR: WriteOnly, // 0x30 + __reserved_2: [u32; 4], // 0x34 + ICR: WriteOnly, // 0x44 +} + +pub enum UartError { + MailboxError, +} +pub type Result = ::core::result::Result; + +pub struct Uart { + uart_base: u32, +} + +impl ops::Deref for Uart { + type Target = RegisterBlock; + + fn deref(&self) -> &Self::Target { + unsafe { &*self.ptr() } + } +} + +impl Uart { + pub fn new(uart_base: u32) -> Uart { + Uart { uart_base } + } + + /// Returns a pointer to the register block + fn ptr(&self) -> *const RegisterBlock { + self.uart_base as *const _ + } + + ///Set baud rate and characteristics (115200 8N1) and map to GPIO + pub fn init(&self, mbox: &mut mbox::Mbox) -> Result<()> { + // turn off UART0 + self.CR.set(0); + + // set up clock for consistent divisor values + mbox.buffer[0] = 9 * 4; + mbox.buffer[1] = mbox::REQUEST; + mbox.buffer[2] = mbox::tag::SETCLKRATE; + mbox.buffer[3] = 12; + mbox.buffer[4] = 8; + mbox.buffer[5] = mbox::clock::UART; // UART clock + mbox.buffer[6] = 4_000_000; // 4Mhz + mbox.buffer[7] = 0; // skip turbo setting + mbox.buffer[8] = mbox::tag::LAST; + + // Insert a compiler fence that ensures that all stores to the + // mbox buffer are finished before the GPU is signaled (which + // is done by a store operation as well). + compiler_fence(Ordering::Release); + + if mbox.call(mbox::channel::PROP).is_err() { + return Err(UartError::MailboxError); // Abort if UART clocks couldn't be set + }; + + // map UART0 to GPIO pins + unsafe { + (*gpio::GPFSEL1).modify(gpio::GPFSEL1::FSEL14::TXD0 + gpio::GPFSEL1::FSEL15::RXD0); + + (*gpio::GPPUD).set(0); // enable pins 14 and 15 + for _ in 0..150 { + asm::nop(); + } + + (*gpio::GPPUDCLK0).modify( + gpio::GPPUDCLK0::PUDCLK14::AssertClock + gpio::GPPUDCLK0::PUDCLK15::AssertClock, + ); + for _ in 0..150 { + asm::nop(); + } + + (*gpio::GPPUDCLK0).set(0); + } + + self.ICR.write(ICR::ALL::CLEAR); + self.IBRD.write(IBRD::IBRD.val(2)); // Results in 115200 baud + self.FBRD.write(FBRD::FBRD.val(0xB)); + self.LCRH.write(LCRH::WLEN::EightBit); // 8N1 + self.CR + .write(CR::UARTEN::Enabled + CR::TXE::Enabled + CR::RXE::Enabled); + + Ok(()) + } + + /// Send a character + pub fn send(&self, c: char) { + // wait until we can send + loop { + if !self.FR.is_set(FR::TXFF) { + break; + } + + asm::nop(); + } + + // write the character to the buffer + self.DR.set(c as u32); + } + + /// Receive a character + pub fn getc(&self) -> char { + // wait until something is in the buffer + loop { + if !self.FR.is_set(FR::RXFE) { + break; + } + + asm::nop(); + } + + // read it and return + let mut ret = self.DR.get() as u8 as char; + + // convert carrige return to newline + if ret == '\r' { + ret = '\n' + } + + ret + } + + /// Display a string + pub fn puts(&self, string: &str) { + for c in string.chars() { + // convert newline to carrige return + newline + if c == '\n' { + self.send('\r') + } + + self.send(c); + } + } + + /// Display a binary value in hexadecimal + pub fn hex(&self, d: u32) { + let mut n; + + for i in 0..8 { + // get highest tetrad + n = d.wrapping_shr(28 - i * 4) & 0xF; + + // 0-9 => '0'-'9', 10-15 => 'A'-'F' + // Add proper offset for ASCII table + if n > 9 { + n += 0x37; + } else { + n += 0x30; + } + + self.send(n as u8 as char); + } + } + + /// Display a binary value in decimal + pub fn dec(&self, d: u32) { + let mut digits: [char; 10] = ['\0'; 10]; + let mut d = d; + let mut i: usize = 0; + + loop { + digits[i] = ((d % 10) + 0x30) as u8 as char; + + i += 1; + d /= 10; + + if d == 0 { + break; + } + } + + for c in digits.iter().rev() { + self.send(*c); + } + } +}