Add tutorial 0D_cache_performance
parent
bf2a1fff7e
commit
c65e2e56cd
Binary file not shown.
Binary file not shown.
@ -0,0 +1,6 @@
|
|||||||
|
[target.aarch64-unknown-none]
|
||||||
|
rustflags = [
|
||||||
|
"-C", "link-arg=-Tlink.ld",
|
||||||
|
"-C", "target-feature=-fp-armv8",
|
||||||
|
"-C", "target-cpu=cortex-a53",
|
||||||
|
]
|
@ -0,0 +1,55 @@
|
|||||||
|
[[package]]
|
||||||
|
name = "cortex-a"
|
||||||
|
version = "2.2.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
dependencies = [
|
||||||
|
"register 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "kernel8"
|
||||||
|
version = "0.1.0"
|
||||||
|
dependencies = [
|
||||||
|
"cortex-a 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
"raspi3_boot 0.1.0",
|
||||||
|
"register 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "panic-abort"
|
||||||
|
version = "0.2.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "r0"
|
||||||
|
version = "0.2.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "raspi3_boot"
|
||||||
|
version = "0.1.0"
|
||||||
|
dependencies = [
|
||||||
|
"cortex-a 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
"panic-abort 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
"r0 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "register"
|
||||||
|
version = "0.2.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
dependencies = [
|
||||||
|
"tock-registers 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "tock-registers"
|
||||||
|
version = "0.2.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
|
||||||
|
[metadata]
|
||||||
|
"checksum cortex-a 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "fe913628f34718bc9e7d0d07e13ca1374638f64f0edc6eb063ec8abe581d395d"
|
||||||
|
"checksum panic-abort 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "6bc796c620f27056d4ffe7c558533fd67ae5af0fd8e919fbe38de803368af73e"
|
||||||
|
"checksum r0 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "e2a38df5b15c8d5c7e8654189744d8e396bddc18ad48041a500ce52d6948941f"
|
||||||
|
"checksum register 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "157a11ac0b1882ff4a527a92f911dd288df17367faaaa0c36f188cd61ec36fc1"
|
||||||
|
"checksum tock-registers 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3a385d94f3f62e60445a0adb9ff8d9621faa272234530d4c0f848ec98f88e316"
|
@ -0,0 +1,12 @@
|
|||||||
|
[package]
|
||||||
|
name = "kernel8"
|
||||||
|
version = "0.1.0"
|
||||||
|
authors = ["Andre Richter <andre.o.richter@gmail.com>"]
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
raspi3_boot = { path = "raspi3_boot" }
|
||||||
|
cortex-a = "2.2.1"
|
||||||
|
register = "0.2.0"
|
||||||
|
|
||||||
|
[package.metadata.cargo-xbuild]
|
||||||
|
sysroot_path = "../xbuild_sysroot"
|
@ -0,0 +1,66 @@
|
|||||||
|
#
|
||||||
|
# MIT License
|
||||||
|
#
|
||||||
|
# Copyright (c) 2018 Andre Richter <andre.o.richter@gmail.com>
|
||||||
|
#
|
||||||
|
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
# of this software and associated documentation files (the "Software"), to deal
|
||||||
|
# in the Software without restriction, including without limitation the rights
|
||||||
|
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
# copies of the Software, and to permit persons to whom the Software is
|
||||||
|
# furnished to do so, subject to the following conditions:
|
||||||
|
#
|
||||||
|
# The above copyright notice and this permission notice shall be included in all
|
||||||
|
# copies or substantial portions of the Software.
|
||||||
|
#
|
||||||
|
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
# SOFTWARE.
|
||||||
|
#
|
||||||
|
|
||||||
|
TARGET = aarch64-unknown-none
|
||||||
|
|
||||||
|
OBJCOPY = cargo objcopy --
|
||||||
|
OBJCOPY_PARAMS = --strip-all -O binary
|
||||||
|
|
||||||
|
UTILS_CONTAINER = andrerichter/raspi3-utils
|
||||||
|
DOCKER_CMD = docker run -it --rm -v $(shell pwd):/work -w /work
|
||||||
|
DOCKER_TTY = --privileged -v /dev:/dev
|
||||||
|
QEMU_CMD = qemu-system-aarch64 -M raspi3 -kernel kernel8.img
|
||||||
|
RASPBOOT_CMD = raspbootcom /dev/ttyUSB0 kernel8.img
|
||||||
|
|
||||||
|
all: clean kernel8.img
|
||||||
|
|
||||||
|
target/$(TARGET)/debug/kernel8: src/main.rs
|
||||||
|
cargo xbuild --target=$(TARGET)
|
||||||
|
cp $@ .
|
||||||
|
|
||||||
|
target/$(TARGET)/release/kernel8: src/main.rs
|
||||||
|
cargo xbuild --target=$(TARGET) --release
|
||||||
|
cp $@ .
|
||||||
|
|
||||||
|
ifeq ($(DEBUG),1)
|
||||||
|
kernel8: target/$(TARGET)/debug/kernel8
|
||||||
|
else
|
||||||
|
kernel8: target/$(TARGET)/release/kernel8
|
||||||
|
endif
|
||||||
|
|
||||||
|
kernel8.img: kernel8
|
||||||
|
$(OBJCOPY) $(OBJCOPY_PARAMS) $< kernel8.img
|
||||||
|
|
||||||
|
qemu: all
|
||||||
|
$(DOCKER_CMD) $(UTILS_CONTAINER) $(QEMU_CMD) -serial stdio
|
||||||
|
|
||||||
|
raspboot: all
|
||||||
|
$(DOCKER_CMD) $(DOCKER_TTY) $(UTILS_CONTAINER) $(RASPBOOT_CMD)
|
||||||
|
|
||||||
|
clippy:
|
||||||
|
cargo xclippy --target=$(TARGET)
|
||||||
|
|
||||||
|
clean:
|
||||||
|
cargo clean
|
||||||
|
rm -f kernel8
|
@ -0,0 +1,57 @@
|
|||||||
|
# Tutorial 0D - Cache Performance
|
||||||
|
|
||||||
|
Now that we finally have virtual memory capabilities available, we also have
|
||||||
|
fine grained control over `cacheability`. You've caught a glimpse already in the
|
||||||
|
last tutorial, where we used page table entries to reference the `MAIR_EL1`
|
||||||
|
register to indicate the cacheability of a page or block.
|
||||||
|
|
||||||
|
Unfortunately, for the user it is often hard to grasp the advantage of caching
|
||||||
|
in early stages of OS or bare-metal software development. This tutorial is a
|
||||||
|
short interlude that tries to give you a feeling of what caching can do for
|
||||||
|
performance.
|
||||||
|
|
||||||
|
## Benchmark
|
||||||
|
|
||||||
|
Let's write a tiny, arbitrary micro-benchmark to showcase the performance of
|
||||||
|
operating on the same DRAM with caching enabled and disabled.
|
||||||
|
|
||||||
|
### mmu.rs
|
||||||
|
|
||||||
|
Therefore, we will map the same physical memory via two different virtual
|
||||||
|
addresses. We set up our pagetables such that the virtual address `0x200000`
|
||||||
|
points to the physical DRAM at `0x400000`, and we configure it as
|
||||||
|
`non-cacheable` in the page tables.
|
||||||
|
|
||||||
|
We are still using a `2 MiB` granule, and set up the next block, which starts at
|
||||||
|
virtual `0x400000`, to point at physical `0x400000` (this is an identity mapped
|
||||||
|
block). This time, the block is configured as cacheable.
|
||||||
|
|
||||||
|
### benchmark.rs
|
||||||
|
|
||||||
|
We write a little function that iteratively reads memory of five times the size
|
||||||
|
of a `cacheline`, in steps of 8 bytes, aka one processor register at a time. We
|
||||||
|
read the value, add 1, and write it back. This whole process is repeated
|
||||||
|
`100_000` times.
|
||||||
|
|
||||||
|
### main.rs
|
||||||
|
|
||||||
|
The benchmark function is called twice. Once for the cacheable and once for the
|
||||||
|
non-cacheable virtual addresses. Remember that both virtual addresses point to
|
||||||
|
the _same_ physical DRAM, so the difference in time that we will see will
|
||||||
|
showcase how much faster it is to operate on DRAM with caching enabled.
|
||||||
|
|
||||||
|
## Results
|
||||||
|
|
||||||
|
On my Raspberry, I get the following results:
|
||||||
|
|
||||||
|
```text
|
||||||
|
Benchmarking non-cacheable DRAM modifications at virtual 0x00200000, physical 0x00400000:
|
||||||
|
664 miliseconds.
|
||||||
|
|
||||||
|
Benchmarking cacheable DRAM modifications at virtual 0x00400000, physical 0x00400000:
|
||||||
|
148 miliseconds.
|
||||||
|
|
||||||
|
With caching, the function is 348% faster!
|
||||||
|
```
|
||||||
|
|
||||||
|
Impressive, isn't it?
|
Binary file not shown.
Binary file not shown.
@ -0,0 +1,57 @@
|
|||||||
|
/*
|
||||||
|
* MIT License
|
||||||
|
*
|
||||||
|
* Copyright (c) 2018 Andre Richter <andre.o.richter@gmail.com>
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in all
|
||||||
|
* copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
* SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
ENTRY(_boot_cores);
|
||||||
|
|
||||||
|
SECTIONS
|
||||||
|
{
|
||||||
|
. = 0x80000; /* This is already 4KiB aligned */
|
||||||
|
__ro_start = .;
|
||||||
|
.text :
|
||||||
|
{
|
||||||
|
KEEP(*(.text.boot)) *(.text .text.*)
|
||||||
|
}
|
||||||
|
|
||||||
|
.rodata :
|
||||||
|
{
|
||||||
|
*(.rodata .rodata.*)
|
||||||
|
}
|
||||||
|
. = ALIGN(4096); /* Fill up to 4KiB */
|
||||||
|
__ro_end = .;
|
||||||
|
|
||||||
|
.data :
|
||||||
|
{
|
||||||
|
*(.data .data.*)
|
||||||
|
}
|
||||||
|
|
||||||
|
.bss ALIGN(8):
|
||||||
|
{
|
||||||
|
__bss_start = .;
|
||||||
|
*(.bss .bss.*)
|
||||||
|
*(COMMON)
|
||||||
|
__bss_end = .;
|
||||||
|
}
|
||||||
|
|
||||||
|
/DISCARD/ : { *(.comment) *(.gnu*) *(.note*) *(.eh_frame*) }
|
||||||
|
}
|
@ -0,0 +1,9 @@
|
|||||||
|
[package]
|
||||||
|
name = "raspi3_boot"
|
||||||
|
version = "0.1.0"
|
||||||
|
authors = ["Andre Richter <andre.o.richter@gmail.com>"]
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
cortex-a = "2.2.1"
|
||||||
|
panic-abort = "0.2.0"
|
||||||
|
r0 = "0.2.2"
|
@ -0,0 +1,131 @@
|
|||||||
|
/*
|
||||||
|
* MIT License
|
||||||
|
*
|
||||||
|
* Copyright (c) 2018 Jorge Aparicio
|
||||||
|
* Copyright (c) 2018 Andre Richter <andre.o.richter@gmail.com>
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in all
|
||||||
|
* copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
* SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#![deny(missing_docs)]
|
||||||
|
#![deny(warnings)]
|
||||||
|
#![no_std]
|
||||||
|
|
||||||
|
//! Low-level boot of the Raspberry's processor
|
||||||
|
|
||||||
|
extern crate cortex_a;
|
||||||
|
extern crate panic_abort;
|
||||||
|
extern crate r0;
|
||||||
|
|
||||||
|
#[macro_export]
|
||||||
|
macro_rules! entry {
|
||||||
|
($path:path) => {
|
||||||
|
#[export_name = "main"]
|
||||||
|
pub unsafe fn __main() -> ! {
|
||||||
|
// type check the given path
|
||||||
|
let f: fn() -> ! = $path;
|
||||||
|
|
||||||
|
f()
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Reset function.
|
||||||
|
///
|
||||||
|
/// Initializes the bss section before calling into the user's `main()`.
|
||||||
|
unsafe fn reset() -> ! {
|
||||||
|
extern "C" {
|
||||||
|
// Boundaries of the .bss section, provided by the linker script
|
||||||
|
static mut __bss_start: u64;
|
||||||
|
static mut __bss_end: u64;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Zeroes the .bss section
|
||||||
|
r0::zero_bss(&mut __bss_start, &mut __bss_end);
|
||||||
|
|
||||||
|
extern "Rust" {
|
||||||
|
fn main() -> !;
|
||||||
|
}
|
||||||
|
|
||||||
|
main()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Prepare and execute transition from EL2 to EL1.
|
||||||
|
#[inline]
|
||||||
|
fn setup_and_enter_el1_from_el2() -> ! {
|
||||||
|
use cortex_a::{asm, regs::*};
|
||||||
|
|
||||||
|
// Enable timer counter registers for EL1
|
||||||
|
CNTHCTL_EL2.write(CNTHCTL_EL2::EL1PCEN::SET + CNTHCTL_EL2::EL1PCTEN::SET);
|
||||||
|
|
||||||
|
// No offset for reading the counters
|
||||||
|
CNTVOFF_EL2.set(0);
|
||||||
|
|
||||||
|
// Set EL1 execution state to AArch64
|
||||||
|
// TODO: Explain the SWIO bit
|
||||||
|
HCR_EL2.write(HCR_EL2::RW::EL1IsAarch64 + HCR_EL2::SWIO::SET);
|
||||||
|
|
||||||
|
// Set up a simulated exception return.
|
||||||
|
//
|
||||||
|
// First, fake a saved program status, where all interrupts were
|
||||||
|
// masked and SP_EL0 was used as a stack pointer.
|
||||||
|
SPSR_EL2.write(
|
||||||
|
SPSR_EL2::D::Masked
|
||||||
|
+ SPSR_EL2::A::Masked
|
||||||
|
+ SPSR_EL2::I::Masked
|
||||||
|
+ SPSR_EL2::F::Masked
|
||||||
|
+ SPSR_EL2::M::EL1t,
|
||||||
|
);
|
||||||
|
|
||||||
|
// Second, let the link register point to reset().
|
||||||
|
ELR_EL2.set(reset as *const () as u64);
|
||||||
|
|
||||||
|
// Set up SP_EL0 (stack pointer), which will be used by EL1 once
|
||||||
|
// we "return" to it.
|
||||||
|
SP_EL0.set(0x80_000);
|
||||||
|
|
||||||
|
// Use `eret` to "return" to EL1. This will result in execution of
|
||||||
|
// `reset()` in EL1.
|
||||||
|
asm::eret()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Entrypoint of the processor.
|
||||||
|
///
|
||||||
|
/// Parks all cores except core0 and checks if we started in EL2. If
|
||||||
|
/// so, proceeds with setting up EL1.
|
||||||
|
#[link_section = ".text.boot"]
|
||||||
|
#[no_mangle]
|
||||||
|
pub unsafe extern "C" fn _boot_cores() -> ! {
|
||||||
|
use cortex_a::{asm, regs::*};
|
||||||
|
|
||||||
|
const CORE_0: u64 = 0;
|
||||||
|
const CORE_MASK: u64 = 0x3;
|
||||||
|
const EL2: u32 = CurrentEL::EL::EL2.value;
|
||||||
|
|
||||||
|
if let CORE_0 = MPIDR_EL1.get() & CORE_MASK {
|
||||||
|
if let EL2 = CurrentEL.get() {
|
||||||
|
setup_and_enter_el1_from_el2()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// if not core0 or EL != 2, infinitely wait for events
|
||||||
|
loop {
|
||||||
|
asm::wfe();
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,40 @@
|
|||||||
|
use core::sync::atomic::{compiler_fence, Ordering};
|
||||||
|
use cortex_a::{barrier, regs::*};
|
||||||
|
|
||||||
|
/// We assume that addr is cacheline aligned
|
||||||
|
pub fn batch_modify(addr: u64) -> u32 {
|
||||||
|
const CACHELINE_SIZE_BYTES: u64 = 64; // TODO: retrieve this from a system register
|
||||||
|
const NUM_CACHELINES_TOUCHED: u64 = 5;
|
||||||
|
const BYTES_PER_U64_REG: usize = 8;
|
||||||
|
const NUM_BENCH_ITERATIONS: u64 = 100_000;
|
||||||
|
|
||||||
|
const NUM_BYTES_TOUCHED: u64 = CACHELINE_SIZE_BYTES * NUM_CACHELINES_TOUCHED;
|
||||||
|
|
||||||
|
let t1 = CNTPCT_EL0.get();
|
||||||
|
|
||||||
|
compiler_fence(Ordering::SeqCst);
|
||||||
|
|
||||||
|
let mut data_ptr: *mut u64;
|
||||||
|
let mut temp: u64;
|
||||||
|
for _ in 0..NUM_BENCH_ITERATIONS {
|
||||||
|
for i in (addr..(addr + NUM_BYTES_TOUCHED)).step_by(BYTES_PER_U64_REG) {
|
||||||
|
data_ptr = i as *mut u64;
|
||||||
|
|
||||||
|
unsafe {
|
||||||
|
temp = core::ptr::read_volatile(data_ptr);
|
||||||
|
core::ptr::write_volatile(data_ptr, temp + 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Insert a barrier to ensure that the last memory operation has finished
|
||||||
|
// before we retrieve the elapsed time with the subsequent counter read. Not
|
||||||
|
// needed at all given the sample size, but let's be a bit pedantic here for
|
||||||
|
// education purposes. For measuring single-instructions, this would be
|
||||||
|
// needed.
|
||||||
|
unsafe { barrier::dsb(barrier::SY) };
|
||||||
|
|
||||||
|
let t2 = CNTPCT_EL0.get();
|
||||||
|
|
||||||
|
((t2 - t1) * 1000 / u64::from(CNTFRQ_EL0.get())) as u32
|
||||||
|
}
|
@ -0,0 +1,75 @@
|
|||||||
|
/*
|
||||||
|
* MIT License
|
||||||
|
*
|
||||||
|
* Copyright (c) 2018 Andre Richter <andre.o.richter@gmail.com>
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in all
|
||||||
|
* copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
* SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
use super::MMIO_BASE;
|
||||||
|
use register::mmio::ReadWrite;
|
||||||
|
|
||||||
|
// Descriptions taken from
|
||||||
|
// https://github.com/raspberrypi/documentation/files/1888662/BCM2837-ARM-Peripherals.-.Revised.-.V2-1.pdf
|
||||||
|
register_bitfields! {
|
||||||
|
u32,
|
||||||
|
|
||||||
|
/// GPIO Function Select 1
|
||||||
|
GPFSEL1 [
|
||||||
|
/// Pin 15
|
||||||
|
FSEL15 OFFSET(15) NUMBITS(3) [
|
||||||
|
Input = 0b000,
|
||||||
|
Output = 0b001,
|
||||||
|
RXD0 = 0b100, // UART0 - Alternate function 0
|
||||||
|
RXD1 = 0b010 // Mini UART - Alternate function 5
|
||||||
|
|
||||||
|
],
|
||||||
|
|
||||||
|
/// Pin 14
|
||||||
|
FSEL14 OFFSET(12) NUMBITS(3) [
|
||||||
|
Input = 0b000,
|
||||||
|
Output = 0b001,
|
||||||
|
TXD0 = 0b100, // UART0 - Alternate function 0
|
||||||
|
TXD1 = 0b010 // Mini UART - Alternate function 5
|
||||||
|
]
|
||||||
|
],
|
||||||
|
|
||||||
|
/// GPIO Pull-up/down Clock Register 0
|
||||||
|
GPPUDCLK0 [
|
||||||
|
/// Pin 15
|
||||||
|
PUDCLK15 OFFSET(15) NUMBITS(1) [
|
||||||
|
NoEffect = 0,
|
||||||
|
AssertClock = 1
|
||||||
|
],
|
||||||
|
|
||||||
|
/// Pin 14
|
||||||
|
PUDCLK14 OFFSET(14) NUMBITS(1) [
|
||||||
|
NoEffect = 0,
|
||||||
|
AssertClock = 1
|
||||||
|
]
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
pub const GPFSEL1: *const ReadWrite<u32, GPFSEL1::Register> =
|
||||||
|
(MMIO_BASE + 0x0020_0004) as *const ReadWrite<u32, GPFSEL1::Register>;
|
||||||
|
|
||||||
|
pub const GPPUD: *const ReadWrite<u32> = (MMIO_BASE + 0x0020_0094) as *const ReadWrite<u32>;
|
||||||
|
|
||||||
|
pub const GPPUDCLK0: *const ReadWrite<u32, GPPUDCLK0::Register> =
|
||||||
|
(MMIO_BASE + 0x0020_0098) as *const ReadWrite<u32, GPPUDCLK0::Register>;
|
@ -0,0 +1,111 @@
|
|||||||
|
/*
|
||||||
|
* MIT License
|
||||||
|
*
|
||||||
|
* Copyright (c) 2018 Andre Richter <andre.o.richter@gmail.com>
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in all
|
||||||
|
* copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
* SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#![no_std]
|
||||||
|
#![no_main]
|
||||||
|
#![feature(asm)]
|
||||||
|
#![feature(const_fn)]
|
||||||
|
|
||||||
|
extern crate cortex_a;
|
||||||
|
|
||||||
|
#[macro_use]
|
||||||
|
extern crate raspi3_boot;
|
||||||
|
|
||||||
|
#[macro_use]
|
||||||
|
extern crate register;
|
||||||
|
|
||||||
|
const MMIO_BASE: u32 = 0x3F00_0000;
|
||||||
|
|
||||||
|
mod gpio;
|
||||||
|
mod mbox;
|
||||||
|
mod mmu;
|
||||||
|
mod uart;
|
||||||
|
mod benchmark;
|
||||||
|
|
||||||
|
fn do_benchmarks(uart: &uart::Uart) {
|
||||||
|
const SIZE_2MIB: u64 = 2 * 1024 * 1024;
|
||||||
|
|
||||||
|
// Start of the __SECOND__ virtual 2 MiB block (counting starts at zero).
|
||||||
|
// NON-cacheable DRAM memory.
|
||||||
|
let non_cacheable_addr: u64 = SIZE_2MIB;
|
||||||
|
|
||||||
|
// Start of the __THIRD__ virtual 2 MiB block.
|
||||||
|
// Cacheable DRAM memory
|
||||||
|
let cacheable_addr: u64 = 2 * SIZE_2MIB;
|
||||||
|
|
||||||
|
uart.puts("Benchmarking non-cacheable DRAM modifications at virtual 0x");
|
||||||
|
uart.hex(non_cacheable_addr as u32);
|
||||||
|
uart.puts(", physical 0x");
|
||||||
|
uart.hex(2 * SIZE_2MIB as u32);
|
||||||
|
uart.puts(":\n");
|
||||||
|
|
||||||
|
let result_nc = benchmark::batch_modify(non_cacheable_addr);
|
||||||
|
uart.dec(result_nc);
|
||||||
|
uart.puts(" miliseconds.\n\n");
|
||||||
|
|
||||||
|
uart.puts("Benchmarking cacheable DRAM modifications at virtual 0x");
|
||||||
|
uart.hex(cacheable_addr as u32);
|
||||||
|
uart.puts(", physical 0x");
|
||||||
|
uart.hex(2 * SIZE_2MIB as u32);
|
||||||
|
uart.puts(":\n");
|
||||||
|
let result_c = benchmark::batch_modify(cacheable_addr);
|
||||||
|
uart.dec(result_c);
|
||||||
|
uart.puts(" miliseconds.\n\n");
|
||||||
|
|
||||||
|
let percent_diff = (result_nc - result_c) * 100 / result_c;
|
||||||
|
|
||||||
|
uart.puts("With caching, the function is ");
|
||||||
|
uart.dec(percent_diff);
|
||||||
|
uart.puts("% faster!\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
entry!(kernel_entry);
|
||||||
|
|
||||||
|
fn kernel_entry() -> ! {
|
||||||
|
let mut mbox = mbox::Mbox::new();
|
||||||
|
let uart = uart::Uart::new(uart::UART_PHYS_BASE);
|
||||||
|
|
||||||
|
// set up serial console
|
||||||
|
if uart.init(&mut mbox).is_err() {
|
||||||
|
loop {
|
||||||
|
cortex_a::asm::wfe() // If UART fails, abort early
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
uart.getc(); // Press a key first before being greeted
|
||||||
|
uart.puts("Hello Rustacean!\n\n");
|
||||||
|
|
||||||
|
uart.puts("\nSwitching MMU on now...");
|
||||||
|
|
||||||
|
unsafe { mmu::init() };
|
||||||
|
|
||||||
|
uart.puts("MMU is live \\o/\n\n");
|
||||||
|
|
||||||
|
do_benchmarks(&uart);
|
||||||
|
|
||||||
|
// echo everything back
|
||||||
|
loop {
|
||||||
|
uart.send(uart.getc());
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,159 @@
|
|||||||
|
/*
|
||||||
|
* MIT License
|
||||||
|
*
|
||||||
|
* Copyright (c) 2018 Andre Richter <andre.o.richter@gmail.com>
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in all
|
||||||
|
* copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
* SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
use super::MMIO_BASE;
|
||||||
|
use core::ops;
|
||||||
|
use cortex_a::asm;
|
||||||
|
use register::mmio::{ReadOnly, WriteOnly};
|
||||||
|
|
||||||
|
register_bitfields! {
|
||||||
|
u32,
|
||||||
|
|
||||||
|
STATUS [
|
||||||
|
FULL OFFSET(31) NUMBITS(1) [],
|
||||||
|
EMPTY OFFSET(30) NUMBITS(1) []
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
const VIDEOCORE_MBOX: u32 = MMIO_BASE + 0xB880;
|
||||||
|
|
||||||
|
#[allow(non_snake_case)]
|
||||||
|
#[repr(C)]
|
||||||
|
pub struct RegisterBlock {
|
||||||
|
READ: ReadOnly<u32>, // 0x00
|
||||||
|
__reserved_0: [u32; 5], // 0x04
|
||||||
|
STATUS: ReadOnly<u32, STATUS::Register>, // 0x18
|
||||||
|
__reserved_1: u32, // 0x1C
|
||||||
|
WRITE: WriteOnly<u32>, // 0x20
|
||||||
|
}
|
||||||
|
|
||||||
|
// Custom errors
|
||||||
|
pub enum MboxError {
|
||||||
|
ResponseError,
|
||||||
|
UnknownError,
|
||||||
|
}
|
||||||
|
pub type Result<T> = ::core::result::Result<T, MboxError>;
|
||||||
|
|
||||||
|
// Channels
|
||||||
|
pub mod channel {
|
||||||
|
pub const PROP: u32 = 8;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Tags
|
||||||
|
pub mod tag {
|
||||||
|
pub const SETCLKRATE: u32 = 0x38002;
|
||||||
|
pub const LAST: u32 = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clocks
|
||||||
|
pub mod clock {
|
||||||
|
pub const UART: u32 = 0x0_0000_0002;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Responses
|
||||||
|
mod response {
|
||||||
|
pub const SUCCESS: u32 = 0x8000_0000;
|
||||||
|
pub const ERROR: u32 = 0x8000_0001; // error parsing request buffer (partial response)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub const REQUEST: u32 = 0;
|
||||||
|
|
||||||
|
// Public interface to the mailbox
|
||||||
|
#[repr(C)]
|
||||||
|
#[repr(align(16))]
|
||||||
|
pub struct Mbox {
|
||||||
|
// The address for buffer needs to be 16-byte aligned so that the
|
||||||
|
// Videcore can handle it properly.
|
||||||
|
pub buffer: [u32; 36],
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Deref to RegisterBlock
|
||||||
|
///
|
||||||
|
/// Allows writing
|
||||||
|
/// ```
|
||||||
|
/// self.STATUS.read()
|
||||||
|
/// ```
|
||||||
|
/// instead of something along the lines of
|
||||||
|
/// ```
|
||||||
|
/// unsafe { (*Mbox::ptr()).STATUS.read() }
|
||||||
|
/// ```
|
||||||
|
impl ops::Deref for Mbox {
|
||||||
|
type Target = RegisterBlock;
|
||||||
|
|
||||||
|
fn deref(&self) -> &Self::Target {
|
||||||
|
unsafe { &*Self::ptr() }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Mbox {
|
||||||
|
pub fn new() -> Mbox {
|
||||||
|
Mbox { buffer: [0; 36] }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns a pointer to the register block
|
||||||
|
fn ptr() -> *const RegisterBlock {
|
||||||
|
VIDEOCORE_MBOX as *const _
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Make a mailbox call. Returns Err(MboxError) on failure, Ok(()) success
|
||||||
|
pub fn call(&self, channel: u32) -> Result<()> {
|
||||||
|
// wait until we can write to the mailbox
|
||||||
|
loop {
|
||||||
|
if !self.STATUS.is_set(STATUS::FULL) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
asm::nop();
|
||||||
|
}
|
||||||
|
|
||||||
|
let buf_ptr = self.buffer.as_ptr() as u32;
|
||||||
|
|
||||||
|
// write the address of our message to the mailbox with channel identifier
|
||||||
|
self.WRITE.set((buf_ptr & !0xF) | (channel & 0xF));
|
||||||
|
|
||||||
|
// now wait for the response
|
||||||
|
loop {
|
||||||
|
// is there a response?
|
||||||
|
loop {
|
||||||
|
if !self.STATUS.is_set(STATUS::EMPTY) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
asm::nop();
|
||||||
|
}
|
||||||
|
|
||||||
|
let resp: u32 = self.READ.get();
|
||||||
|
|
||||||
|
// is it a response to our message?
|
||||||
|
if ((resp & 0xF) == channel) && ((resp & !0xF) == buf_ptr) {
|
||||||
|
// is it a valid successful response?
|
||||||
|
return match self.buffer[1] {
|
||||||
|
response::SUCCESS => Ok(()),
|
||||||
|
response::ERROR => Err(MboxError::ResponseError),
|
||||||
|
_ => Err(MboxError::UnknownError),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,219 @@
|
|||||||
|
/*
|
||||||
|
* MIT License
|
||||||
|
*
|
||||||
|
* Copyright (c) 2018 Andre Richter <andre.o.richter@gmail.com>
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in all
|
||||||
|
* copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
* SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
use cortex_a::{barrier, regs::*};
|
||||||
|
|
||||||
|
register_bitfields! {u64,
|
||||||
|
// AArch64 Reference Manual page 2150
|
||||||
|
STAGE1_DESCRIPTOR [
|
||||||
|
/// Execute-never
|
||||||
|
XN OFFSET(54) NUMBITS(1) [
|
||||||
|
False = 0,
|
||||||
|
True = 1
|
||||||
|
],
|
||||||
|
|
||||||
|
/// Various address fields, depending on use case
|
||||||
|
LVL2_OUTPUT_ADDR_4KiB OFFSET(21) NUMBITS(27) [], // [47:21]
|
||||||
|
NEXT_LVL_TABLE_ADDR_4KiB OFFSET(12) NUMBITS(36) [], // [47:12]
|
||||||
|
|
||||||
|
/// Access flag
|
||||||
|
AF OFFSET(10) NUMBITS(1) [
|
||||||
|
False = 0,
|
||||||
|
True = 1
|
||||||
|
],
|
||||||
|
|
||||||
|
/// Shareability field
|
||||||
|
SH OFFSET(8) NUMBITS(2) [
|
||||||
|
OuterShareable = 0b10,
|
||||||
|
InnerShareable = 0b11
|
||||||
|
],
|
||||||
|
|
||||||
|
/// Access Permissions
|
||||||
|
AP OFFSET(6) NUMBITS(2) [
|
||||||
|
RW_EL1 = 0b00,
|
||||||
|
RW_EL1_EL0 = 0b01,
|
||||||
|
RO_EL1 = 0b10,
|
||||||
|
RO_EL1_EL0 = 0b11
|
||||||
|
],
|
||||||
|
|
||||||
|
/// Memory attributes index into the MAIR_EL1 register
|
||||||
|
AttrIndx OFFSET(2) NUMBITS(3) [],
|
||||||
|
|
||||||
|
TYPE OFFSET(1) NUMBITS(1) [
|
||||||
|
Block = 0,
|
||||||
|
Table = 1
|
||||||
|
],
|
||||||
|
|
||||||
|
VALID OFFSET(0) NUMBITS(1) [
|
||||||
|
False = 0,
|
||||||
|
True = 1
|
||||||
|
]
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
trait BaseAddr {
|
||||||
|
fn base_addr(&self) -> u64;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl BaseAddr for [u64; 512] {
|
||||||
|
fn base_addr(&self) -> u64 {
|
||||||
|
self as *const u64 as u64
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const NUM_ENTRIES_4KIB: usize = 512;
|
||||||
|
|
||||||
|
static mut LVL2_TABLE: [u64; NUM_ENTRIES_4KIB] = [0; NUM_ENTRIES_4KIB];
|
||||||
|
static mut SINGLE_LVL3_TABLE: [u64; NUM_ENTRIES_4KIB] = [0; NUM_ENTRIES_4KIB];
|
||||||
|
|
||||||
|
/// Set up identity mapped page tables for the first 1 gigabyte of address
|
||||||
|
/// space.
|
||||||
|
pub unsafe fn init() {
|
||||||
|
// First, define the three memory types that we will map. Cacheable and
|
||||||
|
// non-cacheable normal DRAM, and device.
|
||||||
|
MAIR_EL1.write(
|
||||||
|
// Attribute 2
|
||||||
|
MAIR_EL1::Attr2_HIGH::Memory_OuterNonCacheable
|
||||||
|
+ MAIR_EL1::Attr2_LOW_MEMORY::InnerNonCacheable
|
||||||
|
|
||||||
|
// Attribute 1
|
||||||
|
+ MAIR_EL1::Attr1_HIGH::Memory_OuterWriteBack_NonTransient_ReadAlloc_WriteAlloc
|
||||||
|
+ MAIR_EL1::Attr1_LOW_MEMORY::InnerWriteBack_NonTransient_ReadAlloc_WriteAlloc
|
||||||
|
|
||||||
|
// Attribute 0
|
||||||
|
+ MAIR_EL1::Attr0_HIGH::Device
|
||||||
|
+ MAIR_EL1::Attr0_LOW_DEVICE::Device_nGnRE,
|
||||||
|
);
|
||||||
|
|
||||||
|
// Descriptive consts for indexing into the correct MAIR_EL1 attributes.
|
||||||
|
mod mair {
|
||||||
|
pub const DEVICE: u64 = 0;
|
||||||
|
pub const NORMAL: u64 = 1;
|
||||||
|
pub const NORMAL_NON_CACHEABLE: u64 = 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set up the first LVL2 entry, pointing to a 4KiB table base address.
|
||||||
|
let lvl3_base: u64 = SINGLE_LVL3_TABLE.base_addr() >> 12;
|
||||||
|
LVL2_TABLE[0] = (STAGE1_DESCRIPTOR::VALID::True
|
||||||
|
+ STAGE1_DESCRIPTOR::TYPE::Table
|
||||||
|
+ STAGE1_DESCRIPTOR::NEXT_LVL_TABLE_ADDR_4KiB.val(lvl3_base))
|
||||||
|
.value;
|
||||||
|
|
||||||
|
// The second 2 MiB block.
|
||||||
|
LVL2_TABLE[1] = (STAGE1_DESCRIPTOR::VALID::True
|
||||||
|
+ STAGE1_DESCRIPTOR::TYPE::Block
|
||||||
|
+ STAGE1_DESCRIPTOR::AttrIndx.val(mair::NORMAL_NON_CACHEABLE)
|
||||||
|
+ STAGE1_DESCRIPTOR::AP::RW_EL1
|
||||||
|
+ STAGE1_DESCRIPTOR::SH::OuterShareable
|
||||||
|
+ STAGE1_DESCRIPTOR::AF::True
|
||||||
|
// This translation is accessed for virtual 0x200000. Point to physical
|
||||||
|
// 0x400000, aka the third phyiscal 2 MiB DRAM block (third block == 2,
|
||||||
|
// because we start counting at 0).
|
||||||
|
//
|
||||||
|
// Here, we configure it non-cacheable.
|
||||||
|
+ STAGE1_DESCRIPTOR::LVL2_OUTPUT_ADDR_4KiB.val(2)
|
||||||
|
+ STAGE1_DESCRIPTOR::XN::True)
|
||||||
|
.value;
|
||||||
|
|
||||||
|
// Fill the rest of the LVL2 (2MiB) entries as block
|
||||||
|
// descriptors. Differentiate between normal and device mem.
|
||||||
|
let mmio_base: u64 = (super::MMIO_BASE >> 21).into();
|
||||||
|
let common = STAGE1_DESCRIPTOR::VALID::True
|
||||||
|
+ STAGE1_DESCRIPTOR::TYPE::Block
|
||||||
|
+ STAGE1_DESCRIPTOR::AP::RW_EL1
|
||||||
|
+ STAGE1_DESCRIPTOR::AF::True
|
||||||
|
+ STAGE1_DESCRIPTOR::XN::True;
|
||||||
|
|
||||||
|
// Notice the skip(2). Start at the third 2 MiB DRAM block, which will point
|
||||||
|
// virtual 0x400000 to physical 0x400000, configured as cacheable memory.
|
||||||
|
for (i, entry) in LVL2_TABLE.iter_mut().enumerate().skip(2) {
|
||||||
|
let j: u64 = i as u64;
|
||||||
|
|
||||||
|
let mem_attr = if j >= mmio_base {
|
||||||
|
STAGE1_DESCRIPTOR::SH::OuterShareable + STAGE1_DESCRIPTOR::AttrIndx.val(mair::DEVICE)
|
||||||
|
} else {
|
||||||
|
STAGE1_DESCRIPTOR::SH::InnerShareable + STAGE1_DESCRIPTOR::AttrIndx.val(mair::NORMAL)
|
||||||
|
};
|
||||||
|
|
||||||
|
*entry = (common + mem_attr + STAGE1_DESCRIPTOR::LVL2_OUTPUT_ADDR_4KiB.val(j)).value;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Finally, fill the single LVL3 table (4KiB granule). Differentiate between
|
||||||
|
// code/RO and RW sections.
|
||||||
|
//
|
||||||
|
// Using the linker script, we ensure that the RO sections are 4KiB aligned,
|
||||||
|
// and we export their boundaries via symbols.
|
||||||
|
extern "C" {
|
||||||
|
static mut __ro_start: u64;
|
||||||
|
static mut __ro_end: u64;
|
||||||
|
}
|
||||||
|
|
||||||
|
const PAGESIZE: u64 = 4096;
|
||||||
|
let ro_start: u64 = &__ro_start as *const _ as u64 / PAGESIZE;
|
||||||
|
let ro_end: u64 = &__ro_end as *const _ as u64 / PAGESIZE;
|
||||||
|
let common = STAGE1_DESCRIPTOR::VALID::True
|
||||||
|
+ STAGE1_DESCRIPTOR::TYPE::Table
|
||||||
|
+ STAGE1_DESCRIPTOR::AttrIndx.val(mair::NORMAL)
|
||||||
|
+ STAGE1_DESCRIPTOR::SH::InnerShareable
|
||||||
|
+ STAGE1_DESCRIPTOR::AF::True;
|
||||||
|
|
||||||
|
for (i, entry) in SINGLE_LVL3_TABLE.iter_mut().enumerate() {
|
||||||
|
let j: u64 = i as u64;
|
||||||
|
|
||||||
|
let mem_attr = if j < ro_start || j > ro_end {
|
||||||
|
STAGE1_DESCRIPTOR::AP::RW_EL1 + STAGE1_DESCRIPTOR::XN::True
|
||||||
|
} else {
|
||||||
|
STAGE1_DESCRIPTOR::AP::RO_EL1 + STAGE1_DESCRIPTOR::XN::False
|
||||||
|
};
|
||||||
|
|
||||||
|
*entry = (common + mem_attr + STAGE1_DESCRIPTOR::NEXT_LVL_TABLE_ADDR_4KiB.val(j)).value;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Point to the LVL2 table base address in TTBR0.
|
||||||
|
TTBR0_EL1.set_baddr(LVL2_TABLE.base_addr());
|
||||||
|
|
||||||
|
// Configure various settings of stage 1 of the EL1 translation regime.
|
||||||
|
let ips = ID_AA64MMFR0_EL1.read(ID_AA64MMFR0_EL1::PARange);
|
||||||
|
TCR_EL1.write(
|
||||||
|
TCR_EL1::TBI0::Ignored
|
||||||
|
+ TCR_EL1::IPS.val(ips)
|
||||||
|
+ TCR_EL1::TG0::KiB_4 // 4 KiB granule
|
||||||
|
+ TCR_EL1::SH0::Inner
|
||||||
|
+ TCR_EL1::ORGN0::WriteBack_ReadAlloc_WriteAlloc_Cacheable
|
||||||
|
+ TCR_EL1::IRGN0::WriteBack_ReadAlloc_WriteAlloc_Cacheable
|
||||||
|
+ TCR_EL1::EPD0::EnableTTBR0Walks
|
||||||
|
+ TCR_EL1::T0SZ.val(34), // Start walks at level 2
|
||||||
|
);
|
||||||
|
|
||||||
|
// Switch the MMU on.
|
||||||
|
//
|
||||||
|
// First, force all previous changes to be seen before the MMU is enabled.
|
||||||
|
barrier::isb(barrier::SY);
|
||||||
|
|
||||||
|
// Enable the MMU and turn on caching
|
||||||
|
SCTLR_EL1.modify(SCTLR_EL1::M::Enable + SCTLR_EL1::C::Cacheable);
|
||||||
|
|
||||||
|
// Force MMU init to complete before next instruction
|
||||||
|
barrier::isb(barrier::SY);
|
||||||
|
}
|
@ -0,0 +1,310 @@
|
|||||||
|
/*
|
||||||
|
* MIT License
|
||||||
|
*
|
||||||
|
* Copyright (c) 2018 Andre Richter <andre.o.richter@gmail.com>
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in all
|
||||||
|
* copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
* SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
use super::MMIO_BASE;
|
||||||
|
use core::{
|
||||||
|
ops,
|
||||||
|
sync::atomic::{compiler_fence, Ordering},
|
||||||
|
};
|
||||||
|
use cortex_a::asm;
|
||||||
|
use gpio;
|
||||||
|
use mbox;
|
||||||
|
use register::mmio::*;
|
||||||
|
|
||||||
|
// PL011 UART registers.
|
||||||
|
//
|
||||||
|
// Descriptions taken from
|
||||||
|
// https://github.com/raspberrypi/documentation/files/1888662/BCM2837-ARM-Peripherals.-.Revised.-.V2-1.pdf
|
||||||
|
register_bitfields! {
|
||||||
|
u32,
|
||||||
|
|
||||||
|
/// Flag Register
|
||||||
|
FR [
|
||||||
|
/// Transmit FIFO full. The meaning of this bit depends on the
|
||||||
|
/// state of the FEN bit in the UARTLCR_ LCRH Register. If the
|
||||||
|
/// FIFO is disabled, this bit is set when the transmit
|
||||||
|
/// holding register is full. If the FIFO is enabled, the TXFF
|
||||||
|
/// bit is set when the transmit FIFO is full.
|
||||||
|
TXFF OFFSET(5) NUMBITS(1) [],
|
||||||
|
|
||||||
|
/// Receive FIFO empty. The meaning of this bit depends on the
|
||||||
|
/// state of the FEN bit in the UARTLCR_H Register. If the
|
||||||
|
/// FIFO is disabled, this bit is set when the receive holding
|
||||||
|
/// register is empty. If the FIFO is enabled, the RXFE bit is
|
||||||
|
/// set when the receive FIFO is empty.
|
||||||
|
RXFE OFFSET(4) NUMBITS(1) []
|
||||||
|
],
|
||||||
|
|
||||||
|
/// Integer Baud rate divisor
|
||||||
|
IBRD [
|
||||||
|
/// Integer Baud rate divisor
|
||||||
|
IBRD OFFSET(0) NUMBITS(16) []
|
||||||
|
],
|
||||||
|
|
||||||
|
/// Fractional Baud rate divisor
|
||||||
|
FBRD [
|
||||||
|
/// Fractional Baud rate divisor
|
||||||
|
FBRD OFFSET(0) NUMBITS(6) []
|
||||||
|
],
|
||||||
|
|
||||||
|
/// Line Control register
|
||||||
|
LCRH [
|
||||||
|
/// Word length. These bits indicate the number of data bits
|
||||||
|
/// transmitted or received in a frame.
|
||||||
|
WLEN OFFSET(5) NUMBITS(2) [
|
||||||
|
FiveBit = 0b00,
|
||||||
|
SixBit = 0b01,
|
||||||
|
SevenBit = 0b10,
|
||||||
|
EightBit = 0b11
|
||||||
|
]
|
||||||
|
],
|
||||||
|
|
||||||
|
/// Control Register
|
||||||
|
CR [
|
||||||
|
/// Receive enable. If this bit is set to 1, the receive
|
||||||
|
/// section of the UART is enabled. Data reception occurs for
|
||||||
|
/// UART signals. When the UART is disabled in the middle of
|
||||||
|
/// reception, it completes the current character before
|
||||||
|
/// stopping.
|
||||||
|
RXE OFFSET(9) NUMBITS(1) [
|
||||||
|
Disabled = 0,
|
||||||
|
Enabled = 1
|
||||||
|
],
|
||||||
|
|
||||||
|
/// Transmit enable. If this bit is set to 1, the transmit
|
||||||
|
/// section of the UART is enabled. Data transmission occurs
|
||||||
|
/// for UART signals. When the UART is disabled in the middle
|
||||||
|
/// of transmission, it completes the current character before
|
||||||
|
/// stopping.
|
||||||
|
TXE OFFSET(8) NUMBITS(1) [
|
||||||
|
Disabled = 0,
|
||||||
|
Enabled = 1
|
||||||
|
],
|
||||||
|
|
||||||
|
/// UART enable
|
||||||
|
UARTEN OFFSET(0) NUMBITS(1) [
|
||||||
|
/// If the UART is disabled in the middle of transmission
|
||||||
|
/// or reception, it completes the current character
|
||||||
|
/// before stopping.
|
||||||
|
Disabled = 0,
|
||||||
|
Enabled = 1
|
||||||
|
]
|
||||||
|
],
|
||||||
|
|
||||||
|
/// Interupt Clear Register
|
||||||
|
ICR [
|
||||||
|
/// Meta field for all pending interrupts
|
||||||
|
ALL OFFSET(0) NUMBITS(11) []
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
pub const UART_PHYS_BASE: u32 = MMIO_BASE + 0x20_1000;
|
||||||
|
|
||||||
|
#[allow(non_snake_case)]
|
||||||
|
#[repr(C)]
|
||||||
|
pub struct RegisterBlock {
|
||||||
|
DR: ReadWrite<u32>, // 0x00
|
||||||
|
__reserved_0: [u32; 5], // 0x04
|
||||||
|
FR: ReadOnly<u32, FR::Register>, // 0x18
|
||||||
|
__reserved_1: [u32; 2], // 0x1c
|
||||||
|
IBRD: WriteOnly<u32, IBRD::Register>, // 0x24
|
||||||
|
FBRD: WriteOnly<u32, FBRD::Register>, // 0x28
|
||||||
|
LCRH: WriteOnly<u32, LCRH::Register>, // 0x2C
|
||||||
|
CR: WriteOnly<u32, CR::Register>, // 0x30
|
||||||
|
__reserved_2: [u32; 4], // 0x34
|
||||||
|
ICR: WriteOnly<u32, ICR::Register>, // 0x44
|
||||||
|
}
|
||||||
|
|
||||||
|
pub enum UartError {
|
||||||
|
MailboxError,
|
||||||
|
}
|
||||||
|
pub type Result<T> = ::core::result::Result<T, UartError>;
|
||||||
|
|
||||||
|
pub struct Uart {
|
||||||
|
uart_base: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ops::Deref for Uart {
|
||||||
|
type Target = RegisterBlock;
|
||||||
|
|
||||||
|
fn deref(&self) -> &Self::Target {
|
||||||
|
unsafe { &*self.ptr() }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Uart {
|
||||||
|
pub fn new(uart_base: u32) -> Uart {
|
||||||
|
Uart { uart_base }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns a pointer to the register block
|
||||||
|
fn ptr(&self) -> *const RegisterBlock {
|
||||||
|
self.uart_base as *const _
|
||||||
|
}
|
||||||
|
|
||||||
|
///Set baud rate and characteristics (115200 8N1) and map to GPIO
|
||||||
|
pub fn init(&self, mbox: &mut mbox::Mbox) -> Result<()> {
|
||||||
|
// turn off UART0
|
||||||
|
self.CR.set(0);
|
||||||
|
|
||||||
|
// set up clock for consistent divisor values
|
||||||
|
mbox.buffer[0] = 9 * 4;
|
||||||
|
mbox.buffer[1] = mbox::REQUEST;
|
||||||
|
mbox.buffer[2] = mbox::tag::SETCLKRATE;
|
||||||
|
mbox.buffer[3] = 12;
|
||||||
|
mbox.buffer[4] = 8;
|
||||||
|
mbox.buffer[5] = mbox::clock::UART; // UART clock
|
||||||
|
mbox.buffer[6] = 4_000_000; // 4Mhz
|
||||||
|
mbox.buffer[7] = 0; // skip turbo setting
|
||||||
|
mbox.buffer[8] = mbox::tag::LAST;
|
||||||
|
|
||||||
|
// Insert a compiler fence that ensures that all stores to the
|
||||||
|
// mbox buffer are finished before the GPU is signaled (which
|
||||||
|
// is done by a store operation as well).
|
||||||
|
compiler_fence(Ordering::Release);
|
||||||
|
|
||||||
|
if mbox.call(mbox::channel::PROP).is_err() {
|
||||||
|
return Err(UartError::MailboxError); // Abort if UART clocks couldn't be set
|
||||||
|
};
|
||||||
|
|
||||||
|
// map UART0 to GPIO pins
|
||||||
|
unsafe {
|
||||||
|
(*gpio::GPFSEL1).modify(gpio::GPFSEL1::FSEL14::TXD0 + gpio::GPFSEL1::FSEL15::RXD0);
|
||||||
|
|
||||||
|
(*gpio::GPPUD).set(0); // enable pins 14 and 15
|
||||||
|
for _ in 0..150 {
|
||||||
|
asm::nop();
|
||||||
|
}
|
||||||
|
|
||||||
|
(*gpio::GPPUDCLK0).modify(
|
||||||
|
gpio::GPPUDCLK0::PUDCLK14::AssertClock + gpio::GPPUDCLK0::PUDCLK15::AssertClock,
|
||||||
|
);
|
||||||
|
for _ in 0..150 {
|
||||||
|
asm::nop();
|
||||||
|
}
|
||||||
|
|
||||||
|
(*gpio::GPPUDCLK0).set(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
self.ICR.write(ICR::ALL::CLEAR);
|
||||||
|
self.IBRD.write(IBRD::IBRD.val(2)); // Results in 115200 baud
|
||||||
|
self.FBRD.write(FBRD::FBRD.val(0xB));
|
||||||
|
self.LCRH.write(LCRH::WLEN::EightBit); // 8N1
|
||||||
|
self.CR
|
||||||
|
.write(CR::UARTEN::Enabled + CR::TXE::Enabled + CR::RXE::Enabled);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Send a character
|
||||||
|
pub fn send(&self, c: char) {
|
||||||
|
// wait until we can send
|
||||||
|
loop {
|
||||||
|
if !self.FR.is_set(FR::TXFF) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
asm::nop();
|
||||||
|
}
|
||||||
|
|
||||||
|
// write the character to the buffer
|
||||||
|
self.DR.set(c as u32);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Receive a character
|
||||||
|
pub fn getc(&self) -> char {
|
||||||
|
// wait until something is in the buffer
|
||||||
|
loop {
|
||||||
|
if !self.FR.is_set(FR::RXFE) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
asm::nop();
|
||||||
|
}
|
||||||
|
|
||||||
|
// read it and return
|
||||||
|
let mut ret = self.DR.get() as u8 as char;
|
||||||
|
|
||||||
|
// convert carrige return to newline
|
||||||
|
if ret == '\r' {
|
||||||
|
ret = '\n'
|
||||||
|
}
|
||||||
|
|
||||||
|
ret
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Display a string
|
||||||
|
pub fn puts(&self, string: &str) {
|
||||||
|
for c in string.chars() {
|
||||||
|
// convert newline to carrige return + newline
|
||||||
|
if c == '\n' {
|
||||||
|
self.send('\r')
|
||||||
|
}
|
||||||
|
|
||||||
|
self.send(c);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Display a binary value in hexadecimal
|
||||||
|
pub fn hex(&self, d: u32) {
|
||||||
|
let mut n;
|
||||||
|
|
||||||
|
for i in 0..8 {
|
||||||
|
// get highest tetrad
|
||||||
|
n = d.wrapping_shr(28 - i * 4) & 0xF;
|
||||||
|
|
||||||
|
// 0-9 => '0'-'9', 10-15 => 'A'-'F'
|
||||||
|
// Add proper offset for ASCII table
|
||||||
|
if n > 9 {
|
||||||
|
n += 0x37;
|
||||||
|
} else {
|
||||||
|
n += 0x30;
|
||||||
|
}
|
||||||
|
|
||||||
|
self.send(n as u8 as char);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Display a binary value in decimal
|
||||||
|
pub fn dec(&self, d: u32) {
|
||||||
|
let mut digits: [char; 10] = ['\0'; 10];
|
||||||
|
let mut d = d;
|
||||||
|
let mut i: usize = 0;
|
||||||
|
|
||||||
|
loop {
|
||||||
|
digits[i] = ((d % 10) + 0x30) as u8 as char;
|
||||||
|
|
||||||
|
i += 1;
|
||||||
|
d /= 10;
|
||||||
|
|
||||||
|
if d == 0 {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for c in digits.iter().rev() {
|
||||||
|
self.send(*c);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue