mirror of
https://github.com/rust-embedded/rust-raspberrypi-OS-tutorials.git
synced 2024-11-17 21:25:52 +00:00
474 lines
15 KiB
Markdown
474 lines
15 KiB
Markdown
# Tutorial 07 - UART Chainloader
|
|
|
|
## tl;dr
|
|
|
|
Running from an SD card was a nice experience, but it would be extremely tedious
|
|
to do it for every new binary. Let's write a [chainloader] using [position
|
|
independent code]. This will be the last binary you need to put on the SD card
|
|
for quite some time. Each following tutorial will provide a `chainboot` target in
|
|
the `Makefile` that lets you conveniently load the kernel over `UART`.
|
|
|
|
Our chainloader is called `MiniLoad` and is inspired by [raspbootin].
|
|
|
|
[chainloader]: https://en.wikipedia.org/wiki/Chain_loading
|
|
[position independent code]: https://en.wikipedia.org/wiki/Position-independent_code
|
|
[raspbootin]: https://github.com/mrvn/raspbootin
|
|
|
|
You can try it with this tutorial already:
|
|
1. Depending on your target hardware:`make` or `BSP=rpi4 make`.
|
|
2. Copy `kernel8.img` to the SD card.
|
|
3. Execute `make chainboot` or `BSP=rpi4 make chainboot`.
|
|
4. Now plug in the USB Serial.
|
|
5. Observe the loader fetching a kernel over `UART`:
|
|
|
|
```console
|
|
make chainboot
|
|
[...]
|
|
### Listening on /dev/ttyUSB0
|
|
__ __ _ _ _ _
|
|
| \/ (_)_ _ (_) | ___ __ _ __| |
|
|
| |\/| | | ' \| | |__/ _ \/ _` / _` |
|
|
|_| |_|_|_||_|_|____\___/\__,_\__,_|
|
|
|
|
Raspberry Pi 3
|
|
|
|
[ML] Requestibinary
|
|
### sending kernel demo_payload_rpi4.img [7840 byte]
|
|
### finished sending
|
|
[ML] Loaded! Executing the payload now
|
|
|
|
[0] Booting on: Raspberry Pi 3
|
|
[1] Drivers loaded:
|
|
1. GPIO
|
|
2. PL011Uart
|
|
[2] Chars written: 84
|
|
[3] Echoing input now
|
|
```
|
|
|
|
In this tutorial, a version of the kernel from the previous tutorial is loaded
|
|
for demo purposes. In subsequent tuts, it will be the working directory's
|
|
kernel.
|
|
|
|
### Observing the jump
|
|
|
|
The `Makefile` in this tutorial has an additional target, `qemuasm`, that lets
|
|
you nicely observe the jump from the loaded address (`0x80_XXX`) to the
|
|
relocated code at (`0x3EFF_0XXX`):
|
|
|
|
```console
|
|
make qemuasm
|
|
[...]
|
|
IN:
|
|
0x000809fc: d0000008 adrp x8, #0x82000
|
|
0x00080a00: 52800020 movz w0, #0x1
|
|
0x00080a04: f9408908 ldr x8, [x8, #0x110]
|
|
0x00080a08: d63f0100 blr x8
|
|
|
|
----------------
|
|
IN:
|
|
0x3eff0528: d0000008 adrp x8, #0x3eff2000
|
|
0x3eff052c: d0000009 adrp x9, #0x3eff2000
|
|
0x3eff0530: f9411508 ldr x8, [x8, #0x228]
|
|
0x3eff0534: f9411929 ldr x9, [x9, #0x230]
|
|
0x3eff0538: eb08013f cmp x9, x8
|
|
0x3eff053c: 540000c2 b.hs #0x3eff0554
|
|
[...]
|
|
```
|
|
|
|
## Diff to previous
|
|
```diff
|
|
Binary files 06_drivers_gpio_uart/demo_payload_rpi3.img and 07_uart_chainloader/demo_payload_rpi3.img differ
|
|
Binary files 06_drivers_gpio_uart/demo_payload_rpi4.img and 07_uart_chainloader/demo_payload_rpi4.img differ
|
|
|
|
diff -uNr 06_drivers_gpio_uart/Makefile 07_uart_chainloader/Makefile
|
|
--- 06_drivers_gpio_uart/Makefile
|
|
+++ 07_uart_chainloader/Makefile
|
|
@@ -15,7 +15,8 @@
|
|
QEMU_MACHINE_TYPE = raspi3
|
|
QEMU_MISC_ARGS = -serial stdio
|
|
LINKER_FILE = src/bsp/rpi/link.ld
|
|
- RUSTC_MISC_ARGS = -C target-cpu=cortex-a53
|
|
+ RUSTC_MISC_ARGS = -C target-cpu=cortex-a53 -C relocation-model=pic
|
|
+ CHAINBOOT_DEMO_PAYLOAD = demo_payload_rpi3.img
|
|
else ifeq ($(BSP),rpi4)
|
|
TARGET = aarch64-unknown-none-softfloat
|
|
OUTPUT = kernel8.img
|
|
@@ -23,7 +24,8 @@
|
|
# QEMU_MACHINE_TYPE =
|
|
# QEMU_MISC_ARGS = -serial stdio
|
|
LINKER_FILE = src/bsp/rpi/link.ld
|
|
- RUSTC_MISC_ARGS = -C target-cpu=cortex-a72
|
|
+ RUSTC_MISC_ARGS = -C target-cpu=cortex-a72 -C relocation-model=pic
|
|
+ CHAINBOOT_DEMO_PAYLOAD = demo_payload_rpi4.img
|
|
endif
|
|
|
|
SOURCES = $(wildcard **/*.rs) $(wildcard **/*.S) $(wildcard **/*.ld)
|
|
@@ -47,9 +49,14 @@
|
|
|
|
DOCKER_CMD = docker run -it --rm
|
|
DOCKER_ARG_CURDIR = -v $(shell pwd):/work -w /work
|
|
-DOCKER_EXEC_QEMU = $(QEMU_BINARY) -M $(QEMU_MACHINE_TYPE) -kernel $(OUTPUT)
|
|
+DOCKER_ARG_TTY = --privileged -v /dev:/dev
|
|
|
|
-.PHONY: all doc qemu clippy clean readelf objdump nm
|
|
+DOCKER_EXEC_QEMU = $(QEMU_BINARY) -M $(QEMU_MACHINE_TYPE) -kernel $(OUTPUT)
|
|
+DOCKER_EXEC_RASPBOOT = raspbootcom
|
|
+DOCKER_EXEC_RASPBOOT_DEV = /dev/ttyUSB0
|
|
+# DOCKER_EXEC_RASPBOOT_DEV = /dev/ttyACM0
|
|
+
|
|
+.PHONY: all doc qemu qemuasm chainboot clippy clean readelf objdump nm
|
|
|
|
all: clean $(OUTPUT)
|
|
|
|
@@ -67,12 +74,22 @@
|
|
ifeq ($(QEMU_MACHINE_TYPE),)
|
|
$(info This board is not yet supported for QEMU.)
|
|
qemu:
|
|
+qemuasm:
|
|
else
|
|
qemu: all
|
|
$(DOCKER_CMD) $(DOCKER_ARG_CURDIR) $(CONTAINER_UTILS) \
|
|
$(DOCKER_EXEC_QEMU) $(QEMU_MISC_ARGS)
|
|
+
|
|
+qemuasm: all
|
|
+ $(DOCKER_CMD) $(DOCKER_ARG_CURDIR) $(CONTAINER_UTILS) \
|
|
+ $(DOCKER_EXEC_QEMU) -d in_asm
|
|
endif
|
|
|
|
+chainboot:
|
|
+ $(DOCKER_CMD) $(DOCKER_ARG_CURDIR) $(DOCKER_ARG_TTY) \
|
|
+ $(CONTAINER_UTILS) $(DOCKER_EXEC_RASPBOOT) $(DOCKER_EXEC_RASPBOOT_DEV) \
|
|
+ $(CHAINBOOT_DEMO_PAYLOAD)
|
|
+
|
|
clippy:
|
|
cargo xclippy --target=$(TARGET) --features bsp_$(BSP)
|
|
|
|
|
|
diff -uNr 06_drivers_gpio_uart/src/arch/aarch64.rs 07_uart_chainloader/src/arch/aarch64.rs
|
|
--- 06_drivers_gpio_uart/src/arch/aarch64.rs
|
|
+++ 07_uart_chainloader/src/arch/aarch64.rs
|
|
@@ -22,7 +22,7 @@
|
|
|
|
if bsp::BOOT_CORE_ID == MPIDR_EL1.get() & CORE_MASK {
|
|
SP.set(bsp::BOOT_CORE_STACK_START);
|
|
- crate::runtime_init::init()
|
|
+ crate::relocate::relocate_self::<u64>()
|
|
} else {
|
|
// if not core0, infinitely wait for events
|
|
wait_forever()
|
|
|
|
diff -uNr 06_drivers_gpio_uart/src/bsp/driver/bcm/bcm2xxx_pl011_uart.rs 07_uart_chainloader/src/bsp/driver/bcm/bcm2xxx_pl011_uart.rs
|
|
--- 06_drivers_gpio_uart/src/bsp/driver/bcm/bcm2xxx_pl011_uart.rs
|
|
+++ 07_uart_chainloader/src/bsp/driver/bcm/bcm2xxx_pl011_uart.rs
|
|
@@ -272,6 +272,18 @@
|
|
let mut r = &self.inner;
|
|
r.lock(|inner| fmt::Write::write_fmt(inner, args))
|
|
}
|
|
+
|
|
+ fn flush(&self) {
|
|
+ let mut r = &self.inner;
|
|
+ // Spin until the TX FIFO empty flag is set.
|
|
+ r.lock(|inner| loop {
|
|
+ if inner.FR.is_set(FR::TXFE) {
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ arch::nop();
|
|
+ });
|
|
+ }
|
|
}
|
|
|
|
impl interface::console::Read for PL011Uart {
|
|
@@ -288,14 +300,19 @@
|
|
}
|
|
|
|
// Read one character.
|
|
- let mut ret = inner.DR.get() as u8 as char;
|
|
+ inner.DR.get() as u8 as char
|
|
+ })
|
|
+ }
|
|
|
|
- // Convert carrige return to newline.
|
|
- if ret == '
|
|
' {
|
|
- ret = '
|
|
'
|
|
+ fn clear(&self) {
|
|
+ let mut r = &self.inner;
|
|
+ r.lock(|inner| loop {
|
|
+ // Read from the RX FIFO until the empty bit is '1'.
|
|
+ if !inner.FR.is_set(FR::RXFE) {
|
|
+ inner.DR.get();
|
|
+ } else {
|
|
+ break;
|
|
}
|
|
-
|
|
- ret
|
|
})
|
|
}
|
|
}
|
|
|
|
diff -uNr 06_drivers_gpio_uart/src/bsp/rpi/link.ld 07_uart_chainloader/src/bsp/rpi/link.ld
|
|
--- 06_drivers_gpio_uart/src/bsp/rpi/link.ld
|
|
+++ 07_uart_chainloader/src/bsp/rpi/link.ld
|
|
@@ -5,9 +5,10 @@
|
|
|
|
SECTIONS
|
|
{
|
|
- /* Set current address to the value from which the RPi starts execution */
|
|
- . = 0x80000;
|
|
+ /* Set the link address to the top-most 40 KiB of DRAM (assuming 1GiB) */
|
|
+ . = 0x3F000000 - 0x10000;
|
|
|
|
+ __binary_start = .;
|
|
.text :
|
|
{
|
|
*(.text._start) *(.text*)
|
|
@@ -31,5 +32,14 @@
|
|
__bss_end = .;
|
|
}
|
|
|
|
+ .got :
|
|
+ {
|
|
+ *(.got*)
|
|
+ }
|
|
+
|
|
+ /* Fill up to 8 byte, b/c relocating the binary is done in u64 chunks */
|
|
+ . = ALIGN(8);
|
|
+ __binary_end = .;
|
|
+
|
|
/DISCARD/ : { *(.comment*) }
|
|
}
|
|
|
|
diff -uNr 06_drivers_gpio_uart/src/bsp/rpi.rs 07_uart_chainloader/src/bsp/rpi.rs
|
|
--- 06_drivers_gpio_uart/src/bsp/rpi.rs
|
|
+++ 07_uart_chainloader/src/bsp/rpi.rs
|
|
@@ -12,6 +12,9 @@
|
|
pub const BOOT_CORE_ID: u64 = 0;
|
|
pub const BOOT_CORE_STACK_START: u64 = 0x80_000;
|
|
|
|
+/// The address on which the RPi3 firmware loads every binary by default.
|
|
+pub const BOARD_DEFAULT_LOAD_ADDRESS: usize = 0x80_000;
|
|
+
|
|
//--------------------------------------------------------------------------------------------------
|
|
// Global BSP driver instances
|
|
//--------------------------------------------------------------------------------------------------
|
|
|
|
diff -uNr 06_drivers_gpio_uart/src/interface.rs 07_uart_chainloader/src/interface.rs
|
|
--- 06_drivers_gpio_uart/src/interface.rs
|
|
+++ 07_uart_chainloader/src/interface.rs
|
|
@@ -26,6 +26,10 @@
|
|
pub trait Write {
|
|
fn write_char(&self, c: char);
|
|
fn write_fmt(&self, args: fmt::Arguments) -> fmt::Result;
|
|
+
|
|
+ /// Block execution until the last character has been physically put on the TX wire
|
|
+ /// (draining TX buffers/FIFOs, if any).
|
|
+ fn flush(&self);
|
|
}
|
|
|
|
/// Console read functions.
|
|
@@ -33,6 +37,9 @@
|
|
fn read_char(&self) -> char {
|
|
' '
|
|
}
|
|
+
|
|
+ /// Clear RX buffers, if any.
|
|
+ fn clear(&self);
|
|
}
|
|
|
|
/// Console statistics.
|
|
|
|
diff -uNr 06_drivers_gpio_uart/src/main.rs 07_uart_chainloader/src/main.rs
|
|
--- 06_drivers_gpio_uart/src/main.rs
|
|
+++ 07_uart_chainloader/src/main.rs
|
|
@@ -29,7 +29,11 @@
|
|
// the first function to run.
|
|
mod arch;
|
|
|
|
-// `_start()` then calls `runtime_init::init()`, which on completion, jumps to `kernel_init()`.
|
|
+// `_start()` then calls `relocate::relocate_self()`.
|
|
+mod relocate;
|
|
+
|
|
+// `relocate::relocate_self()` calls `runtime_init::init()`, which on completion, jumps to
|
|
+// `kernel_init()`.
|
|
mod runtime_init;
|
|
|
|
// Conditionally includes the selected `BSP` code.
|
|
@@ -66,25 +70,48 @@
|
|
fn kernel_main() -> ! {
|
|
use interface::console::All;
|
|
|
|
- // UART should be functional now. Wait for user to hit Enter.
|
|
- loop {
|
|
- if bsp::console().read_char() == '
|
|
' {
|
|
- break;
|
|
- }
|
|
+ println!(" __ __ _ _ _ _ ");
|
|
+ println!("| \/ (_)_ _ (_) | ___ __ _ __| |");
|
|
+ println!("| |\/| | | ' \| | |__/ _ \/ _` / _` |");
|
|
+ println!("|_| |_|_|_||_|_|____\___/\__,_\__,_|");
|
|
+ println!();
|
|
+ println!("{:^37}", bsp::board_name());
|
|
+ println!();
|
|
+ println!("[ML] Requesting binary");
|
|
+ bsp::console().flush();
|
|
+
|
|
+ // Clear the RX FIFOs, if any, of spurious received characters before starting with the loader
|
|
+ // protocol.
|
|
+ bsp::console().clear();
|
|
+
|
|
+ // Notify raspbootcom to send the binary.
|
|
+ for _ in 0..3 {
|
|
+ bsp::console().write_char(3 as char);
|
|
}
|
|
|
|
- println!("[0] Booting on: {}", bsp::board_name());
|
|
-
|
|
- println!("[1] Drivers loaded:");
|
|
- for (i, driver) in bsp::device_drivers().iter().enumerate() {
|
|
- println!(" {}. {}", i + 1, driver.compatible());
|
|
+ // Read the binary's size.
|
|
+ let mut size: u32 = u32::from(bsp::console().read_char() as u8);
|
|
+ size |= u32::from(bsp::console().read_char() as u8) << 8;
|
|
+ size |= u32::from(bsp::console().read_char() as u8) << 16;
|
|
+ size |= u32::from(bsp::console().read_char() as u8) << 24;
|
|
+
|
|
+ // Trust it's not too big.
|
|
+ print!("OK");
|
|
+
|
|
+ let kernel_addr: *mut u8 = bsp::BOARD_DEFAULT_LOAD_ADDRESS as *mut u8;
|
|
+ unsafe {
|
|
+ // Read the kernel byte by byte.
|
|
+ for i in 0..size {
|
|
+ *kernel_addr.offset(i as isize) = bsp::console().read_char() as u8;
|
|
+ }
|
|
}
|
|
|
|
- println!("[2] Chars written: {}", bsp::console().chars_written());
|
|
- println!("[3] Echoing input now");
|
|
+ println!("[ML] Loaded! Executing the payload now
|
|
");
|
|
+ bsp::console().flush();
|
|
|
|
- loop {
|
|
- let c = bsp::console().read_char();
|
|
- bsp::console().write_char(c);
|
|
- }
|
|
+ // Use black magic to get a function pointer.
|
|
+ let kernel: extern "C" fn() -> ! = unsafe { core::mem::transmute(kernel_addr as *const ()) };
|
|
+
|
|
+ // Jump to loaded kernel!
|
|
+ kernel()
|
|
}
|
|
|
|
diff -uNr 06_drivers_gpio_uart/src/relocate.rs 07_uart_chainloader/src/relocate.rs
|
|
--- 06_drivers_gpio_uart/src/relocate.rs
|
|
+++ 07_uart_chainloader/src/relocate.rs
|
|
@@ -0,0 +1,46 @@
|
|
+// SPDX-License-Identifier: MIT
|
|
+//
|
|
+// Copyright (c) 2018-2019 Andre Richter <andre.o.richter@gmail.com>
|
|
+
|
|
+//! Relocation code.
|
|
+
|
|
+/// Relocates the own binary from `bsp::BOARD_DEFAULT_LOAD_ADDRESS` to the `__binary_start` address
|
|
+/// from the linker script.
|
|
+///
|
|
+/// # Safety
|
|
+///
|
|
+/// - Only a single core must be active and running this function.
|
|
+/// - Function must not use the `bss` section.
|
|
+pub unsafe fn relocate_self<T>() -> ! {
|
|
+ extern "C" {
|
|
+ static __binary_start: usize;
|
|
+ static __binary_end: usize;
|
|
+ }
|
|
+
|
|
+ let binary_start_addr: usize = &__binary_start as *const _ as _;
|
|
+ let binary_end_addr: usize = &__binary_end as *const _ as _;
|
|
+ let binary_size_in_byte: usize = binary_end_addr - binary_start_addr;
|
|
+
|
|
+ // Get the relocation destination address from the linker symbol.
|
|
+ let mut reloc_dst_addr: *mut T = binary_start_addr as *mut T;
|
|
+
|
|
+ // The address of where the previous firmware loaded us.
|
|
+ let mut src_addr: *const T = crate::bsp::BOARD_DEFAULT_LOAD_ADDRESS as *const _;
|
|
+
|
|
+ // Copy the whole binary.
|
|
+ //
|
|
+ // This is essentially a `memcpy()` optimized for throughput by transferring in chunks of T.
|
|
+ let n = binary_size_in_byte / core::mem::size_of::<T>();
|
|
+ for _ in 0..n {
|
|
+ use core::ptr;
|
|
+
|
|
+ ptr::write_volatile::<T>(reloc_dst_addr, ptr::read_volatile::<T>(src_addr));
|
|
+ reloc_dst_addr = reloc_dst_addr.offset(1);
|
|
+ src_addr = src_addr.offset(1);
|
|
+ }
|
|
+
|
|
+ // Call `init()` through a trait object, causing the jump to use an absolute address to reach
|
|
+ // the relocated binary. An elaborate explanation can be found in the runtime_init.rs source
|
|
+ // comments.
|
|
+ crate::runtime_init::get().init()
|
|
+}
|
|
|
|
diff -uNr 06_drivers_gpio_uart/src/runtime_init.rs 07_uart_chainloader/src/runtime_init.rs
|
|
--- 06_drivers_gpio_uart/src/runtime_init.rs
|
|
+++ 07_uart_chainloader/src/runtime_init.rs
|
|
@@ -4,21 +4,39 @@
|
|
|
|
//! Rust runtime initialization code.
|
|
|
|
-/// Equivalent to `crt0` or `c0` code in C/C++ world. Clears the `bss` section, then jumps to kernel
|
|
-/// init code.
|
|
+/// We are outsmarting the compiler here by using a trait as a layer of indirection. Because we are
|
|
+/// generating PIC code, a static dispatch to `init()` would generate a relative jump from the
|
|
+/// callee to `init()`. However, when calling `init()`, code just finished copying the binary to the
|
|
+/// actual link-time address, and hence is still running at whatever location the previous loader
|
|
+/// has put it. So we do not want a relative jump, because it would not jump to the relocated code.
|
|
///
|
|
-/// # Safety
|
|
-///
|
|
-/// - Only a single core must be active and running this function.
|
|
-pub unsafe fn init() -> ! {
|
|
- extern "C" {
|
|
- // Boundaries of the .bss section, provided by the linker script.
|
|
- static mut __bss_start: u64;
|
|
- static mut __bss_end: u64;
|
|
+/// By indirecting through a trait object, we can make use of the property that vtables store
|
|
+/// absolute addresses. So calling `init()` this way will kick execution to the relocated binary.
|
|
+pub trait RunTimeInit {
|
|
+ /// Equivalent to `crt0` or `c0` code in C/C++ world. Clears the `bss` section, then jumps to
|
|
+ /// kernel init code.
|
|
+ ///
|
|
+ /// # Safety
|
|
+ ///
|
|
+ /// - Only a single core must be active and running this function.
|
|
+ unsafe fn init(&self) -> ! {
|
|
+ extern "C" {
|
|
+ // Boundaries of the .bss section, provided by the linker script.
|
|
+ static mut __bss_start: u64;
|
|
+ static mut __bss_end: u64;
|
|
+ }
|
|
+
|
|
+ // Zero out the .bss section.
|
|
+ r0::zero_bss(&mut __bss_start, &mut __bss_end);
|
|
+
|
|
+ crate::kernel_init()
|
|
}
|
|
+}
|
|
|
|
- // Zero out the .bss section.
|
|
- r0::zero_bss(&mut __bss_start, &mut __bss_end);
|
|
+struct Traitor;
|
|
+impl RunTimeInit for Traitor {}
|
|
|
|
- crate::kernel_init()
|
|
+/// Give the callee a `RunTimeInit` trait object.
|
|
+pub fn get() -> &'static dyn RunTimeInit {
|
|
+ &Traitor {}
|
|
}
|
|
```
|