|
|
@ -10,6 +10,15 @@
|
|
|
|
[chainloader]: https://en.wikipedia.org/wiki/Chain_loading
|
|
|
|
[chainloader]: https://en.wikipedia.org/wiki/Chain_loading
|
|
|
|
[position independent code]: https://en.wikipedia.org/wiki/Position-independent_code
|
|
|
|
[position independent code]: https://en.wikipedia.org/wiki/Position-independent_code
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
## Note
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Please note that there is a lot of stuff going on in this tutorial that is very hard to grasp by
|
|
|
|
|
|
|
|
only looking at the source code changes.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Please bear with me until I find the time to write it all down here elaborately. For the time being,
|
|
|
|
|
|
|
|
please see this tutorial as an enabler for a convenience feature that allows booting the following
|
|
|
|
|
|
|
|
tutorials in a quick manner.
|
|
|
|
|
|
|
|
|
|
|
|
## Install and test it
|
|
|
|
## Install and test it
|
|
|
|
|
|
|
|
|
|
|
|
Our chainloader is called `MiniLoad` and is inspired by [raspbootin].
|
|
|
|
Our chainloader is called `MiniLoad` and is inspired by [raspbootin].
|
|
|
@ -189,7 +198,7 @@ diff -uNr 06_drivers_gpio_uart/src/_arch/aarch64/cpu.rs 07_uart_chainloader/src/
|
|
|
|
if bsp::cpu::BOOT_CORE_ID == cpu::smp::core_id() {
|
|
|
|
if bsp::cpu::BOOT_CORE_ID == cpu::smp::core_id() {
|
|
|
|
SP.set(bsp::memory::boot_core_stack_end() as u64);
|
|
|
|
SP.set(bsp::memory::boot_core_stack_end() as u64);
|
|
|
|
- runtime_init::runtime_init()
|
|
|
|
- runtime_init::runtime_init()
|
|
|
|
+ relocate::relocate_self::<u64>()
|
|
|
|
+ relocate::relocate_self()
|
|
|
|
} else {
|
|
|
|
} else {
|
|
|
|
// If not core0, infinitely wait for events.
|
|
|
|
// If not core0, infinitely wait for events.
|
|
|
|
wait_forever()
|
|
|
|
wait_forever()
|
|
|
@ -273,7 +282,7 @@ diff -uNr 06_drivers_gpio_uart/src/bsp/raspberrypi/link.ld 07_uart_chainloader/s
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ /* Fill up to 8 byte, b/c relocating the binary is done in u64 chunks */
|
|
|
|
+ /* Fill up to 8 byte, b/c relocating the binary is done in u64 chunks */
|
|
|
|
+ . = ALIGN(8);
|
|
|
|
+ . = ALIGN(8);
|
|
|
|
+ __binary_end = .;
|
|
|
|
+ __binary_end_inclusive = . - 8;
|
|
|
|
+
|
|
|
|
+
|
|
|
|
/DISCARD/ : { *(.comment*) }
|
|
|
|
/DISCARD/ : { *(.comment*) }
|
|
|
|
}
|
|
|
|
}
|
|
|
@ -281,7 +290,16 @@ diff -uNr 06_drivers_gpio_uart/src/bsp/raspberrypi/link.ld 07_uart_chainloader/s
|
|
|
|
diff -uNr 06_drivers_gpio_uart/src/bsp/raspberrypi/memory.rs 07_uart_chainloader/src/bsp/raspberrypi/memory.rs
|
|
|
|
diff -uNr 06_drivers_gpio_uart/src/bsp/raspberrypi/memory.rs 07_uart_chainloader/src/bsp/raspberrypi/memory.rs
|
|
|
|
--- 06_drivers_gpio_uart/src/bsp/raspberrypi/memory.rs
|
|
|
|
--- 06_drivers_gpio_uart/src/bsp/raspberrypi/memory.rs
|
|
|
|
+++ 07_uart_chainloader/src/bsp/raspberrypi/memory.rs
|
|
|
|
+++ 07_uart_chainloader/src/bsp/raspberrypi/memory.rs
|
|
|
|
@@ -23,10 +23,12 @@
|
|
|
|
@@ -12,6 +12,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Symbols from the linker script.
|
|
|
|
|
|
|
|
extern "Rust" {
|
|
|
|
|
|
|
|
+ static __binary_start: UnsafeCell<u64>;
|
|
|
|
|
|
|
|
+ static __binary_end_inclusive: UnsafeCell<u64>;
|
|
|
|
|
|
|
|
static __bss_start: UnsafeCell<u64>;
|
|
|
|
|
|
|
|
static __bss_end_inclusive: UnsafeCell<u64>;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
@@ -23,10 +25,12 @@
|
|
|
|
/// The board's memory map.
|
|
|
|
/// The board's memory map.
|
|
|
|
#[rustfmt::skip]
|
|
|
|
#[rustfmt::skip]
|
|
|
|
pub(super) mod map {
|
|
|
|
pub(super) mod map {
|
|
|
@ -297,14 +315,24 @@ diff -uNr 06_drivers_gpio_uart/src/bsp/raspberrypi/memory.rs 07_uart_chainloader
|
|
|
|
|
|
|
|
|
|
|
|
/// Physical devices.
|
|
|
|
/// Physical devices.
|
|
|
|
#[cfg(feature = "bsp_rpi3")]
|
|
|
|
#[cfg(feature = "bsp_rpi3")]
|
|
|
|
@@ -59,6 +61,12 @@
|
|
|
|
@@ -59,6 +63,22 @@
|
|
|
|
map::BOOT_CORE_STACK_END
|
|
|
|
map::BOOT_CORE_STACK_END
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
+/// The address on which the Raspberry firmware loads every binary by default.
|
|
|
|
+/// The address on which the Raspberry firmware loads every binary by default.
|
|
|
|
+#[inline(always)]
|
|
|
|
+#[inline(always)]
|
|
|
|
+pub fn board_default_load_addr() -> usize {
|
|
|
|
+pub fn board_default_load_addr() -> *const u64 {
|
|
|
|
+ map::BOARD_DEFAULT_LOAD_ADDRESS
|
|
|
|
+ map::BOARD_DEFAULT_LOAD_ADDRESS as _
|
|
|
|
|
|
|
|
+}
|
|
|
|
|
|
|
|
+
|
|
|
|
|
|
|
|
+/// Return the inclusive range spanning the whole binary.
|
|
|
|
|
|
|
|
+///
|
|
|
|
|
|
|
|
+/// # Safety
|
|
|
|
|
|
|
|
+///
|
|
|
|
|
|
|
|
+/// - Values are provided by the linker script and must be trusted as-is.
|
|
|
|
|
|
|
|
+/// - The linker-provided addresses must be u64 aligned.
|
|
|
|
|
|
|
|
+pub fn binary_range_inclusive() -> RangeInclusive<*mut u64> {
|
|
|
|
|
|
|
|
+ unsafe { RangeInclusive::new(__binary_start.get(), __binary_end_inclusive.get()) }
|
|
|
|
+}
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+
|
|
|
|
/// Return the inclusive range spanning the .bss section.
|
|
|
|
/// Return the inclusive range spanning the .bss section.
|
|
|
@ -402,7 +430,7 @@ diff -uNr 06_drivers_gpio_uart/src/main.rs 07_uart_chainloader/src/main.rs
|
|
|
|
+ unsafe {
|
|
|
|
+ unsafe {
|
|
|
|
+ // Read the kernel byte by byte.
|
|
|
|
+ // Read the kernel byte by byte.
|
|
|
|
+ for i in 0..size {
|
|
|
|
+ for i in 0..size {
|
|
|
|
+ *kernel_addr.offset(i as isize) = console().read_char() as u8;
|
|
|
|
+ core::ptr::write_volatile(kernel_addr.offset(i as isize), console().read_char() as u8)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
@ -419,7 +447,7 @@ diff -uNr 06_drivers_gpio_uart/src/main.rs 07_uart_chainloader/src/main.rs
|
|
|
|
- println!(" {}. {}", i + 1, driver.compatible());
|
|
|
|
- println!(" {}. {}", i + 1, driver.compatible());
|
|
|
|
- }
|
|
|
|
- }
|
|
|
|
+ // Use black magic to get a function pointer.
|
|
|
|
+ // Use black magic to get a function pointer.
|
|
|
|
+ let kernel: extern "C" fn() -> ! = unsafe { core::mem::transmute(kernel_addr as *const ()) };
|
|
|
|
+ let kernel: fn() -> ! = unsafe { core::mem::transmute(kernel_addr as *const ()) };
|
|
|
|
|
|
|
|
|
|
|
|
- println!(
|
|
|
|
- println!(
|
|
|
|
- "[2] Chars written: {}",
|
|
|
|
- "[2] Chars written: {}",
|
|
|
@ -438,7 +466,7 @@ diff -uNr 06_drivers_gpio_uart/src/main.rs 07_uart_chainloader/src/main.rs
|
|
|
|
diff -uNr 06_drivers_gpio_uart/src/relocate.rs 07_uart_chainloader/src/relocate.rs
|
|
|
|
diff -uNr 06_drivers_gpio_uart/src/relocate.rs 07_uart_chainloader/src/relocate.rs
|
|
|
|
--- 06_drivers_gpio_uart/src/relocate.rs
|
|
|
|
--- 06_drivers_gpio_uart/src/relocate.rs
|
|
|
|
+++ 07_uart_chainloader/src/relocate.rs
|
|
|
|
+++ 07_uart_chainloader/src/relocate.rs
|
|
|
|
@@ -0,0 +1,52 @@
|
|
|
|
@@ -0,0 +1,49 @@
|
|
|
|
+// SPDX-License-Identifier: MIT OR Apache-2.0
|
|
|
|
+// SPDX-License-Identifier: MIT OR Apache-2.0
|
|
|
|
+//
|
|
|
|
+//
|
|
|
|
+// Copyright (c) 2018-2020 Andre Richter <andre.o.richter@gmail.com>
|
|
|
|
+// Copyright (c) 2018-2020 Andre Richter <andre.o.richter@gmail.com>
|
|
|
@ -458,104 +486,35 @@ diff -uNr 06_drivers_gpio_uart/src/relocate.rs 07_uart_chainloader/src/relocate.
|
|
|
|
+///
|
|
|
|
+///
|
|
|
|
+/// - Only a single core must be active and running this function.
|
|
|
|
+/// - Only a single core must be active and running this function.
|
|
|
|
+/// - Function must not use the `bss` section.
|
|
|
|
+/// - Function must not use the `bss` section.
|
|
|
|
+pub unsafe fn relocate_self<T>() -> ! {
|
|
|
|
+pub unsafe fn relocate_self() -> ! {
|
|
|
|
+ extern "C" {
|
|
|
|
+ let range = bsp::memory::binary_range_inclusive();
|
|
|
|
+ static __binary_start: usize;
|
|
|
|
+ let mut reloc_destination_addr = *range.start();
|
|
|
|
+ static __binary_end: usize;
|
|
|
|
+ let reloc_end_addr_inclusive = *range.end();
|
|
|
|
+ }
|
|
|
|
|
|
|
|
+
|
|
|
|
|
|
|
|
+ let binary_start_addr: usize = &__binary_start as *const _ as _;
|
|
|
|
|
|
|
|
+ let binary_end_addr: usize = &__binary_end as *const _ as _;
|
|
|
|
|
|
|
|
+ let binary_size_in_byte: usize = binary_end_addr - binary_start_addr;
|
|
|
|
|
|
|
|
+
|
|
|
|
|
|
|
|
+ // Get the relocation destination address from the linker symbol.
|
|
|
|
|
|
|
|
+ let mut reloc_dst_addr: *mut T = binary_start_addr as *mut T;
|
|
|
|
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ // The address of where the previous firmware loaded us.
|
|
|
|
+ // The address of where the previous firmware loaded us.
|
|
|
|
+ let mut src_addr: *const T = bsp::memory::board_default_load_addr() as *const _;
|
|
|
|
+ let mut src_addr = bsp::memory::board_default_load_addr();
|
|
|
|
|
|
|
|
+
|
|
|
|
|
|
|
|
+ // TODO Make it work for the case src_addr > reloc_addr as well.
|
|
|
|
|
|
|
|
+ let diff = reloc_destination_addr as usize - src_addr as usize;
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ // Copy the whole binary.
|
|
|
|
+ // Copy the whole binary.
|
|
|
|
+ //
|
|
|
|
+ //
|
|
|
|
+ // This is essentially a `memcpy()` optimized for throughput by transferring in chunks of T.
|
|
|
|
+ // This is essentially a `memcpy()` optimized for throughput by transferring in chunks of T.
|
|
|
|
+ let n = binary_size_in_byte / core::mem::size_of::<T>();
|
|
|
|
+ loop {
|
|
|
|
+ for _ in 0..n {
|
|
|
|
+ core::ptr::write_volatile(reloc_destination_addr, core::ptr::read_volatile(src_addr));
|
|
|
|
+ use core::ptr;
|
|
|
|
+ reloc_destination_addr = reloc_destination_addr.offset(1);
|
|
|
|
+
|
|
|
|
|
|
|
|
+ ptr::write_volatile::<T>(reloc_dst_addr, ptr::read_volatile::<T>(src_addr));
|
|
|
|
|
|
|
|
+ reloc_dst_addr = reloc_dst_addr.offset(1);
|
|
|
|
|
|
|
|
+ src_addr = src_addr.offset(1);
|
|
|
|
+ src_addr = src_addr.offset(1);
|
|
|
|
+ }
|
|
|
|
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ // Call `runtime_init()` through a trait object, causing the jump to use an absolute address to
|
|
|
|
+ if reloc_destination_addr > reloc_end_addr_inclusive {
|
|
|
|
+ // reach the relocated binary. An elaborate explanation can be found in the `runtime_init.rs`
|
|
|
|
+ break;
|
|
|
|
+ // source comments.
|
|
|
|
|
|
|
|
+ runtime_init::get().runtime_init()
|
|
|
|
|
|
|
|
+}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
diff -uNr 06_drivers_gpio_uart/src/runtime_init.rs 07_uart_chainloader/src/runtime_init.rs
|
|
|
|
|
|
|
|
--- 06_drivers_gpio_uart/src/runtime_init.rs
|
|
|
|
|
|
|
|
+++ 07_uart_chainloader/src/runtime_init.rs
|
|
|
|
|
|
|
|
@@ -7,9 +7,43 @@
|
|
|
|
|
|
|
|
use crate::{bsp, memory};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//--------------------------------------------------------------------------------------------------
|
|
|
|
|
|
|
|
+// Private Definitions
|
|
|
|
|
|
|
|
+//--------------------------------------------------------------------------------------------------
|
|
|
|
|
|
|
|
+
|
|
|
|
|
|
|
|
+struct Traitor;
|
|
|
|
|
|
|
|
+
|
|
|
|
|
|
|
|
+//--------------------------------------------------------------------------------------------------
|
|
|
|
|
|
|
|
+// Public Definitions
|
|
|
|
|
|
|
|
+//--------------------------------------------------------------------------------------------------
|
|
|
|
|
|
|
|
+
|
|
|
|
|
|
|
|
+/// We are outsmarting the compiler here by using a trait as a layer of indirection. Because we are
|
|
|
|
|
|
|
|
+/// generating PIC code, a static dispatch to `init()` would generate a relative jump from the
|
|
|
|
|
|
|
|
+/// callee to `init()`. However, when calling `init()`, code just finished copying the binary to the
|
|
|
|
|
|
|
|
+/// actual link-time address, and hence is still running at whatever location the previous loader
|
|
|
|
|
|
|
|
+/// has put it. So we do not want a relative jump, because it would not jump to the relocated code.
|
|
|
|
|
|
|
|
+///
|
|
|
|
|
|
|
|
+/// By indirecting through a trait object, we can make use of the property that vtables store
|
|
|
|
|
|
|
|
+/// absolute addresses. So calling `init()` this way will kick execution to the relocated binary.
|
|
|
|
|
|
|
|
+pub trait RunTimeInit {
|
|
|
|
|
|
|
|
+ /// Equivalent to `crt0` or `c0` code in C/C++ world. Clears the `bss` section, then jumps to
|
|
|
|
|
|
|
|
+ /// kernel init code.
|
|
|
|
|
|
|
|
+ ///
|
|
|
|
|
|
|
|
+ /// # Safety
|
|
|
|
|
|
|
|
+ ///
|
|
|
|
|
|
|
|
+ /// - Only a single core must be active and running this function.
|
|
|
|
|
|
|
|
+ unsafe fn runtime_init(&self) -> ! {
|
|
|
|
|
|
|
|
+ zero_bss();
|
|
|
|
|
|
|
|
+
|
|
|
|
|
|
|
|
+ crate::kernel_init()
|
|
|
|
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+//--------------------------------------------------------------------------------------------------
|
|
|
|
+ let relocated_runtime_init_addr = runtime_init::runtime_init as *const () as usize + diff;
|
|
|
|
// Private Code
|
|
|
|
+ let relocated_runtime_init: fn() -> ! =
|
|
|
|
//--------------------------------------------------------------------------------------------------
|
|
|
|
+ core::mem::transmute(relocated_runtime_init_addr as *const ());
|
|
|
|
|
|
|
|
|
|
|
|
+impl RunTimeInit for Traitor {}
|
|
|
|
|
|
|
|
+
|
|
|
|
+
|
|
|
|
/// Zero out the .bss section.
|
|
|
|
+ relocated_runtime_init()
|
|
|
|
///
|
|
|
|
+}
|
|
|
|
/// # Safety
|
|
|
|
|
|
|
|
@@ -24,14 +58,7 @@
|
|
|
|
|
|
|
|
// Public Code
|
|
|
|
|
|
|
|
//--------------------------------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
-/// Equivalent to `crt0` or `c0` code in C/C++ world. Clears the `bss` section, then jumps to kernel
|
|
|
|
|
|
|
|
-/// init code.
|
|
|
|
|
|
|
|
-///
|
|
|
|
|
|
|
|
-/// # Safety
|
|
|
|
|
|
|
|
-///
|
|
|
|
|
|
|
|
-/// - Only a single core must be active and running this function.
|
|
|
|
|
|
|
|
-pub unsafe fn runtime_init() -> ! {
|
|
|
|
|
|
|
|
- zero_bss();
|
|
|
|
|
|
|
|
-
|
|
|
|
|
|
|
|
- crate::kernel_init()
|
|
|
|
|
|
|
|
+/// Give the callee a `RunTimeInit` trait object.
|
|
|
|
|
|
|
|
+pub fn get() -> &'static dyn RunTimeInit {
|
|
|
|
|
|
|
|
+ &Traitor {}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
```
|
|
|
|
```
|
|
|
|