From c65e2e56cd721d2e7e889d3bfaf48b55752d6bfb Mon Sep 17 00:00:00 2001 From: Andre Richter Date: Mon, 1 Oct 2018 21:55:20 +0200 Subject: [PATCH] Add tutorial 0D_cache_performance --- 0C_virtual_memory/kernel8 | Bin 0 -> 73056 bytes 0C_virtual_memory/kernel8.img | Bin 2564 -> 2572 bytes 0C_virtual_memory/src/mmu.rs | 4 +- 0D_cache_performance/.cargo/config | 6 + 0D_cache_performance/Cargo.lock | 55 ++++ 0D_cache_performance/Cargo.toml | 12 + 0D_cache_performance/Makefile | 66 +++++ 0D_cache_performance/README.md | 57 ++++ 0D_cache_performance/kernel8 | Bin 0 -> 79448 bytes 0D_cache_performance/kernel8.img | Bin 0 -> 6912 bytes 0D_cache_performance/link.ld | 57 ++++ 0D_cache_performance/raspi3_boot/Cargo.toml | 9 + 0D_cache_performance/raspi3_boot/src/lib.rs | 131 +++++++++ 0D_cache_performance/src/benchmark.rs | 40 +++ 0D_cache_performance/src/gpio.rs | 75 +++++ 0D_cache_performance/src/main.rs | 111 +++++++ 0D_cache_performance/src/mbox.rs | 159 ++++++++++ 0D_cache_performance/src/mmu.rs | 219 ++++++++++++++ 0D_cache_performance/src/uart.rs | 310 ++++++++++++++++++++ 19 files changed, 1309 insertions(+), 2 deletions(-) create mode 100755 0C_virtual_memory/kernel8 create mode 100644 0D_cache_performance/.cargo/config create mode 100644 0D_cache_performance/Cargo.lock create mode 100644 0D_cache_performance/Cargo.toml create mode 100644 0D_cache_performance/Makefile create mode 100644 0D_cache_performance/README.md create mode 100755 0D_cache_performance/kernel8 create mode 100755 0D_cache_performance/kernel8.img create mode 100644 0D_cache_performance/link.ld create mode 100644 0D_cache_performance/raspi3_boot/Cargo.toml create mode 100644 0D_cache_performance/raspi3_boot/src/lib.rs create mode 100644 0D_cache_performance/src/benchmark.rs create mode 100644 0D_cache_performance/src/gpio.rs create mode 100644 0D_cache_performance/src/main.rs create mode 100644 0D_cache_performance/src/mbox.rs create mode 100644 0D_cache_performance/src/mmu.rs create mode 100644 0D_cache_performance/src/uart.rs diff --git a/0C_virtual_memory/kernel8 b/0C_virtual_memory/kernel8 new file mode 100755 index 0000000000000000000000000000000000000000..5fab5fd377f738ebfc4fd9004d40dee6b64494ea GIT binary patch literal 73056 zcmeI1dvH|M9mmhzn~>e*m4pOn2<%3KhFI3!yYD_=0|W>SphdGO%AU^^;MN0$Ro41vg>>%*R9)U`4D@ zi7ixO^m?@&V`@3WtahN6OzWLTPvcd^TDxMconETV7_;gRYTTWFNrjbmx{MM8KmY_l z00ck)1V8`;KmY_l00ck)1V8`;KmY_l00ck)1V8`;KmY_l00ck)1V8`;KmY_l00ck) z1V8`;KmY_l00ck)1V8`;KmY_l00ck)1V8`;KmY_l00ck)1V8`;KmY_l00ck)1V8`; zKmY_l00ck)1V8`;KmY_l00ck)1V8`;KmY_l00ck)1V8`;KmY_l00ck)1V8`;KmY_l z00ck)1V8`;KmY_l00ck)1V8`;KmY_l00ck)1V8`;KmY_l00ck)1V8`;KmY_l00ck) z1V8`;KmY_l00ck)1V8`;KmY_l00ck)1V8`;KmY_l00ck)1V8`;KmY_l00ck)1V8`; zKmY_l00ck)1V8`;KmY_l00ck)1V8`;KmY_l00ck)1V8`;KmY_l00ck)1V8`;KmY_l z00ck)1V8`;KmY_l00ck)1V8`;KmY_l00ck)1V8`;KmY_l00ck)1V8`;KmY_l00ck) z1V8`;KmY_l;J-rP2=k3v3Imr08EaX)f}X2b?>2f4TE)>!CNuBr26oWOtbZ^u_U00S z4b^@pb>L^m2TpG+47_M&>}bh*&tLl6p@F7#R99jgyVO25aC+ts=Z_lcd1F0G&!%=4 zM9c8UqD7i8C>9T&BYh@g`^n2u?&SlqOnQOF=N{r84>Y~-S>u_H=-x$bw+*eo z-b-WKXHMT&>UjK_<%Zc8tOwR#A1Vx-y3opFP5xh>8j5& zEo(=19cXy&(bJ}aB8HJP^kyn&wISuTzmi+!VMHqc}gOtr>hRXZ*Q-^z&M zDIcnM_eSlbJ?$A>9RZi*y_47U*W_jL5E? zJH$rrV(cL0&ZXyW{7_)TKzuEf%l3-ENLfii^RESV@Q{IZ$eB!wKumTS0wY!5yJcvF zVW5eR`6KFM4*A~p(AG0)inUa-#0lcbW2^31Jj7$(oo$i6&&Qe-IPfgR7%N$+#l(6i z;!Z9edU%@J-xCz4v>m%eZCkWZ+DlqzCiCV4GIsD02{+Ckc7nqRXpKuboQ<8GQ`In8m9JiVgLaW~D6#~By0(mhmfakJh& z!|n=Cd13Q*ihGCR#%O+=5pO@2^j=NpjGi;M^zBInmkTwegr{~Ujy2;{TBS!0B(`8W@UT$S0<02b=vS6TTk8u3bI(Gchk)p$i zxns{K+|?{yYZcPE|C8i(-h-CrJ8Ark?mu2xY7`bqC(UdkmrC4@Ma^p_#hrZko@1t& ztkl?5S-PC|@L5`q7NM-;St@r=3yho>1<4|qI!+X@7ULXRgS5`xC>>}rpV)kCcy5`~ zd}8h~%L&V|C55b)vp7L>Hwl4}+QL#vi4)k!Avdk-pMgm;Utb*N*T#FeWl7Tf07c!PRRafnp34E2hVH z7NlRD&R*hNIa4wOp~H|CNG z=T*^M%e|hso@wcyeS| za$8bHIl1&Vu@);ITOG|g(RSz6(oFQ}fXKFPGaO?BftS5XdDQZ81cuZ8N;*Nf_} z^1SiA!1qAS1C`BXv?q*zW+i+toG+@BZhK&1^R5!syO;LWxG2l;petIeimo@nZ-&c@->Soew>uA zO~kl@VqeM1c{%CyJz(OU{awm=_PgNRY$jR0#x5EQcUM>#yErE9uHftMI;~C1e0Fgy zy~CCF$aE&D?-NTcFO7_SY@~CZzP}vh-(Qvs#tW}f?8N79v8!Brjn>-k&(-()Ir@%p zzKBWd=&UxgqY2a5%F*_*%2DcT|Gs|GN-9S`5NArp!JCJs2P#Kv=$=tGe{@Vd^cFuK z*01066|2kojo1onTSAWabw;eozMh`$L@E-lGAp&4daS8#tIKX(5ldNn+IJ*lA-U5k zhr@|TGHInoZByE;jdXT)TbuflDLE98<5gy}xp{jm6>5*g+pN@~wL5N&cW<}ZZ2Utk zY3+>N7qM>bzTIrzl8B{vRH{AE-PhJmWyE@4ERpJ?@w()m9$qJ>toD8zeVxMv0w4ea zAn^ZAfPR;;0(LQz&0)SG(3f8moraUHg_@qo(r;39fz`07@wbvLFp;rhO)pkE+oJ2% zsrsKN)L^*L&sJ`buGIz9kKu-&MXJoct?9ZSm!>~XWv+SsIFJ6DJo=eDy3Xe(SsZje z$FlVIsUMmddoxS_NYl;csoX9ry1>3EOx1PHUlyn8*OcCG#2?YP-AZLH4$JcBoAT(L zdGtr}=(p09`>6ioGhNYew;rs^W)m#+ss{y8O%v8qB<*W=p_3NW@&)Ae!N zwRqkx7YCj55#!Wxb^ph+^q2odKd)!SkLHOVS9}TVk6H0o^Tg{u$FkyepJiF=O7EX# z?Z-L9$w2(|_<)LyZPs+Xe|;Yfl3q&jItNOJaxJBO%5{CdWtuKgKrxLlvEMcQtBTGi zJ(crG(s@5R2fg+o$|KkHb*}4=Xu94{jn@AYny%+j-JguzK8^b!vv$1)wSiyP_4kt2 z53{8r{V8Tkbcf}X%xta6B(o)Vbfx50D%+E(g!-hGtRs?$M>>5rUuQ?#y}ils(okEU z&CaUBHi`dnvg&@Dg9Y!};MF3XU0r<+d*kLtDcDlCqOsoLZFkE)$rJL(zHl_;aYaM* z{}k_SZrHG9NwtI+vJ` zq)sguiKh}fXg-oxcK9T}&u{m8hy%|G#$S;Ecv`1zgP3>{kOf|QP;$OF8@!=`pKl^ z@kb+0m&>o`VDgrA#o{rZfmW~E7p19reGzZO>vc@otFAsdk#cR~e|q{-NeXxS?ICy6 z?Q!_*e!nE~gas4bv<|}R1O3iVq?jDx``*kN}_cCSZvwmQAu zu*>GKb#~s@WplgyZimz5_Bics$?26Om;VcPAm#6X7OmT-WTww4eZh6m5KlG6IwCyp zuKOd2Zknzm9CAvnQK#P#_DlYVJr5mcd!(PDBW}s>>FAHBdPy(cHJ)X>X5 zujCB1hI~GkCnP&*L3PQoI8!!ZkQRR=sq8(;m2-trtg`dyUry>8n$*=_mrBvrSlh5> z?f+y^MZ=QpbGo9mTI?<@`E;CU+2>Dc`t_6NKPvgj%5OT>v`atlxT^j`m{y?_`Z=y^ zMfAWU>Eu=}{d+(C=YaD0n5OH6s_LH@qyKYLPIfwJ)k{BDi?v9rUZIw{J6+}9AM~+x zO)0hZHFUo7i_dQ<-FkcdoUbHpVr+WWubOgoe48$4Ww!rYNYB-Nv(~=;23Vzs?Dqcv Dm7&wd literal 0 HcmV?d00001 diff --git a/0C_virtual_memory/kernel8.img b/0C_virtual_memory/kernel8.img index d8e7e007f2e1749cc8fccbdf843481a76177de97..d82eb38d00af783af65562896abbd59e69392a12 100755 GIT binary patch delta 430 zcmZ|Kze@sP7zgn8x$}N4MQI(HaUP;Zw}e8u6ju$^WLcww1x?kU1~sKgiA0cZk6=wD zPPU1YQ9)49QV`)T{tq<-alIO}xjxV5`99BcgyzWvu}rg*0b2^b0?o<1*1!ilY@migqd7j32~4s?2I`l6)kjYxQLx?U;7I7QAFG z-e;bdnUUROP*X_a-@W)~R{YC5W3IDDZ}~g3BeX4z22WUV+pMTN;-Ov;QLlh-k+F*Z!T%B0R%Hu*1; zIAg(NQRaHafXP#t)fwG3?`3voWc)grk##ZSnaN98l^G9DKFPY7aoXfsHgU!Ylc%z2 zGqz4X2xQk!zR70HSOgZ&nXJmL&6o%lkDi>#Zp`Qh7WbIE6)0{C7B`>#6DY0)7FU_< j%Aw6DIk}KSoKbjkCx`juBOD%#os)Sv4S=TDah?MJYdm1m diff --git a/0C_virtual_memory/src/mmu.rs b/0C_virtual_memory/src/mmu.rs index 375e685d..c2bf1500 100644 --- a/0C_virtual_memory/src/mmu.rs +++ b/0C_virtual_memory/src/mmu.rs @@ -219,8 +219,8 @@ pub unsafe fn init() { // First, force all previous changes to be seen before the MMU is enabled. barrier::isb(barrier::SY); - // Enable the MMU - SCTLR_EL1.modify(SCTLR_EL1::M::Enable); + // Enable the MMU and turn on caching + SCTLR_EL1.modify(SCTLR_EL1::M::Enable + SCTLR_EL1::C::Cacheable); // Force MMU init to complete before next instruction barrier::isb(barrier::SY); diff --git a/0D_cache_performance/.cargo/config b/0D_cache_performance/.cargo/config new file mode 100644 index 00000000..d7fb2ba3 --- /dev/null +++ b/0D_cache_performance/.cargo/config @@ -0,0 +1,6 @@ +[target.aarch64-unknown-none] +rustflags = [ + "-C", "link-arg=-Tlink.ld", + "-C", "target-feature=-fp-armv8", + "-C", "target-cpu=cortex-a53", +] diff --git a/0D_cache_performance/Cargo.lock b/0D_cache_performance/Cargo.lock new file mode 100644 index 00000000..c30853d5 --- /dev/null +++ b/0D_cache_performance/Cargo.lock @@ -0,0 +1,55 @@ +[[package]] +name = "cortex-a" +version = "2.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "register 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "kernel8" +version = "0.1.0" +dependencies = [ + "cortex-a 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)", + "raspi3_boot 0.1.0", + "register 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "panic-abort" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "r0" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "raspi3_boot" +version = "0.1.0" +dependencies = [ + "cortex-a 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)", + "panic-abort 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", + "r0 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "register" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "tock-registers 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "tock-registers" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[metadata] +"checksum cortex-a 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "fe913628f34718bc9e7d0d07e13ca1374638f64f0edc6eb063ec8abe581d395d" +"checksum panic-abort 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "6bc796c620f27056d4ffe7c558533fd67ae5af0fd8e919fbe38de803368af73e" +"checksum r0 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "e2a38df5b15c8d5c7e8654189744d8e396bddc18ad48041a500ce52d6948941f" +"checksum register 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "157a11ac0b1882ff4a527a92f911dd288df17367faaaa0c36f188cd61ec36fc1" +"checksum tock-registers 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3a385d94f3f62e60445a0adb9ff8d9621faa272234530d4c0f848ec98f88e316" diff --git a/0D_cache_performance/Cargo.toml b/0D_cache_performance/Cargo.toml new file mode 100644 index 00000000..b7f40114 --- /dev/null +++ b/0D_cache_performance/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "kernel8" +version = "0.1.0" +authors = ["Andre Richter "] + +[dependencies] +raspi3_boot = { path = "raspi3_boot" } +cortex-a = "2.2.1" +register = "0.2.0" + +[package.metadata.cargo-xbuild] +sysroot_path = "../xbuild_sysroot" diff --git a/0D_cache_performance/Makefile b/0D_cache_performance/Makefile new file mode 100644 index 00000000..26e15973 --- /dev/null +++ b/0D_cache_performance/Makefile @@ -0,0 +1,66 @@ +# +# MIT License +# +# Copyright (c) 2018 Andre Richter +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +# + +TARGET = aarch64-unknown-none + +OBJCOPY = cargo objcopy -- +OBJCOPY_PARAMS = --strip-all -O binary + +UTILS_CONTAINER = andrerichter/raspi3-utils +DOCKER_CMD = docker run -it --rm -v $(shell pwd):/work -w /work +DOCKER_TTY = --privileged -v /dev:/dev +QEMU_CMD = qemu-system-aarch64 -M raspi3 -kernel kernel8.img +RASPBOOT_CMD = raspbootcom /dev/ttyUSB0 kernel8.img + +all: clean kernel8.img + +target/$(TARGET)/debug/kernel8: src/main.rs + cargo xbuild --target=$(TARGET) + cp $@ . + +target/$(TARGET)/release/kernel8: src/main.rs + cargo xbuild --target=$(TARGET) --release + cp $@ . + +ifeq ($(DEBUG),1) +kernel8: target/$(TARGET)/debug/kernel8 +else +kernel8: target/$(TARGET)/release/kernel8 +endif + +kernel8.img: kernel8 + $(OBJCOPY) $(OBJCOPY_PARAMS) $< kernel8.img + +qemu: all + $(DOCKER_CMD) $(UTILS_CONTAINER) $(QEMU_CMD) -serial stdio + +raspboot: all + $(DOCKER_CMD) $(DOCKER_TTY) $(UTILS_CONTAINER) $(RASPBOOT_CMD) + +clippy: + cargo xclippy --target=$(TARGET) + +clean: + cargo clean + rm -f kernel8 diff --git a/0D_cache_performance/README.md b/0D_cache_performance/README.md new file mode 100644 index 00000000..1f1dee0a --- /dev/null +++ b/0D_cache_performance/README.md @@ -0,0 +1,57 @@ +# Tutorial 0D - Cache Performance + +Now that we finally have virtual memory capabilities available, we also have +fine grained control over `cacheability`. You've caught a glimpse already in the +last tutorial, where we used page table entries to reference the `MAIR_EL1` +register to indicate the cacheability of a page or block. + +Unfortunately, for the user it is often hard to grasp the advantage of caching +in early stages of OS or bare-metal software development. This tutorial is a +short interlude that tries to give you a feeling of what caching can do for +performance. + +## Benchmark + +Let's write a tiny, arbitrary micro-benchmark to showcase the performance of +operating on the same DRAM with caching enabled and disabled. + +### mmu.rs + +Therefore, we will map the same physical memory via two different virtual +addresses. We set up our pagetables such that the virtual address `0x200000` +points to the physical DRAM at `0x400000`, and we configure it as +`non-cacheable` in the page tables. + +We are still using a `2 MiB` granule, and set up the next block, which starts at +virtual `0x400000`, to point at physical `0x400000` (this is an identity mapped +block). This time, the block is configured as cacheable. + +### benchmark.rs + +We write a little function that iteratively reads memory of five times the size +of a `cacheline`, in steps of 8 bytes, aka one processor register at a time. We +read the value, add 1, and write it back. This whole process is repeated +`100_000` times. + +### main.rs + +The benchmark function is called twice. Once for the cacheable and once for the +non-cacheable virtual addresses. Remember that both virtual addresses point to +the _same_ physical DRAM, so the difference in time that we will see will +showcase how much faster it is to operate on DRAM with caching enabled. + +## Results + +On my Raspberry, I get the following results: + +```text +Benchmarking non-cacheable DRAM modifications at virtual 0x00200000, physical 0x00400000: +664 miliseconds. + +Benchmarking cacheable DRAM modifications at virtual 0x00400000, physical 0x00400000: +148 miliseconds. + +With caching, the function is 348% faster! +``` + +Impressive, isn't it? diff --git a/0D_cache_performance/kernel8 b/0D_cache_performance/kernel8 new file mode 100755 index 0000000000000000000000000000000000000000..7c8f3aedef6c3d1cd33ebf8c75b657890fe450ea GIT binary patch literal 79448 zcmeI3dvp`moyV^vKP1_JF}5LwV3`21okWplB#lHju`rN=OK595v?uMx=!u2jvP=ww z;yjuY0%Rjfk_NhkwA~XVlCwE&HpSg-3+--CQuYx-$@XlwjZNDEJr>Y5g-CYfCvx)B0vO)01+SpM1Tko0U|&IhyW2F0z`la5CI}U1c(3; zAOb{y2oM1xKm>>Y5g-CYfCvx)B0vO)01+SpM1Tko0U|&IhyW2F0z`la5CI}U1c(3; zAOb{y2oM1xKm>>Y5g-CYfCvx)B0vO)01+SpM1Tko0U|&IhyW2F0z`la5CI}U1c(3; zAOb{y2oM1xKm>>Y5g-CYfCvx)B0vO)01+SpM1Tko0U|&IhyW2F0z`la5CI}U1c(3; zAOb{y2oM1xKm>>Y5g-CYfCvx)B0vO)01+SpM1Tko0U|&IhyW2F0z`la5CI}U1c(3; zAOb{y2oM1xKm>>Y5g-CYfCvx)B0vO)01+SpM1Tko0U|&IhyW2F0z`la5CI}U1c(3; zAOb{y2oM1xKm>>Y5g-CYfCvx)B0vO)01+SpM1Tko0U|&IhyW2F0z`la5CI}U1c(3; zAOb{y2oM1xKm>>Y5g-CYfCvx)B0vO)01+SpM1Tko0U|&IhyW2F0{<@rjxzVzid^sb z!;Cept%I&)-J77BErzqHRBHZ@bJ+n4v;4}$*oA5xi`VRn?cXOoM*s_y1K_yr)24u;)F*`;KnT*b&74W+jVXG|W!EXDCVz!#<0# zrxDAWJeKo@66s}>&trJ+LVv@{*VdnTAD>-FyD5I}N8KpfHmhJyvHhuowfZ?@mi_mB z6wmdZ{-S)>3wT83CBKO$@>u)gXOf|-$wC#kU`R$a!aN5WoX-NIXm@xj~#psnrb7HVZ zAHTGtATeZM@l+~5kxJ#^lZ#IdJ_daB_~`IqiIjm&3?F6#M|A7}+RmZ1-Q~mH0X_Iu zpe>&$_YTaPo7327UTS*tIVYSJu)Ie)zc00a*jkD z>`3&F$dmh38kfZR{d4&@OM|dIluA9P+FO>ymx#3gq51aT9QWs^^E(S0Pd8*Jkgnjf zLND@bTO9um%HsF_dD6!$q0Q6f9a)?%5B=sJaeu^6lB^p1%-N*gJ5im>F4m)slIR~> zaLx4=uJeS>gEk9ohMq0-Z0KU4i=oSeE`u%?x*U43(2Jp$2)zV)xzNj@YlN9 zmV}m|T|&E{R|vfV`fj1`hOQI34*DLU?}4rtx*mFi&>Nr|gl>S|DD+0?`-Q$A`T?OI zfc6XRhYkxJhTbCd7U&M4JD{UNN1?Y0y%l=9(A%NECiK^!9})Ty=*NYA9QsM2pM-u& z=%=9f3%wuuS)reWJ}C4-HZgP|)pQu+r3US7;-5v#u~uh1W!~A}Fk-;i*=at+`%NkO zPbub}d8RiG4jJav+@)F9=rWL)Qph18o)B3M~mOLA!)@L9Y;c1vL7jnm_tuqE6^K=zD~| z2fAM9dgu*8Z-8zPx&eBl&>Nxe7y5o^^hY&+^v8r>Xg_pV=rHsap|?PH2;Bi46*>yN zRp_nI+lAf^jsB?SkN%i=MCeDL9~b&@=qH7K68b5jpMu^m^nU1Pg?<+LpwI_z?2o(i zXYP+BYCkNgIi&TMDgE#pj2&poV~!Sm$>(+#_BZgk)40>Lu+mbQp6BLNNb^j;D&Gyi z^n5w5X1BD%(=Z3bNf~J|5VKFPhei>KE|6_@ovoh6;9UOtAC>0 zRhrxQ9{k>c-yqgWZEYxneOTD}+VG5?sp&_sb~41u*wXm#ai8x6)a!mr{2g)qg-=q? zar=i~`EkO?yaVgl({EjA)FmS9$oQol`h;26|5n+&=Nc;PY`|!FxZ$!vm#nq0flCIK zd?u&AVYlwYcs)BYel-6`v~2LjsBf^Jnz7WK_^w{Osyw# zSd(!c)@H1~Zxr`8m``pzo-8Yo%_qx_SDdUkj{eonS)BwMlg>L(lUppQemXWV^&aL0 zYTId}pM_&kmVY4Bn=^5weIbddSb1C@FW5pGZ8oF1(&V4P&e|MSJU5$I|O3USnpBuVwfsNneWj~2~ z^|)7q{6@j)vY|Ao<)KI0nCF(uR}JPG9_z`EQcZnRVl*Pgn-Jq)AjVn|V;N$c$w!{z zGhfuh_hG8380Dg!)N-}>mVAu1om+x(O=`J1_?<)l=kXa$vm2}KD3=%L(lJ$jkZSr3 z?p@lk8f~x&ZBdt)2&_zel)vUGZ#O;`_!y{qXi;Mc#)R>=Y%U)QBl(M@+a9{Pao1ed z{UpZaC4;W!3T)L6Et1O~-d+AB)GJT)->KtsKI}}``S^SmK_kz@guHt@7RVLL zShw@xXUaFi_X_5Sna240!mf?+mIq22_lP`35eLtM$Ft>u64~|eljRR0uX9Ml$6aPT z|0(Q~@oXr%9?yoNY4M!S63?m3cJkH>197SGFB;(0MM zp2ayc$Fn$RGM=qWUUCJ(2&0pH@UB2x` zpZhX>ZoScGeWuSXH~Or~^r^VfXGNwD<_B#)(_*$~`pm!4rz+ED-iJBWTY!pFh+!c5q3&4sC}8SY3DddcyDVPwG+KX;YOqG5mWLFR-ml z4BeuOj~p{3MzCI(9jmJc(bmH+GOWkgXS}c!&yc#r*bj0Nqe?;bFrFRDMV>sL5%>-j zu*9eVX|R6adcuU~0hc+UhgS|75!OU98cq)qCD*KmP5vP8Gd4KI`kp#!vm|*W>Sg@z>)QD!$b) z7x`nZP9EF$U*m=69&31c@gK*Z!d~lr)bEv_9EsY`RmH~*reyWbQ-{v!%zLV@eeR6q zjlx6qbJ^);yj#%ysqReWL;rH95@$>Q>_e5?eSMbgoBCGT^nKnh==-WLChGqhV<@h> zn8e;`9$&k#f8o~!Soh9<_vwb)?qbDlcO|3sc+b(g^QA+3QU1Fqzx7(^OyqA07ZxJM zy+c*;>bt!A_il^yjTj1&FGXtlYL~H#=aDC#Wl!^Q8szbvWOFeuc)+lv0O?whj^}$G z_ajLA7<_Pl+X!L+@28jP%%|!GtI%Fd8bq7@9xPDafln5CCsJq|-f!^2U_Rfg@sGFq zxwraH|NZ!hq2G=VMu*1Lezi(}`buMd;<6zpd1;3+amkRIyn;UQacfSZN`ItbB9M!< zE-!KJuUKMuAsaZUW3HnOUhHw0GJ?Hns)u==$SZQ8FG-`&0X=(lR}Q=A<9ra$s6n5+ z1@+-+lDYj2kvtY3MLs-T%O~@dWw^H`!+!MIdpL$lXX9MP| z_rdYai_PxS%e-!TUU_j5J{5X>_ukUIc!#6!ez|nt!S_wx2NJ=MCxQ^)k)iO~RypYLID-j=)Y{$W(sC*DQh zd>Hjg=6NT+tTT;OL$A$Qpd9tD^i<4Z-Nw?x<;GkV_2yx(Wn$ffI8PwvqZ_>gsaiHr zaZT5~HlH5`Wyp<+*qTMzz=HmOmJlKycsY@7Vc_)UjpF0Q6OjrB% zr5$Ur$M`JT;~wli)+9b|U7bj+(yQ$}fi)asS<>L`Im@7(5`7CZs ztvquT{myf3<(a8Pw3PRrAMkk6zsq#ga3d1muCb{02Q&ssF;e;#R6 zKf_|FU`?T1DAMvY=aFVK#XLXUQkPf=7CTUfU4{i?dc?VQPMX7@VLo_Y=kP7Wr7^|3 z%LD9EzH%RYxu4cHNA$ICd&l#YdC0Q|YZbsxV&hq1?!{8^)RAE-s2NzK9D4>9sK8j&$S@O~k)IPVi)eY($F z+OaZmIloTbi`IgD3(}8*J!e=0_P2rkIj|o^`Y~mC9_*8Rr3HQ3sPlTNba-Ypdnfvr z*Cli&Z=woirTgCX{CWIS%d0`Vnf1&$igx4W@iwSIyWI}1A6! z%2$TKq6Y0)k9O43zl%K7bZE!h(T;bBym%h(B9F1!>G=$Pn6LaB^19Y!A?L45UiS8HakL`_4B&bACLI?LCxJ$&W;|7FJ{>qiN<<;Z5CVK?Ut^VZ9TZhFIJc>?UA-f zPbkpY5$v&=&C@depJ&<=4cyu8i*#6{J!*`;SS-}u6|=-TEy2jvNHApaZ?kLERi0|GE3Ok6AMKv z&1*w#ZJm~e-kz8*5b||Yn$70M2O}{xo#ozp@3SC5N9Ti9tCe3MXz0GE`+a#N8bJ(0Vmrb#`Z63SLZnsNz+3v7A z?Jm1wciTOZO|napBufs-DY+y?a!VfBCfj96mSum zakw2Gr_E`1N>177a5|kXr{Z)wJuaKe?vh-x%i(gmTrS1sc6k(=Vpk+ZRvd~`aVd)8 zRy=N-+wPX!vfJTyx?OI??RI-S2+@P$J;>UFWFD9V7CI3DB0vO)fFQuv1w6K;GJ){5 zo?-Q?V$;{Xn^h}T4U4dg1@vCcZkujjhecJ7jkcE0SH(i~82o1{_fQu553<;QnZ-Vo z#eNlbM8wvL^v~m{;`=JcG zMc9vK*p~@=GQ(aY>;oD0JB3}_&uQg4gq>x?=@E8IhF$9qkLjlOhr3mO9b0XjZeInv z9{or2KY@o^essgG!yPRiZD04Guxs&X`_P@R&(u$}dTIN^bnf`02>A1ok7vZM#koEs zPX0#-Jfc@k)8l_$*uPvb-TrN1KQU{%U8|QCk5;cj)SLHz&Hucbp0O3euH}#SD(cZ9 z>{^_7{!@?Vgk6jCd%}+Q3QVWJUf*;(G4@^DpNZR#ve-{&u@9@|=$W=(PDjI-)}OU~ zwPx4)GZXt*&93p$^3nF)TDxocXzl(B)R)&w%cn|Y^lp|obKp8teV3?Qb?l{#ef_Oj z{QpAr*R!gO`0IpS+plZ&S}W|@zFo6#5cY3oHV--*f(X^gRt}QsO6s{0zClxOx%vD`RJLue0u(WEAqdP@jUdJuph`McQcancFfF& zzp$Qae_h1$4_V^T_*~8?_usSlYuqxQ+s05XACHmHZN&+-w}3gyss7c7%|Ute{G%B{AUTfHVX?bwqv&KSwF=mZ+27NIfv-*2_n6+nHd(7v@xuqu-O{?^o zwT7Y{p*FYGzpX3gk3HCPM__ZW)y9_g;j8)n4T`ugx3~A&ZRx45&i2V+!c%36UV#IRMj9QCzH?&hVvihrs46=>`1>5YbU~BJ+w1uM0{@zGiEYg9F5pi#EZtd|+GX7Kt zJ4g1Z9-QB&I&hYs>fm8fUr$#=ZuWO}#;i_BcDDs%b~QeUcRRa%%6-Y*6N>eAHTyb( ze0-v2p*DMSINI3`A%Q7oF=xo_aE6@xMX5cYPH5Aza7IJO1Fm*k*p0WlA!pbZ4m*R& z4RfC((h=c~vR}5@U14V^gqOb_J^?g0M>|n|FfH(wKN{nqI?>|mo0j&<($ZeZw$#_7 zo-&4}_4+8auIq@ckFnK}{%|1d^9Mo!#Tg9P?Ag3qLVfTKyZw^i z8Fu>uZl4ki1haTgLUM+Kc!ezmrLfcI31th#(bXI4;jx5+l57v#9Ex9Z`XzY{^Wk?R zYanb7yB&VJtYA!d!gj?b`8{qaqy%KU;<4MUHfvkk)^;l{6pxJM$A$OpHoQHTWr^`$ zd?qKV>$L3d^dyu1U7{I3Oa)pILD1$=d`>x_1VT<{$cI){BXBFItS{v81Ve70>~p%M zupDyP96s6M!)te^RZ2%7G%13 zE_cuw_Jl(on=cfQ!;XN@YImmjxbV{h_69CRmL$cE_OyA@fd70BuIxErkjXB!UKnQA zhgce&uVb6ly0T*%$7Hu}4#k>d+qy!{k)Ya}1NM+J81yOlT?YTI4=Y~ZY*RQA$_~^} z;e)`X;HRDRgpx5H(|xTu-7+@G?LfB;+iZ@YU9yExQZx2%V!GfeJzr>Bz5mOZ@2vR; zuM5PX?Jv_0Ku!Fwnx`Dv{#7$!+pDJ6es9y>&uyBYPJcw~Yc+q1Cd3*4*GS#jPr7OQ zSW7xR%SfO_r)9&RKk?LX;~~<&f2d`fcA1%H1o@WtklVw9|$1CXcRj3 literal 0 HcmV?d00001 diff --git a/0D_cache_performance/kernel8.img b/0D_cache_performance/kernel8.img new file mode 100755 index 0000000000000000000000000000000000000000..ec0a16b0869abc221cdd3eaa4386dc7c504a0dd8 GIT binary patch literal 6912 zcma)=3vg5CmB+t(uWdd2ejD3Z*X9vRfRV1=bctp2Sa1qWWryt09l$oG76!YP4dLZl zK+6CI7P*8p&@N89JNSw)lS#8#v^#BZv%3?9ePA4CI@|3=cG?|3c-iJ*kr?g&lRgn6 z$n0w7&i&o*JLi1oo^!s(RWGp(YT_M}2MMWJQwc66ty{pGMPV=$3g!Mfo}3Yh_^FDJ zds>cobtl`qPX;bHzZ36xUror32|@k-No9|_3Vnt?&*VtJb2IrDd4o!2VDdYwh@1+& zaY5s|CupRkIdT2TD$;*PQ2Xve{#B0rT^1o(okNNVx2tKO$8h4DlP6Tx*A!2@5>JAs zIr6~-mvY_kWW_)^pEiDtBQ^cWX_q{>XH0esIQfqE|DDU|O5_u}?!e#AxzzC<_%H-h$LLQcb%8`PIuLY8zH<5M5*-19gu&wP8}F0M}KZt<>r+={XF$%$PV zhEwOt`P6Z-d);GiyrciSe7cXeIr${t36;a!q$1uXq>w|Q1YGgB;&2JLcw8JVB8LPL9JxUJJsdfM+?k@eyMMvq z=Mi5Ga(Oi0;m=Hut8Nj$u{4>3LjKEn72_&DR^;8ToGfx8)ZgU>QP3qHsA z90?BH3)Nh}e9IMFirtyFwOwyFwOzzGtLL29>X?Kk8%m)67WLC3&A?ZI&c}| zGO)y00$Uhcz{?mf1EU_p_EC>=CF4r)YR0Rlv>HI~hB{8yIf@Z)Cg?jCu^) zM?K1QjO)PL7;gh_XS^NU%(xlsX6y#M?8|^(Y@=dc^@ z7@q=nGwueTWqcNVj`6wY>+zW4#r2pKuEQ+d`Dne&s>7cUa;7GMm>T)4SB@oloOJC} zcBl%9#p1|1m#UF6RiEY`gIr|2%+wuYcB)b5#Xn_oLZ;6Hnb@^G8*BSCUHs8}OLlzq9mstSxd8Ub=-!Zn_7T=j_l7rl64oEZ-bvsqA@jXo zpr4)v!p|Pj`#F1l^-1V0+WzQ!zm}E6;a^Km-+WNb$u80}Iklgc)tu*MPUc%qje+=; z;z8$qf%BD%#6Kkv--G_AnHZ@Wsd=3a2on{sBQ&ue?ei!l>W-VkC1z{Q066kMX?TF9R{z~~lb%p%s zdms1Gvq3;0Qf%Af)Pm_?JqH;cvVuIdKTiH_BKd&EmB!Re_^uBz*6X-Nuy3leui9*= zgSGs$^}e9=X%>-IPdswJVr45)lbXHoFBk70}SrPV+TIZpS_R+DA zqpu2mI@lXSobC(RzNifzxlyO*8>a=ej{4g7I8?J|mXAvKI0rud8GI~fKIXv37s(iB z$>g&4sC^Ww$-ubCQ+V8{eMYC&_a_|yLl{#=js=jEgq`}1F6Q zx8HWsbzlUwMAw2LeB*>VdHLAees`#oldG@}gcKw=K96|I=aY%jWKxsVA)lwYJ~2~2 zaPl2>!HAHgyBuFA?c7^@r8tgwKfxT#MBUzB`t{n-h&rJ@ZJOm%JAAso=y{pfVr5=> zUgq1eGV`97SrIFvd0u8&tPIwN=z13Q*$^v}`@BqPtW4(fGMZSK)aPY#Vr3Gam%;bw z)4iI;FuIN)pI*9tg!kA$iMJBD!v-AQcj0Gv1kxZ%9ra|5cnkW=+(N;M*~u*`#(xAv=^vQrL3=Zp|@&ip?5Tq1nGV^dWnNRe9++e5>NN;u+JF( zG#!5=b?!9PuL76OcgSOhiHyC?t6N;D9k~eIrSaO0QJ)078*v0t&%P%GB&eHNIk5Sk zRTIC&9^Itvy1DbpU*Ei*c5yQK$CoCr|HJPlZ-4i9llL?~bEd;S)@t9Slm9cBH2fpy z<)S}Lp2At{F5d6G-}JZ*!=>IyLFLnSTt7d|sk^i@uMCLSlFnD9lm5;4Z-M)&a-jIY zznw3}UG$`!FWy(XN8Gn%PlcY}<9LfT2FE`=C*(sCmT{yqc=S1v#w|;rr zxoA1b*sh5^^#CKV*X#}h-PUhQ&6_ldo(J)=UR?_HN} zPx%5eaSJx_E!$7$=~n7rA4$i$U`GtI5~0@wJ!g$z( zQsj$B0p#op!~)};c~amAhL9UtH~7OKm!8$=<gUm zS|Q&T;(Sy4mGYDj?|Xon__8TZF6Dci!TNaYbqVtDUlDnvfcX13V(E1{aK<6lQJhUf zZNz>wA_r4 zcP7`0d6z}?3vh3m?(C*~8&?%c>{^fY>Mr7VV`8(de}ThhNXXAf!=>T*))Uz$@E;D} zdO7>#xw|SdLC=5ha!FmCoG_04wB{8ebgj#F*e=KCXMDs(Iyfl}c`2S(d$ji4;n+Ne;z63|`FF4h>7Q7}d&)Qp8Vb>&+R%P~ud}Taw zI}&i#QjyjG?m_t6yTRcPl@q^ahHG7;ATSy$4bB*Uj(n`f*<+RbWz$MIw1N-kIfy+Rb6JYo^KT&j z-a5|yeQI|`Vf}DJrJPGV-9h;A2ssGxdE+IRU%e?9V~Ti(sjiR{9*2ZNrvFCW1cf}` zbd3L6oXxOSWT_Oys|-~POr!4XGZh1~-%%CBnR`aldKB>DZO)%ozXnaW5 z4!rx=BVsRt9XdW8_g^rcsw1r$-$3Tk#DcKSIMrL8WK3KV(WQD16+{|`?%0>ao*P$VU;N-`!fU8-{nI%6SX|NHHgasgtoAMem9WKG3=_ z5-+U@2Op`Ksr?o5eMM#XELx7(H$s05v8OTAA@++9`(ea>4Ep2NXKj!t3Tq>3TFE)= zr5wIl)sCQNVWrHe9C9hfiqziM_No7~$I~HiYMz+KkT*IW&4CViTa4J#ap{=D$lEQ9 zJN~U_#}*$etV4)}4tcCX9;5oVVI!=EJT68amoQt@#%%WS8$Bv-JN8w);{;O4!_uQ(X!okKs;UL}1 zy7r(gy>RfBkSr+=7I+gKg@dKIk}C2B1Hy&R=(>u%`{}Q;p}D?sXRW)*)%<<2xutnY zeQkYXLv7uT263gcVx73NWvgqOtG>3~)zaK1*0zhgUGDZ>wL3)pp2gx18uzxLk3K9@ zi#uIATx|{YEzMio%GBy-4FCU)wz=z^E;*s-HUbna?vudQ#WZ7x=;)zxpf+QWL{x^=IKP-t#>qpYlqK0tMc zYj=bAmn}=xYC?VARqJl2zQ4=-iu!>%B{)-sW$L0ahv5J9eu0?xyTP8*!f!{&OSs45 z`XDv(-mDp0N96Za`m^oP{^;G+ytQGE*s`l#Y}qE(wd_LjmW9K!qahp_u?}4{ToFOM za71&uIW}$zU{?3}%DHU^Unbc1bT8BuO$#Cdn*WB&%eT>_)xOV3dqTqseGCT8vhs z&1g62O$L)>GMY>#v&mwznrtS!S#LI&C9~0NGMmj7v(;=f+bw#F!6I3V7L&znu~@7Y zo5gO`TMbspYP6cHW~;?&wc4z9o8D%yNj9U+WHZ|=Hml8Mv)kdK9mCsU+72~4n*JAh CTN>a1 literal 0 HcmV?d00001 diff --git a/0D_cache_performance/link.ld b/0D_cache_performance/link.ld new file mode 100644 index 00000000..896153e9 --- /dev/null +++ b/0D_cache_performance/link.ld @@ -0,0 +1,57 @@ +/* + * MIT License + * + * Copyright (c) 2018 Andre Richter + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +ENTRY(_boot_cores); + +SECTIONS +{ + . = 0x80000; /* This is already 4KiB aligned */ + __ro_start = .; + .text : + { + KEEP(*(.text.boot)) *(.text .text.*) + } + + .rodata : + { + *(.rodata .rodata.*) + } + . = ALIGN(4096); /* Fill up to 4KiB */ + __ro_end = .; + + .data : + { + *(.data .data.*) + } + + .bss ALIGN(8): + { + __bss_start = .; + *(.bss .bss.*) + *(COMMON) + __bss_end = .; + } + + /DISCARD/ : { *(.comment) *(.gnu*) *(.note*) *(.eh_frame*) } +} diff --git a/0D_cache_performance/raspi3_boot/Cargo.toml b/0D_cache_performance/raspi3_boot/Cargo.toml new file mode 100644 index 00000000..73cd453b --- /dev/null +++ b/0D_cache_performance/raspi3_boot/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "raspi3_boot" +version = "0.1.0" +authors = ["Andre Richter "] + +[dependencies] +cortex-a = "2.2.1" +panic-abort = "0.2.0" +r0 = "0.2.2" diff --git a/0D_cache_performance/raspi3_boot/src/lib.rs b/0D_cache_performance/raspi3_boot/src/lib.rs new file mode 100644 index 00000000..8a081b53 --- /dev/null +++ b/0D_cache_performance/raspi3_boot/src/lib.rs @@ -0,0 +1,131 @@ +/* + * MIT License + * + * Copyright (c) 2018 Jorge Aparicio + * Copyright (c) 2018 Andre Richter + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#![deny(missing_docs)] +#![deny(warnings)] +#![no_std] + +//! Low-level boot of the Raspberry's processor + +extern crate cortex_a; +extern crate panic_abort; +extern crate r0; + +#[macro_export] +macro_rules! entry { + ($path:path) => { + #[export_name = "main"] + pub unsafe fn __main() -> ! { + // type check the given path + let f: fn() -> ! = $path; + + f() + } + }; +} + +/// Reset function. +/// +/// Initializes the bss section before calling into the user's `main()`. +unsafe fn reset() -> ! { + extern "C" { + // Boundaries of the .bss section, provided by the linker script + static mut __bss_start: u64; + static mut __bss_end: u64; + } + + // Zeroes the .bss section + r0::zero_bss(&mut __bss_start, &mut __bss_end); + + extern "Rust" { + fn main() -> !; + } + + main() +} + +/// Prepare and execute transition from EL2 to EL1. +#[inline] +fn setup_and_enter_el1_from_el2() -> ! { + use cortex_a::{asm, regs::*}; + + // Enable timer counter registers for EL1 + CNTHCTL_EL2.write(CNTHCTL_EL2::EL1PCEN::SET + CNTHCTL_EL2::EL1PCTEN::SET); + + // No offset for reading the counters + CNTVOFF_EL2.set(0); + + // Set EL1 execution state to AArch64 + // TODO: Explain the SWIO bit + HCR_EL2.write(HCR_EL2::RW::EL1IsAarch64 + HCR_EL2::SWIO::SET); + + // Set up a simulated exception return. + // + // First, fake a saved program status, where all interrupts were + // masked and SP_EL0 was used as a stack pointer. + SPSR_EL2.write( + SPSR_EL2::D::Masked + + SPSR_EL2::A::Masked + + SPSR_EL2::I::Masked + + SPSR_EL2::F::Masked + + SPSR_EL2::M::EL1t, + ); + + // Second, let the link register point to reset(). + ELR_EL2.set(reset as *const () as u64); + + // Set up SP_EL0 (stack pointer), which will be used by EL1 once + // we "return" to it. + SP_EL0.set(0x80_000); + + // Use `eret` to "return" to EL1. This will result in execution of + // `reset()` in EL1. + asm::eret() +} + +/// Entrypoint of the processor. +/// +/// Parks all cores except core0 and checks if we started in EL2. If +/// so, proceeds with setting up EL1. +#[link_section = ".text.boot"] +#[no_mangle] +pub unsafe extern "C" fn _boot_cores() -> ! { + use cortex_a::{asm, regs::*}; + + const CORE_0: u64 = 0; + const CORE_MASK: u64 = 0x3; + const EL2: u32 = CurrentEL::EL::EL2.value; + + if let CORE_0 = MPIDR_EL1.get() & CORE_MASK { + if let EL2 = CurrentEL.get() { + setup_and_enter_el1_from_el2() + } + } + + // if not core0 or EL != 2, infinitely wait for events + loop { + asm::wfe(); + } +} diff --git a/0D_cache_performance/src/benchmark.rs b/0D_cache_performance/src/benchmark.rs new file mode 100644 index 00000000..e4f7ce1d --- /dev/null +++ b/0D_cache_performance/src/benchmark.rs @@ -0,0 +1,40 @@ +use core::sync::atomic::{compiler_fence, Ordering}; +use cortex_a::{barrier, regs::*}; + +/// We assume that addr is cacheline aligned +pub fn batch_modify(addr: u64) -> u32 { + const CACHELINE_SIZE_BYTES: u64 = 64; // TODO: retrieve this from a system register + const NUM_CACHELINES_TOUCHED: u64 = 5; + const BYTES_PER_U64_REG: usize = 8; + const NUM_BENCH_ITERATIONS: u64 = 100_000; + + const NUM_BYTES_TOUCHED: u64 = CACHELINE_SIZE_BYTES * NUM_CACHELINES_TOUCHED; + + let t1 = CNTPCT_EL0.get(); + + compiler_fence(Ordering::SeqCst); + + let mut data_ptr: *mut u64; + let mut temp: u64; + for _ in 0..NUM_BENCH_ITERATIONS { + for i in (addr..(addr + NUM_BYTES_TOUCHED)).step_by(BYTES_PER_U64_REG) { + data_ptr = i as *mut u64; + + unsafe { + temp = core::ptr::read_volatile(data_ptr); + core::ptr::write_volatile(data_ptr, temp + 1); + } + } + } + + // Insert a barrier to ensure that the last memory operation has finished + // before we retrieve the elapsed time with the subsequent counter read. Not + // needed at all given the sample size, but let's be a bit pedantic here for + // education purposes. For measuring single-instructions, this would be + // needed. + unsafe { barrier::dsb(barrier::SY) }; + + let t2 = CNTPCT_EL0.get(); + + ((t2 - t1) * 1000 / u64::from(CNTFRQ_EL0.get())) as u32 +} diff --git a/0D_cache_performance/src/gpio.rs b/0D_cache_performance/src/gpio.rs new file mode 100644 index 00000000..3ff0c1e9 --- /dev/null +++ b/0D_cache_performance/src/gpio.rs @@ -0,0 +1,75 @@ +/* + * MIT License + * + * Copyright (c) 2018 Andre Richter + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +use super::MMIO_BASE; +use register::mmio::ReadWrite; + +// Descriptions taken from +// https://github.com/raspberrypi/documentation/files/1888662/BCM2837-ARM-Peripherals.-.Revised.-.V2-1.pdf +register_bitfields! { + u32, + + /// GPIO Function Select 1 + GPFSEL1 [ + /// Pin 15 + FSEL15 OFFSET(15) NUMBITS(3) [ + Input = 0b000, + Output = 0b001, + RXD0 = 0b100, // UART0 - Alternate function 0 + RXD1 = 0b010 // Mini UART - Alternate function 5 + + ], + + /// Pin 14 + FSEL14 OFFSET(12) NUMBITS(3) [ + Input = 0b000, + Output = 0b001, + TXD0 = 0b100, // UART0 - Alternate function 0 + TXD1 = 0b010 // Mini UART - Alternate function 5 + ] + ], + + /// GPIO Pull-up/down Clock Register 0 + GPPUDCLK0 [ + /// Pin 15 + PUDCLK15 OFFSET(15) NUMBITS(1) [ + NoEffect = 0, + AssertClock = 1 + ], + + /// Pin 14 + PUDCLK14 OFFSET(14) NUMBITS(1) [ + NoEffect = 0, + AssertClock = 1 + ] + ] +} + +pub const GPFSEL1: *const ReadWrite = + (MMIO_BASE + 0x0020_0004) as *const ReadWrite; + +pub const GPPUD: *const ReadWrite = (MMIO_BASE + 0x0020_0094) as *const ReadWrite; + +pub const GPPUDCLK0: *const ReadWrite = + (MMIO_BASE + 0x0020_0098) as *const ReadWrite; diff --git a/0D_cache_performance/src/main.rs b/0D_cache_performance/src/main.rs new file mode 100644 index 00000000..41b9d0fb --- /dev/null +++ b/0D_cache_performance/src/main.rs @@ -0,0 +1,111 @@ +/* + * MIT License + * + * Copyright (c) 2018 Andre Richter + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#![no_std] +#![no_main] +#![feature(asm)] +#![feature(const_fn)] + +extern crate cortex_a; + +#[macro_use] +extern crate raspi3_boot; + +#[macro_use] +extern crate register; + +const MMIO_BASE: u32 = 0x3F00_0000; + +mod gpio; +mod mbox; +mod mmu; +mod uart; +mod benchmark; + +fn do_benchmarks(uart: &uart::Uart) { + const SIZE_2MIB: u64 = 2 * 1024 * 1024; + + // Start of the __SECOND__ virtual 2 MiB block (counting starts at zero). + // NON-cacheable DRAM memory. + let non_cacheable_addr: u64 = SIZE_2MIB; + + // Start of the __THIRD__ virtual 2 MiB block. + // Cacheable DRAM memory + let cacheable_addr: u64 = 2 * SIZE_2MIB; + + uart.puts("Benchmarking non-cacheable DRAM modifications at virtual 0x"); + uart.hex(non_cacheable_addr as u32); + uart.puts(", physical 0x"); + uart.hex(2 * SIZE_2MIB as u32); + uart.puts(":\n"); + + let result_nc = benchmark::batch_modify(non_cacheable_addr); + uart.dec(result_nc); + uart.puts(" miliseconds.\n\n"); + + uart.puts("Benchmarking cacheable DRAM modifications at virtual 0x"); + uart.hex(cacheable_addr as u32); + uart.puts(", physical 0x"); + uart.hex(2 * SIZE_2MIB as u32); + uart.puts(":\n"); + let result_c = benchmark::batch_modify(cacheable_addr); + uart.dec(result_c); + uart.puts(" miliseconds.\n\n"); + + let percent_diff = (result_nc - result_c) * 100 / result_c; + + uart.puts("With caching, the function is "); + uart.dec(percent_diff); + uart.puts("% faster!\n"); +} + +entry!(kernel_entry); + +fn kernel_entry() -> ! { + let mut mbox = mbox::Mbox::new(); + let uart = uart::Uart::new(uart::UART_PHYS_BASE); + + // set up serial console + if uart.init(&mut mbox).is_err() { + loop { + cortex_a::asm::wfe() // If UART fails, abort early + } + } + + uart.getc(); // Press a key first before being greeted + uart.puts("Hello Rustacean!\n\n"); + + uart.puts("\nSwitching MMU on now..."); + + unsafe { mmu::init() }; + + uart.puts("MMU is live \\o/\n\n"); + + do_benchmarks(&uart); + + // echo everything back + loop { + uart.send(uart.getc()); + } +} diff --git a/0D_cache_performance/src/mbox.rs b/0D_cache_performance/src/mbox.rs new file mode 100644 index 00000000..18fe5787 --- /dev/null +++ b/0D_cache_performance/src/mbox.rs @@ -0,0 +1,159 @@ +/* + * MIT License + * + * Copyright (c) 2018 Andre Richter + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +use super::MMIO_BASE; +use core::ops; +use cortex_a::asm; +use register::mmio::{ReadOnly, WriteOnly}; + +register_bitfields! { + u32, + + STATUS [ + FULL OFFSET(31) NUMBITS(1) [], + EMPTY OFFSET(30) NUMBITS(1) [] + ] +} + +const VIDEOCORE_MBOX: u32 = MMIO_BASE + 0xB880; + +#[allow(non_snake_case)] +#[repr(C)] +pub struct RegisterBlock { + READ: ReadOnly, // 0x00 + __reserved_0: [u32; 5], // 0x04 + STATUS: ReadOnly, // 0x18 + __reserved_1: u32, // 0x1C + WRITE: WriteOnly, // 0x20 +} + +// Custom errors +pub enum MboxError { + ResponseError, + UnknownError, +} +pub type Result = ::core::result::Result; + +// Channels +pub mod channel { + pub const PROP: u32 = 8; +} + +// Tags +pub mod tag { + pub const SETCLKRATE: u32 = 0x38002; + pub const LAST: u32 = 0; +} + +// Clocks +pub mod clock { + pub const UART: u32 = 0x0_0000_0002; +} + +// Responses +mod response { + pub const SUCCESS: u32 = 0x8000_0000; + pub const ERROR: u32 = 0x8000_0001; // error parsing request buffer (partial response) +} + +pub const REQUEST: u32 = 0; + +// Public interface to the mailbox +#[repr(C)] +#[repr(align(16))] +pub struct Mbox { + // The address for buffer needs to be 16-byte aligned so that the + // Videcore can handle it properly. + pub buffer: [u32; 36], +} + +/// Deref to RegisterBlock +/// +/// Allows writing +/// ``` +/// self.STATUS.read() +/// ``` +/// instead of something along the lines of +/// ``` +/// unsafe { (*Mbox::ptr()).STATUS.read() } +/// ``` +impl ops::Deref for Mbox { + type Target = RegisterBlock; + + fn deref(&self) -> &Self::Target { + unsafe { &*Self::ptr() } + } +} + +impl Mbox { + pub fn new() -> Mbox { + Mbox { buffer: [0; 36] } + } + + /// Returns a pointer to the register block + fn ptr() -> *const RegisterBlock { + VIDEOCORE_MBOX as *const _ + } + + /// Make a mailbox call. Returns Err(MboxError) on failure, Ok(()) success + pub fn call(&self, channel: u32) -> Result<()> { + // wait until we can write to the mailbox + loop { + if !self.STATUS.is_set(STATUS::FULL) { + break; + } + + asm::nop(); + } + + let buf_ptr = self.buffer.as_ptr() as u32; + + // write the address of our message to the mailbox with channel identifier + self.WRITE.set((buf_ptr & !0xF) | (channel & 0xF)); + + // now wait for the response + loop { + // is there a response? + loop { + if !self.STATUS.is_set(STATUS::EMPTY) { + break; + } + + asm::nop(); + } + + let resp: u32 = self.READ.get(); + + // is it a response to our message? + if ((resp & 0xF) == channel) && ((resp & !0xF) == buf_ptr) { + // is it a valid successful response? + return match self.buffer[1] { + response::SUCCESS => Ok(()), + response::ERROR => Err(MboxError::ResponseError), + _ => Err(MboxError::UnknownError), + }; + } + } + } +} diff --git a/0D_cache_performance/src/mmu.rs b/0D_cache_performance/src/mmu.rs new file mode 100644 index 00000000..a379c769 --- /dev/null +++ b/0D_cache_performance/src/mmu.rs @@ -0,0 +1,219 @@ +/* + * MIT License + * + * Copyright (c) 2018 Andre Richter + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +use cortex_a::{barrier, regs::*}; + +register_bitfields! {u64, + // AArch64 Reference Manual page 2150 + STAGE1_DESCRIPTOR [ + /// Execute-never + XN OFFSET(54) NUMBITS(1) [ + False = 0, + True = 1 + ], + + /// Various address fields, depending on use case + LVL2_OUTPUT_ADDR_4KiB OFFSET(21) NUMBITS(27) [], // [47:21] + NEXT_LVL_TABLE_ADDR_4KiB OFFSET(12) NUMBITS(36) [], // [47:12] + + /// Access flag + AF OFFSET(10) NUMBITS(1) [ + False = 0, + True = 1 + ], + + /// Shareability field + SH OFFSET(8) NUMBITS(2) [ + OuterShareable = 0b10, + InnerShareable = 0b11 + ], + + /// Access Permissions + AP OFFSET(6) NUMBITS(2) [ + RW_EL1 = 0b00, + RW_EL1_EL0 = 0b01, + RO_EL1 = 0b10, + RO_EL1_EL0 = 0b11 + ], + + /// Memory attributes index into the MAIR_EL1 register + AttrIndx OFFSET(2) NUMBITS(3) [], + + TYPE OFFSET(1) NUMBITS(1) [ + Block = 0, + Table = 1 + ], + + VALID OFFSET(0) NUMBITS(1) [ + False = 0, + True = 1 + ] + ] +} + +trait BaseAddr { + fn base_addr(&self) -> u64; +} + +impl BaseAddr for [u64; 512] { + fn base_addr(&self) -> u64 { + self as *const u64 as u64 + } +} + +const NUM_ENTRIES_4KIB: usize = 512; + +static mut LVL2_TABLE: [u64; NUM_ENTRIES_4KIB] = [0; NUM_ENTRIES_4KIB]; +static mut SINGLE_LVL3_TABLE: [u64; NUM_ENTRIES_4KIB] = [0; NUM_ENTRIES_4KIB]; + +/// Set up identity mapped page tables for the first 1 gigabyte of address +/// space. +pub unsafe fn init() { + // First, define the three memory types that we will map. Cacheable and + // non-cacheable normal DRAM, and device. + MAIR_EL1.write( + // Attribute 2 + MAIR_EL1::Attr2_HIGH::Memory_OuterNonCacheable + + MAIR_EL1::Attr2_LOW_MEMORY::InnerNonCacheable + + // Attribute 1 + + MAIR_EL1::Attr1_HIGH::Memory_OuterWriteBack_NonTransient_ReadAlloc_WriteAlloc + + MAIR_EL1::Attr1_LOW_MEMORY::InnerWriteBack_NonTransient_ReadAlloc_WriteAlloc + + // Attribute 0 + + MAIR_EL1::Attr0_HIGH::Device + + MAIR_EL1::Attr0_LOW_DEVICE::Device_nGnRE, + ); + + // Descriptive consts for indexing into the correct MAIR_EL1 attributes. + mod mair { + pub const DEVICE: u64 = 0; + pub const NORMAL: u64 = 1; + pub const NORMAL_NON_CACHEABLE: u64 = 2; + } + + // Set up the first LVL2 entry, pointing to a 4KiB table base address. + let lvl3_base: u64 = SINGLE_LVL3_TABLE.base_addr() >> 12; + LVL2_TABLE[0] = (STAGE1_DESCRIPTOR::VALID::True + + STAGE1_DESCRIPTOR::TYPE::Table + + STAGE1_DESCRIPTOR::NEXT_LVL_TABLE_ADDR_4KiB.val(lvl3_base)) + .value; + + // The second 2 MiB block. + LVL2_TABLE[1] = (STAGE1_DESCRIPTOR::VALID::True + + STAGE1_DESCRIPTOR::TYPE::Block + + STAGE1_DESCRIPTOR::AttrIndx.val(mair::NORMAL_NON_CACHEABLE) + + STAGE1_DESCRIPTOR::AP::RW_EL1 + + STAGE1_DESCRIPTOR::SH::OuterShareable + + STAGE1_DESCRIPTOR::AF::True + // This translation is accessed for virtual 0x200000. Point to physical + // 0x400000, aka the third phyiscal 2 MiB DRAM block (third block == 2, + // because we start counting at 0). + // + // Here, we configure it non-cacheable. + + STAGE1_DESCRIPTOR::LVL2_OUTPUT_ADDR_4KiB.val(2) + + STAGE1_DESCRIPTOR::XN::True) + .value; + + // Fill the rest of the LVL2 (2MiB) entries as block + // descriptors. Differentiate between normal and device mem. + let mmio_base: u64 = (super::MMIO_BASE >> 21).into(); + let common = STAGE1_DESCRIPTOR::VALID::True + + STAGE1_DESCRIPTOR::TYPE::Block + + STAGE1_DESCRIPTOR::AP::RW_EL1 + + STAGE1_DESCRIPTOR::AF::True + + STAGE1_DESCRIPTOR::XN::True; + + // Notice the skip(2). Start at the third 2 MiB DRAM block, which will point + // virtual 0x400000 to physical 0x400000, configured as cacheable memory. + for (i, entry) in LVL2_TABLE.iter_mut().enumerate().skip(2) { + let j: u64 = i as u64; + + let mem_attr = if j >= mmio_base { + STAGE1_DESCRIPTOR::SH::OuterShareable + STAGE1_DESCRIPTOR::AttrIndx.val(mair::DEVICE) + } else { + STAGE1_DESCRIPTOR::SH::InnerShareable + STAGE1_DESCRIPTOR::AttrIndx.val(mair::NORMAL) + }; + + *entry = (common + mem_attr + STAGE1_DESCRIPTOR::LVL2_OUTPUT_ADDR_4KiB.val(j)).value; + } + + // Finally, fill the single LVL3 table (4KiB granule). Differentiate between + // code/RO and RW sections. + // + // Using the linker script, we ensure that the RO sections are 4KiB aligned, + // and we export their boundaries via symbols. + extern "C" { + static mut __ro_start: u64; + static mut __ro_end: u64; + } + + const PAGESIZE: u64 = 4096; + let ro_start: u64 = &__ro_start as *const _ as u64 / PAGESIZE; + let ro_end: u64 = &__ro_end as *const _ as u64 / PAGESIZE; + let common = STAGE1_DESCRIPTOR::VALID::True + + STAGE1_DESCRIPTOR::TYPE::Table + + STAGE1_DESCRIPTOR::AttrIndx.val(mair::NORMAL) + + STAGE1_DESCRIPTOR::SH::InnerShareable + + STAGE1_DESCRIPTOR::AF::True; + + for (i, entry) in SINGLE_LVL3_TABLE.iter_mut().enumerate() { + let j: u64 = i as u64; + + let mem_attr = if j < ro_start || j > ro_end { + STAGE1_DESCRIPTOR::AP::RW_EL1 + STAGE1_DESCRIPTOR::XN::True + } else { + STAGE1_DESCRIPTOR::AP::RO_EL1 + STAGE1_DESCRIPTOR::XN::False + }; + + *entry = (common + mem_attr + STAGE1_DESCRIPTOR::NEXT_LVL_TABLE_ADDR_4KiB.val(j)).value; + } + + // Point to the LVL2 table base address in TTBR0. + TTBR0_EL1.set_baddr(LVL2_TABLE.base_addr()); + + // Configure various settings of stage 1 of the EL1 translation regime. + let ips = ID_AA64MMFR0_EL1.read(ID_AA64MMFR0_EL1::PARange); + TCR_EL1.write( + TCR_EL1::TBI0::Ignored + + TCR_EL1::IPS.val(ips) + + TCR_EL1::TG0::KiB_4 // 4 KiB granule + + TCR_EL1::SH0::Inner + + TCR_EL1::ORGN0::WriteBack_ReadAlloc_WriteAlloc_Cacheable + + TCR_EL1::IRGN0::WriteBack_ReadAlloc_WriteAlloc_Cacheable + + TCR_EL1::EPD0::EnableTTBR0Walks + + TCR_EL1::T0SZ.val(34), // Start walks at level 2 + ); + + // Switch the MMU on. + // + // First, force all previous changes to be seen before the MMU is enabled. + barrier::isb(barrier::SY); + + // Enable the MMU and turn on caching + SCTLR_EL1.modify(SCTLR_EL1::M::Enable + SCTLR_EL1::C::Cacheable); + + // Force MMU init to complete before next instruction + barrier::isb(barrier::SY); +} diff --git a/0D_cache_performance/src/uart.rs b/0D_cache_performance/src/uart.rs new file mode 100644 index 00000000..809b8546 --- /dev/null +++ b/0D_cache_performance/src/uart.rs @@ -0,0 +1,310 @@ +/* + * MIT License + * + * Copyright (c) 2018 Andre Richter + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +use super::MMIO_BASE; +use core::{ + ops, + sync::atomic::{compiler_fence, Ordering}, +}; +use cortex_a::asm; +use gpio; +use mbox; +use register::mmio::*; + +// PL011 UART registers. +// +// Descriptions taken from +// https://github.com/raspberrypi/documentation/files/1888662/BCM2837-ARM-Peripherals.-.Revised.-.V2-1.pdf +register_bitfields! { + u32, + + /// Flag Register + FR [ + /// Transmit FIFO full. The meaning of this bit depends on the + /// state of the FEN bit in the UARTLCR_ LCRH Register. If the + /// FIFO is disabled, this bit is set when the transmit + /// holding register is full. If the FIFO is enabled, the TXFF + /// bit is set when the transmit FIFO is full. + TXFF OFFSET(5) NUMBITS(1) [], + + /// Receive FIFO empty. The meaning of this bit depends on the + /// state of the FEN bit in the UARTLCR_H Register. If the + /// FIFO is disabled, this bit is set when the receive holding + /// register is empty. If the FIFO is enabled, the RXFE bit is + /// set when the receive FIFO is empty. + RXFE OFFSET(4) NUMBITS(1) [] + ], + + /// Integer Baud rate divisor + IBRD [ + /// Integer Baud rate divisor + IBRD OFFSET(0) NUMBITS(16) [] + ], + + /// Fractional Baud rate divisor + FBRD [ + /// Fractional Baud rate divisor + FBRD OFFSET(0) NUMBITS(6) [] + ], + + /// Line Control register + LCRH [ + /// Word length. These bits indicate the number of data bits + /// transmitted or received in a frame. + WLEN OFFSET(5) NUMBITS(2) [ + FiveBit = 0b00, + SixBit = 0b01, + SevenBit = 0b10, + EightBit = 0b11 + ] + ], + + /// Control Register + CR [ + /// Receive enable. If this bit is set to 1, the receive + /// section of the UART is enabled. Data reception occurs for + /// UART signals. When the UART is disabled in the middle of + /// reception, it completes the current character before + /// stopping. + RXE OFFSET(9) NUMBITS(1) [ + Disabled = 0, + Enabled = 1 + ], + + /// Transmit enable. If this bit is set to 1, the transmit + /// section of the UART is enabled. Data transmission occurs + /// for UART signals. When the UART is disabled in the middle + /// of transmission, it completes the current character before + /// stopping. + TXE OFFSET(8) NUMBITS(1) [ + Disabled = 0, + Enabled = 1 + ], + + /// UART enable + UARTEN OFFSET(0) NUMBITS(1) [ + /// If the UART is disabled in the middle of transmission + /// or reception, it completes the current character + /// before stopping. + Disabled = 0, + Enabled = 1 + ] + ], + + /// Interupt Clear Register + ICR [ + /// Meta field for all pending interrupts + ALL OFFSET(0) NUMBITS(11) [] + ] +} + +pub const UART_PHYS_BASE: u32 = MMIO_BASE + 0x20_1000; + +#[allow(non_snake_case)] +#[repr(C)] +pub struct RegisterBlock { + DR: ReadWrite, // 0x00 + __reserved_0: [u32; 5], // 0x04 + FR: ReadOnly, // 0x18 + __reserved_1: [u32; 2], // 0x1c + IBRD: WriteOnly, // 0x24 + FBRD: WriteOnly, // 0x28 + LCRH: WriteOnly, // 0x2C + CR: WriteOnly, // 0x30 + __reserved_2: [u32; 4], // 0x34 + ICR: WriteOnly, // 0x44 +} + +pub enum UartError { + MailboxError, +} +pub type Result = ::core::result::Result; + +pub struct Uart { + uart_base: u32, +} + +impl ops::Deref for Uart { + type Target = RegisterBlock; + + fn deref(&self) -> &Self::Target { + unsafe { &*self.ptr() } + } +} + +impl Uart { + pub fn new(uart_base: u32) -> Uart { + Uart { uart_base } + } + + /// Returns a pointer to the register block + fn ptr(&self) -> *const RegisterBlock { + self.uart_base as *const _ + } + + ///Set baud rate and characteristics (115200 8N1) and map to GPIO + pub fn init(&self, mbox: &mut mbox::Mbox) -> Result<()> { + // turn off UART0 + self.CR.set(0); + + // set up clock for consistent divisor values + mbox.buffer[0] = 9 * 4; + mbox.buffer[1] = mbox::REQUEST; + mbox.buffer[2] = mbox::tag::SETCLKRATE; + mbox.buffer[3] = 12; + mbox.buffer[4] = 8; + mbox.buffer[5] = mbox::clock::UART; // UART clock + mbox.buffer[6] = 4_000_000; // 4Mhz + mbox.buffer[7] = 0; // skip turbo setting + mbox.buffer[8] = mbox::tag::LAST; + + // Insert a compiler fence that ensures that all stores to the + // mbox buffer are finished before the GPU is signaled (which + // is done by a store operation as well). + compiler_fence(Ordering::Release); + + if mbox.call(mbox::channel::PROP).is_err() { + return Err(UartError::MailboxError); // Abort if UART clocks couldn't be set + }; + + // map UART0 to GPIO pins + unsafe { + (*gpio::GPFSEL1).modify(gpio::GPFSEL1::FSEL14::TXD0 + gpio::GPFSEL1::FSEL15::RXD0); + + (*gpio::GPPUD).set(0); // enable pins 14 and 15 + for _ in 0..150 { + asm::nop(); + } + + (*gpio::GPPUDCLK0).modify( + gpio::GPPUDCLK0::PUDCLK14::AssertClock + gpio::GPPUDCLK0::PUDCLK15::AssertClock, + ); + for _ in 0..150 { + asm::nop(); + } + + (*gpio::GPPUDCLK0).set(0); + } + + self.ICR.write(ICR::ALL::CLEAR); + self.IBRD.write(IBRD::IBRD.val(2)); // Results in 115200 baud + self.FBRD.write(FBRD::FBRD.val(0xB)); + self.LCRH.write(LCRH::WLEN::EightBit); // 8N1 + self.CR + .write(CR::UARTEN::Enabled + CR::TXE::Enabled + CR::RXE::Enabled); + + Ok(()) + } + + /// Send a character + pub fn send(&self, c: char) { + // wait until we can send + loop { + if !self.FR.is_set(FR::TXFF) { + break; + } + + asm::nop(); + } + + // write the character to the buffer + self.DR.set(c as u32); + } + + /// Receive a character + pub fn getc(&self) -> char { + // wait until something is in the buffer + loop { + if !self.FR.is_set(FR::RXFE) { + break; + } + + asm::nop(); + } + + // read it and return + let mut ret = self.DR.get() as u8 as char; + + // convert carrige return to newline + if ret == '\r' { + ret = '\n' + } + + ret + } + + /// Display a string + pub fn puts(&self, string: &str) { + for c in string.chars() { + // convert newline to carrige return + newline + if c == '\n' { + self.send('\r') + } + + self.send(c); + } + } + + /// Display a binary value in hexadecimal + pub fn hex(&self, d: u32) { + let mut n; + + for i in 0..8 { + // get highest tetrad + n = d.wrapping_shr(28 - i * 4) & 0xF; + + // 0-9 => '0'-'9', 10-15 => 'A'-'F' + // Add proper offset for ASCII table + if n > 9 { + n += 0x37; + } else { + n += 0x30; + } + + self.send(n as u8 as char); + } + } + + /// Display a binary value in decimal + pub fn dec(&self, d: u32) { + let mut digits: [char; 10] = ['\0'; 10]; + let mut d = d; + let mut i: usize = 0; + + loop { + digits[i] = ((d % 10) + 0x30) as u8 as char; + + i += 1; + d /= 10; + + if d == 0 { + break; + } + } + + for c in digits.iter().rev() { + self.send(*c); + } + } +}