; Copyright 2009 Castle Technology Ltd
; Copyright 2021 RISC OS Open Ltd
;
; Licensed under the Apache License, Version 2.0 (the "License");
; you may not use this file except in compliance with the License.
; You may obtain a copy of the License at
;
;     http://www.apache.org/licenses/LICENSE-2.0
;
; Unless required by applicable law or agreed to in writing, software
; distributed under the License is distributed on an "AS IS" BASIS,
; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
; See the License for the specific language governing permissions and
; limitations under the License.
;

; VMSAv6+ "long descriptor" page table support

        ; Convert given page flags to the equivalent temp uncacheable L3PT flags
        MACRO
        GetTempUncache_LongDesc $out, $pageflags, $pcbtrans, $temp
        ASSERT  $out <> $pageflags
        ASSERT  $out <> $pcbtrans
        ASSERT  $out <> $temp
        ASSERT  $temp <> $pcbtrans
        ASSERT  DynAreaFlags_CPBits = 7*XCB_P :SHL: 10
        ASSERT  DynAreaFlags_NotCacheable = XCB_NC :SHL: 4
        ASSERT  DynAreaFlags_NotBufferable = XCB_NB :SHL: 4
        AND     $out, $pageflags, #DynAreaFlags_NotCacheable + DynAreaFlags_NotBufferable
        AND     $temp, $pageflags, #DynAreaFlags_CPBits
        ORR     $out, $out, #XCB_TU<<4                      ; treat as temp uncacheable
        ORR     $out, $out, $temp, LSR #10-4
        LDRB    $out, [$pcbtrans, $out, LSR #4]             ; convert to AttrIndx
        MEND

TempUncache_L3PTMask * 7*LL_Page_LowAttr_AttrIndx0

; **************** CAM manipulation utility routines ***********************************

; **************************************************************************************
;
;       BangCamUpdate - Update CAM, MMU for page move, coping with page currently mapped in
;
; mjs Oct 2000
; reworked to use generic ARM ops (vectored to appropriate routines during boot)
;
; First look in the CamEntries table to find the logical address L this physical page is
; currently allocated to. Then check in the Level 3 page tables to see if page L is currently
; at page R2. If it is, then map page L to be inaccessible, otherwise leave page L alone.
; Then map logical page R3 to physical page R2.
;
; in:   r2 = physical page number
;       r3 = logical address (2nd copy if doubly mapped area)
;       r9 = offset from 1st to 2nd copy of doubly mapped area (either source or dest, but not both)
;       r11 = PPL + CB bits
;
; out:  r0, r1, r4, r6 corrupted
;       r2, r3, r5, r7-r12 preserved
;

BangCamUpdate_LongDesc ROUT
        Entry   "r7,r8"
        TST     r11, #DynAreaFlags_DoublyMapped ; if moving page to doubly mapped area
        SUBNE   r3, r3, r9                      ; then CAM soft copy holds ptr to 1st copy

        LDR     r8, =LL3PT                      ; point to level 3 page tables

        LDR     r1, =ZeroPage
        LDR     r1, [r1, #CamEntriesPointer]
        ADD     r1, r1, r2, LSL #CAM_EntrySizeLog2 ; point at cam entry (logaddr, PPL)
        ASSERT  CAM_LogAddr=0
        ASSERT  CAM_PageFlags=4
        LDMIA   r1, {r0, r6}                    ; r0 = current logaddress, r6 = current PPL
        Push    "r0, r6"                        ; save old logical address, PPL
        BIC     r4, r11, #PageFlags_Unsafe
        BIC     r4, r4, #StickyPageFlags
        AND     r6, r6, #StickyPageFlags
        ORR     r4, r4, r6
        STMIA   r1, {r3, r4}                    ; store new address, PPL

        LDR     r1, =ZeroPage+PhysRamTable      ; go through phys RAM table
        MOV     r6, r2                          ; make copy of r2 (since that must be preserved)
10
        LDMIA   r1!, {r0, r4}                   ; load next address, size
        SUBS    r6, r6, r4, LSR #12             ; subtract off that many pages
        BCS     %BT10                           ; if more than that, go onto next bank

        ADD     r6, r6, r4, LSR #12             ; put back the ones which were too many
        ADD     r0, r0, r6                      ; move on address by the number of pages left
        LDR     r6, [sp]                        ; reload old logical address
        MOV     r0, r0, ROR #20                 ; High address bits packed into low, ready for Get4PTE

; now we have r6 = old logical address, r2 = physical page number, r0 = physical address

        TEQ     r6, r3                          ; TMD 19-Jan-94: if old logaddr = new logaddr, then
        BEQ     %FT20                           ; don't remove page from where it is, to avoid window
                                                ; where page is nowhere.
        ADD     r1, r8, r6, LSR #9              ; r6 -> L3PT entry for old log.addr
        MOV     r4, r1, LSR #12                 ; r4 = entry offset into L3 for address r6
        ADD     r6, r8, r4, LSL #3
        LDRD    r6, [r6]                        ; r6,r7 = L3PT entry for L3PT entry for old log.addr
        TST     r6, #LL_TypeMask                ; if page not there
        BEQ     %FT20                           ; then no point in trying to remove it

        LDRD    r6, [r1]                        ; r6,r7 = L3PT entry for old log.addr
        ASSERT  LL_HighAddr_Start = 0
        ASSERT  LL_HighAddr_Start+LL_HighAddr_Size <= LL_LowAddr_Start
        BFI     r6, r7, #0, #LL_HighAddr_Size
        BIC     r6, r6, #&FFFFFFFF - (LL_LowAddr+LL_HighAddr) ; r6 = packed phys addr
        TEQ     r6, r0                          ; if equal to physical address of page being moved
        BNE     %FT20                           ; if not there, then just put in new page

        AND     r4, r11, #PageFlags_Unsafe
        Push    "r0, r3, r11"                   ; save phys.addr, new log.addr, new PPL
        ADD     r3, sp, #3*4
        LDMIA   r3, {r3, r11}                   ; reload old logical address, old PPL
        LDR     lr, =DuffEntry                  ; Nothing to do if wasn't mapped in
        TEQ     r3, lr
        BEQ     %FT19
        ORR     r11, r11, r4
        MOV     r6, #0                          ; cause translation fault
        MOV     r7, #0
        BL      BangL3PT                        ; map page out
        LDR     r8, =LL3PT                      ; restore, ready for 2nd BangL3PT
19
        Pull    "r0, r3, r11"
20
        ADD     sp, sp, #8                      ; junk old logical address, PPL
        LDR     r4, =DuffEntry          ; check for requests to map a page to nowhere
        TEQ     r4, r3                  ; don't actually map anything to nowhere
        EXIT    EQ
        GetPTE  r6, 4K, r0, r11, LongDesc

        BL      BangL3PT
        EXIT

;internal entry point for updating L3PT entry
;
; entry: r3 = logical address (4k aligned)
;        r6,r7 = new L3PT entry
;        r8 -> L3PT
;        r9 = offset if doubly-mapped
;        r11 = PPL
;
; exit: r0,r1,r4,r8 corrupted
;
BangL3PT                                        ; internal entry point used only by BangCamUpdate
        Push    "lr"

        TST     r11, #PageFlags_Unsafe
        ADD     r8, r8, r3, LSR #9              ; point to L3PT entry
        BNE     BangL3PT_unsafe

        ;In order to safely map out a cacheable page and remove it from the
        ;cache, we need to perform the following process:
        ;* Make the page uncacheable
        ;* Flush TLB
        ;* Clean+invalidate cache
        ;* Write new mapping (r6)
        ;* Flush TLB
        ;For uncacheable pages we can just do the last two steps
        ;
        TEQ     r6, #0                          ;EQ if mapping out
        TSTEQ   r11, #DynAreaFlags_NotCacheable ;EQ if also cacheable (overcautious for temp uncache+illegal PCB combos)
        LDR     r4, =ZeroPage
        BNE     %FT20
        ; Potentially we could just map as strongly-ordered + XN here
        ; But for safety just go for temp uncacheable (will retain memory type + shareability)
        LDR     lr, [r4, #MMU_PCBTrans]
        GetTempUncache_LongDesc r4, r11, lr, r0
        LDRD    r0, [r8]                        ;get current L3PT entry
        BIC     r0, r0, #TempUncache_L3PTMask   ;remove current attributes
        ORR     r0, r0, r4
        STRD    r0, [r8]                        ;Make uncacheable
        TST     r11, #DynAreaFlags_DoublyMapped
        LDR     r4, =ZeroPage
        BEQ     %FT19
        ADD     lr, r8, r9, LSR #9
        STRD    r0, [lr]                        ;Update 2nd mapping too if required
        ADD     r0, r3, r9
        ARMop   MMU_ChangingEntry,,, r4
19
        MOV     r0, r3
        ARMop   MMU_ChangingEntry,,, r4
20
        STRD    r6, [r8]                        ;update L3PT entry
        TST     r11, #DynAreaFlags_DoublyMapped
        BEQ     %FT21
        ADD     lr, r8, r9, LSR #9
        STRD    r6, [lr]                        ;Update 2nd mapping
        MOV     r0, r3
        ARMop   MMU_ChangingUncachedEntry,,, r4 ; TLB flush for 1st mapping
        ADD     r3, r3, r9                      ;restore r3 back to 2nd copy
21
        Pull    "lr"
        MOV     r0, r3
        ARMop   MMU_ChangingUncachedEntry,,tailcall,r4

BangL3PT_unsafe
        STRD    r6, [r8]                        ; update level 3 page table (and update pointer so we can use bank-to-bank offset
        TST     r11, #DynAreaFlags_DoublyMapped ; if area doubly mapped
        ADDNE   lr, r8, r9, LSR #9
        STRNED  r6, [lr]                        ; then store entry for 2nd copy as well
        ADDNE   r3, r3, r9                      ; and point logical address back at 2nd copy
        Pull    "pc"

; The description of the MAIR "Attr" fields in the ARMv7 ARM isn't very easy to
; read, so here's an easier description:
;
; * For Normal memory, bits 0-3 indicate the inner cache policy, and 4-7 indicate the outer cache policy
;   * 0100 = non-cacheable
;   * 10RW = write-through, non-transient
;   * 11RW = write-back, non-transient
;   * R = read-allocate, W = write-allocate (00=no allocate)
;   * There's also implementation-defined support for 00RW and 01RW as write-through transient and write-back transient, respectively
; * 0000 0000 is used for strongly-ordered memory
; * 0000 0100 is used for device memory
;
; We can only have eight mappings active at once, which gives us the following
; set of Attr values:
;
; 0000 0000   Strongly-ordered (RISC OS NCNB)
; 0000 0100   Device memory (RISC OS NCB non-merging write buffer)
; 0100 0100   Normal, inner + outer non-cacheable (e.g. NCB default/idempotent, temporary uncacheable)
; 1111 1111   Write-back read+write-allocate (C+B default)
; 1010 1010   Write-through read-allocate (C+B alt 1)
; 1110 1110   Write-back read-allocate (C+B alt 2)

MAIRAttr_Nrm_NC   * 2_01000100
MAIRAttr_Nrm_WBWA * 2_11111111
MAIRAttr_Nrm_WBRA * 2_11101110
MAIRAttr_Nrm_WT   * 2_10101010
MAIRAttr_SO       * 2_00000000
MAIRAttr_Dev      * 2_00000100

        ALIGN
MAIRValues
        ; MAIR 0
        ASSERT  LLAttr_Nrm_NC = 0:SHL:LL_Page_LowAttr_AttrIndx0_bit
        DCB     MAIRAttr_Nrm_NC
        ASSERT  LLAttr_Nrm_WBWA = 1:SHL:LL_Page_LowAttr_AttrIndx0_bit
        DCB     MAIRAttr_Nrm_WBWA
        ASSERT  LLAttr_Nrm_WBRA = 2:SHL:LL_Page_LowAttr_AttrIndx0_bit
        DCB     MAIRAttr_Nrm_WBRA
        ASSERT  LLAttr_Nrm_WT = 3:SHL:LL_Page_LowAttr_AttrIndx0_bit
        DCB     MAIRAttr_Nrm_WT
        ; MAIR 1
        ASSERT  LLAttr_SO = 4:SHL:LL_Page_LowAttr_AttrIndx0_bit
        DCB     MAIRAttr_SO
        ASSERT  LLAttr_Dev = 5:SHL:LL_Page_LowAttr_AttrIndx0_bit
        DCB     MAIRAttr_Dev
        DCB     0
        DCB     0

; Map 4 bits of a MAIR Attr value to the corresponding 2 bit TTBCR IRGN/ORGN
; value. Note: This will need updating if we add support for transient
; cacheable (00xx, 01xx)
        ALIGN
TTBRCacheMunge
        DCB     2_00000000 ; 00xx: Strongly-ordered -> non-cacheable
        DCB     2_00000000 ; 01xx: Device memory, or non-cacheable -> non-cacheable
        DCB     2_10101010 ; 10xx: Write-through
        DCB     2_01110111 ; 11xx: Write-back

; XN, PXN, AP2, AP1 mean we theoretically have 16 permission levels. However
; some are redundant (e.g. setting both XN+PXN)

 [ EmulateAP1
; Because the long descriptor format doesn't support RISC OS AP 1 (user RX,
; priv RWX), we instead map the memory as user None, priv RWX, (equivalent to
; RISC OS AP 2). User mode read access is provided by the AbortTrap code; when
; AbortTrap examines the page it will see the "correct" permissions, and so will
; do a direct memcpy from the page, without needing any special code or needing
; to invoke any AbortTrap handlers.
;
; Some parts of the OS (including AbortTrap) need to be able to decode page
; table entries and map the flags back to the corresponding AP value. So for
; the OS to correctly differentiate between AP 1 & AP 2, we now consider the
; SW0 flag bit to be one of the permission bits.
;
; Note that we currently don't provide any support for executing usermode code
; from AP 1 memory, and the advertised permissions reflect this.
 ]

        ; Encode both high and low attributes in one word
        ASSERT  LL_HighAttr_Start >= LL_LowAttr_Start+LL_LowAttr_Size
        MACRO
        LongPPL  $XN, $PXN, $AP2, $AP1, $SW0
        DCD     ($XN * LL_Page_HighAttr_XN) + ($PXN * LL_Page_HighAttr_PXN) + ($AP2 * LL_Page_LowAttr_AP2) + ($AP1 * LL_Page_LowAttr_AP1) + LL_Page_LowAttr_SH1 + LL_Page_LowAttr_SH0 + ($SW0 * LL_Page_HighAttr_SW0)
        MEND

; AP2 = read-only
; AP1 = enable unprivileged access
PPLTrans_LongDesc
        ;       XN  PXN AP2 AP1 SW0   EL1 EL0
        LongPPL 0,  0,  0,  1,  0   ; RWX RWX
      [ EmulateAP1
        LongPPL 0,  0,  0,  0,  1   ; RWX R
      |
        LongPPL 0,  0,  0,  1,  0   ; RWX RWX
      ]
        LongPPL 0,  0,  0,  0,  0   ; RWX
        LongPPL 0,  0,  1,  1,  0   ; R X R X
        LongPPL 0,  0,  1,  0,  0   ; R X
                                    
        LongPPL 1,  0,  0,  0,  0   ; RW 
        LongPPL 1,  0,  0,  1,  0   ; RW  RW 
        LongPPL 1,  0,  1,  0,  0   ; R  
        LongPPL 1,  0,  1,  1,  0   ; R   R  
                                    
        LongPPL 0,  1,  0,  1,  0   ; RW  RWX 
        LongPPL 0,  1,  1,  1,  0   ; R   R X 

PPLAccess_LongDesc   ; EL1EL0
                     ; RWXRWX
        GenPPLAccess 2_111111
      [ EmulateAP1
        GenPPLAccess 2_111100
      |
        GenPPLAccess 2_111111
      ]
        GenPPLAccess 2_111000
        GenPPLAccess 2_101101
        GenPPLAccess 2_101000

        GenPPLAccess 2_110000
        GenPPLAccess 2_110110
        GenPPLAccess 2_100000
        GenPPLAccess 2_100100

        GenPPLAccess 2_110111
        GenPPLAccess 2_100101
        DCD     -1

        LTORG

; PPLTrans should contain XN + PXN + AP2 + AP1 + SH1 + SH0 + SW0
; PCBTrans should contain AttrIndx

LL_HighPPLBits * LL_Page_HighAttr_XN + LL_Page_HighAttr_PXN + LL_Page_HighAttr_SW0
LL_LowPPLBits * LL_Page_LowAttr_SH1 + LL_Page_LowAttr_SH0 + LL_Page_LowAttr_AP2 + LL_Page_LowAttr_AP1

; In:
; r0 = phys addr (aligned), with bits 0-7 containing bits 32-39 of PA
; r1 = page flags:
;      APBits
;      NotBufferable
;      NotCacheable
;      CPBits
;      PageFlags_TempUncacheableBits
; r2 -> PPLTrans
; r3 -> PCBTrans
; Out:
; r0,r1 = PTE for 4K page ("small page")
Get4KPTE_LongDesc ROUT
        Entry   "r4"
        ; Get CB+TEX bits
        ASSERT  DynAreaFlags_CPBits = 7*XCB_P :SHL: 10
        ASSERT  DynAreaFlags_NotCacheable = XCB_NC :SHL: 4
        ASSERT  DynAreaFlags_NotBufferable = XCB_NB :SHL: 4
        TST     r1, #PageFlags_TempUncacheableBits
        AND     r4, r1, #DynAreaFlags_NotCacheable + DynAreaFlags_NotBufferable
        AND     lr, r1, #DynAreaFlags_CPBits
        ORRNE   r4, r4, #XCB_TU<<4                      ; if temp uncache, set TU bit
        ORR     r4, r4, lr, LSR #10-4
        LDRB    r4, [r3, r4, LSR #4]                    ; convert to AttrIndx
        ; Get AP bits
        AND     lr, r1, #DynAreaFlags_APBits
        LDR     lr, [r2, lr, LSL #2]
        ORR     r4, r4, lr                              ; R4 contains all attributes, LR only (fully) contains high attributes
        BFI     lr, r0, #0, #12                         ; Insert high address bits
        BIC     r1, lr, #&FFFFFFFF - (LL_HighAttr + &FFF) ; Clear any low attributes inherited from LR
        BFI     r0, r4, #0, #12                         ; Insert low attributes into R0, overwriting the high address bits
        ORR     r0, r0, #LL3_Page                       ; Flag as L3 page entry
        ORR     r0, r0, #LL_Page_LowAttr_AF             ; Access flag must be set to avoid an Access Fault
        EXIT

; In:
; As per Get4KPTE
; Out:
; r0,r1 = PTE for 64K page ("large page")
Get64KPTE_LongDesc ROUT
        Entry
        BL      Get4KPTE_LongDesc
        ORR     r1, r1, #LL_Page_HighAttr_Contig ; 16 contiguous pages
        EXIT 

; In:
; As per Get2MPTE
; Out:
; r0,r1 = PTE for 2M page ("block")
Get2MPTE_LongDesc
        Entry
        BL      Get4KPTE_LongDesc
        EOR     r0, r0, #LL12_Block :EOR: LL3_Page      ; Convert from page to block (because in ARM's infinite wisdom, they decided to make their new orthogonal page table format non-orthogonal in one key area)
        EXIT

; In:
; r0 = page-aligned logical addr
; Out:
; r0,r1 = phys addr
; r2 = page flags
;      or -1 if fault
; r3 = entry size/alignment (bytes)
LoadAndDecodeL2Entry_LongDesc ROUT
        LDR     r1, =LL3PT
        ADD     r0, r1, r0, LSR #9
        LDRD    r0, [r0]
        ASSERT  LL_Fault = 0
        TST     r0, #LL_TypeMask
        MOV     r3, #4096
        MOVEQ   r2, #-1
        MOVEQ   pc, lr
05      ; Arrive here from LoadAndDecodeL1Entry
        Entry   "r4-r5"
        LDR     lr, =ZeroPage
        ; Extract AttrIndx so we can look up the PCB bits
        AND     r2, r0, #LL_Page_LowAttr_AttrIndx0+LL_Page_LowAttr_AttrIndx1+LL_Page_LowAttr_AttrIndx2
        LDR     r5, [lr, #MMU_PCBTrans]
        SUB     r5, r5, #8*4
        ASSERT  LL_Page_LowAttr_AttrIndx0_bit = 2 ; i.e. r2 is word aligned offset
        LDR     r2, [r5, r2]
        ; Find PPL
        LDR     r5, [lr, #MMU_PPLTrans]
        LDR     lr, =LL_LowPPLBits
        AND     r4, r1, #LL_HighPPLBits
        AND     lr, lr, r0
        ORR     r4, r4, lr
10
        LDR     lr, [r5], #4
        CMP     lr, r4
        ADDNE   r2, r2, #1
        BNE     %BT10
        ; Determine physical address
        TST     r1, #LL_Page_HighAttr_Contig
        AND     r1, r1, #LL_HighAddr
        MOVNE   r3, r3, LSL #4
        SUB     r4, r3, #1
        BIC     r0, r0, r4
        EXIT

; In:
; r0 = MB-aligned logical addr
; Out:
; r0,r1 = phys addr of start of section or L2PT entry
; r2 = page flags if 1MB page
;      or -1 if fault
;      or -2 if page table ptr
; r3 = entry size/alignment (bytes)
LoadAndDecodeL1Entry_LongDesc
        LDR     r1, =LL2PT
        ADD     r0, r1, r0, LSR #18
        BIC     r0, r0, #7              ; Input is only 1MB aligned
        LDRD    r0, [r0]
        ANDS    r3, r0, #LL_TypeMask
        MOVEQ   r2, #-1
        MOVEQ   r3, #1:SHL:21
        MOVEQ   pc, lr
        CMP     r3, #LL12_Block
        MOV     r3, #1:SHL:21
        BEQ     %BT05                   ; Branch to common decode code
        ; Table pointer
        MOV     r2, #-2
        BFC     r0, #0, #LL_LowAddr_Start
        AND     r1, r1, #LL_HighAddr
        MOV     pc, lr

; In:
; r0 = phys addr (aligned)
; r1 -> ZeroPage
; Out:
; TTBR0 and any other related registers updated
; If MMU is currently on, it's assumed the mapping of ROM+stack will not be
; affected by this change
SetTTBR_LongDesc ROUT
        Entry   "r2-r3"
        ; Update TTBCR
        ; First we need to determine the IRGN+ORGN settings
        LDR     lr, [r1, #PageTable_PageFlags]
        ASSERT  DynAreaFlags_CPBits = 7*XCB_P :SHL: 10
        ASSERT  DynAreaFlags_NotCacheable = XCB_NC :SHL: 4
        ASSERT  DynAreaFlags_NotBufferable = XCB_NB :SHL: 4
        AND     r2, lr, #DynAreaFlags_NotCacheable + DynAreaFlags_NotBufferable
        AND     lr, lr, #DynAreaFlags_CPBits
        LDR     r3, [r1, #MMU_PCBTrans]
        ORR     r2, r2, lr, LSR #10-4
        LDRB    r2, [r3, r2, LSR #4]           ; Get AttrIndx value
        ADR     r3, MAIRValues
        LDRB    r2, [r3, r2, LSR #LL_Page_LowAttr_AttrIndx0_bit] ; Get corresponding hardware flags
        LDR     r3, TTBRCacheMunge
        MOV     r4, r2, LSR #4                 ; Outer attributes
        AND     r2, r2, #&F                    ; Inner attributes
        MOV     r4, r4, LSL #1
        MOV     r2, r2, LSL #1
        MOV     r4, r3, LSR r4
        MOV     r2, r3, LSR r2
        MOV     r3, #&80000000                 ; TTBCR: Long-descriptor format enabled, using TTBR0 only
        BFI     r3, r4, #10, #2                ; ORGN
        BFI     r3, r2, #8, #2                 ; IRGN
        ; Determine shareability (assume same for all PPLs)
        LDR     r2, PPLTrans_LongDesc
        AND     r2, r2, #LL_Page_LowAttr_SH0+LL_Page_LowAttr_SH1
        ORR     r3, r3, r2, LSL #12-LL_Page_LowAttr_SH0_bit
        MCR     p15, 0, r3, c2, c0, 2          ; Write TTBCR

        ; Check if security extensions are supported
        ARM_read_ID r2
        AND     r2, r2, #&F<<16
        CMP     r2, #ARMvF<<16
        BNE     %FT01
        MRC     p15, 0, r2, c0, c1, 1           ; ID_PFR1
        TST     r2, #15<<4
        BEQ     %FT01
        MCR     p15, 0, lr, c12, c0, 0          ; VBAR: Ensure exception vector base is 0 (security extensions)
01

        ; Set MAIRs
        LDR     r2, MAIRValues
        MCR     p15, 0, r2, c10, c2, 0          ; MAIR0
        LDR     r2, MAIRValues+4
        MCR     p15, 0, r2, c10, c2, 1          ; MAIR1

        ; Now update TTBR0
        MOV     r2, #0
        MCRR    p15, 0, r0, r2, c2              ; Write full 64bit value (page tables assumed to always lie in low RAM)
        EXIT

        END