; Copyright 1996 Acorn Computers Ltd
; Copyright 2016 Castle Technology Ltd
;
; Licensed under the Apache License, Version 2.0 (the "License");
; you may not use this file except in compliance with the License.
; You may obtain a copy of the License at
;
;     http://www.apache.org/licenses/LICENSE-2.0
;
; Unless required by applicable law or agreed to in writing, software
; distributed under the License is distributed on an "AS IS" BASIS,
; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
; See the License for the specific language governing permissions and
; limitations under the License.
;

; Page table interaction - "long descriptor" format (ARMv7+ 8 bytes per entry)

; Note that to ensure page table reads & writes are seen as atomic by both the
; MMU and other CPU cores, we only use LDRD & STRD when reading/writing entries.

;----------------------------------------------------------------------------------------
; logical_to_physical
;
;       In:     r4 = logical address
;
;       Out:    r5 corrupt
;               CC => r8,r9 = physical address
;               CS => invalid logical address, r8,r9 corrupted
;
;       Convert logical address to physical address.
;
logical_to_physical
        LDR     r9, =LL3PT
        MOV     r5, r4, LSR #12         ; r5 = logical page number
        ADD     r5, r9, r5, LSL #3      ; r5 -> L3PT entry for logical address
        MOV     r8, r5, LSR #12         ; r8 = page offset to L3PT entry for logical address
        ADD     r8, r9, r8, LSL #3      ; r8 -> L3PT entry for L3PT entry for logical address
        LDRD    r8, [r8]
        ASSERT  LL3_Page != 0
        ASSERT  LL_Fault = 0
        TST     r8, #LL3_Page           ; Check for valid (4K) page
        BEQ     meminfo_returncs
        LDRD    r8, [r5]
        TST     r8, #LL3_Page           ; Check for valid (4K) page
        BEQ     meminfo_returncs
        BFI     r8, r4, #0, #12         ; Valid, so apply offset within the page
        AND     r9, r9, #LL_HighAddr    ; And high address mask
        CLC
        MOV     pc, lr

 [ CacheablePageTables
MakePageTablesCacheable ROUT
        Entry   "r0,r4-r5,r8-r9"
        BL      GetPageFlagsForCacheablePageTables
        ; Update PageTable_PageFlags
        LDR     r1, =ZeroPage
        STR     r0, [r1, #PageTable_PageFlags]
        ; Adjust the logical mapping of the page tables to use the specified page flags
        LDR     r1, =LL1PT
        LDR     r2, =4096 ; Round up to page multiple
        BL      AdjustMemoryPageFlags
        LDR     r1, =LL2PT
        LDR     r2, =16*1024
        BL      AdjustMemoryPageFlags
        LDR     r1, =LL3PT
        LDR     r2, =8*1024*1024
        BL      AdjustMemoryPageFlags
        ; Update the TTBR
        LDR     r4, =LL1PT
        BL      logical_to_physical
        MOV     r0, r8 ; Assume only 32bit address
        LDR     r1, =ZeroPage
        BL      SetTTBR
        ; Perform a full TLB flush to make sure the new mappings are visible
        ARMop   TLB_InvalidateAll,,,r1
        EXIT

MakePageTablesNonCacheable ROUT
        Entry   "r0-r1,r4-r5,r8-r9"
        ; Flush the page tables from the cache, so that when we update the TTBR
        ; below we can be sure that the MMU will be seeing the current page
        ; tables
        LDR     r0, =LL1PT
        ADD     r1, r0, #4096
        LDR     r4, =ZeroPage
        ARMop   Cache_CleanRange,,,r4
        LDR     r0, =LL2PT
        ADD     r1, r0, #16*1024
        ARMop   Cache_CleanRange,,,r4
        LDR     r0, =LL3PT
        ADD     r1, r0, #8*1024*1024
        ARMop   Cache_CleanRange,,,r4
        ; Update the TTBR so the MMU performs non-cacheable accesses
        LDR     r0, =AreaFlags_PageTablesAccess :OR: DynAreaFlags_NotCacheable :OR: DynAreaFlags_NotBufferable
        STR     r0, [r4, #PageTable_PageFlags]
        LDR     r4, =LL1PT
        BL      logical_to_physical
        MOV     r0, r8 ; Assume only 32bit address
        LDR     r1, =ZeroPage
        BL      SetTTBR
        ; Perform a full TLB flush just in case
        ARMop   TLB_InvalidateAll,,,r1
        ; Now we can adjust the logical mapping of the page tables to be non-cacheable
        LDR     r0, [r1, #PageTable_PageFlags]
        LDR     r1, =LL1PT
        LDR     r2, =4096
        BL      AdjustMemoryPageFlags
        LDR     r1, =LL2PT
        LDR     r2, =16*1024
        BL      AdjustMemoryPageFlags
        LDR     r1, =LL3PT
        LDR     r2, =8*1024*1024
        BL      AdjustMemoryPageFlags
        EXIT
 ]

;**************************************************************************
;
;       AllocateBackingLevel2 - Allocate L3 pages for an area
;
;       Internal routine called by DynArea_Create
;
; in:   r3 = base address (will be page aligned)
;       r4 = area flags (NB if doubly mapped, then have to allocate for both halves)
;       r5 = size (of each half in doubly mapped areas)
;
; out:  If successfully allocated pages, then
;         All registers preserved
;         V=0
;       else
;         r0 -> error
;         V=1
;       endif

AllocateBackingLevel2 Entry "r0-r8,r11"
        TST     r4, #DynAreaFlags_DoublyMapped          ; if doubly mapped
        SUBNE   r3, r3, r5                              ; then area starts further back
        MOVNE   r5, r5, LSL #1                          ; and is twice the size

; NB no need to do sanity checks on addresses here, they've already been checked

; now round address range to 2M boundaries

        ADD     r5, r5, r3                              ; r5 -> end
        MOV     r0, #1 :SHL: 21
        SUB     r0, r0, #1
        BIC     r8, r3, r0                              ; round start address down (+ save for later)
        ADD     r5, r5, r0
        BIC     r5, r5, r0                              ; but round end address up

; first go through existing L3PT working out how much we need

        LDR     r7, =LL3PT
        ADD     r3, r7, r8, LSR #9                      ; r3 -> start of L3PT for area
        ADD     r5, r7, r5, LSR #9                      ; r5 -> end of L3PT for area +1

        ADD     r1, r7, r3, LSR #9                      ; r1 -> L3PT for r3
        ADD     r2, r7, r5, LSR #9                      ; r2 -> L3PT for r5

        TEQ     r1, r2                                  ; if no pages needed
        BEQ     %FT30

        MOV     r4, #0                                  ; number of backing pages needed
10
        LDRD    r6, [r1], #8                            ; get L3PT entry for L3PT
        TST     r6, #LL_TypeMask                        ; EQ if translation fault
        ADDEQ   r4, r4, #1                              ; if not there then 1 more page needed
        TEQ     r1, r2
        BNE     %BT10

; if no pages needed, then exit

        TEQ     r4, #0
        BEQ     %FT30

; now we need to claim r4 pages from the free pool, if possible; return error if not

        LDR     r1, =ZeroPage
        LDR     r6, [r1, #FreePoolDANode + DANode_PMPSize]
        SUBS    r6, r6, r4                              ; reduce free pool size by that many pages
        BCS     %FT14                                   ; if enough, skip next bit

; not enough pages in free pool currently, so try to grow it by the required amount

        Push    "r0, r1"
        MOV     r0, #ChangeDyn_FreePool
        RSB     r1, r6, #0                              ; size change we want (+ve)
        MOV     r1, r1, LSL #12
        SWI     XOS_ChangeDynamicArea
        Pull    "r0, r1"
        BVS     %FT90                                   ; didn't manage change, so report error

        MOV     r6, #0                                  ; will be no pages left in free pool after this
14
        STR     r6, [r1, #FreePoolDANode + DANode_PMPSize] ; if possible then update size

        LDR     r0, [r1, #FreePoolDANode + DANode_PMP]  ; r0 -> free pool page list
        ADD     r0, r0, r6, LSL #2                      ; r0 -> first page we're taking out of free pool

        LDR     lr, =LL2PT
        ADD     r8, lr, r8, LSR #18                     ; point r8 at start of L2 we may be updating
        LDR     r7, =LL3PT
        ADD     r1, r7, r3, LSR #9                      ; point r1 at L3PT for r3 again
        LDR     r11, =ZeroPage
        LDR     r11, [r11, #PageTable_PageFlags]        ; access privs (+CB bits)
20
        LDRD    r6, [r1], #8                            ; get L2PT entry again
        TST     r6, #LL_TypeMask                        ; if no fault
        BNE     %FT25                                   ; then skip

        Push    "r1-r2, r4"
        MOV     lr, #-1
        LDR     r2, [r0]                                ; get page number to use
        STR     lr, [r0], #4                            ; remove from PMP
        Push    "r0"
        BL      BangCamUpdate                           ; Map in to L3PT access window

; now that the page is mapped in we can zero its contents (=> cause translation fault for area initially)
; L2PT won't know about the page yet, so mapping it in with garbage initially shouldn't cause any issues

        ADD     r0, r3, #4096
        MOV     r1, #0
        MOV     r2, #0
        MOV     r4, #0
        MOV     r6, #0
15
        STMDB   r0!, {r1,r2,r4,r6}                      ; store data
        TEQ     r0, r3
        BNE     %BT15

        ; Make sure the page is seen to be clear before we update L2PT to make
        ; it visible to the MMU
        PageTableSync

        Pull    "r0-r2, r4"

        LDR     lr, =ZeroPage
        LDR     r6, [lr, #LxPTUsed]
        ADD     r6, r6, #4096
        STR     r6, [lr, #LxPTUsed]

; now update 1 entry in L2PT (corresponding to 2M of address space which is covered by the 4K of L3)
; and point it at the physical page we've just allocated (r1!-8 will already hold physical address+bits now!)

        LDRD    r6, [r1, #-8]                           ; r6,r7 = L3PT entry describing our logical mapping of the new page
        BFC     r6, #0, #LL_LowAddr_Start               ; Low phys addr
        AND     r7, r7, #LL_HighAddr                    ; High phys addr
        ORR     r6, r6, #LL12_Table                     ; It's a table ptr
        STRD    r6, [r8]                                ; Store as L2PT entry
25
        ADD     r3, r3, #4096                           ; advance L3PT logical address
        ADD     r8, r8, #8                              ; move onto L2 for next 2M

        TEQ     r1, r2
        BNE     %BT20
        PageTableSync
30
        CLRV
        EXIT

; Come here if not enough space in free pool to allocate level3

90
        ADRL    r0, ErrorBlock_CantAllocateLevel2
  [ International
        BL      TranslateError
  |
        SETV
  ]
        STR     r0, [sp]
        EXIT

;**************************************************************************
;
;       UpdateL1PTForPageReplacement
;
; Updates L2PT to point to the right place, if a physical L3PT page has been
; replaced with a substitute.
;
; In: r0 = log addr of page being replaced
;     r1 = phys addr of replacement page
;
; Out: r0-r4, r7-r12 can be corrupted
;
UpdateL1PTForPageReplacement ROUT
        MOV     r2, #0                          ; XXX high phys addr
        LDR     r3, =LL3PT
        SUBS    r0, r0, r3
        MOVCC   pc, lr                          ; address is below L3PT
        CMP     r0, #8*1024*1024
        MOVCS   pc, lr                          ; address is above L3PT

        LDR     r3, =LL2PT
        ADD     r0, r3, r0, LSR #(12-3)         ; address in L2 of entry to update
        LDRD    r8, [r0]                        ; load L2PT entry
        MOV     r1, r1, LSR #LL_LowAddr_Start
        BFI     r8, r1, #LL_LowAddr_Start, #LL_LowAddr_Size ; Update low phys addr
        ASSERT  LL_HighAddr_Start = 0
        BFI     r9, r2, #0, #LL_HighAddr_Size   ; Update high phys addr
        STRD    r8, [r0]

        ; In order to guarantee that the result of a page table write is
        ; visible, the ARMv6+ memory order model requires us to perform TLB
        ; maintenance (equivalent to the MMU_ChangingUncached ARMop) after we've
        ; performed the write. Performing the maintenance beforehand (as we've
        ; done traditionally) will work most of the time, but not always.
        LDR     r3, =ZeroPage
        ARMop   MMU_ChangingUncached,,tailcall,r3

;
; ----------------------------------------------------------------------------------
;
;convert page number in $pnum to L3PT entry (physical address+protection bits),
;using cached PhysRamTable entries for speed
;
;entry: $ptable -> PhysRamTable, $pbits,$pbits2 = protection bits
;       $cache0, $cache1, $cache2 = PhysRamTable cache
;exit:  $cache0, $cache1, $cache2 updated
;

        MACRO
        PageNumToL3PT $pnum,$pnum2,$ptable,$cache0,$cache1,$cache2,$pbits,$pbits2
        MOV     $pnum2,$pbits2
        SUB     $pbits2,$pnum,$cache0 ; no. pages into block
        CMP     $pbits2,$cache2
        BLHS    PageNumToL3PTCache_$ptable._$cache0._$cache1._$cache2._$pbits2
        ADD     $pnum,$cache1,$pbits2,LSL #Log2PageSize ; physical address of page
        ORR     $pnum,$pbits,$pnum ; munge in protection bits
        MOV     $pbits2,$pnum2
        MEND

        MACRO
        PageNumToL3PTInit $ptable,$cache0,$cache1,$cache2
        ASSERT  $cache2 > $cache1
        LDR     $ptable,=ZeroPage+PhysRamTable
        MOV     $cache0,#0
        LDMIA   $ptable,{$cache1,$cache2}
        MOV     $cache2,$cache2,LSR #12
        MEND

PageNumToL3PTCache_r4_r5_r6_r7_r12 ROUT
        Entry   "r4"
        ADD     r12,r12,r5 ; Restore page number
        MOV     r5,#0
10
        LDMIA   r4!,{r6,r7} ; Get PhysRamTable entry
        MOV     r7,r7,LSR #12
        CMP     r12,r7
        SUBHS   r12,r12,r7
        ADDHS   r5,r5,r7
        BHS     %BT10
        EXIT    ; r5-r7 = cache entry, r12 = offset into entry

; ----------------------------------------------------------------------------------
;
;AMB_movepagesin_L2PT
;
;updates L3PT for new logical page positions, does not update CAM
;
; entry:
;       r3  =  new logical address of 1st page
;       r8  =  number of pages
;       r9  =  page flags
;       r10 -> page list
;
AMB_movepagesin_L2PT ROUT
        Entry   "r0-r12"

        MOV     r0, #0
        GetPTE  r11, 4K, r0, r9

        PageNumToL3PTInit r4,r5,r6,r7

        LDR     r9,=LL3PT
        ADD     r9,r9,r3,LSR #(Log2PageSize-3) ;r9 -> L3PT for 1st new logical page

        CMP     r8,#2
        BLT     %FT20
10
        LDMIA   r10!,{r0,r2}         ;next 2 page numbers
        PageNumToL3PT r0,r1,r4,r5,r6,r7,r11,r12
        PageNumToL3PT r2,r3,r4,r5,r6,r7,r11,r12
        STRD    r0,[r9],#16          ;write L3PT entries
        STRD    r2,[r9,#-8]
        SUB     r8,r8,#2
        CMP     r8,#2
        BGE     %BT10
20
        CMP     r8,#0
        BEQ     %FT35
        LDR     r0,[r10],#4
        PageNumToL3PT r0,r1,r4,r5,r6,r7,r11,r12
        STRD    r0,[r9]
35
        PageTableSync
        EXIT

; ----------------------------------------------------------------------------------
;
;AMB_movecacheablepagesout_L2PT
;
;updates L3PT for old logical page positions, does not update CAM
;
; entry:
;       r3  =  old page flags
;       r4  =  old logical address of 1st page
;       r8  =  number of pages
;
AMB_movecacheablepagesout_L2PT
        Entry   "r0-r8"

        ; Calculate L3PT flags needed to make the pages uncacheable
        ; Assume all pages will have identical flags (or at least close enough)
        LDR     lr,=ZeroPage
        LDR     lr,[lr, #MMU_PCBTrans]
        GetTempUncache r0, r3, lr, r1
        LDR     r1, =TempUncache_L3PTMask

        LDR     lr,=LL3PT
        ADD     lr,lr,r4,LSR #(Log2PageSize-3)    ;lr -> L3PT 1st entry

        CMP     r8,#2
        BLT     %FT20
10
        LDRD    r2,[lr]
        LDRD    r4,[lr,#8]
        BIC     r2,r2,r1
        BIC     r4,r4,r1
        ORR     r2,r2,r0
        ORR     r4,r4,r0
        STRD    r2,[lr],#16
        STRD    r4,[lr,#-8]
        SUB     r8,r8,#2
        CMP     r8,#2
        BGE     %BT10
20
        CMP     r8,#0
        BEQ     %FT35
        LDRD    r2,[lr]
        BIC     r2,r2,r1
        ORR     r2,r2,r0
        STRD    r2,[lr]
35
        FRAMLDR r0,,r4                           ;address of 1st page
        FRAMLDR r1,,r8                           ;number of pages
        LDR     r3,=ZeroPage
        ARMop   MMU_ChangingEntries,,,r3
        FRAMLDR r4
        FRAMLDR r8
        B       %FT55 ; -> moveuncacheablepagesout_L2PT (avoid pop+push of large stack frame)

; ----------------------------------------------------------------------------------
;
;AMB_moveuncacheablepagesout_L2PT
;
;updates L3PT for old logical page positions, does not update CAM
;
; entry:
;       r4  =  old logical address of 1st page
;       r8  =  number of pages
;
AMB_moveuncacheablepagesout_L2PT
        ALTENTRY
55      ; Enter here from movecacheablepagesout
        LDR     lr,=LL3PT
        ADD     lr,lr,r4,LSR #(Log2PageSize-3)    ;lr -> L2PT 1st entry

        MOV     r0,#0                             ;0 means translation fault
        MOV     r1,#0

        CMP     r8,#8
        BLT     %FT70
60
        STRD    r0,[lr],#8*8
        STRD    r0,[lr,#-7*8]
        STRD    r0,[lr,#-6*8]
        STRD    r0,[lr,#-5*8]
        STRD    r0,[lr,#-4*8]
        STRD    r0,[lr,#-3*8]
        STRD    r0,[lr,#-2*8]
        STRD    r0,[lr,#-1*8]
        SUB     r8,r8,#8
        CMP     r8,#8
        BGE     %BT60
70
        CMP     r8,#0
        BEQ     %FT85
80
        STRD    r0,[lr],#8
        SUBS    r8,r8,#1
        BNE     %BT80
85
        FRAMLDR r0,,r4                           ;address of 1st page
        FRAMLDR r1,,r8                           ;number of pages
        LDR     r3,=ZeroPage
        ARMop   MMU_ChangingUncachedEntries,,,r3 ;no cache worries, hoorah
        EXIT

        LTORG

        END