; Copyright 2016 Castle Technology Ltd
;
; Licensed under the Apache License, Version 2.0 (the "License");
; you may not use this file except in compliance with the License.
; You may obtain a copy of the License at
;
;     http://www.apache.org/licenses/LICENSE-2.0
;
; Unless required by applicable law or agreed to in writing, software
; distributed under the License is distributed on an "AS IS" BASIS,
; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
; See the License for the specific language governing permissions and
; limitations under the License.
;

; in:   r0 = logical address where page is now

GetPageFlagsForR0IntoR6 Entry "r3-r5,r8-r11"
        MOV     r4, r0
        PTOp    logical_to_physical
        BCS     %FT91
        LDR     r4, =ZeroPage
        LDR     r7, [r4, #MaxCamEntry]
        BL      physical_to_ppn
        BCS     %FT92
        ; r3 = page number
        LDR     r4, [r4, #CamEntriesPointer]
        ADD     r4, r4, r3, LSL #CAM_EntrySizeLog2
        LDR     r6, [r4, #CAM_PageFlags]
        EXIT

90
        ADR     r11, NoL2ForPageBeingRemovedError ; NB don't corrupt r0 yet - we need that in block as evidence
95
        Push    "r1"
        BL      StoreDebugRegs
        Pull    "r1"
        MOV     r0, r11
        PullEnv                                 ; seriously broken memory
        SETV
        MOV     pc, lr

91
        ADR     r11, PageBeingRemovedNotPresentError
        B       %BT95

92
        ADR     r11, PhysicalAddressNotFoundError
        B       %BT95

; ***********************************************************************************
;
;       MovePageAtR0ToR1WithAccessR6
;
;       Internal routine, called by OS_ChangeDynamicArea
;
; in:   r0 = logical address where page is now
;       r1 = logical address it should be moved to
;       r6 = area flags (which contain access privileges, and cacheable/bufferable bits)
;
; out:  All registers preserved
;

MovePageAtR0ToR1WithAccessR6 Entry "r2-r5,r11"
        MOV     r3, r1
        MOV     r11, r6
        BL      MoveCAMatR0toR3         ; use old internal routine for now
        EXIT

; Same as above, but returns with r2 = page number of page that moved

MovePageAtR0ToR1WithAccessR6ReturnPageNumber Entry "r3-r5,r11"
        MOV     r3, r1
        MOV     r11, r6
        BL      MoveCAMatR0toR3         ; use old internal routine for now
        EXIT

; MoveCAMatR0toR3
; in:   r0 = old logaddr
;       r3 = new logaddr
;       r9 = offset from 1st to 2nd copy of doubly mapped area (either source or dest, but not both)
;       r11 = page protection level
;
; out:  r2 = physical page number of page moved, unless there was a serious error
;       r0,r1,r3,r6-r12 preserved
;       r4,r5 corrupted

MoveCAMatR0toR3
        Entry   "r0,r1,r6,r7"
        Push    "r3,r8-r11"
        ; Get page number of R0, so we know what we're dealing with
        MOV     r4, r0
        PTOp    logical_to_physical
        BCS     %FT91
        LDR     r4, =ZeroPage
        LDR     r7, [r4, #MaxCamEntry]
        BL      physical_to_ppn
        ; r3 = page number
        MOVCC   r2, r3
        Pull    "r3,r8-r11"
        BCS     %FT92
        ; r2, r3, r9, r11 are ready for BangCamUpdate
        PTOp    BangCamUpdate
        CLRV
        EXIT

90
        ADR     lr, NoL2ForPageBeingRemovedError ; NB don't corrupt r0 yet - we need that in block as evidence
95
        STR     lr, [sp]                        ; update returned r0
        BL      StoreDebugRegs
        PullEnv                                 ; seriously broken memory
        SETV
        MOV     pc, lr

91
        Pull    "r3,r8-r11"
        ADR     lr, PageBeingRemovedNotPresentError
        B       %BT95

92
        ADR     lr, PhysicalAddressNotFoundError
        B       %BT95

StoreDebugRegs ; Note: Corrupts R0,R1
        Push    "lr"
        LDR     lr, =ZeroPage+CamMapCorruptDebugBlock
        STMIA   lr, {r0-r12}
        STR     sp, [lr, #13*4]!
        LDMIA   sp, {r0,r1}                     ; reload stacked LR & return R0 (error pointer)
        STMIB   lr, {r0,r1}                     ; LR -> LR, error -> PC
        Pull    "pc"

NoL2ForPageBeingRemovedError
        &       0
        =       "Memory Corrupt: No L2PT for page being removed", 0
        ALIGN

PageBeingRemovedNotPresentError
        &       0
        =       "Memory Corrupt: Page being removed was not present", 0
        ALIGN

PhysicalAddressNotFoundError
        &       0
        =       "Memory Corrupt: Physical address not found", 0
        ALIGN

CamMapBroke
        &       0
        =       "!!!! CAM Map Corrupt !!!!", 0
        ALIGN

  [ FastCDA_Bulk
; RemoveCacheabilityR0ByMinusR2
; Make a range of pages (temporarily) uncacheable prior to (re)moving them
; in:   r0 = end of area
;       r2 = size of area (must be nonzero)
; out:  r6 has DynAreaFlags_NotCacheable set if entire region noncacheable
;              Flag clear if at least one page was cacheable
;       r0 points to start of area
;
;       4K page size assumed!
RemoveCacheabilityR0ByMinusR2 ROUT
        Entry   "r0-r2,r4-r5,r8-r9"
        MOV     r5, #DynAreaFlags_NotCacheable
        MOV     r9, #-1
10
        SUB     r0, r0, #4096
        BL      GetPageFlagsForR0IntoR6
        BVS     %FT99
        TST     r6, #DynAreaFlags_NotCacheable
        BNE     %FT90
        PTOp    MakeTempUncache         ; Update the page table entry to make it uncacheable
        ; Clear R5 to indicate that at least one page was cacheable
        MOV     r5, #0
90
        SUBS    r2, r2, #4096
        BNE     %BT10
        MOV     r6, r5
        FRAMSTR r0
        ; Perform the required cache/TLB maintenance
        LDR     r4, =ZeroPage
        FRAMLDR r1,,r2
        MOV     r1,r1,LSR #Log2PageSize
        TST     r6, #DynAreaFlags_NotCacheable
        ADR     lr, %FT91
        ARMop   MMU_ChangingEntries,EQ,tailcall,r4
        ARMop   MMU_ChangingUncachedEntries,NE,tailcall,r4
91
        EXIT
99
        FRAMSTR r0
        EXIT
  ]

; Move pages at [R0-R4,R0) to [R1-R4,R1)
 [ FastCDA_Bulk
; Pages must be uncacheable
 ]
; R0 < R1
MoveUncacheableR0ToR1ByMinusR4 ROUT
        Entry   "r0,r1,r4,r6,r9"
        MOV     r9, #0                          ; no funny stuff while moving these pages
25
        SUB     r0, r0, #PageSize
        SUB     r1, r1, #PageSize
        BL      GetPageFlagsForR0IntoR6
 [ FastCDA_Bulk
        ORR     r6, r6, #PageFlags_Unsafe
 ]
        BL      MovePageAtR0ToR1WithAccessR6
        SUBS    r4, r4, #PageSize
        BNE     %BT25
      [ FastCDA_Bulk
        ; Flush the TLB for the shuffled pages
        ; R0 will be the lowest address we modified, and original R1 the highest (+1)
        FRAMLDR r1
        SUB     r1, r1, r0
        MOV     r1, r1, LSR #Log2PageSize
        LDR     r4, =ZeroPage
        ARMop   MMU_ChangingUncachedEntries,,,r4
      ]
        EXIT

; Move pages at [R0,R0+R4) to [R1,R1+R4)
 [ FastCDA_Bulk
; Pages must currently be uncacheable
 ]
; R0 > R1
MoveUncacheableR0ToR1ByR4WithAccessR6 ROUT
        Entry  "r0,r1,r4,r6,r9"
      [ FastCDA_Bulk
        ORR     r6, r6, #PageFlags_Unsafe
      ]
        MOV     r9, #0                          ; no funny business while moving these pages
15
        BL      MovePageAtR0ToR1WithAccessR6    ; move page
        ADD     r0, r0, #PageSize               ; advance src ptr
        ADD     r1, r1, #PageSize               ; advance dst ptr
        SUBS    r4, r4, #PageSize               ; one less page to move
        BNE     %BT15                           ; loop if more
      [ FastCDA_Bulk
        ; Flush the shuffled pages from the TLB
        ; Note we only flush the area containing pages which have been
        ; moved/removed; for pages which have been mapped in the only
        ; requirement is that (for VMSAv6) we issue a DSB+ISB, which we can
        ; assume the MMU_ChangingUnachedEntries op will do for us.
        MOV     r1, r0                          ; High end
        FRAMLDR r0,,r1                          ; Low end
        SUB     r1, r1, r0
        MOV     r1, r1, LSR #Log2PageSize
        LDR     r4, =ZeroPage
        ARMop   MMU_ChangingUncachedEntries,,,r4
      ]
        EXIT

; R1 = base of dynamic area (i.e. end of 1st copy)
; R2 = DA grow amount (how far to move pages)
; R4 = current DA size
; R6 = DA flags
ShuffleDoublyMappedRegionForGrow ROUT
        Entry   "r0,r1"
      [ FastCDA_Bulk
        ; Perform cache maintenance upfront
        MOV     r0, r1
        Push    "r2,r6"
        MOV     r2, r4
        BL      RemoveCacheabilityR0ByMinusR2
        Pull    "r2,r6"
      ]
        SUB     r0, r1, r4                      ; src starts at start of 1st copy = start of 2nd - old size
        SUB     r1, r0, r2                      ; dst start = src start - amount of room needed
        BL      MoveUncacheableR0ToR1ByR4WithAccessR6
        EXIT

; R2 = grow amount, bytes
; R3 = 64bit page list to add page numbers to, or null if not needed
; R11 = src DANode (free pool)
; R12 = dest DANode
DoTheGrowCommon ROUT
        Entry   "r0-r10"
; move pages starting from end of area

        LDR     r0, [r11, #DANode_PMP]
        LDR     r3, [r11, #DANode_PMPSize]
        ADD     r0, r0, r3, LSL #2              ; move r0 to point to after end of area
        SUB     r3, r3, r2, LSR #Log2PageSize   ; reduce by amount moving from area
        STR     r3, [r11, #DANode_PMPSize]      ; store reduced source size

        LDR     r1, [r12, #DANode_Base]
        LDR     r3, [r12, #DANode_Size]

        LDR     r6, [r12, #DANode_Flags]        ; r6 = dst flags
        LDR     lr, =DynAreaFlags_AccessMask
        AND     r6, r6, lr
        TST     r6, #DynAreaFlags_DoublyMapped  ; check if dst is doubly mapped
        BEQ     %FT25                           ; [it's not, so skip all this]

; we must shunt all existing pages in dest area down

        MOVS    r4, r3                          ; amount to do
        BLNE    ShuffleDoublyMappedRegionForGrow
        ADD     r9, r3, r2                      ; set up offset from 1st copy to 2nd copy (= new size)
25
        ADD     r1, r1, r3                      ; r1 -> address of 1st extra page
   [ FastCDA_Bulk
        ORR     r6, r6, #PageFlags_Unsafe
   ]
        MOV     r4, #0                          ; amount done so far
        MOV     r10, r2                         ; move amount to do into r10
        FRAMLDR r3                              ; recover page list pointer
30
        LDR     r2, [r0, #-4]!                  ; pre-decrement source pointer
 [ DebugCDA2
        DREG    r2, "Moving page number ", cc
        DREG    r1, " to ", cc
        DREG    r6, " with PPL "
 ]
        Push    "r0-r1,r3-r4,r6,r11"
        MOV     r11, #-1
        MOV     r3, r1
        STR     r11, [r0]                       ; Remove from free pool PMP
        MOV     r11, r6
        PTOp    BangCamUpdate
        Pull    "r0-r1,r3-r4,r6,r11"
        ADD     r1, r1, #PageSize
        TEQ     r3, #0
        ADD     r4, r4, #PageSize
        ASSERT  MemPageBlock64_PageNum = 0
        STRNE   r2, [r3], #MemPageBlock64_Size  ; store page number and move on
        CMP     r4, r10                         ; have we done all of it?
        BNE     %BT30                           ; [no, so loop]
      [ FastCDA_Bulk
        PageTableSync
      ]
35
        LDR     r3, [r12, #DANode_Size]
        ADD     r3, r3, r10
        STR     r3, [r12, #DANode_Size]         ; store increased destination size
      [ ZeroPage = 0
        TEQ     r12, #AppSpaceDANode            ; check if dest = appspace
      |
        LDR     lr, =ZeroPage+AppSpaceDANode
        TEQ     r12, lr                         ; check if dest = appspace
      ]
        STREQ   r3, [r12, #MemLimit-AppSpaceDANode] ; update memlimit if so
        EXIT

; r2 = shrink amount
; r11 = src DANode
; r12 = dest DANode (free pool)
DoTheShrink ROUT
        Entry   "r0-r10"
; Move pages starting from end of area

        LDR     r0, [r11, #DANode_Base]
        LDR     r3, [r11, #DANode_Size]
        LDR     r6, [r11, #DANode_Flags]        ; r6 = src flags
        Push    "r3, r6"                        ; save src old size, src flags for later
        TST     r6, #DynAreaFlags_DoublyMapped  ; if src is doubly mapped
        MOVNE   r9, r3                          ; then set up offset from 1st copy to 2nd copy = old src size
        ADD     r0, r0, r3                      ; move r0 to point to after end of area (2nd copy)
        SUB     r3, r3, r2
        STR     r3, [r11, #DANode_Size]         ; store reduced source size
      [ ZeroPage = 0
        TEQ     r11, #AppSpaceDANode            ; check if src = appspace
      |
        LDR     lr, =ZeroPage+AppSpaceDANode
        TEQ     r11, lr                         ; check if src = appspace
      ]
        STREQ   r3, [r11, #MemLimit-AppSpaceDANode] ; update memlimit if so
      [ PMPDebug
        DebugReg r3, "< src size"
      ]

        LDR     r3, [r12, #DANode_PMPSize]      ; r3 -> index of 1st extra page

      [ FastCDA_Bulk
        Push    "r0"
        BL      RemoveCacheabilityR0ByMinusR2
        LDR     r6, [r11, #DANode_Flags]
        TST     r6, #DynAreaFlags_DoublyMapped
        BEQ     %FT19
        ; Interacting with doubly-mapped region - make entireity of lower mapping uncacheable too
        LDR     r0, [r11, #DANode_Base]
        MOV     r1, r2
        LDR     r2, [sp, #4]                    ; Grab old source size (pushed r3)
        BL      RemoveCacheabilityR0ByMinusR2
        MOV     r2, r1
19
        Pull    "r0"
      ]

        LDR     lr, =DynAreaFlags_AccessMask
        MOV     r4, r2
        LDR     r6, [r12, #DANode_Flags]        ; r6 = dst flags
        AND     r6, r6, lr
      [ FastCDA_Bulk
        ORR     r6, r6, #PageFlags_Unsafe
      ]
20
        SUB     r0, r0, #PageSize               ; pre-decrement source pointer
 [ DebugCDA2
        DREG    r0, "Moving page at ", cc
        DREG    r3, " to free pool index ", cc
        DREG    r6, " with PPL "
 ]
        Push    "r2"
        LDR     r1, =Nowhere
        BL      MovePageAtR0ToR1WithAccessR6ReturnPageNumber
        ; Update PMP association
        LDR     r1, [r12, #DANode_PMP]
        STR     r2, [r1, r3, LSL #2]            ; Store in free pool PMP
        LDR     lr, =ZeroPage
        LDR     lr, [lr, #CamEntriesPointer]
        ADD     lr, lr, r2, LSL #CAM_EntrySizeLog2
        STR     r12, [lr, #CAM_PMP]              ; Store PMP in CAM
        STR     r3, [lr, #CAM_PMPIndex]
        ADD     r3, r3, #1
        Pull    "r2"

        SUBS    r4, r4, #PageSize
        BNE     %BT20
      [ FastCDA_Bulk
        ; Flush the TLB for the removed pages
        Push    "r0-r1,r3"
        MOV     r1, r2, LSR #Log2PageSize
        LDR     r3, =ZeroPage
        ARMop   MMU_ChangingUncachedEntries,,,r3 ; src region
        LDR     r6, [r11, #DANode_Flags]        ; r6 = src flags
        TST     r6, #DynAreaFlags_DoublyMapped
        LDRNE   r0, [sp]
        SUBNE   r0, r0, r9
        ARMop   MMU_ChangingUncachedEntries,NE,,r3 ; doubly mapped src region
        Pull    "r0-r1,r3"
      ]

        STR     r3, [r12, #DANode_PMPSize]      ; store increased destination size

        Pull    "r3, r6"                        ; restore src old size, src flags
        TST     r6, #DynAreaFlags_DoublyMapped  ; if src doubly mapped
        SUBNES  r4, r3, r2                      ; then set r4 = number of pages to shuffle up
        BEQ     %FT30                           ; [not doubly mapped, or no pages left, so skip]

        SUB     r0, r0, r3                      ; move r0 back to end of 1st copy of pages remaining
        ADD     r1, r0, r2                      ; r1 is end of where they're moving to (should be src base address!)
        BL      MoveUncacheableR0ToR1ByMinusR4

30
        EXIT

 [ CacheablePageTables
; In:
; R0 = new page flags
; R1 = base of area (page aligned)
; R2 = size of area
AdjustMemoryPageFlags ROUT
        Entry   "r0-r12"
        LDR     r12, =ZeroPage
        LDR     r7, [r12, #MaxCamEntry]
        MOV     r4, r1
10
        PTOp    logical_to_physical ; CC if page exists, r8,r9 = phys addr
        BLCC    physical_to_ppn
        BCS     %FT90
        ; r5,r10-r11 corrupt, r3 = page number, r8,r9 = phys addr
        Push    "r0,r2,r4"
        MOV     r2, r3
        MOV     r3, r4
        ; Get the current page flags, so that we can retain the Unavailable flag
        ; (L2PT is a mix of locked and unlocked pages - potentially this is
        ; redundant and just a side-effect of ConstructCAMFromPageTables, but
        ; for now I'll play it safe and keep the current flag setting.
        ; N.B. If this is changed, be sure to make sure the flag is retained for
        ; L1PT)
        LDR     r11, [r12, #CamEntriesPointer]
        ADD     r11, r11, #CAM_PageFlags
        LDR     r11, [r11, r2, LSL #CAM_EntrySizeLog2]
        AND     r11, r11, #PageFlags_Unavailable
        ORR     r11, r0, r11
        PTOp    BangCamUpdate ; r0, r1, r4, r6 corrupted
        ; When making the pages non-cacheable, we need to be extra vigorous in
        ; order to make sure the cache and TLB aren't left in inconsistent
        ; states.
        TST     r11, #DynAreaFlags_NotCacheable
        BEQ     %FT50
        ; The MMU will be performing non-cacheable accesses, but the CPU will
        ; be performing cacheable accesses, so the maintenance performed by
        ; BangCamUpdate won't have done anything useful.
        ; So start by flushing the L2PT word from the cache.
      [ LongDesc :LAND: ShortDesc
        PTWhich r0
        LDRNE   r0, =LL3PT
        ADDNE   r0, r0, r3, LSR #9 ; -> L3PT entry we modified
        LDREQ   r0, =L2PT
        ADDEQ   r0, r0, r3, LSR #10 ; -> L2PT entry we modified
      ELIF LongDesc
        LDR     r0, =LL3PT
        ADD     r0, r0, r3, LSR #9 ; -> L3PT entry we modified
      |
        LDR     r0, =L2PT
        ADD     r0, r0, r3, LSR #10 ; -> L2PT entry we modified
      ]
        LDRB    r2, [r12, #DCache_LineLen]
        SUB     r2, r2, #1
        BIC     r0, r0, r2
        ADD     r1, r0, r2
        ADD     r1, r1, #1
        ARMop   Cache_CleanRange,,,r12
        ; Now that we know the page table write has gone to main memory, we can
        ; do regular TLB+cache maintenance for the area and have it take effect
        MOV     r0, r3
        ARMop   MMU_ChangingEntry,,,r12
50
        Pull    "r0,r2,r4"
90
        SUBS    r2, r2, #PageSize
        ADD     r4, r4, #PageSize
        BNE     %BT10
        EXIT
 ]

        LTORG

        END