; Copyright 1996 Acorn Computers Ltd
;
; Licensed under the Apache License, Version 2.0 (the "License");
; you may not use this file except in compliance with the License.
; You may obtain a copy of the License at
;
;     http://www.apache.org/licenses/LICENSE-2.0
;
; Unless required by applicable law or agreed to in writing, software
; distributed under the License is distributed on an "AS IS" BASIS,
; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
; See the License for the specific language governing permissions and
; limitations under the License.
;
; > s.memmap

; low level memory mapping

; **************************************************************************************
;

;convert page number in $reg to L2PT entry (physical address+protection bits),
;using PhysBin table for speed
;
;entry: lr -> PhysBin table, r11 = protection bits
;exit:  r12 corrupted
;
        MACRO
        PageNumToL2PT $reg
        BIC     r12,$reg,#(3:SHL:(AMBPhysBinShift-2)) ;word alignment for PhysBin lookup
        LDR     r12,[lr,r12,LSR #(AMBPhysBinShift-2)] ;start physical address of bin
        AND     $reg,$reg,#AMBPhysBinMask             ;no. pages into bin
        ADD     $reg,r12,$reg,LSL #Log2PageSize    ;physical address of page
        ORR     $reg,$reg,r11                      ;munge in protection bits
        MEND

;AMB_movepagesin_L2PT
;
;updates L2PT for new logical page positions, does not update CAM
;
; entry:
;       r3  =  new logical address of 1st page
;       r8  =  number of pages
;       r10 -> page list
;       r11 =  protection/control bits for L2PT
;
AMB_movepagesin_L2PT ROUT
        Push    "r0-r10,r12,lr"

        LDR     lr,AMBPhysBin                  ;lr -> PhysBin
        LDR     r9,=L2PT
        ADD     r9,r9,r3,LSR #(Log2PageSize-2) ;r9 -> L2PT for 1st new logical page

        CMP     r8,#8
        BLT     %FT20
10
        LDMIA   r10!,{r0-r7}   ;next 8 page numbers
        PageNumToL2PT r0
        PageNumToL2PT r1
        PageNumToL2PT r2
        PageNumToL2PT r3
        PageNumToL2PT r4
        PageNumToL2PT r5
        PageNumToL2PT r6
        PageNumToL2PT r7
        STMIA   r9!,{r0-r7}    ;write 8 L2PT entries
        SUB     r8,r8,#8
        CMP     r8,#8
        BGE     %BT10
20
        CMP     r8,#0
        BEQ     %FT35
30
        LDR     r0,[r10],#4
        PageNumToL2PT r0
        STR     r0,[r9],#4
        SUBS    r8,r8,#1
        BNE     %BT30
35
        ARM_read_ID r0
        AND     r0,r0,#&F000
        CMP     r0,#&A000
        ARMA_drain_WB EQ         ;because L2PT area for AppSpace will be bufferable
        Pull    "r0-r10,r12,pc"

;update CAM entry for page number in $reg
;
;entry: r11 -> CAM, r9 = logical addr of page, lr = PPL of page
;exit: $reg = addr of CAM entry
;
        MACRO
        UpdateCAM $reg
        ADD     $reg,r11,$reg,LSL #3    ;r0 -> CAM entry for 1st page
        STMIA   $reg,{r9,lr}            ;store logical addr,PPL
        MEND

;AMB_movepagesin_CAM
;
;updates CAM, does not update L2PT
;
; entry:
;       r3  =  new logical address of 1st page
;       r8  =  number of pages
;       r9  =  PPL for CAM
;       r10 -> page list
;
AMB_movepagesin_CAM ROUT
        Push    "r0-r11,lr"


        MOV     lr,r9
        MOV     r9,r3
        MOV     r11,#0
        LDR     r11,[r11,#CamEntriesPointer]   ;r11 -> CAM

        CMP     r8,#8
        BLT     %FT20
10
        LDMIA   r10!,{r0-r7}                   ;next 8 page numbers
        UpdateCAM r0
        ADD     r9,r9,#PageSize                ;next logical addr
        UpdateCAM r1
        ADD     r9,r9,#PageSize
        UpdateCAM r2
        ADD     r9,r9,#PageSize
        UpdateCAM r3
        ADD     r9,r9,#PageSize
        UpdateCAM r4
        ADD     r9,r9,#PageSize
        UpdateCAM r5
        ADD     r9,r9,#PageSize
        UpdateCAM r6
        ADD     r9,r9,#PageSize
        UpdateCAM r7
        ADD     r9,r9,#PageSize
        SUB     r8,r8,#8
        CMP     r8,#8
        BGE     %BT10
20
        CMP     r8,#0
        Pull    "r0-r11,pc",EQ
30
        LDR     r0,[r10],#4
        UpdateCAM r0
        ADD     r9,r9,#PageSize
        SUBS    r8,r8,#1
        BNE     %BT30
        Pull    "r0-r11,pc"


;AMB_movepagesout_CAM
;
;updates CAM, does not update L2PT
;
; entry:
;       r8  =  number of pages
;       r9  =  PPL for CAM
;       r10 -> page list
;
AMB_movepagesout_CAM ROUT
        Push    "r0-r11,lr"

        MOV     lr,r9
        LDR     r9,=DuffEntry
        MOV     r11,#0
        LDR     r11,[r11,#CamEntriesPointer]   ;r11 -> CAM

        CMP     r8,#8
        BLT     %FT20
10
        LDMIA   r10!,{r0-r7}                   ;next 8 page numbers
        UpdateCAM r0
        UpdateCAM r1
        UpdateCAM r2
        UpdateCAM r3
        UpdateCAM r4
        UpdateCAM r5
        UpdateCAM r6
        UpdateCAM r7
        SUB     r8,r8,#8
        CMP     r8,#8
        BGE     %BT10
20
        CMP     r8,#0
        Pull    "r0-r11,pc",EQ
30
        LDR     r0,[r10],#4
        UpdateCAM r0
        SUBS    r8,r8,#1
        BNE     %BT30
        Pull    "r0-r11,pc"


;AMB_movepagesout_L2PT
;
;updates L2PT for old logical page positions, does not update CAM
;
; entry:
;       r4  =  old logical address of 1st page
;       r8  =  number of pages
;
AMB_movepagesout_L2PT ROUT
        Push    "r0-r8,lr"

        LDR     lr,=L2PT
        ADD     lr,lr,r4,LSR #(Log2PageSize-2)    ;lr -> L2PT 1st entry

        MOV     r0,#0                             ;0 means translation fault
        MOV     r1,#0
        MOV     r2,#0
        MOV     r3,#0
        MOV     r4,#0
        MOV     r5,#0
        MOV     r6,#0
        MOV     r7,#0

        CMP     r8,#8
        BLT     %FT20
10
        STMIA   lr!,{r0-r7}                       ;blam! (8 entries)
        SUB     r8,r8,#8
        CMP     r8,#8
        BGE     %BT10
20
        CMP     r8,#0
        BEQ     %FT35
30
        STR     r0,[lr],#4
        SUBS    r8,r8,#1
        BNE     %BT30
35
        ARM_read_ID r0
        AND     r0,r0,#&F000
        CMP     r0,#&A000
        ARMA_drain_WB EQ         ;because L2PT area for AppSpace will be bufferable
        Pull    "r0-r8,pc"

  [ ARM810support
    ;Previously supported ARMs all tolerate cache (clean and) flush _after_
    ;remapping - ARMs 6,7 because there is no clean, StrongARM because the cache
    ;writebacks use physical address.
    ;ARM810 does not support clean of writeback cache after remapping, since
    ;writebacks use virtual address. Rather than completely restructure code,
    ;this routine is called before remapping where necessary, and cleans/flushes
    ;if it finds we are running on ARM 810.
    ;
    ;corrupts r3
    ;
AMB_cachecleanflush_ifARM810
        ARM_read_ID r3
        AND     r3,r3,#&F000
        CMP     r3,#&8000
        MOVNE   pc,lr           ;not ARM8
    [ ARM810cleanflushbroken
        Push    "lr"
        ARM8_cleanflush_IDC r3,lr
        Pull    "pc"
    |
        ARM8_cleanflush_IDC r3
        MOV     pc,lr
    ]

  ] ;ARM810support

;**************************************************************************
; AMB_SetMemMapEntries:
;
; entry:
;   R3 =  no. of pages
;   R4 -> list of page entries (1 word per entry, giving page no.)
;   R5 =  start logical address of mapping (-1 means 'out of the way')
;   R6 =  PPL ('page protection level') for mapping
;
; +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

AMB_SetMemMapEntries ROUT
        Push    "r0-r4,r7-r11,lr"

        MOVS    r8,r3
        BEQ     AMB_smme_exit

        CMP     r5,#-1
        MOVEQ   r9,#AP_Duff  ;PPL for mapped out pages
        MOVNE   r9,r6        ;PPL for mapped in pages

;get L2PT protection etc. bits, appropriate to PPL in R9, into R11
        ADRL    r1,PPLTrans
        AND     lr,r9,#3
        LDR     r11,[r1,lr,LSL #2]
        TST     r9,#DynAreaFlags_NotCacheable
        TSTEQ   r9,#PageFlags_TempUncacheableBits
        ORREQ   r11,r11,#L2_C         ;if cacheable (area bit CLEAR + temp count zero), then OR in C bit
        TST     r9,#DynAreaFlags_NotBufferable
        ORREQ   r11,r11,#L2_B         ;if bufferable (area bit CLEAR), then OR in B bit

        MOV     r10,r4                      ;ptr to next page number

        LDR     r2,[r10]                    ;page number of 1st page
        MOV     r7,#0
        LDR     r7,[r7,#CamEntriesPointer]  ;r7 -> CAM
        ADD     r1,r7,r2,LSL #3
        LDR     r4,[r1]                     ;fetch old logical addr. of 1st page from CAM

        CMP     r5,#-1
        BEQ     AMB_smme_mapout
;map or mapin
        LDR     r1,=DuffEntry
        CMP     r4,r1
        BEQ     AMB_smme_mapin

;map from somewhere to somewhere (should be App Space <-> Free Pool)
;could be an optimise here if source is FreePool and we know that FreePool
;has not been used - ie. no need to clean/flush cache(s) - not done yet (requires
;sorting of Wimp_ClaimFreeMemory)
  [ ARM810support
        BL      AMB_cachecleanflush_ifARM810
  ]
        MOV     r3,r5
        BL      AMB_movepagesout_L2PT
        BL      AMB_movepagesin_L2PT
        BL      AMB_movepagesin_CAM
        B       AMB_smme_cachecleanflush ;needed because of the map out from source

;all pages sourced from same old logical page 'nowhere'
AMB_smme_mapin
        MOV     r3,r5
        BL      AMB_movepagesin_L2PT
        BL      AMB_movepagesin_CAM
;don't need to flush cache at end of mapin (already coherent, since
;nothing mapped in before), but do need to flush TLB (eg. TLB will cache
;access denial for app space after mapout)
        B       AMB_smme_TLBflush

;all pages destined for same new logical page 'nowhere'
AMB_smme_mapout
  [ ARM810support
        BL      AMB_cachecleanflush_ifARM810
  ]
        LDR     r3,=DuffEntry
        BL      AMB_movepagesout_L2PT
        BL      AMB_movepagesout_CAM

;(clean and) flush cache(s) appropriately, then flush TLB(s)
AMB_smme_cachecleanflush
        ARM_read_ID r0
        AND     r0,r0,#&F000
  [ ARM810support
        CMP     r0,#&8000      ;cache clean/flush done before remapping if ARM810
        ARM8_flush_TLB EQ
        Pull    "r0-r4,r7-r11, pc",EQ
  ]
        CMP     r0,#&A000
        BEQ     AMB_smme_cachecleanflush_strongarm

        ARMop   MMU_Changing
        Pull    "r0-r4,r7-r11, pc"

AMB_smme_cachecleanflush_strongarm

;we have a StrongARM then
;
;here, r4 = old logical addr. of 1st page, r8 = no. of pages
;
;StrongARM lets us clean data cache (DC) after remapping, because it writes back by
;physical address.
;
        MOV     r0,r4                      ;r0 := start address for clean/flush
        ADD     r1,r0,r8,LSL #Log2PageSize ;r1 := end address for clean/flush (exclusive)

;Cleaning a sufficiently small space by range will be quicker, because of the fixed
;memory reading cost for a full DC clean. A sufficiently large space will be better handled
;by full clean, because of the huge number of clean/flush line instructions for the range
;case. We use a threshold to switch between the two schemes. The value of the threshold
;depends on memory speed, core speed etc. but is not particularly critical.

        SUB     r2,r1,r0
        CMP     r2,#AMB_ARMA_CleanRange_thresh
        BLO     AMB_smme_StrongARM_flushrange

        MOV     r2,#ARMA_Cleaner_flipflop
        LDR     r1,[r2]
        EOR     r1,r1,#16*1024
        STR     r1,[r2]
        ARMA_clean_DC r1,r2,r3     ;effectively, fully clean/flush wrt non-interrupt stuff
        ARMA_drain_WB
        ARMA_flush_IC WithoutNOPs  ;do *not* flush DC - may be interrupt stuff in it
        MOV     r0,r0              ;NOPs to ensure 4 instructions after IC flush before return
        MOV     r0,r0
        ARMA_flush_TLBs
        Pull    "r0-r4,r7-r11, pc"

AMB_smme_StrongARM_flushrange

  [ SAcleanflushbroken                  ; ARMA_cleanflush_DCentry instruction seems to be ineffective.
01
        ARMA_clean_DCentry r0
        ARMA_flush_DCentry r0
        ADD     r0,r0,#32
        ARMA_clean_DCentry r0
        ARMA_flush_DCentry r0
        ADD     r0,r0,#32
        ARMA_clean_DCentry r0
        ARMA_flush_DCentry r0
        ADD     r0,r0,#32
        ARMA_clean_DCentry r0
        ARMA_flush_DCentry r0
        ADD     r0,r0,#32
        CMP     r0,r1
        BLO     %BT01                   ;loop to clean DC over logical range
  |
01
        ARMA_cleanflush_DCentry r0
        ADD     r0,r0,#32
        ARMA_cleanflush_DCentry r0
        ADD     r0,r0,#32
        ARMA_cleanflush_DCentry r0
        ADD     r0,r0,#32
        ARMA_cleanflush_DCentry r0
        ADD     r0,r0,#32
        CMP     r0,r1
        BLO     %BT01                   ;loop to clean DC over logical range
  ]

        ARMA_drain_WB
        ARMA_flush_IC WithoutNOPs
        MOV     r0,r0                   ;NOPs to ensure 4 instructions after IC flush before return
        MOV     r0,r0
        ARMA_flush_TLBs

        Pull    "r0-r4,r7-r11, pc"

AMB_smme_TLBflush
        ARMop   TLB_InvalidateAll
AMB_smme_exit
        Pull    "r0-r4,r7-r11, pc"


; ************************************************************************
; AMB_FindMemMapEntries
;
; finds page numbers for pages currently at given logical start address,
; and fills in buffer; pages must exist
;
; (does not have any page number guesses)
;
; entry:
;   R3 =  no. of pages
;   R4 -> buffer for page entries
;   R5 =  logical address of 1st page
; exit:
;   buffer at R4 filled in with page numbers
;
; ************************************************************************

AMB_FindMemMapEntries ROUT

        Push    "r0-r11,lr"

;initialise r0,r1,r2 as physical RAM chunk cache for AMB_r11topagenum routine
        MOV     r9,#PhysRamTable
        LDMIA   r9,{r0,r1}        ;r0,r1 := phys addr,size of chunk
        ADD     r1,r1,r0          ;r0,r1 := lowest addr,highest addr + 1 of chunk
        MOV     r2,#0             ;r2    := first page number of chunk

        LDR     r10,=L2PT
        ADD     r10,r10,r5,LSR #(Log2PageSize-2) ;r10 -> L2 entry for 1st page
        CMP     r3,#4                            ;handle pages in chunks of 4
        BLT     %FT20
10
        LDMIA   r10!,{r5-r8}                     ;next 4 L2PT entries
        MOV     r11,r5,LSR #Log2PageSize         ;r11 := phys_addr/page_size
        BL      AMB_r11topagenum
        MOV     r5,r11
        MOV     r11,r6,LSR #Log2PageSize
        BL      AMB_r11topagenum
        MOV     r6,r11
        MOV     r11,r7,LSR #Log2PageSize
        BL      AMB_r11topagenum
        MOV     r7,r11
        MOV     r11,r8,LSR #Log2PageSize
        BL      AMB_r11topagenum
        STMIA   r4!,{r5-r7,r11}                  ;fill in next 4 page numbers
        SUB     r3,r3,#4
        CMP     r3,#4
        BGE     %BT10
20
        CMP     r3,#0
        Pull    "r0-r11,pc",EQ
30
        LDR     r11,[r10],#4
        MOV     r11,r11,LSR #Log2PageSize
        BL      AMB_r11topagenum
        STR     r11,[r4],#4
        SUBS    r3,r3,#1
        BNE     %BT30
        Pull    "r0-r11,pc"

;AMB_r11topagenum
;entry:
;     r0,r1,r2 = lowest addr,highest addr +1,first page no.
;                (cached physical RAM chunk)
;     r11      = physical_addr/page_size for page
;
;exit:
;     r11      = page number of page
;     r0,r1,r2 cache updated if necessary
;     r9       corrupted
;
AMB_r11topagenum ROUT
        CMP     r11,r0,LSR #Log2PageSize
        BLO     %FT10
        CMP     r11,r1,LSR #Log2PageSize
        BHS     %FT10
;cache hit (phys address in range of cached chunk)
        SUB     r11,r11,r0,LSR #Log2PageSize    ;pages into chunk
        ADD     r11,r11,r2                      ;page number
        MOV     pc,lr
10
        MOV     r9,#PhysRamTable
        MOV     r2,#0                        ;start at page number 0
20
        LDMIA   r9!,{r0,r1}                  ;r0,r1 := phys addr,size of chunk
        SUB     r11,r11,r0,LSR #Log2PageSize
        CMP     r11,r1,LSR #Log2PageSize
        ADDHS   r11,r11,r0,LSR #Log2PageSize
        ADDHS   r2,r2,r1,LSR #Log2PageSize
        BHS     %BT20
        ADD     r1,r1,r0
        ADD     r11,r11,r2
        MOV     pc,lr

        LTORG

    END