; Copyright 1996 Acorn Computers Ltd
;
; Licensed under the Apache License, Version 2.0 (the "License");
; you may not use this file except in compliance with the License.
; You may obtain a copy of the License at
;
;     http://www.apache.org/licenses/LICENSE-2.0
;
; Unless required by applicable law or agreed to in writing, software
; distributed under the License is distributed on an "AS IS" BASIS,
; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
; See the License for the specific language governing permissions and
; limitations under the License.
;
; > MemInfo

        LTORG

;----------------------------------------------------------------------------------------
; MemorySWI
;
;       In:     r0 = reason code and flags
;                       bits 0-7  = reason code
;                       bits 3-31 = reason specific flags
;       Out:    specific to reason codes
;
;       Perform miscellaneous operations for memory management.
;
MemorySWI       ROUT
        Push    lr                              ; Save real return address.
        AND     lr, r0, #&FF                    ; Get reason code.
        CMP     lr, #OSMemReason_Convert64
        BHS     %FT50
        CMP     lr, #(%40-%30):SHR:2            ; If valid reason code then
        ADDCC   lr, lr, #(%30-%10):SHR:2        ;   determine where to jump to in branch table,
        ADDCC   lr, pc, lr, LSL #2
        Push    lr, CC                          ;   save address so we can
10
        ADRCC   lr, MemReturn                   ;   set up default return address for handler routines
        Pull    pc, CC                          ;   and jump into branch table.
20
        ADRL    r0, ErrorBlock_HeapBadReason    ; Otherwise, unknown reason code.
        SETV
        ; Drop through to...

MemReturn
 [ International
        BLVS    TranslateError
 ]
        Pull    lr                              ; Get back real return address.
        BVS     SLVK_SetV
        ExitSWIHandler

30
        B       MemoryConvertFIQCheck           ; 0
        B       %BT20                           ; Reason codes 1-5 are reserved.
        B       %BT20
        B       %BT20
        B       %BT20
        B       %BT20
        B       MemoryPhysSize                  ; 6
        B       MemoryReadPhys                  ; 7
        B       MemoryAmounts                   ; 8
        B       MemoryIOSpace                   ; 9
        B       %BT20                           ; Reason code 10 reserved (for free pool locking)
        B       %BT20                           ; Reason code 11 reserved (for PCImapping).
        B       RecommendPage                   ; 12
        B       MapIOpermanent                  ; 13
        B       AccessPhysAddr                  ; 14
        B       ReleasePhysAddr                 ; 15
        B       MemoryAreaInfo                  ; 16
        B       MemoryAccessPrivileges          ; 17
        B       FindAccessPrivilege             ; 18
        B       DMAPrep                         ; 19
        B       ChangeCompatibility             ; 20
        B       MapIO64permanent                ; 21
        B       AccessPhysAddr64                ; 22
        B       ReservePages                    ; 23
        B       CheckMemoryAccess               ; 24
                                                ; 25+ reserved for ROL
40      ; End of list

50
        SUB     lr, lr, #OSMemReason_Convert64
        CMP     lr, #(%90-%80):SHR:2            ; If valid reason code then
        ADDCC   lr, lr, #(%80-%60):SHR:2        ;   determine where to jump to in branch table,
        ADDCC   lr, pc, lr, LSL #2
        Push    lr, CC                          ;   save address so we can
60
        ADRCC   lr, MemReturn                   ;   set up default return address for handler routines
        Pull    pc, CC                          ;   and jump into branch table.
        B       %BT20                           ; Otherwise, unknown reason code.

80
        B       MemoryConvert64                 ; 64
        B       MemoryLogToPhys                 ; 65
90      ; End of list


;----------------------------------------------------------------------------------------
; MemoryConvert
;
;       In:     r0 = flags
;                       bit     meaning
;                       0-7     0 (reason code, page list uses 32bit addrs)
;                               or 64 (reason code, list uses 64bit addrs)
;                       8       page number provided when set
;                       9       logical address provided when set
;                       10      physical address provided when set
;                       11      fill in page number when set
;                       12      fill in logical address when set
;                       13      fill in physical address when set
;                       14-15   0,1=don't change cacheability
;                               2=disable caching on these pages
;                               3=enable caching on these pages
;                       16-31   reserved (set to 0)
;               r1 -> page block. 3 words per entry (OS_Memory 0) or 5 words
;                     per entry (OS_Memory 64).
;               r2 = number of entries in page block
;
;       Out:    r1 -> updated page block
;
;       Converts between representations of memory addresses. Can also set the
;       cacheability of the specified pages.
;

; Declare symbols used for decoding flags (given and wanted are used
; so that C can be cleared by rotates of the form a,b). We have to munge
; the flags a bit to make the rotates even.
;
ppn             *       1:SHL:0         ; Bits for address formats.
logical         *       1:SHL:1
physical        *       1:SHL:2
all             *       ppn :OR: logical :OR: physical
given           *       24              ; Rotate for given fields.
wanted          *       20              ; Rotate for wanted fields.
ppn_bits        *       ((ppn :SHL: 4) :OR: ppn)
logical_bits    *       ((logical :SHL: 4) :OR: logical)
physical_bits   *       ((physical :SHL: 4) :OR: physical)
cacheable_bit   *       1:SHL:15
alter_cacheable *       1:SHL:16
mem0_64         *       OSMemReason_Convert64

MemoryConvert64 ROUT
        ; Wrapper which checks for unsupported flags in R0
        CMP     r0, #1:SHL:16
        BLO     MemoryConvertFIQCheck
        ADRL    r0, ErrorBlock_BadParameters
        SETV
        MOV     pc, lr

; Small wrapper to make sure FIQs are disabled if we're making pages uncacheable
; (Modern ARMs ignore unexpected cache hits, so big coherency issues if we make
; a page uncacheable which is being used by FIQ).
MemoryConvertFIQCheck ROUT
        AND     r11, r0, #3:SHL:14
        TEQ     r11, #2:SHL:14
        BNE     MemoryConvertNoFIQCheck
        Entry   "r0-r1"
        MOV     r1, #Service_ClaimFIQ
        SWI     XOS_ServiceCall
        LDMIA   sp, {r0-r1}
        BL      MemoryConvertNoFIQCheck
        FRAMSTR r0
        MRS     r11, CPSR
        MOV     r1, #Service_ReleaseFIQ
        SWI     XOS_ServiceCall
        MSR     CPSR_f, r11
        EXIT

MemoryConvertNoFIQCheck   ROUT
        Entry   "r0-r11"                ; Need lots of registers!!

;        MRS     lr, CPSR
;        Push    "lr"
;        ORR     lr, lr, #I32_bit+F32_bit
;        MSR     CPSR_c, lr

        MOV     lr, r0, LSR #11         ; Need to munge r0 to get rotates to work (must be even)
        BIC     r0, r0, lr, LSL #11
        ORR     r0, r0, lr, LSL #12     ; Move bits 11-30 to 12-31. Bits 7 & 11 are clear, so the rotate will always clear C

        TST     r0, #all,given          ; Check for invalid argument (no fields provided)
        TEQNE   r2, #0                  ;   (no entries in table).
        ADREQL  r0, ErrorBlock_BadParameters
        BEQ     %FT95

        EOR     lr, r0, r0, LSL #given-wanted   ; If flag bits 8-10 and 12-14 contain common bits then
        AND     lr, lr, #all,wanted             ;   clear bits in 12-14 (ie. don't fill in fields already given).
        EOR     lr, lr, #all,wanted
        BIC     r0, r0, lr

        LDR     r6, =ZeroPage
        LDR     r7, [r6, #MaxCamEntry]
        LDR     r6, [r6, #CamEntriesPointer]

        TST     r0, #mem0_64                 ; Step back one entry (main loop
        SUBEQ   r1, r1, #MemPageBlock32_Size ; increments ptr at the start of
        SUBNE   r1, r1, #MemPageBlock64_Size ; the loop)
10
        SUBS    r2, r2, #1
        BCC     %FT70

        TST     r0, #mem0_64
        ADDEQ   r1, r1, #MemPageBlock32_Size
        ADDNE   r1, r1, #MemPageBlock64_Size

        ASSERT  MemPageBlock32_PageNum=0
        ASSERT  MemPageBlock32_LogAddr=4
        ASSERT  MemPageBlock32_PhysAddr=8
        LDMIA   r1, {r3-r4,r8}          ; Get next three words (PN,LA,PA)
        MOVEQ   r9, #0                  ; High word of phys addr
        LDRNE   r9, [r1, #MemPageBlock64_PhysHigh]

        ; If we're using the 64bit API, the LDM will have actually loaded these
        ASSERT  MemPageBlock64_PageNum=0
        ASSERT  MemPageBlock64_LogLow=4
        ASSERT  MemPageBlock64_LogHigh=8
        ; Load the correct low phys addr, and if log addr given, check high word is zero
        MOVNE   lr, r8
        LDRNE   r8, [r1, #MemPageBlock64_PhysLow]
        TSTNE   r0, #logical,given
        CMPNE   lr, #0
        BNE     %FT80

   [ AMB_LazyMapIn
        BL      handle_AMBHonesty       ; may need to make page honest (as if not lazily mapped)
   ]

        TST     r0, #physical,wanted    ; If PA not wanted
        BEQ     %FT20                   ;   then skip.
        TST     r0, #logical,given      ; If LA given (rotate clears C) then
        BEQ     %FT11
        PTOp    logical_to_physical     ; Get PA from LA
        B       %FT15
11
        BL      ppn_to_logical          ; Else get LA from PN (PA wanted (not given) & LA not given => PN given).
        BLCC    ppn_to_physical         ; And get PA from PN (more accurate than getting PA from LA - page may be mapped out)
15
        BCS     %FT80
        MVN     lr, r0
        CMP     r9, #0                  ; If high phys addr non-zero
        TSTNE   lr, #mem0_64            ; And not using 64bit API
        BNE     %FT80                   ; Throw error

        ; Store phys addr
        TST     r0, #mem0_64
        STREQ   r8, [r1, #MemPageBlock32_PhysAddr]
        STRNE   r8, [r1, #MemPageBlock64_PhysLow]
        STRNE   r9, [r1, #MemPageBlock64_PhysHigh]

        ; Store log addr, if wanted
        TST     r0, #logical,wanted
        ASSERT  MemPageBlock64_LogLow = MemPageBlock32_LogAddr
        STRNE   r4, [r1, #MemPageBlock32_LogAddr]
        TSTNE   r0, #mem0_64
        MOVNE   lr, #0
        STRNE   lr, [r1, #MemPageBlock64_LogHigh]
20
        TST     r0, #alter_cacheable    ; If altering cacheability
        EORNE   lr, r0, #ppn,given      ;   and PN not given
        TSTNE   lr, #ppn,given
        TSTEQ   r0, #ppn,wanted         ;   OR PN wanted then don't skip
        BEQ     %FT30                   ; else skip.
        TST     r0, #physical_bits,given        ; If PA not given and PA not wanted (rotate clears C) then
        PTOpEQ  logical_to_physical             ;   get it from LA (PN wanted/not given & PA not given => LA given).
        BLCC    physical_to_ppn         ; Get PN from PA.
        BCS     %FT80
        TST     r0, #ppn,wanted
        ASSERT  MemPageBlock64_PageNum = MemPageBlock32_PageNum
        STRNE   r3, [r1, #MemPageBlock32_PageNum] ; Store back PN if wanted.
30
        TST     r0, #logical,wanted     ; If LA wanted
        EORNE   lr, r0, #physical,wanted
        TSTNE   lr, #physical,wanted    ;   and PA not wanted then don't skip
        BEQ     %FT40                   ; else skip.
        TST     r0, #alter_cacheable    ; If not changing cacheability (already have PN)
        TSTEQ   r0, #ppn_bits,given     ;   and PN not given and PN not wanted (rotate clears C) then
        BLEQ    physical_to_ppn         ;   get it from PA (LA wanted (not given) & PN not given => PA given).
        BLCC    ppn_to_logical          ; Get LA from PN.
        BCS     %FT80
        ; Store back log addr
        ASSERT  MemPageBlock64_LogLow = MemPageBlock32_LogAddr
        STR     r4, [r1, #MemPageBlock32_LogAddr]
        TST     r0, #mem0_64
        MOVNE   lr, #0
        STRNE   lr, [r1, #MemPageBlock64_LogHigh]
40
        TST     r0, #alter_cacheable
        BEQ     %BT10

        CMP     r7, r3                  ; Make sure page number is valid (might not have done any conversion).
        BCC     %FT80

        ADD     r3, r6, r3, LSL #CAM_EntrySizeLog2 ; Point to CAM entry for this page.
        ASSERT  CAM_LogAddr=0
        ASSERT  CAM_PageFlags=4
        LDMIA   r3, {r4,r5}             ; Get logical address and PPL.

        AND     lr, r5, #PageFlags_TempUncacheableBits
        TST     r0, #cacheable_bit
        BNE     %FT50

        TEQ     lr, #PageFlags_TempUncacheableBits      ; Make uncacheable (increment count).
        BEQ     %BT10                                   ; If count has reached max then go no further (should not happen).
        TEQ     lr, #0                                  ; EQ => we have to change L2.
        ADD     r5, r5, #1:SHL:TempUncacheableShift
        B       %FT60
50
        TEQ     lr, #0                                  ; Make cacheable (decrement count).
        BEQ     %BT10                                   ; If count is already 0 then go no further (page already cacheable).
        SUB     r5, r5, #1:SHL:TempUncacheableShift
        TST     r5, #PageFlags_TempUncacheableBits      ; EQ => we have to change L2.
60
        STR     r5, [r3, #CAM_PageFlags] ; Write back new PPL.
        BNE     %BT10                   ; Do next entry if we don't have to change L2.

        MOV     r4, r4, LSR #12
 [ LongDesc :LAND: ShortDesc
        PTWhich r8
        BEQ     %FT65
 ]
 [ LongDesc
        LDR     r8, =LL3PT
        LDR     r3, =ZeroPage
        ADD     r4, r8, r4, LSL #3      ; Address of L3 entry for logical address.
        ; VMSAv6 is hard, use XCBTable/PCBTrans
        ASSERT  DynAreaFlags_CPBits = 7*XCB_P :SHL: 10
        ASSERT  DynAreaFlags_NotCacheable = XCB_NC :SHL: 4
        ASSERT  DynAreaFlags_NotBufferable = XCB_NB :SHL: 4
        TST     r0, #cacheable_bit      ; n.b. must match EQ/NE used by ARMop calls
        AND     lr, r5, #DynAreaFlags_NotCacheable + DynAreaFlags_NotBufferable
        AND     r5, r5, #DynAreaFlags_CPBits
        ORR     lr, lr, r5, LSR #10-4
        LDR     r5, [r3, #MMU_PCBTrans]
        ORREQ   lr, lr, #XCB_TU<<4      ; if temp uncache, set TU bit
        LDRB    lr, [r5, lr, LSR #4]    ; get AttrIndx value
        LDRD    r8, [r4]                ; Get L3 entry (safe as we know address is valid).
        BIC     r8, r8, #TempUncache_L3PTMask ; Knock out existing attributes (n.b. assumed to not be large page!)
        ORR     r8, r8, lr              ; Set new attributes
        BNE     %FT63
        ; Making page non-cacheable
        ; There's a potential interrupt hole here - many ARMs ignore cache hits
        ; for pages which are marked as non-cacheable (seen on XScale,
        ; Cortex-A53, Cortex-A15 to name but a few, and documented in many TRMs)
        ; We can't be certain that this page isn't being used by an interrupt
        ; handler, so if we're making it non-cacheable we have to take the safe
        ; route of disabling interrupts around the operation.
        ; Note - currently no consideration is given to FIQ handlers.
        ; Note - we clean the cache as the last step (as opposed to doing it at
        ; the start) to make sure prefetching doesn't pull data back into the
        ; cache.
        MRS     r11, CPSR
        ORR     lr, r11, #I32_bit       ; IRQs off
        ; Yuck, we also need to deal with the case where we're making the
        ; current SVC stack page uncacheable (coherency issue when calling the
        ; ARMops if cache hits to uncacheable pages are ignored). Deal with this
        ; by temporarily dropping into IRQ mode (and thus a different stack) if
        ; we think this is going to happen.
        MOV     r5, r4, LSL #9          ; R5 = original logical address
      [ (LL3PT:SHL:9) <> 0
        SUB     r5, r5, #LL3PT:SHL:9
      ]
        SUB     r10, sp, r5
        CMP     r10, #8192              ; Be extra cautious
        EORLO   lr, lr, #SVC32_mode :EOR: IRQ32_mode
        MSR     CPSR_c, lr              ; Switch mode
        Push    "r0, lr"                ; Preserve OS_Memory flags and (potential) IRQ lr
        STRD    r8, [r4]                ; Write back new L3 entry.
        MOV     r0, r5
        ARMop   MMU_ChangingEntry,,,r3  ; Clean TLB+cache
        Pull    "r0, lr"                ; Restore OS_Memory flags + IRQ lr
        MSR     CPSR_c, r11             ; Back to original mode + IRQ state
        B       %BT10
63
        ; Making page cacheable again
        ; Shouldn't be any cache maintenance worries
        STRD    r8, [r4]                ; Write back new L2 entry.
        MOV     r4, r0
        MOV     r0, r5
        ARMop   MMU_ChangingUncachedEntry,,,r3   ; Clean TLB
        MOV     r0, r4 
        B       %BT10
 ]
 [ ShortDesc
65
        LDR     r8, =L2PT
        LDR     r3, =ZeroPage
        ADD     r4, r8, r4, LSL #2      ; Address of L2 entry for logical address.
   [ MEMM_Type = "VMSAv6"
        ; VMSAv6 is hard, use XCBTable/PCBTrans
        ASSERT  DynAreaFlags_CPBits = 7*XCB_P :SHL: 10
        ASSERT  DynAreaFlags_NotCacheable = XCB_NC :SHL: 4
        ASSERT  DynAreaFlags_NotBufferable = XCB_NB :SHL: 4
        TST     r0, #cacheable_bit      ; n.b. must match EQ/NE used by ARMop calls
        AND     lr, r5, #DynAreaFlags_NotCacheable + DynAreaFlags_NotBufferable
        AND     r5, r5, #DynAreaFlags_CPBits
        ORR     lr, lr, r5, LSR #10-4
        LDR     r5, [r3, #MMU_PCBTrans]
        ORREQ   lr, lr, #XCB_TU<<4      ; if temp uncache, set TU bit
        MOV     lr, lr, LSR #3
        LDRH    lr, [r5, lr]            ; convert to C, B and TEX bits for this CPU
        LDR     r5, [r4]                ; Get L2 entry (safe as we know address is valid).
        BIC     r5, r5, #TempUncache_L2PTMask ; Knock out existing attributes (n.b. assumed to not be large page!)
        ORR     r5, r5, lr              ; Set new attributes
   |
        LDR     r5, [r4]                ; Get L2 entry (safe as we know address is valid).
        TST     r0, #cacheable_bit
        BICEQ   r5, r5, #L2_C           ; Disable/enable cacheability.
        ORRNE   r5, r5, #L2_C
   ]
        BNE     %FT67
        ; Making page non-cacheable
        ; There's a potential interrupt hole here - many ARMs ignore cache hits
        ; for pages which are marked as non-cacheable (seen on XScale,
        ; Cortex-A53, Cortex-A15 to name but a few, and documented in many TRMs)
        ; We can't be certain that this page isn't being used by an interrupt
        ; handler, so if we're making it non-cacheable we have to take the safe
        ; route of disabling interrupts around the operation.
        ; Note - currently no consideration is given to FIQ handlers.
        ; Note - we clean the cache as the last step (as opposed to doing it at
        ; the start) to make sure prefetching doesn't pull data back into the
        ; cache.
        MRS     r11, CPSR
        ORR     lr, r11, #I32_bit       ; IRQs off
        ; Yuck, we also need to deal with the case where we're making the
        ; current SVC stack page uncacheable (coherency issue when calling the
        ; ARMops if cache hits to uncacheable pages are ignored). Deal with this
        ; by temporarily dropping into IRQ mode (and thus a different stack) if
        ; we think this is going to happen.
        MOV     r10, r4, LSL #10
        SUB     r10, sp, r10
        CMP     r10, #8192              ; Be extra cautious
        EORLO   lr, lr, #SVC32_mode :EOR: IRQ32_mode
        MSR     CPSR_c, lr              ; Switch mode
        Push    "r0, lr"                ; Preserve OS_Memory flags and (potential) IRQ lr
        STR     r5, [r4]                ; Write back new L2 entry.
        ASSERT  (L2PT :SHL: 10) = 0     ; Ensure we can convert r4 back to the page log addr
        MOV     r0, r4, LSL #10
        ARMop   MMU_ChangingEntry,,,r3  ; Clean TLB+cache
        Pull    "r0, lr"                ; Restore OS_Memory flags + IRQ lr
        MSR     CPSR_c, r11             ; Back to original mode + IRQ state
        B       %BT10
67
        ; Making page cacheable again
        ; Shouldn't be any cache maintenance worries
        STR     r5, [r4]                ; Write back new L2 entry.
        MOV     r5, r0
        ASSERT  (L2PT :SHL: 10) = 0     ; Ensure we can convert r4 back to the page log addr
        MOV     r0, r4, LSL #10
        ARMop   MMU_ChangingUncachedEntry,,,r3   ; Clean TLB
        MOV     r0, r5
        B       %BT10
 ]

70
        CLRV
        EXIT

80
        TST     r0, #alter_cacheable    ; If we haven't changed any cacheability stuff then
        BEQ     %FT90                   ;   just return error.

        AND     lr, r0, #all,wanted             ; Get wanted flags.
        LDMIA   sp, {r0,r1,r3}                  ; Get back original flags, pointer and count.
        ORR     r0, r0, lr, LSR #given-wanted   ; Wanted fields are now also given as we have done the conversion.
        BIC     r0, r0, #all:SHL:11             ; Clear wanted flags, we only want to change cacheability.
        EOR     r0, r0, #cacheable_bit          ; If we made them uncacheable then make them cacheable again & v.v.
        SUB     r2, r3, r2
        SUBS    r2, r2, #1              ; Change back the entries we have changed up to (but excluding) the error entry.
        BLNE    MemoryConvertNoFIQCheck
90
        ADRL    r0, ErrorBlock_BadAddress
95
        STR     r0, [sp, #Proc_RegOffset+0]
        SETV
        EXIT

   [ AMB_LazyMapIn
;
;  entry: r3,r4,r8,r9 = provided PN,LA,PA triple for entry to make honest (at least one given)
;         r0 bits flag which of PN,LA,PA are given
;  exit:  mapping made honest (as if not lazily mapped) if necessary
handle_AMBHonesty  ROUT
        Push    "r0, r3-r4, lr"
        TST     r0, #logical,given
        BEQ     %FT10
        MOV     r0, r4
        BL      AMB_MakeHonestLA
        B       %FT90
10
        TST     r0, #ppn,given
        BEQ     %FT20
15
        MOV     r0, r3
        BL      AMB_MakeHonestPN
        B       %FT90
20
        TST     r0, #physical,given
        BEQ     %FT90
        Push    "r5, r7, r10-r11"
        LDR     r14, =ZeroPage
        LDR     r7, [r14, #MaxCamEntry]
        BL      physical_to_ppn
        Pull    "r5, r7, r10-r11"
        BCC     %BT15
90
        Pull    "r0, r3-r4, pc"

   ] ;AMB_LazyMapIn


;----------------------------------------------------------------------------------------
; ppn_to_logical
;
;       In:     r3 = page number
;               r8,r9 = physical address if given
;               r6 = CamEntriesPointer
;               r7 = MaxCamEntry
;
;       Out:    r5 corrupted
;               CC => r4 = logical address
;               CS => invalid page number
;
;       Convert physical page number to logical address.
;
ppn_to_logical
        CMP     r7, r3                  ; Validate page number.
        BCC     meminfo_returncs        ; Invalid so return C set.

        ASSERT  CAM_LogAddr=0
        LDR     r4, [r6, r3, LSL #CAM_EntrySizeLog2] ; If valid then lookup logical address.
        TST     r0, #physical,given     ; If physical address was given then
      [ NoARMT2
        LDRNE   r5, =&FFF
        ANDNE   r5, r8, r5              ;   mask off page offset
        ORRNE   r4, r4, r5              ;   and combine with logical address.
      |
        BFINE   r4, r8, #0, #12         ;   apply page offset
      ]
        CLC
        MOV     pc, lr

meminfo_returncs_pullr8
        Pull    "r8"
meminfo_returncs
        SEC
        MOV     pc, lr

;----------------------------------------------------------------------------------------
; physical_to_ppn
;
;       In:     r8,r9 = physical address
;               r7 = MaxCamEntry
;
;       Out:    r5,r10 corrupted
;               CC => r3 = page number, low 12 bits of r11 = PhysRamTable flags
;               CS => invalid physical address, r3+r11 corrupted
;
;       Convert physical address to physical page number.
;
physical_to_ppn ROUT
        Push    "r8"
        LDR     r5, =ZeroPage+PhysRamTable
        MOV     r3, #0                  ; Start at page 0.
        MOV     r8, r8, LSR #12
        ORR     r8, r8, r9, LSL #20
10
        CMP     r7, r3                  ; Stop if we run out of pages
        BCC     meminfo_returncs_pullr8

        LDMIA   r5!, {r10,r11}          ; Get start address and size of next block.
        SUB     r10, r8, r10            ; Determine if given address is in this block.
        CMP     r10, r11, LSR #12
        ADDCS   r3, r3, r11, LSR #12    ; Move on to next block.
        BCS     %BT10

        Pull    "r8"

        ADD     r3, r3, r10
        CLC
        MOV     pc, lr

;----------------------------------------------------------------------------------------
; ppn_to_physical
;
;       In:     r3 = page number
;
;       Out:    r5 corrupted
;               CC => r8,r9 = physical address
;               CS => invalid page number, r8,r9 corrupted
;
;       Convert physical page number to physical address.
;
ppn_to_physical ROUT
        Push    "r3,lr"
        LDR     r5, =ZeroPage+PhysRamTable
10
        LDMIA   r5!, {r8,lr}            ; Get start address and size of next block.
        MOVS    lr, lr, LSR #12
        BEQ     %FT20
        CMP     r3, lr
        SUBHS   r3, r3, lr
        BHS     %BT10

        ADD     r8, r8, r3
        MOV     r9, r8, LSR #20
        MOV     r8, r8, LSL #12
        Pull    "r3,pc"
20
        SEC
        Pull    "r3,pc"


;----------------------------------------------------------------------------------------
; Symbols used in MemoryPhysSize and MemoryReadPhys
;

; Shifts to determine number of bytes/words to allocate in table.
ByteShift       *       1             ; 2^1 pages per byte
WordShift       *       ByteShift + 2 ; 2^3 pages per word

; Bit patterns for different types of memory.
NotPresent      *       &00000000
DRAM_Pattern    *       &11111111
VRAM_Pattern    *       &22222222
ROM_Pattern     *       &33333333
IO_Pattern      *       &44444444
NotAvailable    *       &88888888


;----------------------------------------------------------------------------------------
; MemoryPhysSize
;
;       In:     r0 = 6 (reason code with flag bits 8-31 clear)
;
;       Out:    r1 = table size (in bytes)
;               r2 = page size (in bytes)
;
;       Returns information about the memory arrangement table.
;
MemoryPhysSize
        Entry   "r0-r1,r3,sb,ip"
        AddressHAL
        MOV     r0, #PhysInfo_GetTableSize
        ADD     r1, sp, #4
        CallHAL HAL_PhysInfo
        MOV     r2, #4*1024
        CLRV
        EXIT


;----------------------------------------------------------------------------------------
; MemoryReadPhys
;
;       In:     r0 = 7 (reason code with flag bits 8-31 clear)
;               r1 -> memory arrangement table to be filled in
;
;       Out:    r1 -> filled in memory arrangement table
;
;       Returns the physical memory arrangement table in the given block.
;
MemoryReadPhys  ROUT

        Entry   "r0-r12"
        AddressHAL
        MOV     r0, #PhysInfo_WriteTable
        SUB     sp, sp, #8
        MOV     r2, sp
        CallHAL HAL_PhysInfo            ; fills in everything except DRAM
        ADD     sp, sp, #8              ; We don't use this address range any more

        MOV     r5, #0                  ; Current page number.
        LDR     r6, =ZeroPage+PhysRamTable
        LDR     r7, [r6, #CamEntriesPointer-PhysRamTable]
        ADD     r7, r7, #CAM_PageFlags  ; Point to PPL entries.
        LDR     r8, [r6, #MaxCamEntry-PhysRamTable]
10
        LDMIA   r6!, {r9,r10}           ; Get physical address and size of next block.

        TST     r10, #OSAddRAM_IsVRAM   ; If not DRAM then
        ADDNE   r5, r5, r10, LSR #12    ;   adjust current page number
        BNE     %BT10                   ;   and try next block.

        MOV     r10, r10, LSR #12
        LDR     r1, [sp, #4]            ; Get table address back
        MOV     r3, r9, LSR #WordShift
        LDR     r3, [r1, r3, LSL #2]!   ; Get first word of block
        MOV     r4, r9, LSL #3
        AND     r4, r4, #31             ; Bit offset of first page in the word
        RSB     r4, r4, #32             ; number of bits left to process
        MOV     r3, r3, LSL r4

        ; r1 -> current table location
        ; r3 = next word to store in table
        ; r4 = how much we have to shift r3 before storing it
20
        SUBS    r4, r4, #4              ; Reduce shift.
        MOVCS   r3, r3, LSR #4          ; If more space in current word then shift it.
        STRCC   r3, [r1], #4            ; Otherwise, store current word
        MOVCC   r3, #0                  ;   and start a new one.
        MOVCC   r4, #28

        LDR     lr, [r7, r5, LSL #CAM_EntrySizeLog2] ; Page is there so get PPL and determine if it's available or not.
        TST     lr, #PageFlags_Unavailable
        TSTEQ   lr, #PageFlags_Reserved
        ORREQ   r3, r3, #DRAM_Pattern :SHL: 28
        ORRNE   r3, r3, #(DRAM_Pattern :OR: NotAvailable) :SHL: 28
        ADD     r5, r5, #1              ; Increment page count.
30
        SUBS    r10, r10, #1            ; Decrease size of block.
        BNE     %BT20                   ; Stop if no more block left.

        ; Store the partial last word
        LDR     lr, [r1]
        MOV     r3, r3, LSR r4          ; put bits in correct position
        RSB     r4, r4, #32
        MOV     lr, lr, LSR r4          ; drop the low bits of lr
        ORR     r3, r3, lr, LSL r4      ; combine with r3
        STR     r3, [r1]                ; and store word.

        CMP     r8, r5                  ; Stop if we run out of pages.
        BCS     %BT10

        ; If softloaded, mark that as unavailable DRAM.
        MOV     r0, #8
        SWI     XOS_ReadSysInfo
        BVS     %FT40
        AND     r1, r1, r2
        ANDS    r1, r1, #1:SHL:4        ; Test OS-runs-from-RAM flag
        BEQ     %FT40
        LDR     r0, =ZeroPage
        LDR     r0, [r0, #ROMPhysAddr]
        LDR     r1, [sp, #4]
        ADD     r0, r1, r0, LSR #ByteShift+12
        LDR     r1, =DRAM_Pattern :OR: NotAvailable
        MOV     r2, #(OSROM_ImageSize :SHR: 2) :SHR: ByteShift
        BL      memset
40
        CLRV
        EXIT


;----------------------------------------------------------------------------------------
; MemoryAmounts
;
;       In:     r0 = flags
;                       bit     meaning
;                       0-7     8 (reason code)
;                       8-11    1=return amount of DRAM (excludes any soft ROM)
;                               2=return amount of VRAM
;                               3=return amount of ROM
;                               4=return amount of I/O space
;                               5=return amount of soft ROM (ROM loaded into hidden DRAM)
;                       12-31   reserved (set to 0)
;
;       Out:    r1 = number of pages of the specified type of memory
;               r2 = page size (in bytes)
;
;       Return the amount of the specified type of memory.
;
MemoryAmounts   ROUT
        Entry   "r3"

        BICS    lr, r0, #&FF            ; Get type of memory required (leave bits 12-31, non-zero => error).
        CMP     lr, #6:SHL:8
        ADDCC   pc, pc, lr, LSR #8-2
        NOP
        B       %FT99                   ; Don't understand 0 (so the spec says).
        B       %FT10                   ; DRAM
        B       %FT20                   ; VRAM
        B       %FT30                   ; ROM
        B       %FT40                   ; I/O
        B       %FT50                   ; Soft ROM

10
        LDR     r1, =ZeroPage
        LDR     r3, [r1, #VideoSizeFlags]
        TST     r3, #OSAddRAM_IsVRAM
        MOVNE   r3, r3, LSR #12         ; Extract size from flags when genuine VRAM
        MOVEQ   r3, #0
        LDR     r1, [r1, #RAMLIMIT]
        SUB     r1, r1, r3              ; DRAM = RAMLIMIT - VRAMSize
        B       %FT98
20
        LDR     r1, =ZeroPage
        LDR     r1, [r1, #VideoSizeFlags]
        TST     r1, #OSAddRAM_IsVRAM
        MOVNE   r1, r1, LSR #12
        MOVNE   r1, r1, LSL #12         ; VRAM = VRAMSize
        MOVEQ   r1, #0
        B       %FT97
30
        Push    "r0, sb, ip"
        AddressHAL
        MOV     r0, #PhysInfo_HardROM
        SUB     sp, sp, #8
        MOV     r2, sp
        CallHAL HAL_PhysInfo
        LDMIA   sp!, {r0-r1}
        SUBS    r1, r1, r0
        ADDNE   r1, r1, #1              ; ROM = ROMPhysTop + 1 - ROMPhysBot
        Pull    "r0, sb, ip"
        B       %FT97
40
        LDR     r1, =ZeroPage
        LDR     r3, [r1, #IOAllocTop]
        LDR     r1, [r1, #IOAllocLimit]
        SUB     r1, r3, r1              ; IO = IO ceiling - IO floor
        B       %FT97
50
        Push    "r0"
        MOV     r0, #8
        SWI     XOS_ReadSysInfo         ; Are we softloaded?
        Pull    "r0"
        AND     r1, r1, r2
        ANDS    r1, r1, #1:SHL:4        ; Test OS-runs-from-RAM flag
        MOVNE   r1, #OSROM_ImageSize*1024
        B       %FT97
97
        MOV     r1, r1, LSR #12         ; Return as number of pages.
98
        MOV     r2, #4*1024             ; Return page size.
        CLRV
        EXIT
99
        PullEnv
        ; Fall through...
MemoryBadParameters
        ADRL    r0, ErrorBlock_BadParameters ; n.b. MemReturn handles internationalisation
        SETV
        MOV     pc, lr


;----------------------------------------------------------------------------------------
; MemoryIOSpace
;
;       In:     r0 = 9 (reason code with flag bits 8-31 clear)
;               r1 = controller ID
;                       bit     meaning
;                       0-7     controller sequence number
;                       8-31    controller type:
;                               0 = EASI card access speed control
;                               1 = EASI space(s)
;                               2 = VIDC1
;                               3 = VIDC20
;                               4 = S space (IOMD,podules,NICs,blah blah)
;                               5 = Extension ROM(s)
;                               6 = Tube ULA
;                               7-31 = Reserved (for us)
;                               32 = Primary ROM
;                               33 = IOMD
;                               34 = FDC37C665/SMC37C665/82C710/SuperIO/whatever
;                               35+ = Reserved (for ROL)
;
;       Out:    r1 = controller base address or 0 if not present
;
;       Return the location of the specified controller.
;

MemoryIOSpace   ROUT
        Entry   "r0,r2,r3,sb,ip"
        AddressHAL
        CallHAL HAL_ControllerAddress
        CMP     r0, #-1
        MOVNE   r1, r0
        PullEnv
        MOVNE   pc, lr
        B       MemoryBadParameters

;----------------------------------------------------------------------------------------
; MemoryFreePoolLock - removed now that free pool is a PMP

;----------------------------------------------------------------------------------------
;PCImapping - reserved for Acorn use (PCI manager)
;
; See code on Ursula branch


;----------------------------------------------------------------------------------------
;RecommendPage
;
;       In:     r0 bits 0..7  = 12 (reason code 12)
;               r0 bit 8 = 1 if region must be DMAable
;               r0 bit 9 = 1 if r4-r7 provided
;               r0 bits 10..31 = 0 (reserved flags)
;               r1 = size of physically contiguous RAM region required (bytes)
;               r2 = log2 of required alignment of base of region (eg. 12 = 4k, 20 = 1M)
;               r4,r5 = lowest acceptable physical address (inclusive) (if bit 9 of r0 set)
;               r6,r7 = highest acceptable physical address (inclusive) (if bit 9 of r0 set)
;
;       Out:    r3 = page number of first page of recommended region that could be
;                    grown as specific pages by dynamic area handler (only guaranteed
;                    if grow is next page claiming operation)
;        - or error if not possible (eg too big, pages unavailable)
;
; Notes:
; * Default address range in r4-r7 is for the lower 4GB of physical space
; * The high address in r6,r7 is for the end of the memory block, not the start
;
RecommendPage ROUT
        Entry   "r0-r2,r4-r12"
        CMP     r2,#30
        BHI     RP_failed         ;refuse to look for alignments above 1G
        ANDS    r11,r0,#1:SHL:8   ;convert flag into something usable in the loop
        MOVNE   r11,#OSAddRAM_NoDMA
;
        TST     r0,#1:SHL:9       ;If no range specified, limit to lower 4GB
        MOVEQ   r10,#0
        MOVEQ   r12,#1:SHL:20
        BEQ     %FT10
        CMP     r5,#1:SHL:8
        BHS     RP_failed         ; LPAE/long descriptor format limits us to 40 bit physical addresses (although technically PhysRamTable can store 44 bit addresses)
        CMP     r7,#1:SHL:8       ; Clamp high address
        MOVCS   r7,#&FF
        MOVCS   r6,#-1
        LDR     lr,=4095
        ADD     r10,r4,lr         ; Round up low address
        MOV     r10,r10,LSR #12
        ORR     r10,r10,r5,LSL #20
        MOV     r12,r6,LSR #12    ; Round down high address
        ORR     r12,r12,r7,LSL #20
        ADD     r12,r12,#1        ; Make exclusive
10
;
        ADD     r1,r1,#&1000
        SUB     r1,r1,#1
        MOV     r1,r1,LSR #12     ;size rounded up to whole no. of pages
;
        SUBS    r2,r2,#12         ;log2 alignment, in terms of pages
        MOVLT   r2,#0             ;must be at least zero
        MOV     r0,#1
        MOV     r4,r0,LSL r2      ;required alignment, page units
;
        SUB     r12,r12,r1
        MOV     r12,r12,LSR r2
        MOV     r12,r12,LSL r2    ; Last acceptable block start address
;
        LDR     r0,=ZeroPage+PhysRamTable
        MOV     r3,#0            ;page number, starts at 0
        LDR     r5,=ZeroPage+CamEntriesPointer
        LDR     r5,[r5]
        ADD     r5,r5,#CAM_PageFlags ; [r5,<page no.>,LSL #3] addresses flags word in CAM
        LDMIA   r0!,{r7,r8}      ;address,size of video chunk (skip this one)
;
RP_nextchunk
        ADD     r3,r3,r8,LSR #12 ;page no. of first page of next chunk
        LDMIA   r0!,{r7,r8}      ;address,size of next physical chunk
; R0 -> PhysRamTable
; R1 = Required length in pages
; R2 = Required log2 alignment-12
; R3 = current phys page no.
; R4 = Required alignment, page units
; R5 -> CAM
; R7,R8 = Current PhysRamTable entry
; R10 = Low address limit
; R11 = Flags
; R12 = High address limit
; R6,R9 = spare
        CMP     r8,#0
        BEQ     RP_failed
        TST     r8,r11           ;ignore non-DMA regions if bit 8 of R0 was set
        BNE     RP_nextchunk
;
        MOV     r8,r8,LSR #12
        CMP     r7,r10
        ADDLO   r6,r10,r4
        ADDHS   r6,r7,r4
        MOV     r8,r8,LSL #12
        SUB     r6,r6,#1         ;round up
        MOV     r6,r6,LSR r2
        MOV     r6,r6,LSL r2     ;address of first page of acceptable alignment
        SUBS    lr,r12,r6
        BLS     RP_nextchunk     ;exceeded upper address limit
        SUB     r6,r6,r7         ;adjustment to first address of acceptable alignment
        CMP     r6,r8,LSR #12
        BHS     RP_nextchunk     ;negligible chunk
        ADD     r7,r3,r6         ;first page number of acceptable alignment
        RSB     r9,r6,r8,LSR #12 ;remaining size of chunk
        CMP     r9,lr
        ADDHI   r9,lr,r1         ;clamp effective chunk length if we're going to hit the upper address limit
        
;
;find first available page
RP_nextpage
        CMP     r9,r1
        BLO     RP_nextchunk      ;not enough pages left in chunk
        LDR     r6,[r5,r7,LSL #CAM_EntrySizeLog2] ;page flags from CAM
        ;must not be marked Unavailable or Required
        TST     r6,#PageFlags_Unavailable :OR: PageFlags_Required
        TSTEQ   r6,#PageFlags_Reserved
        BEQ     RP_checkotherpages
        CMP     r9,r4
        BLS     RP_nextchunk
        ADD     r7,r7,r4           ;next page of suitable alignment
        SUB     r9,r9,r4
        B       RP_nextpage
;
RP_nextpagecontinue
        ; r7 = start page, r6 = page that failed
        ; No point checking any of r7...r6 again, so skip ahead past r6
        SUB     r6,r6,r7           ;number of pages to skip (minus 1)
        ADD     r6,r6,r4
        MOV     r6,r6,LSR r2
        MOV     r6,r6,LSL r2       ;number to skip, rounded up by alignment
        CMP     r9,r6
        BLS     RP_nextchunk
        ADD     r7,r7,r6           ;next page of suitable alignment
        SUB     r9,r9,r6
        B       RP_nextpage
;
RP_checkotherpages
        ADD     r6,r7,r1
        SUB     r6,r6,#1          ;last page required
RP_checkotherpagesloop
        LDR     lr,[r5,r6,LSL #CAM_EntrySizeLog2] ;page flags from CAM
        TST     lr,#PageFlags_Unavailable :OR: PageFlags_Required
        TSTEQ   lr,#PageFlags_Reserved
        BNE     RP_nextpagecontinue
        SUB     r6,r6,#1
        CMP     r6,r7
        BHI     RP_checkotherpagesloop
;
;success!
;
        MOV     r3,r7
        Exit

RP_failed
        MOV     r3,#0
        ADR     r0,ErrorBlock_NoMemChunkAvailable
        SETV
        FRAMSTR r0
        Exit

        MakeErrorBlock NoMemChunkAvailable

;----------------------------------------------------------------------------------------
;MapIOpermanent - map IO space (if not already mapped) and return logical address
;
;       In:     r0 bits 0..7  = 13 (reason code 13)
;               r0 bit  8     = 1 to map bufferable space (0 is normal, non-bufferable)
;               r0 bit  9     = 1 to map cacheable space (0 is normal, non-cacheable)
;               r0 bits 10..12 = cache policy
;               r0 bits 13..15 = 0 (reserved flags)
;               r0 bit  16    = 1 to doubly map
;               r0 bit  17    = 1 if access privileges specified
;               r0 bits 18..23 = 0 (reserved flags)
;               r0 bits 24..27 = access privileges (if bit 17 set)
;               r0 bits 28..31 = 0 (reserved flags)
;               r1 = physical address of base of IO space required
;               r2 = size of IO space required (bytes)
;
;       Out:    r3 = logical address of base of IO space
;        - or error if not possible (no room)
;
MapIOpermanent ROUT
        Entry   "r0-r2,r12"
        MOV     r3, r2
        MOV     r2, #0
        B       %FT10

;----------------------------------------------------------------------------------------
;MapIO64permanent - map IO space (if not already mapped) from 64-bit physical space
;and return logical address
;
;       In:     r0 bits 0..7  = 21 (reason code 21)
;               r0 bit  8     = 1 to map bufferable space (0 is normal, non-bufferable)
;               r0 bit  9     = 1 to map cacheable space (0 is normal, non-cacheable)
;               r0 bits 10..12 = cache policy
;               r0 bits 13..15 = 0 (reserved flags)
;               r0 bit  16    = 1 to doubly map
;               r0 bit  17    = 1 if access privileges specified
;               r0 bits 18..23 = 0 (reserved flags)
;               r0 bits 24..27 = access privileges (if bit 17 set)
;               r0 bits 28..31 = 0 (reserved flags)
;               r1,r2 = physical address of base of IO space required
;               r3 = size of IO space required (bytes)
;
;       Out:    r3 = logical address of base of IO space
;        - or error if not possible (no room)
;
MapIO64permanent
        ALTENTRY
10      ; Convert the input flags to some DA flags
        TST     r0, #1:SHL:17
        MOVEQ   r12, #2                 ; Default AP: SVC RW, USR none
        MOVNE   r12, r0, LSR #24        ; Else use given AP
        ANDNE   r12, r12, #DynAreaFlags_APBits
        AND     lr, r0, #&300
        EOR     lr, lr, #&300
        ASSERT  DynAreaFlags_NotBufferable = 1:SHL:4
        ASSERT  DynAreaFlags_NotCacheable = 1:SHL:5
        ORR     r12, r12, lr, LSR #4
        AND     lr, r0, #7:SHL:10
        ASSERT  DynAreaFlags_CPBits = 7:SHL:12
        ORR     r12, r12, lr, LSL #2
        ; Calculate the extra flags needed for RISCOS_MapInIO
        AND     r0, r0, #1:SHL:16
        ASSERT  MapInFlag_DoublyMapped = 1:SHL:20
        MOV     r0, r0, LSL #4
        ; Call RISCOS_MapInIO (via pagetable-specific helper)
        PTOp    MapIO_Helper
        MOV     r3, r0
        PullEnv
        CMP     r3, #0              ;MOV,CMP rather than MOVS to be sure to clear V
        ADREQ   r0, ErrorBlock_NoRoomForIO
        SETV    EQ
        MOV     pc, lr

        MakeErrorBlock NoRoomForIO

;----------------------------------------------------------------------------------------
;AccessPhysAddr - claim temporary access to given physical address (in fact,
;                 controls access to the 1Mb aligned space containing the address)
;                 The access remains until the next AccessPhysAddr or until a
;                 ReleasePhysAddr (although interrupts or subroutines may temporarily
;                 make their own claims, but restore on Release before returning)
;
;       In:     r0 bits 0..7  = 14 (reason code 14)
;               r0 bit  8     = 1 to map bufferable space, 0 for unbufferable
;               r0 bits 9..31 = 0 (reserved flags)
;               r1 = physical address
;
;       Out:    r2 = logical address corresponding to phys address r1
;               r3 = old state (for ReleasePhysAddr)
;
; Use of multiple accesses: it is fine to make several Access calls, and
; clean up with a single Release at the end. In this case, it is the old state
; (r3) of the *first* Access call that should be passed to Release in order to
; restore the state before any of your accesses. (The r3 values of the other
; access calls can be ignored.)
;
AccessPhysAddr ROUT
        Push    "r0-r3,r12,lr"
        MOV     r2, #0
        B       %FT10

;----------------------------------------------------------------------------------------
;AccessPhysAddr64 - claim temporary access to given 64-bit physical address (in fact,
;                 controls access to the 1-16Mb aligned space containing the address)
;                 The access remains until the next AccessPhysAddr or until a
;                 ReleasePhysAddr (although interrupts or subroutines may temporarily
;                 make their own claims, but restore on Release before returning)
;
;       In:     r0 bits 0..7  = 22 (reason code 22)
;               r0 bit  8     = 1 to map bufferable space, 0 for unbufferable
;               r0 bits 9..31 = 0 (reserved flags)
;               r1,r2 = physical address
;
;       Out:    r2 = logical address corresponding to phys address r1
;               r3 = old state (for ReleasePhysAddr)
;
; Use of multiple accesses: it is fine to make several Access calls, and
; clean up with a single Release at the end. In this case, it is the old state
; (r3) of the *first* Access call that should be passed to Release in order to
; restore the state before any of your accesses. (The r3 values of the other
; access calls can be ignored.)
;
AccessPhysAddr64
        Push    "r0-r3,r12,lr"
10      TST     r0, #&100           ;test bufferable bit
        LDR     r0, =OSAP_None+DynAreaFlags_NotCacheable
        ORREQ   r0, r0, #DynAreaFlags_NotBufferable
        SUB     sp, sp, #4          ; word for old state
        MOV     r3, sp              ; pointer to word
        PTOp    AccessPhysicalAddress
        MOVS    r2, r0              ; null pointer means invalid physical address
        LDMIB   sp, {r0,r1}
        BEQ     %FT90
        LDR     r3, [sp], #5*4      ; load old state, and skip stacked r0-r3
        Pull    "r12,pc"

90      ADRL    r0, ErrorBlock_CantGetPhysMem
        SETV
        ADD     sp, sp, #2*4
        Pull    "r1-r3,r12,pc"

;----------------------------------------------------------------------------------------
;ReleasePhysAddr - release temporary access that was claimed by AccessPhysAddr
;
;       In:     r0 bits 0..7  = 15 (reason code 15)
;               r0 bits 8..31 = 0 (reserved flags)
;               r1 = old state to restore
;
ReleasePhysAddr
        Push    "r0-r3,r12,lr"
        MOV     r0, r1
        PTOp    ReleasePhysicalAddress
        Pull    "r0-r3,r12,pc"

        LTORG

;----------------------------------------------------------------------------------------
;
;        In:    r0 = flags
;                       bit     meaning
;                       0-7     16 (reason code)
;                       8-15    1=cursor/system/sound
;                               2=IRQ stack
;                               3=SVC stack
;                               4=ABT stack
;                               5=UND stack
;                               6=Soft CAM
;                               7=Level 1 page tables
;                               8=Level 2 page tables
;                               9=HAL workspace
;                               10=Kernel buffers
;                               11=HAL uncacheable workspace
;                               12=Kernel 'ZeroPage' workspace
;                               13=Processor vectors
;                               14=DebuggerSpace
;                               15=Scratch space
;                               16=Compatibility page
;                               17=RW data section
;                       16-31   reserved (set to 0)
;
;       Out:    r1 = base of area
;               r2 = address space allocated for area (whole number of pages)
;               r3 = actual memory used by area (whole number of pages)
;               all values 0 if not present, or incorporated into another area
;
;       Return size of various low-level memory regions
MemoryAreaInfo ROUT
        Entry   "r0"
        MOV     r1, #0
        MOV     r2, #0
        MOV     r3, #0
        MOV     lr, r0, LSR #8
        AND     lr, lr, #&FF
        CMP     lr, #(MAI_TableEnd - MAI_TableStart)/4
        ADDLO   pc, pc, lr, LSL #2
        B       %FT70
MAI_TableStart
        B       %FT70
        B       MAI_CursSysSound
        B       MAI_IRQStk
        B       MAI_SVCStk
        B       MAI_ABTStk
        B       MAI_UNDStk
        B       MAI_SoftCAM
        B       MAI_L1PT
        B       MAI_L2PT
        B       MAI_HALWs
        B       MAI_Kbuffs
        B       MAI_HALWsNCNB
        B       MAI_ZeroPage
        B       MAI_ProcVecs
        B       MAI_DebuggerSpace
        B       MAI_ScratchSpace
        B       MAI_CompatibilityPage
        B       MAI_RWData
MAI_TableEnd

70
        PullEnv
        B       MemoryBadParameters

MAI_CursSysSound
        LDR     r1, =CursorChunkAddress
        MOV     r2, #32*1024
        MOV     r3, r2
        EXIT

MAI_IRQStk
 [ IRQSTK < CursorChunkAddress :LOR: IRQSTK > CursorChunkAddress+32*1024
        LDR     r1, =IRQStackAddress
        MOV     r2, #IRQSTK-IRQStackAddress
        MOV     r3, r2
 ]
        EXIT

MAI_SVCStk
        LDR     r1, =SVCStackAddress
        MOV     r2, #SVCSTK-SVCStackAddress
        MOV     r3, r2
        EXIT

MAI_ABTStk
        LDR     r1, =ABTStackAddress
        MOV     r2, #ABTSTK-ABTStackAddress
        MOV     r3, r2
        EXIT

MAI_UNDStk
        LDR     r1, =UNDSTK :AND: &FFF00000
        LDR     r2, =UNDSTK :AND: &000FFFFF
        MOV     r3, r2
        EXIT

MAI_SoftCAM
        LDR     r0, =ZeroPage
        LDR     r1, [r0, #CamEntriesPointer]
        LDR     r2, [r0, #SoftCamMapSize]
        MOV     r3, r2
        EXIT

MAI_L1PT
      [ LongDesc :LAND: ShortDesc
        PTWhich r1
        ; Combined L1PT & L2PT
        ASSERT  LL1PT = LL2PT+16*1024
        LDRNE   r1, =LL2PT
        MOVNE   r2, #16*1024+4096
        LDREQ   r1, =L1PT
        MOVEQ   r2, #16*1024
      ELIF LongDesc
        ; Combined L1PT & L2PT
        ASSERT  LL1PT = LL2PT+16*1024
        LDR     r1, =LL2PT
        MOV     r2, #16*1024+4096
      |
        LDR     r1, =L1PT
        MOV     r2, #16*1024
      ]
        MOV     r3, r2
        EXIT

MAI_L2PT
        LDR     r0, =ZeroPage
      [ LongDesc :LAND: ShortDesc
        PTWhich r1
        ; Actually L3PT
        LDRNE   r1, =LL3PT
        MOVNE   r2, #8*1024*1024
        LDREQ   r1, =L2PT
        MOVEQ   r2, #4*1024*1024
      ELIF LongDesc
        ; Actually L3PT
        LDR     r1, =LL3PT
        MOV     r2, #8*1024*1024
      |
        LDR     r1, =L2PT
        MOV     r2, #4*1024*1024
      ]
        LDR     r3, [r0, #LxPTUsed]
        EXIT

MAI_HALWs
        LDR     r0, =ZeroPage
        LDR     r1, =HALWorkspace
        MOV     r2, #HALWorkspaceSize
        LDR     r3, [r0, #HAL_WsSize]
        EXIT

MAI_HALWsNCNB
        LDR     r0, =ZeroPage
        LDR     r1, =HALWorkspaceNCNB
        MOV     r2, #32*1024
        LDR     r3, [r0, #HAL_Descriptor]
        LDR     r3, [r3, #HALDesc_Flags]
        ANDS    r3, r3, #HALFlag_NCNBWorkspace
        MOVNE   r3, r2
        EXIT

MAI_Kbuffs
        LDR     r1, =KbuffsBaseAddress
        MOV     r2, #KbuffsMaxSize
        LDR     r3, =(KbuffsSize + &FFF) :AND: :NOT: &FFF
        EXIT

MAI_ZeroPage
        LDR     r1, =ZeroPage
        MOV     r2, #16*1024
        MOV     r3, #16*1024
        EXIT

MAI_ProcVecs
      [ ZeroPage != ProcVecs
        LDR     r1, =ProcVecs
        MOV     r2, #4096
        MOV     r3, #4096
      ]
        EXIT

MAI_DebuggerSpace
        ; Only report if DebuggerSpace is a standalone page. The debugger module
        ; finds DebuggerSpace via OS_ReadSysInfo 6, this call is only for the
        ; benefit of the task manager.
      [ DebuggerSpace_Size >= &1000
        LDR     r1, =DebuggerSpace
        MOV     r2, #DebuggerSpace_Size
        MOV     r3, #DebuggerSpace_Size
      ]
        EXIT

MAI_ScratchSpace
        LDR     r1, =ScratchSpace
        MOV     r2, #16*1024
        MOV     r3, #16*1024
        EXIT

MAI_CompatibilityPage
      [ CompatibilityPage
        MOV     r1, #0
        MOV     r2, #4096
        LDR     r0, =ZeroPage
        LDRB    r3, [r0,#CompatibilityPageEnabled]
        CMP     r3, #0
        MOVNE   r3, #4096
      ]
        EXIT

MAI_RWData
        LDR     r1, =|Image$$RW$$Base|
        LDR     r2, =|Image$$ZI$$Limit|+&FFF
        SUB     r2, r2, r1
        BIC     r2, r2, #&FF
        BIC     r2, r2, #&F00
        MOV     r3, r2
        EXIT

;----------------------------------------------------------------------------------------
;
;        In:    r0 = flags
;                       bit     meaning
;                       0-7     17 (reason code)
;                       8-31    reserved (set to 0)
;               r1 = AP number to start search from (0 to start enumeration)
;                    increment by 1 on each call to enumerate all values
;
;       Out:    r1 = AP number (-1 if end of list reached)
;               r2 = Permissions:
;               bit 0: executable in user mode
;               bit 1: writable in user mode
;               bit 2: readable in user mode
;               bit 3: executable in privileged modes
;               bit 4: writable in privileged modes
;               bit 5: readable in privileged modes
;               bits 6+: reserved
;
;       Returns permission information for a given AP / enumerates all AP
        EXPORT  MemoryAccessPrivileges
MemoryAccessPrivileges ROUT
        CMP     r0, #17
        BNE     MemoryBadParameters
        Entry   "r3-r4"
        LDR     r3, =ZeroPage
        MOV     lr, r1
        LDR     r3, [r3, #MMU_PPLAccess]
        ; Currently we don't have any gaps in the table, so we can just index the r1'th element (being careful to not go past the table end)
10
        LDR     r4, [r3], #4
        CMP     r4, #-1
        BEQ     %FT98
        SUBS    lr, lr, #1
        BGE     %BT10
        BL      PPL_CMA_to_RWX
        EXIT
98
        MOV     r1, #-1
        MOV     r2, #0
        EXIT

; In: r4 = CMA-style AP/PPL access flags (from MMU_PPLAccess)
; Out: r2 = RWX-style AP/PPL access flags (for OS_Memory 17/18)
PPL_CMA_to_RWX ROUT
        Entry
        AND     r2, r4, #CMA_Partially_UserR
        ASSERT  CMA_Partially_UserR = 1<<4
        ASSERT  MemPermission_UserR = 1<<2
        MOV     r2, r2, LSR #4-2
        AND     lr, r4, #CMA_Partially_UserW
        ASSERT  CMA_Partially_UserW = 1<<5
        ASSERT  MemPermission_UserW = 1<<1
        ORR     r2, r2, lr, LSR #5-1
        AND     lr, r4, #CMA_Partially_UserXN ; (internally, XN flags are stored inverted)
        ASSERT  CMA_Partially_UserXN = 1<<14
        ASSERT  MemPermission_UserX = 1<<0
        ORR     r2, r2, lr, LSR #14-0
        AND     lr, r4, #CMA_Partially_PrivR
        ASSERT  CMA_Partially_PrivR = 1<<6
        ASSERT  MemPermission_PrivR = 1<<5
        ORR     r2, r2, lr, LSR #6-5
        AND     lr, r4, #CMA_Partially_PrivW
        ASSERT  CMA_Partially_PrivW = 1<<7
        ASSERT  MemPermission_PrivW = 1<<4
        ORR     r2, r2, lr, LSR #7-4
        AND     lr, r4, #CMA_Partially_PrivXN
        ASSERT  CMA_Partially_PrivXN = 1<<15
        ASSERT  MemPermission_PrivX = 1<<3
        ORR     r2, r2, lr, LSR #15-3
        EXIT

;----------------------------------------------------------------------------------------
;
;        In:    r0 = flags
;                       bit     meaning
;                       0-7     18 (reason code)
;                       8-31    reserved (set to 0)
;               r1 = Permission flag values (as per OS_Memory 17)
;               r2 = Permission flag mask
;
;       Out:    r0 = AP number that gives closest permissions
;               r2 = Permission flags of that AP (== r1 if exact match)
;               Error if no suitable AP found
;
;       Searches for an AP where ((permissions AND r2) == r1), and which
;       grants the least extra permissions
;
;       Extra permissions are weighted as follows (least acceptable first):
;       * User write
;       * User execute
;       * User read
;       * Privileged write
;       * Privileged execute
;       * Privileged read
FindAccessPrivilege ROUT
        CMP     r0, #18 ; No extra flags in r0
        BICEQS  r0, r1, r2 ; r1 must be a subset of r2
        BICEQS  r0, r2, #63 ; Only 6 known permission flags
        BNE     MemoryBadParameters
        ; n.b. r0 is now 0
        Entry   "r3-r11"
        LDR     r3, =ZeroPage
        MOV     r5, r1
        LDR     r3, [r3, #MMU_PPLAccess]
        MOV     r6, r2
        MOV     r7, #-1 ; Best AP
        MOV     r8, #0 ; Best flags
        MOV     r9, #-1 ; Best difference
        ; Magic constants for weighting the difference
        LDR     r10, =(1<<1)+(1<<6)+(1<<12)+(1<<18)+(1<<24)+(1<<30)
        LDR     r11, =(MemPermission_PrivR<<1)+(MemPermission_PrivX<<6)+(MemPermission_PrivW<<12)+(MemPermission_UserR<<18)+(MemPermission_UserX<<24)+(MemPermission_UserW<<30)
10
        LDR     r4, [r3], #4
        CMP     r4, #-1
        BEQ     %FT50
        BL      PPL_CMA_to_RWX ; -> r2 = flags
        ; Check it satisfies the mask
        AND     lr, r2, r6
        CMP     lr, r5
        BNE     %FT40
        ; Calculate diff
        BIC     lr, r2, r6
        MUL     lr, r10, lr ; Replicate the six bits six times
        AND     lr, r11, lr ; Select just the bits that we care about
        CMP     lr, r9
        BEQ     %FT80       ; Exact match found
        MOVLO   r7, r0      ; Remember new result if better
        MOVLO   r8, r2
        MOVLO   r9, lr
40
        ADD     r0, r0, #1
        B       %BT10
50
        MOVS    r0, r7
        BMI     %FT90
        MOV     r2, r8
80
        CLRV
        EXIT

90
        MOV     r2, r6 ; Restore original r2
        ADR     r0, ErrorBlock_AccessPrivilegeNotFound
        SETV
        EXIT

        MakeErrorBlock AccessPrivilegeNotFound

;----------------------------------------------------------------------------------------
;
;        In:    r0 = flags
;                       bit     meaning
;                       0-7     19 (reason code)
;                       8       Input function provides physical addresses
;                       9       DMA is writing to RAM
;                       10      DMA is complete, perform any post-op cache maintenance
;                       11      Physical addresses are 64bit
;                       12-31   reserved (set to 0)
;               r1 = R12 value to provide to called functions
;               r2 = Initial R9 value to provide to input function
;               r3 -> Input function
;               r4 = Initial R9 value to provide to output function
;               r5 -> Output function (if bit 10 of R0 clear)
;
;       Out:    r2, r4 updated to match values returned by input/output calls
;               All other regs preserved
;
; Input function, 32bit version:
;  in:  r9 = r2 from SWI / value from previous call
;       r12 = r1 from SWI
;  out: r0 = start address of region
;       r1 = length of region (0 if end of transfer)
;       r2 = flags:
;            bit 0: Bounce buffer will be used
;       r9 = new r9 for next input call
;       r12 corrupt
;
; Output function, 32bit version:
;   in: r0 = logical address of start of region
;       r1 = physical address of start of region
;       r2 = length of region
;       r3 = flags:
;            bit 0: Bounce buffer must be used
;       r9 = r4 from SWI / value from previous call
;       r12 = r1 from SWI
;   out: r9 = new r9 value for next output call
;        r0-r3, r12 corrupt
;
; Input function, 64bit version:
;  in:  r9 = r2 from SWI / value from previous call
;       r12 = r1 from SWI
;  out: r0,r1 = start address of region
;       r2 = flags:
;            bit 0: Bounce buffer will be used
;       r3 = length of region (0 if end of transfer)
;       r9 = new r9 for next input call
;       r12 corrupt
;
; Output function, 64bit version:
;   in: r0 = logical address of start of region
;       r1,r2 = physical address of start of region
;       r3 = flags:
;            bit 0: Bounce buffer must be used
;       r4 = length of region
;       r9 = r4 from SWI / value from previous call
;       r12 = r1 from SWI
;   out: r9 = new r9 value for next output call
;        r0-r4, r12 corrupt
;
; Performs address translation and cache maintenance necessary to allow for DMA
; to be performed to/from cacheable memory.
;
; To allow Service_PagesUnsafe to be dealt with in a straightforward manner, we
; have to be careful not to cache the results of any address translations over
; calls to the input/output functions. E.g. if the output function tries to
; allocate from PCI RAM, that may trigger claiming of a specific page by the
; PCI DA, potentially invalidating any existing logical -> physical translation.
; This restriction hampers the routines ability to merge together input and
; output blocks, and to perform minimal cache maintenance. However for typical
; scatter lists of low to medium complexity it should still produce acceptable
; output.
;
; Note that if the input function provides physical addresses, the caller must
; take care to abort the entire operation if one of the physical pages involved
; in the request becomes claimed by someone else while the OS_Memory call is in
; progress. This is because we have no sensible way of dealing with this case
; ourselves (even if we didn't attempt to call the input function multiple times
; and merge together the blocks, we'd still have to buffer things internally to
; deal with when blocks need splitting for cache alignment)
;
; Internally, blocks are stored in the following format:
;
; Word 0 = Start logical address (incl.)
; Word 1 = Logical -> physical address offset (low bits) + flags (high bits)
; Word 2 = End logical address (excl.)
;
; This minimises the number of registers needed to hold a block, and simplifies
; the merge calculation (blocks can be merged if words 2 + 1 of first block
; match words 0 + 1 of second block).
;
; Note: InChunk uses a slightly different format, which essentially assumes a
; flat 1:1 logical to physical mapping. I.e. start & end addresses are in
; whatever unit the input function provided, and only the upper 8 bits of the
; log -> phys offset are used (storing the high bits of large phys addresses)

; Workspace struct that's stored on the stack
                    ^ 0
DMAPrepW_InHold     # 12
DMAPrepW_InChunk    # 12
DMAPrepW_PhyChunk   # 12
DMAPrepW_CacheMask  # 4 ; Cache line length - 1
DMAPrepW_ARMop      # 4 ; Cache maintenenace ARMop to use
DMAPrepW_CamEntriesPointer # 4 ; CamEntriesPointer copy
DMAPrepW_MaxCamEntry # 4 ; MaxCamEntry copy
DMAPrepW_Size       # 0
                        ; These next few correspond directly to the input registers in the stack frame
DMAPrepW_Flags      # 4
DMAPrepW_R12        # 4
DMAPrepW_InR9       # 4
DMAPrepW_InFunc     # 4
DMAPrepW_OutR9      # 4
DMAPrepW_OutFunc    # 4

DMAPrep_FlagOffset * 28 ; We need 28 address bits for 40 bit physical addresses (dropping the lower 12 bits which provide the page offset)
DMAPrep_NonCacheable * 1:SHL:29 ; Internal flag used for tracking non-cacheable pages

DMAPrep ROUT
        CMP     r0, #1<<12
        BHS     MemoryBadParameters
        ; The end of a read from RAM is a no-op (no cache maintenance required)
        AND     r11, r0, #DMAPrep_Write :OR: DMAPrep_End
        TEQ     r11, #DMAPrep_End
        MOVEQ   pc, lr
        Entry   "r0-r9", DMAPrepW_Size
        ; Determine the cache maintenance function we need to use
        CMP     r11, #DMAPrep_Write
        LDR     r10, =ZeroPage
        ASSERT  DMAPrep_End > DMAPrep_Write
        LDRLE   r11, [r10, #Proc_Cache_CleanRange] ; Start of DMA (read or write)
        LDRGT   r11, [r10, #Proc_Cache_InvalidateRange] ; End of DMA write
        STR     r11, [sp, #DMAPrepW_ARMop]
        ; Get the params needed for address translation
        LDR     r6, [r10, #CamEntriesPointer]
        LDR     r7, [r10, #MaxCamEntry]
        ; Init workspace
        STR     r6, [sp, #DMAPrepW_CamEntriesPointer]
        STR     r7, [sp, #DMAPrepW_MaxCamEntry]
        ; Get the cache line mask value
      [ MEMM_Type == "ARM600"
        LDRB    r1, [r10, #DCache_LineLen]
      |
        ; Yuck, need to probe for the last cache level
        MOV     r5, #Cache_Lx_MaxLevel-1
01
        MOV     r1, r5
        ARMop   Cache_Examine,,,r10
        CMP     r1, #0
        SUBEQ   r5, r5, #1
        BEQ     %BT01
        CMP     r3, r1
        MOVHI   r1, r3
      ]
        SUB     r1, r1, #1
        STR     r1, [sp, #DMAPrepW_CacheMask]
        ; Get initial input region
        BL      DMAPrep_CallInputFunc
        CMP     r0, r3
        BEQ     %FT90
05
        ; r0 > r3 implies the input crosses a 4G barrier. Barriers are annoying
        ; for us to deal with using this 3-word chunk format, so split things
        ; up.
        STMLOIA lr, {r0, r2, r3}
        BLO     %FT10
        MOV     r4, #0
        STMIA   lr, {r0, r2, r4}        ; First part
        CMP     r0, #0
        ADDNE   r2, r2, #1:SHL:20
        MOVNE   r0, #0                  ; Second part
        BLEQ    DMAPrep_CallInputFunc   ; Or, (non-merged) next chunk if we ended on a 4G barrier
        B       %FT19
10
        ; Get another input region, see if we can merge it with InChunk
        BL      DMAPrep_CallInputFunc
        CMP     r0, r3
        BHS     %FT19                   ; Zero-length (end of input), or 4G crossing
        LDMIB   lr, {r4, r5}
        CMP     r4, r2
        CMPEQ   r5, r0
        STREQ   r3, [lr, #8]
        BEQ     %BT10
19
        ; Can't merge this region, store it in InHold
        ASSERT  DMAPrepW_InHold = DMAPrepW_InChunk-12
        STMDB   lr, {r0, r2, r3}
20
        ; Perform address translation for the start of InChunk
        LDR     r5, [sp, #DMAPrepW_InChunk]
        BL      DMAPrep_Translate
        ; Store in PhyChunk
        ADD     lr, sp, #DMAPrepW_PhyChunk
        STMIA   lr, {r5-r7}
        ; Align start to cache boundary
        TST     r6, #DMAPrep_NonCacheable+(DMAPrep_UseBounceBuffer :SHL: DMAPrep_FlagOffset)
        BNE     %FT25
        LDR     lr, [sp, #DMAPrepW_Flags]
        LDR     r10, [sp, #DMAPrepW_CacheMask]
        TST     lr, #DMAPrep_Write
        TSTNE   r5, r10
        BEQ     %FT25
        ; Unaligned write to cacheable memory -> bounce required
        ADD     r1, r5, r10
        BIC     r1, r1, r10 ; End of current cache line
        ; Only round down to end of current cache line if the end of the chunk
        ; is at or beyond the end of the next cache line
        ADD     r2, r1, r10 ; Last byte we can accept without needing to truncate
        CMP     r7, r2
        MOVHI   r7, r1 ; Truncate! N.B. this compare may break if we map memory at &FFFFF000
        ORR     r6, r6, #DMAPrep_UseBounceBuffer :SHL: DMAPrep_FlagOffset
        B       %FT40
25
        ; Start doesn't need splitting, so translate + append more pages
        ADD     lr, sp, #DMAPrepW_InChunk
        ASSERT  DMAPrepW_PhyChunk = DMAPrepW_InChunk + 12
        LDMIA   lr, {r0-r2, r5-r7}
        SUB     r3, r7, r5 ; Length of translated region
        SUB     r2, r2, r0 ; Length of input region
        CMP     r3, r2
        BEQ     %FT30
        ADD     r5, r0, r3 ; Translate next address in input address space
        BL      DMAPrep_Translate
        ; Try and merge with PhyChunk
        ADD     lr, sp, #DMAPrepW_PhyChunk
        LDMIB   lr, {r0, r1}
        CMP     r0, r6
        CMPEQ   r1, r5
        STREQ   r7, [sp, #DMAPrepW_PhyChunk + 8]
        BEQ     %BT25
        LDMIA   lr, {r5-r7}
30
        ; Can't merge any more pages into this chunk {r5-r7}
        ; Truncate / bounce the end if necessary
        TST     r6, #DMAPrep_NonCacheable+(DMAPrep_UseBounceBuffer :SHL: DMAPrep_FlagOffset)
        BNE     %FT50
        LDR     lr, [sp, #DMAPrepW_Flags]
        LDR     r10, [sp, #DMAPrepW_CacheMask]
        TST     lr, #DMAPrep_Write
        TSTNE   r7, r10
        BEQ     %FT40
        ; Unaligned write to cacheable memory -> bounce required
        BIC     r3, r7, r10
        CMP     r3, r5
        ORREQ   r6, r6, #DMAPrep_UseBounceBuffer :SHL: DMAPrep_FlagOffset ; Bounce
        MOVNE   r7, r3 ; Truncate
40
        ; Perform cache maintenance if necessary
        ; For safety we always perform this before calling the output function, rather than caching and attempting to merge the regions (output function may alter cacheability of pages?)
        TST     r6, #DMAPrep_NonCacheable+(DMAPrep_UseBounceBuffer :SHL: DMAPrep_FlagOffset)
        BNE     %FT50
        ADD     r1, r7, r10
        BIC     r0, r5, r10
        BIC     r1, r1, r10
        MOV     lr, pc
        LDR     pc, [sp, #DMAPrepW_ARMop]
50
        ; Call the output function
        LDR     lr, [sp, #DMAPrepW_Flags]
        TST     lr, #DMAPrep_End
        BNE     %FT60 ; No output func for end-of-op
        MOV     r0, r5
        ADDS    r1, r5, r6, LSL #12
        MOV     r2, r6, LSR #20
        ADC     r2, r2, #0              ; Yuck, need to deal with carry propagation
        AND     r2, r2, #255            ; ... and keep modulo 2^40
        SUB     r4, r7, r5
        MOV     r3, r6, LSR #DMAPrep_FlagOffset
        LDR     r12, [sp, #DMAPrepW_R12]
        AND     r3, r3, #DMAPrep_UseBounceBuffer ; Mask out internal flags
        TST     lr, #DMAPrep_Phys64
        MOVEQ   r2, r4                  ; For the 32bit API, this will drop the high physical address bits. But that should be safe, since we force high addresses to use a bounce buffer (in which case the physical address *should* be completely ignored)
        ADD     r9, sp, #DMAPrepW_OutR9
        CLRV    ; Ensure V is clear on entry so simple functions don't confuse us
        MOV     lr, pc
        ASSERT  DMAPrepW_OutFunc = DMAPrepW_OutR9 + 4
        LDMIA   r9, {r9, pc}            ; Call output function
        STR     r9, [sp, #DMAPrepW_OutR9] ; Always write back updated R9
        BVS     %FT90
60
        ; Advance InChunk by the length of {r5-r7}
        LDR     r0, [sp, #DMAPrepW_InChunk]
        ADD     r0, r0, r7
        LDR     r1, [sp, #DMAPrepW_InChunk+8]
        SUB     r0, r0, r5
        STR     r0, [sp, #DMAPrepW_InChunk]
        CMP     r0, r1
        BNE     %BT20
        ; InChunk depleted, copy InHold to InChunk and try for more input
        ADD     lr, sp, #DMAPrepW_InChunk
        ASSERT  DMAPrepW_InHold = 0
        LDMIA   sp, {r0,r2,r3}
        CMP     r0, r3
        BNE     %BT05
        ; InHold was empty, so no more regions to process
90
        FRAMSTR r0, VS
        EXIT

95
        ADRL    r0, ErrorBlock_BadAddress
        SETV
        B       %BT90

96
        PullEnv
        B       MemoryBadParameters

; Out: R0, R2, R3 = block
;      LR -> InChunk
;      R1, R4, R9, R12 corrupt
DMAPrep_CallInputFunc
        LDR     r12, [sp, #DMAPrepW_R12]
        ADD     r9, sp, #DMAPrepW_InR9
        Push    "lr"
        CLRV    ; Ensure V is clear on entry so simple functions don't confuse us
        MOV     lr, pc
        ASSERT  DMAPrepW_InFunc = DMAPrepW_InR9 + 4
        LDMIA   r9, {r9, pc}            ; Call the input function
        Pull    "r12"
        STR     r9, [sp, #DMAPrepW_InR9] ; Always write back updated R9
        BVS     %BT90
        ; Shuffle registers if we're using the 32bit API
        LDR     r9, [sp, #DMAPrepW_Flags]
        TST     r9, #DMAPrep_Phys64
        MOVEQ   r3, r1
        MOVEQ   r1, #0
        CMP     r3, #0
        BEQ     %FT50
        CMP     r2, #DMAPrep_UseBounceBuffer
        BHI     %BT96
        CMP     r1, #255                ; Max 40 bit phys addr
        BHI     %BT95
        ; Pack into InChunk
        MOV     r2, r2, LSL #DMAPrep_FlagOffset
        ORR     r2, r2, r1, LSL #20
        ADD     lr, sp, #DMAPrepW_InChunk
        ADD     r3, r0, r3
        MOV     pc, r12
50
        ; End of input - just set everything to zero
        MOV     r0, #0
        MOV     r2, #0
        ADD     lr, sp, #DMAPrepW_InChunk
        MOV     pc, r12

; Translate the start of InChunk into a block
; In: r5 = Address to translate
; Out: r5-r7 = block
;      r1, r3, r4, r8-r12 corrupt
DMAPrep_Translate
        MOV     r1, lr
        LDR     r12, [sp, #DMAPrepW_InChunk+8]
        SUB     r12, r12, r5            ; Length of input region (guaranteed 32bit)
        LDR     lr, [sp, #DMAPrepW_Flags]
        LDR     r6, [sp, #DMAPrepW_CamEntriesPointer]
        LDR     r7, [sp, #DMAPrepW_MaxCamEntry]
        LDR     r9, [sp, #DMAPrepW_InChunk+4]
        TST     lr, #DMAPrep_PhysProvided
        BNE     %FT20
        TST     r9, #255:SHL:20         ; Logical addresses must be 32bit!
        BNE     %BT95
      [ AMB_LazyMapIn
        MOV     r9, r0
        MOV     r0, r5
        BL      AMB_MakeHonestLA
        MOV     r0, r9
      ]
        MOV     r4, r5
        PTOp    logical_to_physical     ; r4 -> r8, r9
        BLCC    physical_to_ppn         ; r7, r8, r9 -> r3, r11
        BCS     %BT95
        ; r5,r10 corrupt
        ; Grab page flags
        ADD     lr, r6, r3, LSL #CAM_EntrySizeLog2
        LDR     lr, [lr, #CAM_PageFlags]
        B       %FT30
20
        MOV     r8, r5
      [ NoARMT2
        MOV     r9, r9, LSR #20
        AND     r9, r9, #255
      |
        UBFX    r9, r9, #20, #8
      ]
        BL      physical_to_ppn         ; r7, r8, r9 -> r3, r11
        BCS     %BT95
        ; r5, r10 corrupt
        ; Manual ppn -> logical so we can get the page flags at the same time
        ; TODO this won't deal with mapped out pages in a sensible manner (will output them all individually)
      [ AMB_LazyMapIn
        MOV     r10, r0
        MOV     r0, r3
        BL      AMB_MakeHonestPN
        MOV     r0, r10
      ]
        ADD     lr, r6, r3, LSL #CAM_EntrySizeLog2
        ASSERT  CAM_LogAddr=0
        ASSERT  CAM_PageFlags=4
        LDMIA   lr, {r3, lr}
        ; Merge in the offset within the page
      [ NoARMT2
        MOV     r3, r3, LSR #12
        ORR     r4, r3, r8, LSL #20
        MOV     r4, r4, ROR #20
      |
        BFI     r3, r8, #0, #12
        MOV     r4, r3
      ]
30
        ; We now have r4 = log addr, r8,r9 = phys addr, lr = page flags, r11 = PhysRamTable flags
        LDR     r3, [sp, #DMAPrepW_InChunk+4]
        ; Combine the cacheability + phys offset into r6
        SUBS    r6, r8, r4              ; r6 = phys-log
        AND     r3, r3, #&FFFFFFFF:SHL:DMAPrep_FlagOffset ; Get the chunk flags
        ORR     r6, r3, r6, LSR #12
        SBC     r7, r9, #0
      [ NoARMT2
        AND     r7, r7, #255
        ORR     r6, r6, r7, LSL #20
      |
        BFI     r6, r7, #20, #8
      ]
        TST     lr, #DynAreaFlags_NotCacheable
        ORRNE   r6, r6, #DMAPrep_NonCacheable
        ; For the 32bit API, any large phys addresses get forced to use bounce
        ; buffers. Force it here, so that the main logic will know not to bother
        ; with cache maintenance for the region.
        CMP     r9, #0
        LDRNE   lr, [sp, #DMAPrepW_Flags]
        EORNE   lr, lr, #DMAPrep_Phys64
        TSTNE   lr, #DMAPrep_Phys64
        ; We also want to force bounce buffer usage for RAM chunks which have
        ; been flagged by the HAL as not supporting DMA
        TSTEQ   r11, #OSAddRAM_NoDMA
        ORRNE   r6, r6, #DMAPrep_UseBounceBuffer:SHL:DMAPrep_FlagOffset
        ; Work out how much of r12 fits into this page
        ; This is done by comparing against the length of the input region,
        ; since the input could be logical or physical
        ADD     r7, r4, #4096
        MOV     r7, r7, LSR #12
        RSB     r7, r4, r7, LSL #12
        CMP     r7, r12
        MOVHI   r7, r12
        ADD     r7, r4, r7
        MOV     r5, r4
        MOV     pc, r1

;----------------------------------------------------------------------------------------
;
;       In:     r0 = flags
;                       bit     meaning
;                       0-7     20 (reason code)
;                       8-31    reserved (set to 0)
;               r1 = 0 to disable compatibility page
;                    1 to enable compatibility page
;                    -1 to read state
;
;       Out:    r1 = new/current state:
;                    0 if disabled
;                    1 if enabled
;                    -1 if not supported
;
;       Controls the page zero compatibility page located at &0
;
;       If the compatibility page isn't supported, attempts to enable it will
;       silently fail, with a result of r1 = -1
;
ChangeCompatibility ROUT
        CMP     r1, #-1
        CMPNE   r1, #1
        CMPLS   r0, #255
        BHI     MemoryBadParameters
 [ :LNOT: CompatibilityPage
        MOV     r1, #-1
        MOV     pc, lr
 |
        Entry   "r0-r11", DANode_NodeSize
        LDR     r12, =ZeroPage
        LDRB    r0, [r12, #CompatibilityPageEnabled]
        FRAMSTR r0,,r1 ; return pre-change state in r1 (will be updated later, as necessary)
        CMP     r1, #-1
        CMPNE   r0, r1
        EXIT    EQ
        ; If we're attempting to enable it, make sure nothing else has mapped itself in to page zero
        CMP     r1, #0
        BEQ     %FT05
        MOV     r4, #0
        PTOp    logical_to_physical
        MOVCC   r1, #-1
        FRAMSTR r1,CC
        EXIT    CC
05
        ; Set up temp DANode on the stack so we can use a Batcall to manage the mapping
        MOV     r2, sp
        MOV     r0, #DynAreaFlags_NotCacheable
        STR     r0, [r2, #DANode_Flags]
        MOV     r0, #0
        STR     r0, [r2, #DANode_Base]
        STR     r0, [r2, #DANode_Handler]
        CMP     r1, #1
        STREQ   r0, [r2, #DANode_Size]
        MOV     r0, #4096
        STRNE   r0, [r2, #DANode_Size]
        STR     r0, [r2, #DANode_MaxSize]
        MOV     r0, #ChangeDyn_Batcall
        MOV     r1, #4096
        RSBNE   r1, r1, #0
        SWI     XOS_ChangeDynamicArea
        FRAMSTR r0,VS
        EXIT    VS
        ; If we just enabled the page, fill it with the special value and then change it to read-only
        FRAMLDR r1
        RSBS    r1, r1, #1 ; invert returned state, to be correct for the above action
        STRB    r1, [r12, #CompatibilityPageEnabled] ; Also update our state flag
        FRAMSTR r1
        EXIT    EQ
        MOV     r0, #0
        ADR     r1, %FT20
10
        CMP     r0, #%FT30-%FT20
        LDRLO   r2, [r1, r0]
        STR     r2, [r0], #4
        CMP     r0, #4096
        BNE     %BT10
        LDR     r7, [r12, #MaxCamEntry]
        MOV     r4, #0
        PTOp    logical_to_physical
        BL      physical_to_ppn
        ; r5, r10-r11 corrupt, r3 = page number, r8,r9 = phys addr
        MOV     r0, #OSMemReason_FindAccessPrivilege
        MOV     r1, #2_100100
        MOV     r2, #2_100100
        SWI     XOS_Memory ; Get AP number for read-only access (will make area XN on ARMv6+)
        ORRVC   r11, r0, #DynAreaFlags_NotCacheable
        MOVVC   r2, r3
        MOVVC   r3, #0
        PTOpVC  BangCamUpdate
        EXIT

20
        ; Pattern to place in compatibility page
        DCD     &FDFDFDFD ; A few of words of invalid addresses, which should also be invalid instructions on ARMv5 (ARMv6+ will have this page non-executable, ARMv4 and lower can't have high processor vectors)
        DCD     &FDFDFDFD
        DCD     &FDFDFDFD
        DCD     &FDFDFDFD
        = "!!!!NULL.POINTER.DEREFERENCE!!!!", 0 ; Readable message if interpretered as a string. Also, all words are unaligned pointers.
        ALIGN
        DCD     0 ; Fill the rest with zero (typically, most of ZeroPage is zero)
30
 ]
        LTORG

;----------------------------------------------------------------------------------------
;
;       In:     r0 = flags
;                       bit     meaning
;                       0-7     23 (reason code)
;                       8       0 = reserve, 1 = release reservation
;                       9-31    reserved (set to 0)
;               r1 = base page number
;               r2 = page count
;
;       Attempts to reserve (or remove the reservation) on a range of pages.
;       Dynamic areas can still use the memory, but only the code that reserved
;       it will be allowed to claim exclusive use over it (i.e. perform an
;       action that will cause PageFlags_Unavailable to be set)
;
;       This is useful for systems such as the PCI heap, where physically
;       contiguous memory is required, but the memory isn't needed all of the
;       time. By reserving the pages, it allows other regular DAs to make use
;       of the memory when the PCI heap is small. But when the PCI heap needs
;       to grow, it guarantees that (if there's enough free memory in the
;       system) the previously reserved pages can be allocated to the PCI heap.
;
;       Notes:
;
;       * Reservations are handled on an honour system; there's no checking
;         that the program that reserved the memory is the one attempting to
;         mark it Unavailable.
;       * For regular NeedsSpecificPages DAs, reserved pages can only be used
;         if the special "RESV" R0 return value is used (DAHandler_RESV)
;       * For PMP DAs, reserved pages can only be made Unavailable if the entry
;         in the page block also specifies the Reserved page flag. The actual
;         state of the Reserved flag can't be modified via PMP DA ops; the flag
;         is only used to indicate the caller's permission/intent to make an
;         already Reserved page Unavailable.
;       * If a PMP DA tries to make a Reserved page Unavailable without
;         specifying the Reserved flag, the kernel will try to swap it out for
;         a replacement page taken from the free pool (preserving the contents
;         and generating Service_PagesUnsafe / Service_PagesSafe, as if another
;         DA had claimed the page)
;
ReservePages ROUT
        Entry   "r1-r5"
        LDR     r3, =ZeroPage+CamEntriesPointer
        LDR     r4, [r3, #MaxCamEntry-CamEntriesPointer]
        LDR     r3, [r3]
        SUBS    r4, r4, r1
        SUBHSS  r4, r4, r2
        BLO     %FT90
        ADD     r3, r3, #CAM_PageFlags
        ADD     r3, r3, r1, LSL #CAM_EntrySizeLog2
        MOV     r5, r3
        TST     r0, #1:SHL:8
        BEQ     %FT20
10
        SUBS    r2, r2, #1
        EXIT    LO
        LDR     r4, [r5]
        BIC     r4, r4, #PageFlags_Reserved
        STR     r4, [r5], #CAM_EntrySize
        B       %BT10

20
        SUBS    r2, r2, #1
        EXIT    LO
        LDR     r4, [r5]
        TST     r4, #PageFlags_Unavailable  ; If already claimed
        TSTEQ   r4, #PageFlags_Reserved     ; Or already reserved
        BNE     %FT30                       ; Then complain
        ORR     r4, r4, #PageFlags_Reserved
        STR     r4, [r5], #CAM_EntrySize
        B       %BT20
        
30
        CMP     r3, r5
        LDRNE   r4, [r3]
        BICNE   r4, r4, #PageFlags_Reserved ; Remove reservations we just added
        STRNE   r4, [r3], #CAM_EntrySize
        BNE     %BT30
        ADRL    r0, ErrorBlock_CantGetPhysMem
        SETV
        EXIT

90
        ADRL    r0, ErrorBlock_BadParameters
        SETV
        EXIT

;----------------------------------------------------------------------------------------
;
;        In:    r0 = flags
;                       bit     meaning
;                       0-7     24 (reason code)
;                       8-31    reserved (set to 0)
;               r1 = low address (inclusive)
;               r2 = high address (exclusive)
;
;       Out:    r1 = access flags:
;               bit 0: completely readable in user mode
;               bit 1: completely writable in user mode
;               bit 2: completely readable in privileged modes
;               bit 3: completely writable in privileged modes
;               bit 4: partially readable in user mode
;               bit 5: partially writable in user mode
;               bit 6: partially readable in privileged modes
;               bit 7: partially writable in privileged modes
;               bit 8: completely physically mapped (i.e. IO memory)
;               bit 9: completely abortable (i.e. custom data abort handler)
;               bit 10: completely non-executable in user mode
;               bit 11: completely non-executable in privileged modes
;               bit 12: partially physically mapped
;               bit 13: partially abortable
;               bit 14: partially non-executable in user mode
;               bit 15: partially non-executable in privileged modes
;               bits 16+: reserved
;
;       Return various attributes for the given memory region

; NOTE: To make the flags easier to calculate, this routine calculates executability rather than non-executability. This means that unmapped memory has flags of zero. On exit we invert the sense of the bits in order to get non-executability (so that the public values are backwards-compatible with OS versions that didn't return executability information)
CMA_Completely_Inverted * CMA_Completely_UserXN + CMA_Completely_PrivXN

CMA_CheckL2PT          * 1<<31 ; Pseudo flag used internally for checking sparse areas
CMA_DecodeAP           * 1<<30 ; Used with CheckL2PT to indicate AP flags should be decoded from L2PT

CheckMemoryAccess ROUT
        Entry   "r0,r2-r10"
        CMP     r0, #24
        BNE     %FT99
        LDR     r10, =ZeroPage
        ; Set all the 'completely' flags, we'll clear them as we go along
        LDR     r0, =&0F0F0F0F
        ; Make end address inclusive so we don't have to worry so much about
        ; wrap around at 4G
        TEQ     r1, r2
        SUBNE   r2, r2, #1
        ; Split memory up into five main regions:
        ; * scratchspace/zeropage
        ; * application space
        ; * dynamic areas
        ; * IO memory
        ; * special areas (stacks, ROM, HAL workspace, etc.)
        ; All ranges are checked in increasing address order, so the
        ; completeness flags are returned correctly if we happen to cross from
        ; one range into another
        ; Note that application space can't currently be checked in DA block as
        ; (a) it's not linked to DAList/DynArea_AddrLookup
        ; (b) we need to manually add the abortable flag
        CMP     r1, #32*1024
        BHS     %FT10
        ; Check zero page
        ASSERT  ProcVecs = ZeroPage
     [ ZeroPage = 0
        MOV     r3, #0
        MOV     r4, #16*1024
        LDR     r5, =CMA_ZeroPage
        BL      CMA_AddRange
     |
      [ CompatibilityPage
        ; Zero page compatibility page
        LDR     r3, =ZeroPage
        LDRB    r3, [r3, #CompatibilityPageEnabled]
        CMP     r3, #0
        BEQ     %FT05
        MOV     r3, #0
        MOV     r4, #4096
        ; This represents our ideal access flags; it may not correspond to reality
        LDR     r5, =CMA_Partially_UserR+CMA_Partially_PrivR
        BL      CMA_AddRange
05
      ]
        ; DebuggerSpace
        ASSERT  DebuggerSpace < ScratchSpace
        LDR     r3, =DebuggerSpace
        LDR     r4, =(DebuggerSpace_Size + &FFF) :AND: &FFFFF000
        LDR     r5, =CMA_DebuggerSpace
        BL      CMA_AddRange
     ]
        ; Scratch space
        LDR     r3, =ScratchSpace
        MOV     r4, #16*1024
        LDR     r5, =CMA_ScratchSpace
        BL      CMA_AddRange
10
        ; Application space
        ; Note - checking AplWorkSize as opposed to AplWorkMaxSize to cope with
        ; software which creates DAs within application space (e.g. Aemulor)
        LDR     r4, [r10, #AplWorkSize]
        CMP     r1, r4
        BHS     %FT20
        LDR     r3, [r10, #AMBControl_ws]
        LDR     r3, [r3, #:INDEX:AMBFlags]
        LDR     r5, =CMA_AppSpace
        TST     r3, #AMBFlag_LazyMapIn_disable :OR: AMBFlag_LazyMapIn_suspend
        MOV     r3, #32*1024
        ORREQ   r5, r5, #CMA_Partially_Abort
        BL      CMA_AddRange2
20
        ; Dynamic areas
        LDR     r7, [r10, #IOAllocLimit]
        CMP     r1, r7
        BHS     %FT30
        ; Look through the quick lookup table until we find a valid DANode ptr
        LDR     r6, [r10, #DynArea_ws]
        MOV     r3, r1
        TEQ     r6, #0 ; We can get called during ROM init, before the workspace is allocated (pesky OS_Heap validating its pointers)
        ADD     r6, r6, #(:INDEX:DynArea_AddrLookup) :AND: &00FF
        LDREQ   r9, [r10, #DAList] ; So just start at the first DA
        ADD     r6, r6, #(:INDEX:DynArea_AddrLookup) :AND: &FF00
        BEQ     %FT22
21
        AND     r8, r3, #DynArea_AddrLookupMask
        LDR     r9, [r6, r8, LSR #30-DynArea_AddrLookupBits]
        TEQ     r9, #0
        BNE     %FT22
        ; Nothing here, skip ahead to next block
        ADD     r3, r8, #DynArea_AddrLookupSize
        CMP     r3, r2
        BHI     %FT90 ; Hit end of search area
        CMP     r3, r7
        BLO     %BT21
        ; Hit end of DA area and wandered into IO area
        B       %FT30
22
        ; Now that we've found a DA to start from, walk through and process all
        ; the entries until we hit the end of the list, or any DAs above
        ; IOAllocLimit
        LDR     r3, [r9, #DANode_Base]
        LDR     r6, [r9, #DANode_Flags]
        CMP     r3, r7
        BHS     %FT30
        ; Decode AP flags
        LDR     r5, [r10, #MMU_PPLAccess]
        AND     lr, r6, #DynAreaFlags_APBits
        LDR     r5, [r5, lr, LSL #2]
        TST     r6, #DynAreaFlags_Abortable
        ORRNE   r5, r5, #CMA_Partially_Abort
        TST     r6, #DynAreaFlags_PMP
        ORRNE   r5, r5, #CMA_DecodeAP
        TSTEQ   r6, #DynAreaFlags_SparseMap
        LDREQ   lr, [r9, #DANode_Size]
        LDRNE   r4, [r9, #DANode_SparseHWM] ; Use HWM as bounds when checking sparse/PMP areas
        ORRNE   r5, r5, #CMA_CheckL2PT ; ... and request L2PT check
        ADDEQ   r4, r3, lr
        TST     r6, #DynAreaFlags_DoublyMapped ; Currently impossible for Sparse/PMP areas - so use of lr safe
        SUBNE   r3, r3, lr
        TSTNE   r6, #DynAreaFlags_Abortable
        BEQ     %FT23
        ; Doubly-mapped abortable DA; make sure the unallocated area between
        ; MaxSize and Size is marked as abortable
        MOV     r12, r5
        MOV     r5, #CMA_Partially_Abort
        MOV     r10, r4
        LDR     r4, [r9, #DANode_MaxSize]
        SUB     r4, r4, lr ; MaxSize-Size
        SUB     r3, r3, r4
        BL      CMA_AddRange
        MOV     r3, r4
        MOV     r4, r10
        MOV     r5, r12
23
        ; Map the allocated part of the DA
        BL      CMA_AddRange2
        TST     r6, #DynAreaFlags_Abortable
        BEQ     %FT24
        ; Abortable DA; make sure the unallocated area between R4 and
        ; Base+MaxSize is marked as abortable
        MOV     r5, #CMA_Partially_Abort
        MOV     r3, r4
        LDR     r4, [r9, #DANode_Base]
        LDR     lr, [r9, #DANode_MaxSize]
        ADD     r4, r4, lr
        BL      CMA_AddRange2
24
        LDR     r9, [r9, #DANode_Link]
        TEQ     r9, #0
        BNE     %BT22
        ; Hit the end of the list
30
        ; IO memory
        LDR     r9, [r10, #IOAllocTop]
        CMP     r1, r9
        BHS     %FT40
        MOV     r6, r1, LSR #20
        LDR     r4, [r10, #IOAllocPtr]
        MOV     r6, r6, LSL #20 ; Get MB-aligned addr of first entry to check
        CMP     r6, r4
        MOVLO   r6, r4 ; Skip all the unallocated regions
31
        Push    "r0-r2"
        MOV     r0, r6
        PTOp    LoadAndDecodeL1Entry    ; TODO bit wasteful. We only care about access privileges, but this call gives us cache info too.
        LDR     r5, [r10, #MMU_PPLAccess]
        AND     lr, r2, #DynAreaFlags_APBits
        LDR     r5, [r5, lr, LSL #2]
        Pull    "r0-r2"
        SUB     lr, r3, #1
        ADD     r4, r6, r3
        BIC     r3, r6, lr              ; Aligned start addr
        BIC     r4, r4, lr              ; Aligned end addr
        ORR     r5, r5, #CMA_Partially_Phys
        BL      CMA_AddRange2
        CMP     r4, r9
        MOV     r6, r4
        BNE     %BT31
40
        ; Everything else!
        ASSERT  CAMTop <= HALWorkspace
        LDR     r3, [r10, #CamEntriesPointer]
        LDR     r4, [r10, #SoftCamMapSize]
        LDR     r5, =CMA_CAM
        BL      CMA_AddRange
        ASSERT  HALWorkspace >= CAMTop
        LDR     r3, =HALWorkspace
        LDR     r4, [r10, #HAL_WsSize]
        LDR     r5, =CMA_HALWorkspace
        BL      CMA_AddRange
        ASSERT  IRQStackAddress > HALWorkspace
        LDR     r3, =IRQStackAddress
        LDR     r4, =IRQStackSize
        LDR     r5, =CMA_IRQStack
        BL      CMA_AddRange
        ASSERT  SVCStackAddress > IRQStackAddress
        LDR     r3, =SVCStackAddress
        LDR     r4, =SVCStackSize
        LDR     r5, =CMA_SVCStack
        BL      CMA_AddRange
        ASSERT  ABTStackAddress > SVCStackAddress
        LDR     r3, =ABTStackAddress
        LDR     r4, =ABTStackSize
        LDR     r5, =CMA_ABTStack
        BL      CMA_AddRange
        ASSERT  UNDStackAddress > ABTStackAddress
        LDR     r3, =UNDStackAddress
        LDR     r4, =UNDStackSize
        LDR     r5, =CMA_UNDStack
        BL      CMA_AddRange
        ASSERT  DCacheCleanAddress > UNDStackAddress
        LDR     r4, =DCacheCleanAddress+DCacheCleanSize
        CMP     r1, r4
        BHS     %FT60
        ; Check that DCacheCleanAddress is actually used
        Push    "r0-r2,r9"
        AddressHAL r10
        MOV     a1, #-1
        CallHAL HAL_CleanerSpace
        CMP     a1, #-1
        Pull    "r0-r2,r9"
        BEQ     %FT60
        SUB     r3, r4, #DCacheCleanSize
        MOV     r4, #DCacheCleanSize
        ; Mark as IO, it may not be actual memory there
        LDR     r5, =CMA_DCacheClean+CMA_Partially_Phys
        BL      CMA_AddRange
60
        ASSERT  KbuffsBaseAddress > DCacheCleanAddress
        LDR     r3, =KbuffsBaseAddress
        LDR     r4, =(KbuffsSize + &FFF) :AND: &FFFFF000
        LDR     r5, =CMA_Kbuffs
        BL      CMA_AddRange
        ASSERT  HALWorkspaceNCNB > KbuffsBaseAddress
        LDR     r3, [r10, #HAL_Descriptor]
        LDR     r3, [r3, #HALDesc_Flags]
        TST     r3, #HALFlag_NCNBWorkspace
        BEQ     %FT70
        LDR     r3, =HALWorkspaceNCNB
        LDR     r4, =32*1024
        LDR     r5, =CMA_HALWorkspaceNCNB
        BL      CMA_AddRange
70
    [ LongDesc :LAND: ShortDesc
        PTWhich r3
        BEQ     %FT71
    ]
    [ LongDesc
        ASSERT  LL3PT > HALWorkspaceNCNB
        LDR     r3, =LL3PT
        MOV     r4, #8*1024*1024
        LDR     r5, =CMA_PageTablesAccess+CMA_CheckL2PT ; L3PT contains gaps due to logical indexing
        BL      CMA_AddRange
        ASSERT  LL2PT > LL3PT
        ASSERT  LL1PT = LL2PT+16*1024
        LDR     r3, =LL2PT
        MOV     r4, #16*1024+4096
        LDR     r5, =CMA_PageTablesAccess
        BL      CMA_AddRange
        ASSERT  CursorChunkAddress > LL1PT
      [ ShortDesc
        B       %FT72
71
      ]
    ]
    [ ShortDesc
        ASSERT  L2PT > HALWorkspaceNCNB
        LDR     r3, =L2PT
        MOV     r4, #4*1024*1024
        LDR     r5, =CMA_PageTablesAccess+CMA_CheckL2PT ; L2PT contains gaps due to logical indexing
        BL      CMA_AddRange
        ASSERT  L1PT > L2PT
        LDR     r3, =L1PT
        MOV     r4, #16*1024
        LDR     r5, =CMA_PageTablesAccess
        BL      CMA_AddRange
        ASSERT  CursorChunkAddress > L1PT
72
    ]
        LDR     r3, =CursorChunkAddress
        MOV     r4, #32*1024
        LDR     r5, =CMA_CursorChunk
        BL      CMA_AddRange

        ASSERT  PhysicalAccess > CursorChunkAddress
        CMP     r1, #ROM
        BHS     %FT80
        Push    "r0-r2"
        LDR     r0, =PhysicalAccess
        PTOp    LoadAndDecodeL1Entry
        CMP     r2, #-2
        AND     lr, r2, #DynAreaFlags_APBits
        Pull    "r0-r2"
        BHS     %FT80
        ADD     r4, r3, #PhysicalAccess
        LDR     r5, [r10, #MMU_PPLAccess]
        LDR     r3, =PhysicalAccess
        LDR     r5, [r5, lr, LSL #2]
        ORR     r5, r5, #CMA_Partially_Phys
        BL      CMA_AddRange2
80
        ASSERT  ROM > PhysicalAccess
        LDR     r3, =ROM
        LDR     r4, =OSROM_ImageSize*1024
        LDR     r5, =CMA_ROM
        BL      CMA_AddRange
        ASSERT  RWBase > ROM
        LDR     r3, =RWBase
        LDR     r4, =|Image$$ZI$$Limit|+&FFF-RWBase
        BIC     r4, r4, #&FF
        BIC     r4, r4, #&F00
        LDR     r5, =CMA_RWArea
        BL      CMA_AddRange
        ; Finally, high processor vectors/relocated zero page
        ASSERT  ProcVecs = ZeroPage
      [ ZeroPage > 0
        ASSERT  ZeroPage > ROM
        MOV     r3, r10
        LDR     r4, =16*1024
        LDR     r5, =CMA_ZeroPage
        BL      CMA_AddRange
      ]
90
        ; If there's anything else, we've wandered off into unallocated memory
        LDR     r3, =&0F0F0F0F
        BIC     r1, r0, r3
        B       CMA_Done

99
        PullEnv
        B       MemoryBadParameters

        ; Add range r3..r4 to attributes in r0
        ; Corrupts r8, exits with r4 = end addr
CMA_AddRange ROUT ; r3 = start, r4 = length
        ADD     r4, r3, r4
CMA_AddRange2 ; r3 = start, r4 = end (excl.)
        LDR     r8, =&0F0F0F0F
        ; Increment r1 and exit if we hit r2
        ; Ignore any ranges which are entirely before us
        CMP     r1, r4
        MOVHS   pc, lr
        ; Check for any gap at the start, i.e. r3 > r1
        CMP     r3, r1
        BICHI   r0, r0, r8
        MOVHI   r1, r3 ; Update r1 for L2PT check code
        ; Exit if the range starts after our end point
        CMP     r3, r2
        BHI     %FT10
    [ EmulateAP1
        Push    "r8-r9"
      [ ShortDesc
        PTWhich r8
        BEQ     %FT03
      ]
        ; Detect AP1 areas and flag them as UserXN + Abort
        LDR     r9, =CMA_Read :AND: :NOT: CMA_Partially_UserXN ; Might already be flagged as UserXN, depending on how the flags were fetched
        BIC     r8, r5, #CMA_Partially_UserXN+CMA_Partially_Phys
        TEQ     r8, r9
        BICEQ   r5, r5, #CMA_Partially_UserXN
        ORREQ   r5, r5, #CMA_Partially_Abort
03
        Pull    "r8-r9"
    ]
        ; Process the range
        TST     r5, #CMA_CheckL2PT
        BNE     %FT20
        CMP     r3, r4 ; Don't apply any flags for zero-length ranges
        ORR     r8, r5, r8
        ORRNE   r0, r0, r5 ; Set new partial flags
        ANDNE   r0, r0, r8, ROR #4 ; Discard completion flags which aren't for this range
05
        CMP     r4, r2
        MOV     r1, r4 ; Continue search from the end of this range
        MOVLS   pc, lr
10
        ; We've ended inside this range
        MOV     r1, r0
CMA_Done
        ; Invert the sense of the executability flags
        ;               Completely_X Partially_X -> Completely_XN Partially_XN
        ; Completely X             1           1                0            0
        ; Partially X              0           1                0            1
        ; XN                       0           0                1            1
        ; I.e. swap the positions of the two bits and invert them
        EOR     r0, r1, r1, LSR #4      ; Completely EOR Partially
        MVN     r0, r0                  ; Invert as well as swap
        AND     r0, r0, #CMA_Completely_Inverted ; Only touch these bits
        EOR     r1, r1, r0              ; Swap + invert Completely flags
        EOR     r1, r1, r0, LSL #4      ; Swap + invert Partially flags
        CLRV
        EXIT

20
        ; Check L2PT for sparse region r1..min(r2+1,r4)
        ; r4 guaranteed page aligned
        CMP     r3, r4
        BIC     r5, r5, #CMA_CheckL2PT
        BEQ     %BT05
        Push    "r2,r4,r5,r8,r9,r10,lr"
        LDR     lr, =&FFF
        CMP     r4, r2
        ADDHS   r2, r2, #4096
        BICHS   r2, r2, lr
        MOVLO   r2, r4
        ; r2 is now page aligned min(r2+1,r4)
        TST     r5, #CMA_DecodeAP
        BIC     r4, r1, lr
        BNE     %FT35
        MOV     r10, #0
30
        PTOp    logical_to_physical
        ORRCC   r10, r10, #1
        ADD     r4, r4, #4096
        ORRCS   r10, r10, #2
        CMP     r4, r2
        BNE     %BT30
        Pull    "r2,r4,r5,r8"
        CMP     r10, #2
        ; 01 -> entirely mapped
        ; 10 -> entirely unmapped
        ; 11 -> partially mapped
        MOVNE   r9, r5
        ANDEQ   r9, r5, #CMA_Partially_Abort ; Completely unmapped, only flag as abortable
        ANDHI   r10, r5, #CMA_Partially_Abort ; Partially mapped, retain the
        ORRHI   r8, r8, r10                   ; abortable completion flag
        ORR     r8, r9, r8
        ORR     r0, r0, r9 ; Set new partial flags
        AND     r0, r0, r8, ROR #4 ; Discard completion flags which aren't for this range
        Pull    "r9,r10,lr"
        B       %BT05

35
        ; Check L2PT, with AP decoding on a per-page basis
        AND     r10, r5, #CMA_Partially_Abort
40
        PTOp    logical_to_physical
        LDR     r8, =&0F0F0F0F
        MOVCS   r5, r10 ; Unmapped page, only take the abortable flag
        BCS     %FT45
        ; Get the L2PT entry and decode the flags
        Push    "r0-r3"
        MOV     r0, r4
        PTOp    LoadAndDecodeL2Entry    ; TODO bit wasteful. We only care about access privileges, but this call gives us cache info too. Also, if we know the L2PT backing exists (it should do) we could skip the logical_to_physical call
        ; r2 = DA flags
        ; Extract and decode AP
        LDR     r0, =ZeroPage
        LDR     r5, [r0, #MMU_PPLAccess]
        AND     lr, r2, #DynAreaFlags_APBits
        LDR     r5, [r5, lr, LSL #2]
        ORR     r5, r5, r10 ; Merge in any abortability flag from the caller
    [ EmulateAP1
      [ ShortDesc
        PTWhich r0
        BEQ     %FT42
      ]
        ; Detect AP1 areas and them as UserXN + Abort
        TEQ     lr, #OSAP_Read
        BICEQ   r5, r5, #CMA_Partially_UserXN
        ORREQ   r5, r5, #CMA_Partially_Abort
42
    ]
        Pull    "r0-r3"
45
        ORR     r8, r5, r8
        ORR     r0, r0, r5 ; Set new partial flags
        AND     r0, r0, r8, ROR #4 ; Discard completion flags which aren't for this range
        ADD     r4, r4, #4096
        CMP     r4, r2
        BNE     %BT40
        Pull    "r2,r4,r5,r8,r9,r10,lr"
        B       %BT05

;----------------------------------------------------------------------------------------
;
;       In:     r0 = flags
;                       bit     meaning
;                       0-7     65 (reason code)
;                       8-31    reserved (set to 0)
;               r1 = logical address
;
;       Out:    r0,r1 = physical address
;               r2 = size/alignment of mapping
;               For invalid addresses:
;               r0 = "Address not recognised" error
;               r1 corrupt
;               r2 = size/alignment of mapping (so caller knows how much
;                    to skip)
;
;       Convert a logical address to a physical address. Supports all page types
;       (unlike other logical -> physical SWIs, which only cope with regular
;       4KB RAM pages).
;
MemoryLogToPhys ROUT
        CMP     r0, #OSMemReason_LogToPhys
        BNE     %FT99
        Entry   "r3"
        MOV     r0, r1
        BL      RISCOS_LogToPhys
        MOV     r2, r3
        CMP     r0, #-1
        CMPEQ   r1, #-1
        ADREQL  r0, ErrorBlock_BadAddress
        SETV    EQ
        EXIT
99
        B       MemoryBadParameters

        LTORG

        END