; Copyright 1996 Acorn Computers Ltd
;
; Licensed under the Apache License, Version 2.0 (the "License");
; you may not use this file except in compliance with the License.
; You may obtain a copy of the License at
;
;     http://www.apache.org/licenses/LICENSE-2.0
;
; Unless required by applicable law or agreed to in writing, software
; distributed under the License is distributed on an "AS IS" BASIS,
; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
; See the License for the specific language governing permissions and
; limitations under the License.
;
; > ARM600


        GBLL    DebugAborts
DebugAborts SETL {FALSE}

 [ Simulator
        ! 0, "**** Warning - IOMD Simulator debugging included - will crash on real thing! ****"
 ]

; MMU interface file - ARM600 version

; Created by TMD 15-Jul-92
; Comments updated by TMD 04-Aug-93

;24-01-96 MJS  now effectively codes for ARM 6 onwards (6,7,8,A, where A = StrongARM)
;              but ARM8 not properly supported (not needed for RO 3.70)
;07-10-96 MJS  proper support for ARM810 added


; Workspace needed for ARM600 work is as follows:
;
; * Level 2 page tables for a contiguous logical area starting at zero
;     This consists of:
;       a) a fixed size bit covering 0 to 192M (currently)
;       b) a variable size bit covering the free pool - 192M to 192M + (memsize rounded up to 4M)
;     Note that the 192M value is sufficient to cover all the fixed size areas at present.
;     As more areas switch to new world, this limit will come down and down, but free pool must always
;      start at the end of the fixed areas.
;     (Level 2 for areas outside this region are allocated dynamically afterwards)
;
; * Level 1 page table (16K, stored in the middle of L2PT, where the I/O + ROM would be if it wasn't section mapped)
;
; * Undefined32 mode stack (8K)
;
; * Soft CAM map (variable size = memsize/4K*8, rounded up to 4K)
;
; In order to make the memory models for MEMC1 and IOMD harmonious, the MEMC1 system is considered as a section of
; video RAM starting at &02000000 size 480K, and an area of "non-video RAM" starting at &02078000, size (totalRAM-480K)
; IOMD has 1 area of video RAM and up to 4 areas of non-video RAM.
;
; (Note: when OS is soft-loaded, a 2 Mbyte chunk of DRAM is removed from the RAM map, therefore the model allows for
;  1 area of video RAM and up to 5 areas of non-video RAM)
;
; The fixed system pages (which include those described above) start at the base of the first bank of non-video RAM
; (on IOMD we allow this to be in any of the 4 RAM sites, ie you don't have to have RAM in any particular SIMM site)
; Consequently the base of the fixed system pages is not known at assembly time, so has to be passed in a register
; to the generic code.

; Fixed page allocation is as follows

        ^       0
DRAMOffset_CursorChunk  #       32*1024         ; ie on MEMC1 this is the last 32K of DAG-addressable memory
DRAMOffset_PageZero     #       32*1024         ; 32K at location zero
DRAMOffset_SystemHeap   #       32*1024         ; system heap/svc stack
DRAMOffset_L2PT         #       0               ; static L2PT (variable size, with embedded L1PT)
DRAMOffset_L1PT         *       DRAMOffset_L2PT + 48*1024

; Undefined stack memory (size 8K) starts immediately after end of L2PT (which is variable size)
; Soft CAM map (variable size) starts immediately after end of UndStack

StaticPagesSize         *       @

; Logical addresses are as follows

L2PT                    *       &02C00000       ; size 256K
L1PT                    *       &02C0C000       ; in the middle of L2PT, where the mapping for 03000000 to 03FFFFFF would be

FixedAreasL2Size        *       96*1024        ; amount of L2 to cover fixed areas, excluding free pool

UndStackSoftCamChunk    *       &01E00000
UndStackSize            *       8*1024
CamEntriesForVicky      *       UndStackSoftCamChunk + &2000
UNDSTK                  *       CamEntriesForVicky ; points to end of stack
PhysSpace               *       &80000000       ; Map of MEMC/IOMD physical space (64M/512M in size)


; - address for virtual area for StrongARM data cache cleaning (32k, for two 16k areas)
; - the two areas are used in strict rotation for each full clean, so that we can do a full
;   clean (and not flush) with interrupts on
; - the address must be aligned such that EOR with 16*1024 flipflops between the two addresses
ARMA_Cleaners_address  * 31*1024*1024 + 64*1024


arm600stuff_before_align
         ALIGN   4096     ;align to page boundary to allow for easy ROMpatch

arm600stuff_startofstuff
  ! 0, "-- start of (4k aligned) ARM600+ stuff at ":CC::STR:(arm600stuff_startofstuff)

;note that we use the R bit if supported (not 610), so that we can write protect ROM space
;fully (user and supervisor)
;
ARM_default_MMU_CR_table
;
;ARM 6              SBLDPWCAM
         DCD  2_0000001111101
;
;ARM 7            FRSB1DPWCAM
         DCD  2_0011001111101
;
  [ ARM810bpbroken  ;branch prediction broken!
;ARM 8           Z0RSB111WCAM
         DCD  2_0001001111101
  |
;ARM 8           Z0RSB111WCAM
         DCD  2_0101001111101
  ]
;
;ARM 9 ??
         DCD  0
;
  [ SAWBbroken  ;write buffer broken! - turn off write buffer (safe-ish - safer would turn off DC as well)
;StrongARM      I00RSB111WCAM
         DCD  2_1001001110101
  |
;StrongARM      I00RSB111WCAM
         DCD  2_1001001111101
  ]
;


ARM_cacheoff_MMU_CR_table
;
;ARM 6              SBLDPWCAM
         DCD  2_0000001110001
;
;ARM 7            FRSB1DPWCAM
         DCD  2_0011001110001
;
;ARM 8           Z0RSB111WCAM
         DCD  2_0001001110001
;
;ARM 9 ??
         DCD  0
;
;StrongARM      I00RSB111WCAM
         DCD  2_0001001110001
;


OneMByte                EQU     (1024*1024)
SixteenMByte            EQU     (1024*1024 * 16)

 [ MEMC_Type = "IOMD"
; *****************************************************************************
;
;       SetDAG - Program DMA address generator R1 with physical address R0
;       NB on IOMD this is the true physical address, not just offset into VRAM or DRAM
;
; in:   r0 = physical address
;       r1 = index of DMA address generator to program, as defined in vdudecl
;
; out:  All registers preserved, operation ignored if illegal
;

SetDAG  ENTRY   "r0-r1,r12"
        MOV     r12, #IOMD_Base
        CMP     r1, #1
        BEQ     %FT10
        BHI     %FT20

; Program VInit

00
        ASSERT  MEMCDAG_VInit = 0
        MOV     r14, #0
        STR     r0, [r14, #VInitSoftCopy]       ; save VInit so that writes to VEnd can check
        LDR     r14, [r14, #VEndSoftCopy]
        CMP     r0, r14                         ; if VInit >= VEnd then set L bit
        ORRCS   r0, r0, #IOMD_DMA_L_Bit
        STR     r0, [r12, #IOMD_VIDINIT]

        MOV     r1, #0
        LDRB    r1, [r1, #LCD_Active]
        TST     r1, #&80
        EXIT    EQ                              ;Exit if not a dual-panel LC display

        ;Otherwise, we are going to have to update VIDINITB too...
        MOV     r1, #VduDriverWorkSpace
        LDR     r1, [r1, #ScreenSize]
        BIC     r0, r0, #IOMD_DMA_L_Bit
        ADD     r0, r0, r1, LSR #1              ;R0 = VIDINIT+(screensize/2)
        CMP     r0, r14                         ;If VIDINITB>=VEnd...
        ORREQ   r0, r0, #IOMD_DMA_L_Bit         ;Set the L bit if =
        SUBGT   r0, r0, r14                     ;VIDINITB=VIDINITB-VEnd
        MOVGT   r14, #0
        LDRGT   r1, [r14, #VStartSoftCopy]
        ADDGT   r0, r0, r1                      ;VIDINITB=VIDINITB+VStart
        SUBGT   r0, r0, #16                     ;Quad word correction. /** You are not expected to understand this **/ :-)
        STR     r0, [r12, #IOMD_VIDINITB]
        EXIT

; Program VStart

10
        ASSERT  MEMCDAG_VStart = 1
        MOV     r14, #0
        STR     r0, [r14, #VStartSoftCopy]
        STR     r0, [r12, #IOMD_VIDSTART]
        EXIT

20
        CMP     r1, #3
        EXIT    HI
        BEQ     %FT30

; Program VEnd

        ASSERT  MEMCDAG_VEnd = 2
        MOV     r14, #0
        STR     r0, [r14, #VEndSoftCopy]        ; remember old VEnd value
        LDR     r14, [r14, #VInitSoftCopy]      ; load old VInit
        CMP     r14, r0                         ; if VInit >= VEnd
        ORRCS   r14, r14, #IOMD_DMA_L_Bit       ; then set L bit
        STR     r14, [r12, #IOMD_VIDINIT]       ; store VInit
        STR     r0, [r12, #IOMD_VIDEND]         ; and VEnd

        MOV     r14, #0
        LDRB    r14, [r14, #LCD_Active]
        TST     r14, #&80
        EXIT    EQ                              ; Not a dual-panel LCD so no need to hang around....

        ;Check whether we need to update VIDINITB or not...

        EXIT

; Program CInit

30
        ASSERT  MEMCDAG_CInit = 3
        STR     r0, [r12, #IOMD_CURSINIT]
        EXIT

 |

; DMA address generators - still controlled by MEMC1 currently

VInit   * &03600000
VStart  * &03620000
VEnd    * &03640000
CInit   * &03660000

; *****************************************************************************
;
;       SetDAG - Program DMA address generator R1 with physical address R0
;
; in:   r0 = physical address offset from 32M
;       r1 = index of DMA address generator to program, as defined in vdudecl
;
; out:  All registers preserved, operation ignored if illegal
;

SetDAG  ENTRY   "r0"
        CMP     r1, #MEMCDAG_MaxReason
        EXIT    HI
        ADR     r14, DAGAddressTable
        LDR     r14, [r14, r1, LSL #2]          ; load base address in MEMC1
        MOV     r0, r0, LSR #4                  ; bottom 4 bits irrelevant
        CMP     r0, #(1 :SHL: 15)               ; ensure in range
        ORRCC   r14, r14, r0, LSL #2
        STRCC   r14, [r14]                      ; any old data will do
        EXIT

        GBLA    DAGIndex
DAGIndex SETA   0

        MACRO
        DAGTab  $reason, $address
        ASSERT  ($reason)=DAGIndex
        &       $address
DAGIndex SETA   DAGIndex + 1
        MEND

DAGAddressTable
        DAGTab  MEMCDAG_VInit, VInit
        DAGTab  MEMCDAG_VStart, VStart
        DAGTab  MEMCDAG_VEnd, VEnd
        DAGTab  MEMCDAG_CInit, CInit
 ]


; **************** CAM manipulation utility routines ***********************************

; **************************************************************************************
;
;       BangCamUpdate - Update CAM entry and soft copy
;
; This part of the routine has to do more work on ARM600
;
; First look in the CamEntries table to find the logical address L this physical page is
; currently allocated to. Then check in the Level 2 page tables to see if page L is currently
; at page R2. If it is, then map page L to be inaccessible, otherwise leave page L alone.
; Then map logical page R3 to physical page R2.
;
; in:   r2 = physical page number
;       r3 = logical address (2nd copy if doubly mapped area)
;       r9 = offset from 1st to 2nd copy of doubly mapped area (either source or dest, but not both)
;       r11 = PPL + CB bits
;
; out:  r0, r1, r4, r6 corrupted
;       r2, r3, r5, r7-r12 preserved
;
; NB Use of stack is allowed in this routine

BangCamUpdate ROUT
        TST     r11, #DynAreaFlags_DoublyMapped ; if moving page to doubly mapped area
        SUBNE   r3, r3, r9                      ; then CAM soft copy holds ptr to 1st copy

        MOV     r1, #0
        LDR     r1, [r1, #CamEntriesPointer]
        ADD     r1, r1, r2, LSL #3              ; point at cam entry (logaddr, PPL)
        LDMIA   r1, {r0, r6}                    ; r0 = current logaddress, r6 = current PPL
        STMIA   r1, {r3, r11}                   ; store new address, PPL
        Push    "r0, r6"                        ; save old logical address, PPL
        MOV     r1, #PhysRamTable               ; go through phys RAM table
        MOV     r6, r2                          ; make copy of r2 (since that must be preserved)
10
        LDMIA   r1!, {r0, r4}                   ; load next address, size
        SUBS    r6, r6, r4, LSR #12             ; subtract off that many pages
        BCS     %BT10                           ; if more than that, go onto next bank

        ADD     r6, r6, r4, LSR #12             ; put back the ones which were too many
        ADD     r0, r0, r6, LSL #12             ; move on address by the number of pages left
        LDMFD   r13, {r6}                       ; reload old logical address

; now we have r6 = old logical address, r2 = physical page number, r0 = physical address

        TEQ     r6, r3                          ; TMD 19-Jan-94: if old logaddr = new logaddr, then
        BEQ     %FT20                           ; don't remove page from where it is, to avoid window
                                                ; where page is nowhere.
        LDR     r1, =L2PT
        ADD     r6, r1, r6, LSR #10             ; r6 -> L2PT entry for old log.addr
        MOV     r4, r6, LSR #12                 ; r4 = word offset into L2 for address r6
        LDR     r4, [r1, r4, LSL #2]            ; r4 = L2PT entry for L2PT entry for old log.addr
        TST     r4, #3                          ; if page not there
        BEQ     %FT20                           ; then no point in trying to remove it

        LDR     r4, [r6]                        ; r4 = L2PT entry for old log.addr
        MOV     r4, r4, LSR #12                 ; r4 = physical address for old log.addr
        TEQ     r4, r0, LSR #12                 ; if equal to physical address of page being moved
        BNE     %FT20                           ; if not there, then just put in new page

        Push    "r0, r3, r11, r14"              ; save phys.addr, new log.addr, new PPL, lr
        ADD     r3, sp, #4*4
        LDMIA   r3, {r3, r11}                   ; reload old logical address, old PPL
        MOV     r0, #0                          ; cause translation fault
        BL      BangL2PT                        ; map page out
        Pull    "r0, r3, r11, r14"
20
        ADD     sp, sp, #8                      ; junk old logical address, PPL
        B       BangCamAltEntry                 ; and branch into BangCam code

; **************************************************************************************
;
;       BangCam - Update CAM entry, but not soft copy
;
; This routine maps a physical page to a given logical address
; For ARM600, I assume that the physical page was previously not mapped
; anywhere else - on MEMC1 it would automatically unmap any logical
; address that the physical page was previously at, but on ARM600 it won't
;
; in:   r2 = physical page number
;       r3 = logical address (2nd copy if doubly mapped)
;       r9 = offset from 1st to 2nd copy of doubly mapped area (either source or dest, but not both)
;       r11 = PPL
;
; out:  r0, r1, r4, r6 corrupted
;       r2, r3, r5, r7-r12 preserved
;
; NB Can't use stack - there might not be one!
;
; NB Also - the physical page number MUST be in range.

; This routine must work in 32-bit mode

        GBLL    UsePPLCBBits
UsePPLCBBits    SETL    {TRUE}

;if we can assume no code above 64Mb (ie. 26bit code space), big optimise for StrongARM
        GBLL    AssumeNoCodeAbove64Mb
AssumeNoCodeAbove64Mb  SETL    {TRUE}

;if we just use sledgehammer approach anyway
        GBLL    AlwaysSledgehammer
AlwaysSledgehammer SETL {FALSE}

BangCam ROUT
        TST     r11, #DynAreaFlags_DoublyMapped ; if area doubly mapped
        SUBNE   r3, r3, r9              ; then move ptr to 1st copy

        MOV     r1, #PhysRamTable       ; go through phys RAM table
        MOV     r6, r2                  ; make copy of r2 (since that must be preserved)
10
        LDMIA   r1!, {r0, r4}           ; load next address, size
        SUBS    r6, r6, r4, LSR #12     ; subtract off that many pages
        BCS     %BT10                   ; if more than that, go onto next bank

        ADD     r6, r6, r4, LSR #12     ; put back the ones which were too many
        ADD     r0, r0, r6, LSL #12     ; move on address by the number of pages left
BangCamAltEntry
        ADR     r1, PPLTrans
        AND     r4, r11, #3             ; first use PPL bits
        LDR     r1, [r1, r4, LSL #2]    ; get PPL bits and SmallPage indicator
 [ UsePPLCBBits
        TST     r11, #DynAreaFlags_NotCacheable
        TSTEQ   r11, #PageFlags_TempUncacheableBits
        ORREQ   r1, r1, #L2_C           ; if cacheable (area bit CLEAR + temp count zero), then OR in C bit
        TST     r11, #DynAreaFlags_NotBufferable
        ORREQ   r1, r1, #L2_B           ; if bufferable (area bit CLEAR), then OR in B bit
 ]
        ORR     r0, r0, r1

        LDR     r1, =L2PT               ; point to level 2 page tables

;internal entry point for updating L2PT entry
;
; entry: r0 = new L2PT value, r1 -> L2PT, r3 = logical address (4k aligned), r11 = PPL
;
; exit: r0,r1,r4,r6 corrupted
;
BangL2PT                                        ; internal entry point used only by BangCamUpdate
  [ AlwaysSledgehammer
        B       BangL2PT_sledgehammer
  |
        TST     r11, #DynAreaFlags_DoublyMapped
        BNE     BangL2PT_sledgehammer           ;if doubly mapped, don't try to be clever
  ]
  [ ARM810support
    ;if we are mapping out a cacheable page on an ARM810, must clean+flush cache _before_
    ;remapping, since ARM810 relies on virtual addresses for writebacks
        ARM_read_ID r4
        AND     r4,r4,#&F000                    ;ARM ID nibble now in r4
        CMP     r0,#0                           ;EQ if map out
        TSTEQ   r11,#DynAreaFlags_NotCacheable  ;EQ if also cacheable
        CMPEQ   r4,#&8000                       ;EQ if also ARM 8
        BNE     BangL2PT_noARM810flush
    [ ARM810cleanflushbroken
        ARM8_cleanflush_IDC r6,r4
        MOV     r4,#&8000
    |
        ARM8_cleanflush_IDC r6
    ]
BangL2PT_noARM810flush
  ]
        STR     r0, [r1, r3, LSR #10]           ;update L2PT
  [ :LNOT: ARM810support
        ARM_read_ID r4
        AND     r4,r4,#&F000                    ;ARM ID nibble in r4
  ]
        CMP     r0,#0
        BEQ     BangL2PT_mapout                 ;the update is a map out => cache(s) may need clean/flush
;else update is a map in (and nothing should be there at the moment) => no cache worries
        CMP     r4,#&A000
        BEQ     BangL2PT_mapin_StrongARM
  [ ARM810support
        CMP     r4,#&8000
        ARM8_flush_TLBentry r3,EQ               ;flush TLB entry for this page, ARM 8
        ARM67_flush_TLBentry r3,NE              ;flush TLB entry for this page, ARM 6,7
        MOV     pc,lr
  |
;else assume ARM 6,7
        ARM67_flush_TLBentry r3                 ;flush TLB entry for this page
        MOV     pc,lr
  ]

BangL2PT_mapin_StrongARM
        ARMA_drain_WB                           ;in case L2PT entry itself is in a bufferable area
        ARMA_flush_DTLBentry r3                 ;flush data TLB entry for this page
  [ AssumeNoCodeAbove64Mb
        CMP     r3,#64*1024*1024                ;if logical address above 64Mb, assume no code (26 bit)
        MOVHS   pc,lr
  ]
        ARMA_flush_ITLB                         ;but if there is code, we must flush instruction TLB
        MOV     pc,lr

BangL2PT_mapout
        CMP     r4,#&A000
        BEQ     BangL2PT_mapout_StrongARM
  [ ARM810support
        CMP     r4,#&8000
        ARM8_flush_TLBentry r3,EQ                 ;flush TLB entry for this page, ARM 8
        MOVEQ   pc,lr                             ;ARM8 cache already flushed, if necessary
  ]
;else assume ARM 6,7
        TST     r11,#DynAreaFlags_NotCacheable
        ARM67_flush_cache EQ                    ;flush instruction/data cache if necessary
        ARM67_flush_TLBentry r3                 ;flush TLB entry for this page
        MOV     pc,lr

BangL2PT_mapout_StrongARM
        TST     r11,#DynAreaFlags_NotCacheable
        BNE     BangL2PT_mapin_StrongARM        ;if NotCacheable, no flush needed (ie. same as mapin)
;note that we are cleaning *after* remapping, so relying on StrongARM writebacks using physical address
        MOV     r4,r3
        ADD     r6,r3,#4*1024                   ;clean/flush data cache over 4k range of page

  [ SAcleanflushbroken        ; ARMA_cleanflush_DCentry instruction seems to be ineffective
01
        ARMA_clean_DCentry r4
        ARMA_flush_DCentry r4
        ADD     r4,r4,#32
        ARMA_clean_DCentry r4
        ARMA_flush_DCentry r4
        ADD     r4,r4,#32
        ARMA_clean_DCentry r4
        ARMA_flush_DCentry r4
        ADD     r4,r4,#32
        ARMA_clean_DCentry r4
        ARMA_flush_DCentry r4
        ADD     r4,r4,#32
        CMP     r4,r6
        BLO     %BT01
  |
01
        ARMA_cleanflush_DCentry r4
        ADD     r4,r4,#32
        ARMA_cleanflush_DCentry r4
        ADD     r4,r4,#32
        ARMA_cleanflush_DCentry r4
        ADD     r4,r4,#32
        ARMA_cleanflush_DCentry r4
        ADD     r4,r4,#32
        CMP     r4,r6
        BLO     %BT01
  ]

        ARMA_drain_WB
        ARMA_flush_DTLBentry r3                 ;flush data TLB entry for this page
  [ AssumeNoCodeAbove64Mb
        CMP     r3,#64*1024*1024                ;if logical address above 64Mb, assume no code (26 bit)
        MOVHS   pc,lr
  ]
        ARMA_flush_IC WithoutNOPs
        MOV     r0,r0                           ;NOPs to ensure 4 instructions before return, after IC flush
        MOV     r0,r0
        ARMA_flush_ITLB
        MOV     pc,lr

BangL2PT_sledgehammer
  [ ARM810support
        ;if necessary, clean+flush _before_ reamapping, since ARM810 writebacks use virtual addresses
        ARM_read_ID r4
        AND     r4,r4,#&F000
        CMP     r4,#&8000
        TSTEQ   r11,#DynAreaFlags_NotCacheable
        BNE     BangL2PT_sledge_noARM810flush
    [ ARM810cleanflushbroken
        ARM8_cleanflush_IDC r4,r6
    |
        ARM8_cleanflush_IDC r4
    ]
BangL2PT_sledge_noARM810flush
  ]
        BICS    r4, r3, #(3 :SHL: 10)   ; ensure going to be on word boundary
 [ {FALSE}      ; this breaks too many things at the moment
        BICEQ   r0, r0, #&30            ; if logical page zero, then make 1st 1K no user access
        ORREQ   r0, r0, #&10
 ]
 [ :LNOT: UsePPLCBBits
        LDR     r6, [r1, r4, LSR #10]   ; read current contents
        AND     r6, r6, #L2_C :OR: L2_B ; preserve old CB bits (set up by soft loader)
        ORR     r0, r0, r6              ; but OR in new address and PPL bits
 ]
        STR     r0, [r1, r4, LSR #10]!  ; update level 2 page table (and update pointer so we can use bank-to-bank offset
        TST     r11, #DynAreaFlags_DoublyMapped ; if area doubly mapped
        STRNE   r0, [r1, r9, LSR #10]   ; then store entry for 2nd copy as well
        ADDNE   r3, r3, r9              ; and point logical address back at 2nd copy

        ARM_read_ID r4
        AND     r4,r4,#&F000
        CMP     r4,#&A000
        BEQ     BangL2PT_sledgehammer_StrongARM
  [ ARM810support
        CMP     r4,#&8000
        ARM8_flush_TLB EQ
        MOVEQ   pc, lr      ;ARM8 cache already flushed if necessary
  ]
;else assume ARM 6,7
        TST     r11,#DynAreaFlags_NotCacheable
        ARM67_flush_cache EQ
        ARM67_flush_TLB
        MOV     pc, lr
BangL2PT_sledgehammer_StrongARM
        TST     r11,#DynAreaFlags_NotCacheable
        BNE     BangL2PT_sledgehammer_StrongARM_NotC

        MOV     r4,#ARMA_Cleaner_flipflop
        LDR     r0,[r4]                   ;last cleaner address
        EOR     r0,r0,#16*1024            ;flip it (r0 -> cleaner address to use)
        STR     r0,[r4]

        ARMA_clean_DC r0,r4,r6            ;effectively, clean/flush DC fully with respect to non-interrupt stuff
        ARMA_drain_WB
        ARMA_flush_IC WithoutNOPs         ;do *not* flush DC - there may be some stuff from interrupt routines
        MOV     r0,r0                     ;NOPs to ensure 4 instructions before return, after IC flush
        MOV     r0,r0
        ARMA_flush_TLBs
        MOV     pc,lr

BangL2PT_sledgehammer_StrongARM_NotC    ;no flush necessary if NotCacheable
        ARMA_drain_WB
        ARMA_flush_TLBs
        MOV     pc,lr


PPLTrans
        &       (AP_Full * L2_APMult) + L2_SmallPage      ; R any W any
        &       (AP_Read * L2_APMult) + L2_SmallPage      ; R any W sup
        &       (AP_None * L2_APMult) + L2_SmallPage      ; R sup W sup
        &       (AP_None * L2_APMult) + L2_SmallPage      ; R sup W sup

PageSizes
        &       4*1024                  ; 0 is 4K
        &       8*1024                  ; 4 is 8K
        &       16*1024                 ; 8 is 16
        &       32*1024                 ; C is 32

PageShifts
        =       12, 13, 0, 14           ; 1 2 3 4
        =       0,  0,  0, 15           ; 5 6 7 8

; +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
; SWI OS_UpdateMEMC: Read/write MEMC1 control register

SSETMEMC ROUT

        AND     r10, r0, r1
        MOV     r12, #0
        TEQP    pc, #SVC_mode+I_bit+F_bit
        LDR     r0, [r12, #MEMC_CR_SoftCopy] ; return old value
        BIC     r11, r0, r1
        ORR     r11, r11, R10
        BIC     r11, r11, #&FF000000
        BIC     r11, r11, #&00F00000
        ORR     r11, r11, #MEMCADR
        STR     r11, [r12, #MEMC_CR_SoftCopy]

; We now have to mimic the relevant bits of the MEMC1 control register
;
; bits 0,1 => unused
; bits 2,3 => page size, irrelevant since always 4K
; bits 4,5 => low ROM access time (mostly irrelevant but set it up anyway)
; bits 6,7 => hi  ROM access time (definitely irrelevant but set it up anyway)
; bits 8,9 => DRAM refresh control
; bit 10   => Video/cursor DMA enable
; bit 11   => Sound DMA enable
; bit 12   => OS mode

 [ MEMC_Type = "IOMD"
        Push    "r10"
        MOV     r12, #IOMD_Base
        TST     r11, #1 :SHL: 10   ; see if video DMA wants to be enabled
        LDRB    r11, [r12, #IOMD_VIDCR]
        AND     r11, r11, #(&7F :AND: :NOT: IOMD_VIDCR_Enable)  ; knock out bit 7 and video DMA enable bit
        ORRNE   r11, r11, #IOMD_VIDCR_Enable
        MOV     r10, #0
        LDRB    r10, [r10, #LCD_Active]
        TST     r10, #&80
        ORRNE   r11, r11, #IOMD_VIDCR_Dup                       ;Set bit 7 if we're on an LCD dual-panel display
        Pull    "r10"
        STRB    r11, [r12, #IOMD_VIDCR]
 |

; Just change the page size to the real MEMC page size and leave other bits alone

        ORR     r11, r11, #&0C          ; force 32K page size
        STR     r11, [r11]              ; any old data will do
 ]

        TEQP    pc, #SVC_mode+I_bit
        ExitSWIHandler

; +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
;
;       ClearPhysRAM - Routine to clear "all" memory
;
; While this routine is running, keyboard IRQs may happen. For this reason
; it avoids LogRAM 0..31 (where hardware IRQ vector is) and PhysRAM
; 0..31 where the IRQ workspace is.
;
; We also have to avoid the L2PT (inc L1PT) and the PhysRamTable.
; The latter is also used to tell us which areas of memory we should clear.

; We don't have to worry about trampling on the ROM image as it's
; already been excluded from PhysRamTable.

; This routine must work in 32-bit mode.

; in:   r7 = memory speed
;       r8 = page size
;       r9 = MEMC control register
;       r13 = total RAM size
;
; None of the above are actually used by this routine
;
; out:  r7-r9, r13 preserved
;

     GBLL ClearPhysRAMspeedup
ClearPhysRAMspeedup SETL {TRUE}

ClearPhysRAM ROUT

    [ :LNOT: RunningOnEmul

;StrongARM - We will make the logical representation of physical space for RAM temporarily bufferable
;            (on any ARM). This is small boost for ARM 6,7,8 but a big speed benefit for StrongARM (which
;            won't burst write in non bufferable areas).

        LDR     r0,  =L1PT
        LDR     r12, =PhysRamTable
        ADD     r4, r12, #PhysRamTableEnd-PhysRamTable  ; r4 -> end of table
02
        LDMIA   r12!, {r10, r11}                        ; load next address, size
        SUB     r11,r11,#&100000                        ; 1 Mb will be done on first L1PT update
        ORR     r10, r10, #PhysSpace                    ; point to logical representation of physical space
        ADD     r1,r0,r10,LSR #(20-2)                   ; L1PT address for same
;MJS bug fix (since 3.70) for memory fragments not necessarily 1Mb aligned (eg 2 Mb Kryten)
        BIC     r1,r1,#3
;
04
        LDR     r2,[r1]
        ORR     r2,r2,#4                                ; bufferable bit
        STR     r2,[r1],#4
        SUBS    r11,r11,#&100000                        ; another 1 Mb done
        BPL     %BT04
        TEQ     r12, r4                                 ; have we done all areas?
        BNE     %BT02

;now let us do the clear
  [ ClearPhysRAMspeedup
        MOV     r0,#48                                  ;we can preserve r7-r9,r13 at logical address 48..63
        STMIA   r0,{r7-r9,r13}
        MOV     r7,  #0
        MOV     r8,  #0
        MOV     r9,  #0
        MOV     r13, #0
  ]
        MOV     r0, #0
        MOV     r1, #0
        MOV     r2, #0
        MOV     r3, #0
        LDR     r12, =PhysRamTable                      ; point to 5 lots of (physaddr,size)
        ADR     r6, RamSkipTable
        ADD     r4, r12, #PhysRamTableEnd-PhysRamTable  ; r4 -> end of table
10
        LDR     r5, [r6], #4                            ; load first skip offset

        LDMIA   r12!, {r10, r11}                        ; load next address, size

        ORR     r10, r10, #PhysSpace                    ; point to logical representation of physical space
        ADD     r11, r11, r10                           ; r11 -> end address of this area
15
        ADD     r5, r5, r10                             ; r5 -> skip address if any
20
        TEQ     r10, r11                                ; test for end of this area?
        BEQ     %FT30
        TEQ     r10, r5                                 ; test for the start of a skipped region
  [ ClearPhysRAMspeedup
        STMNEIA r10!, {r0-r3,r7-r9,r13}
  |
        STMNEIA r10!, {r0-r3}
  ]
        BNE     %BT20

        LDR     r5, [r6], #4                            ; load skip amount
        CMP     r5, #0                                  ; if negative, then it's an offset from start of skipped bit
        LDRLT   r5, [r10, r5]                           ; to address of word holding skip amount
        ADD     r10, r10, r5                            ; and skip it
        LDR     r5, [r6], #4                            ; load next skip offset (NB relative to end of last skip)
        B       %BT15

30
        TEQ     r12, r4                                 ; have we done all areas?
        BNE     %BT10

  [ ClearPhysRAMspeedup
        MOV     r12,#48
        LDMIA   r12,{r7-r9,r13}                         ;restore
        MOV     r12,#32                                 ;clear our speed up workspace
        STMIA   r12!,{r0-r3}
        STMIA   r12!,{r0-r3}
  ]

;StrongARM - now let us remove bufferable status of logical representation of physical space (perhaps we could
;            leave it? not sure at the mo.)

        LDR     r0,  =L1PT
        LDR     r12, =PhysRamTable
        ADD     r4, r12, #PhysRamTableEnd-PhysRamTable  ; r4 -> end of table
32
        LDMIA   r12!, {r10, r11}                        ; load next address, size
        SUB     r11,r11,#&100000                        ; 1 Mb will be done on first L1PT update
        ORR     r10, r10, #PhysSpace                    ; point to logical representation of physical space
        ADD     r1,r0,r10,LSR #(20-2)                   ; L1PT address for same
;MJS bug fix (since 3.70) for memory fragments not necessarily 1Mb aligned (eg 2 Mb Kryten)
        BIC     r1,r1,#3
;
34
        LDR     r2,[r1]
        BIC     r2,r2,#4                                ; bufferable bit
        STR     r2,[r1],#4
        SUBS    r11,r11,#&100000                        ; another 1 Mb done
        BPL     %BT34
        TEQ     r12, r4                                 ; have we done all areas?
        BNE     %BT32

    ] ;RunningOnEmul

        LDR     r0, =OsbyteVars + :INDEX: LastBREAK

        MOV     r1, #&80
        STRB    r1, [r0]                                ; flag the fact that RAM cleared

        ARM_number r0
        SUB     r0,r0,#6
        ADRL    r1,ARM_default_MMU_CR_table
        LDR     r1,[r1,r0,LSL #2]
        MOV     r0, #0
        STR     r1, [r0, #MMUControlSoftCopy]           ; set up MMU soft copy

        MOV     pc, lr

        LTORG

        GBLA    lastaddr
lastaddr SETA   0
        GBLA    lastregion
lastregion SETA 0

        MACRO
        MakeSkipTable $region, $addr, $size
 [ ($region)<>lastregion
        &       -1
lastaddr SETA   0
 ]
        &       ($addr)-lastaddr, $size
lastaddr SETA   ($addr)+($size)
lastregion SETA $region
        MEND

        MACRO
        EndSkipTables
        WHILE   lastregion < (PhysRamTableEnd-PhysRamTable)/8
        &       -1
lastregion SETA   lastregion +1
        WEND
        MEND

; Note (TMD 04-Aug-93): Special bodge put in here to allow variable size skip for L2PT.
; If skip size field is negative, then it's an offset from the start of this skipped bit to a word holding
; the size of the skip. This relies on the L2PTSize being in page zero, which is at a lower physical address than
; the L2 itself. Also assumes that there are no more skips in the 1st DRAM chunk after the L2PT, since the offset
; to the next skip is relative to the end of the previous one, which isn't known at assembly time!

; Tim says "Yuk, yuk, yuk!!"

RamSkipTable
 [ :LNOT: NewStyle_Screen
        MakeSkipTable   0, 0, 32                        ; in video RAM skip 1st 32 bytes (kbd workspace)
 ]
 [ ClearPhysRAMspeedup ; allow some workspace to speed up ClearPhysRAM  Mike says whoosh
        MakeSkipTable   1, DRAMOffset_PageZero + 0, 64  ; skip 1st 32 bytes of LogRAM, so IRQs work!
                                                        ; additional 32 bytes for workspace
 |
        MakeSkipTable   1, DRAMOffset_PageZero + 0, 32  ; skip 1st 32 bytes of LogRAM, so IRQs work!
 ]
        MakeSkipTable   1, DRAMOffset_PageZero + SkippedTables, SkippedTablesEnd-SkippedTables
        MakeSkipTable   1, DRAMOffset_L2PT, DRAMOffset_PageZero + L2PTSize - DRAMOffset_L2PT
        EndSkipTables

        ASSERT  DRAMOffset_PageZero + L2PTSize < DRAMOffset_L2PT


; +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
;
;       InitMEMC - Initialise memory controller
;
; in:   If ResetIndirection assembly flag set, then
;         r1 = 0 if reset, 1 if break
;       else
;         r1 undefined
;       endif

InitMEMC ROUT
 [ MEMC_Type = "IOMD"

; Note: On IOMD, all accesses go to ROM until the first write cycle.

        MOV     r12, #IOMD_Base

 [ MorrisSupport
; Perform a dummy write to IOMD (some harmless register) to get it out of ROM force mode.
; Reads from IOMD will return garbage before this has happened. If we're actually running out
; of 32-bit wide ROMs on MORRIS, a write will already have happened, to get ROMCR0 from
; 16 to 32-bit wide mode, but we can't yet determine for sure (by reading it back), so do it
; anyway.

        STRB    r12, [r12, #IOMD_DMAREQ]              ; writes to DMAREQ are ignored

        LDRB    r0, [r12, #IOMD_ID0]
        CMP     r0, #&E7
        LDRB    r0, [r12, #IOMD_ID1]
        CMPEQ   r0, #&D4
       ;MOVEQ   r3, #xxxx
        BEQ     MedusaInit                            ; Medusa, else assume Morris
;
; MORRIS contains IOMD equivalant circuitry. Due to lack of VRAM, presence of 16/32 bit support
; and a different ROM speed register, we program it slightly differently.
;

  [ RO371Timings
        ;IOMD_ID1 still in r0 - check for 7500FE
        CMP     r0, #&aa         ; EQ if 7500FE   => assume 64 MHz bus, EDO RAM
                                 ; NE assume 7500 => assume 32 MHz bus
;7500FE
;set memory to 32MHz for early boot (avoid probs with POST and with power-on key detection)
        MOVEQ   r0, #&12                 ; 5-3 cycle ROM access, Half speed (ie. 10-6)
        STREQB  r0, [r12, #IOMD_ROMCR0]
        STREQB  r0, [r12, #IOMD_ROMCR1]
        MOVEQ   r0, #&70                 ; EDO RAM selected, 32 bit wide, conservative RAS,CAS timing
        STREQB  r0, [r12, #IOMD_DRAMWID] ; DRAM control reg. (more than just width on FE)
        MOVEQ   r0, #&04                 ; clock dividers: /1 for CPU, /2 for memory, /2 for I/O
        STREQB  r0, [r12, #IOMD_CLKCTL]
;7500
        MOVNE   r0, #&32                 ; 5-3 cycle ROM access
        STRNEB  r0, [r12, #IOMD_ROMCR0]
        STRNEB  r0, [r12, #IOMD_ROMCR1]
        MOVNE   r0, #&07                 ; clock dividers: /1 for I/O, /1 for CPU, /1 for memory
        STRNEB  r0, [r12, #IOMD_CLKCTL]

  | ;else if not RO371Timings
;
; PSwindell wants all prescalers set to divide by 1
;
        MOV     r0, #IOMD_CLKCTL_CpuclkNormal + IOMD_CLKCTL_MemclkNormal + IOMD_CLKCTL_IOclkNormal
        STRB    r0, [r12, #IOMD_CLKCTL] ; initialise all prescalers to div1

;
; Set ROM speed, take care to preserve 16-bit mode bit...
;
; According to RJKing on 6/5/94, Kryten will use burst mode roms: use 93nS burst, 156nS initial.
;
; We assume that the extension ROMs are the same access time and width as the main OS ROMS.
;
        LDRB    r0, [r12, #IOMD_ROMCR0]
        AND     r0, r0, #&40            ; clear all but 16-bit mode bit, giving us the slowest ROMs possible
 [ :LNOT: AutoSpeedROMS
  [ NormalSpeedROMS
   ;Normal code
        ORR     r0, r0, #IOMD_ROMCR_Normal + IOMD_ROMCR_156 + IOMD_ROMCR_Burst93
                                                                ; initialise ROM speed to 156.25nS, 93.75nS burst
        ; the fast EPROMS used for Kryten testing should be able to cope even though they aren't
        ; burst devices
  |
   ;Slow ROM access for PSwindells test EPROMS. Paul requested 156nS (or slower), burst off.
        ORR     r0, r0, #IOMD_ROMCR_Normal + IOMD_ROMCR_187 + IOMD_ROMCR_BurstOff

        ! 0, "*** WARNING *** Slow ROM version ment for PSwindell"
  ]
 ]

        STRB    r0, [r12, #IOMD_ROMCR0]
        STRB    r0, [r12, #IOMD_ROMCR1]         ; and do the same for extension ROMs (just in case)

  ] ;RO371Timings conditional

;
; MORRIS doesn't support VRAM. Kryten has same DRAM speed as Medusa
;
        MOV     r0, #IOMD_VREFCR_REF_16                         ; select 16�s refresh
        STRB    r0, [r12, #IOMD_VREFCR]

        MOV     r0, #IOMD_IOTCR_Network_TypeA :OR: IOMD_IOTCR_Combo_TypeB :OR: IOMD_IOTCR_Sound_TypeB :OR: IOMD_IOTCR_Sound_Word
        STRB    r0, [r12, #IOMD_IOTCR]

        MOV     r0, #0                          ; Podule manager wants TypeA setting by default for all podules
        STRB    r0, [r12, #IOMD_ECTCR]

 [ Select16BitSound
; All MORRIS based machines have 16bit 'Japanese' format sound DAC's
        MOV     r0, #2_10
        STRB    r0, [r12, #IOMD_VIDMUX]
 ]
        B       CommonInit

MedusaInit
 ]


  [ RO371Timings
        MOV     r0, #&12    ; 5-3 cycle ROM access
  |

 [ Simulator
        MOV     r0, #IOMD_ROMCR_62 + IOMD_ROMCR_BurstOff        ; make faster for simulation (no point in burst mode, it's
                                                                ; no faster than the fastest initial speed)
 |
  [ RISCPCBurstMode
   [ 1 = 1
        ReadCop r0, CR_ID
        BIC     r0, r0, #&F     ;ignore 4 bit revision field
        LDR     r2, =&41007100                                  ;Test for early 710's
        CMP     r0, r2                                          ;
        MOVEQ   r0, #IOMD_ROMCR_156 + IOMD_ROMCR_BurstOff       ;cos they can't work in burst mode!
        MOVNE   r0, #IOMD_ROMCR_156 + IOMD_ROMCR_Burst93        ;610's 710A's and beyond can
        ! 0, "*** WARNING *** Burst mode enabled on RISC PC iff processor can cope"
   |
        MOV     r0, #IOMD_ROMCR_156 + IOMD_ROMCR_Burst93
        ! 0, "*** WARNING *** Burst mode enabled on RISC PC"
   ]
  |
        MOV     r0, #IOMD_ROMCR_156 + IOMD_ROMCR_BurstOff       ; initialise ROM speed to 156.25ns (changed from 187ns 21-Jan-94)
  ]
 ]

  ] ;RO371Timings conditional

        STRB    r0, [r12, #IOMD_ROMCR0]
        STRB    r0, [r12, #IOMD_ROMCR1]         ; and do the same for extension ROMs (just in case)

        MOV     r0, #IOMD_VREFCR_VRAM_256Kx64 :OR: IOMD_VREFCR_REF_16   ; select 16�s refresh, assume 2 banks of VRAM
        STRB    r0, [r12, #IOMD_VREFCR]

        MOV     r0, #IOMD_IOTCR_Network_TypeA :OR: IOMD_IOTCR_Combo_TypeB :OR: IOMD_IOTCR_Sound_TypeB :OR: IOMD_IOTCR_Sound_Word
        STRB    r0, [r12, #IOMD_IOTCR]

        MOV     r0, #0                          ; Podule manager wants TypeA setting by default for all podules
        STRB    r0, [r12, #IOMD_ECTCR]

 |
        LDR     R0, ResetMemC_Value
        STR     R0, [R0]     ; set ROM access times, refresh on flyback, no DMA
 ]
CommonInit
; On breaks (ie software resets) we have to turn the MMU off.
; This is slightly tricky if we've been soft-loaded!

;mjs - must now (if not before) be true, since 26 bit configuration does not exist in Architecture 4 (ARM 810/StrongARM)
        ASSERT ResetIndirected

 [ ResetIndirected
        TEQ     r1, #0                  ; r1 = 0 if reset, 1 if break
        BEQ     %FT03                   ; [it's a reset]

        SetMode SVC32_mode, r0          ; select 32-bit mode (we know we're in 32-bit config)
        B       %FT05
03
 |

; We check for breaks by testing if we're in 32-bit configuration:
;  - on reset we'll be put into 26-bit config, MMU off, 26-bit mode
;  - on breaks we'll be left in 32-bit config, MMU on,  26-bit mode

; In both cases we want to end up in 32-bit config with MMU off, in 32-bit mode

        SetMode SVC32_mode, r1, r0      ; try to select SVC32 mode
        mrs     AL, r2, CPSR_all        ; read back PSR
        AND     r2, r2, #&1F            ; extract mode bits from PSR we tried to modify
        TEQ     r2, #SVC32_mode         ; and see if we made it into SVC32
        BEQ     %FT05                   ; [we made it so must be a Break]
 ]

; It's a reset, so select 32-bit config, MMU off

 [ LateAborts
        MOV     r2, #MMUC_P :OR: MMUC_D :OR: MMUC_L ; select 32-bit config, MMU off, late aborts
 |
        MOV     r2, #MMUC_P :OR: MMUC_D ; select 32-bit config, MMU off
 ]
        SetCop  r2, CR_Control
        SetMode SVC32_mode, r1, r0      ; and re-select 32-bit mode (this time it'll work)
        AND     r0, r0, #&1F            ; check original mode
        TEQ     r0, #SVC26_mode         ; if we were in a 26-bit mode,
        BICEQ   lr, lr, #&FC000003      ; then knock off 26-bit style PSR bits from link register
                                        ; don't knock them off otherwise, since we may be soft-loaded above 64M
        MOV     pc, lr                  ; and exit

; It's a Break

; The MMU is on and we want it off: whether we're executing out of ROM or RAM, we
; have to jump to the physical location of our image, which means paging it in at its
; own physical address.

; On MEMC1 systems it's possible that the L1/L2 logical address is the same as the image's physical
; address, which causes a headache, so we'd best use the physical mapping of the page tables (this
; can't clash as IOMD only goes up to 2000 0000 and our physical mapping is above that).

05
        MOV     r0, #0
        LDR     r0, [r0, #DRAMPhysAddrA]        ; get address of 1st DRAM bank
        LDR     r1, =PhysSpace + DRAMOffset_L1PT ; offset to start of L1
        ADD     r0, r0, r1                      ; r0 -> L1 in physical mapped logical space

        LDR     r1, [r0, #ROM :SHR: (20-2)]     ; load L1 entry for 1st Mbyte of ROM
        MOV     r1, r1, LSR #20                 ; knock off other bits
        LDR     r2, =(AP_None * L1_APMult) + L1_Section
                                                ; (svc-only access) + ~ucb + section mapped
        ORR     r2, r2, r1, LSL #20             ; merge in address
        STR     r2, [r0, r1, LSL #2]!           ; store in L1PT for 1st Mbyte
        ADD     r2, r2, #1 :SHL: 20             ; move on to 2nd Mbyte
        STR     r2, [r0, #4]                    ; and store in next entry

        ARM_flush_cacheandTLB r0

        MOV     r0, r1, LSL #20
        SUB     r0, r0, #ROM                    ; form RAM-ROM offset
        ADD     pc, pc, r0                      ; jump to RAM code (when we get onto IOMD, we'll have to be in 32-bit mode)
        NOP                                     ; this instruction will be skipped

; we're now in RAM, so it's safe to turn the MMU off, but leave us in 32-bit config (and 32-bit mode)

        BIC     lr, lr, #&FC000003              ; knock out PSR bits from return address
                                                ; (we know we were in 32-bit config, 26-bit mode on entry)
        ADD     lr, lr, r0                      ; and add on offset - NB this may now be above 64MB (on IOMD)

;mjs - the MMU off values are ok for all ARMs; some will ignore P,D,L bits
 [ LateAborts
        MOV     r0, #MMUC_P :OR: MMUC_D :OR: MMUC_L ; turn MMU off, but leave us in 32-bit config, late aborts
 |
        MOV     r0, #MMUC_P :OR: MMUC_D         ; turn MMU off, but leave us in 32-bit config
 ]
        ARM_write_control r0

        MOV     pc, lr                          ; return to caller, but in physical address space

        LTORG
; -> MemSize

; (non-destructive) algorithm to determine MEMC RAM configuration
;
; Dave Flynn and Alasdair Thomas
; 17-March-87
;
; Spooling checkered by NRaine and SSwales !
; 8MByte check bodged in by APT
;
; NOTE: Routines MemSize and TimeCPU are called by the power-on test software,
; so their specifications MUST not change.
;
; Set MEMC for 32-k page then analyse signature of possible
; external RAM configurations...
; The configurations are:
;
; Ram Size    Page Size    Configuration    (Phys RAM) Signature
;--------------------------------------------------------------------
;  16MByte      32k        4*32*1Mx1         A13,A20,A21,A22,A23,A23.5 distinct
;  16MByte      32k        16*8*256kx4       A13,A20,A21,A22,A23,A23.5 distinct
;
;  12MByte      32k        3*32*1Mx1         A13,A20,A21,A22,A23 OK, A23.5 fail
;  12MByte      32k        12*8*256kx4       A13,A20,A21,A22,A23 OK, A23.5 fail
;
;   8MByte      32k        2*32*1Mx1         A13,A20,A21,A22 distinct, A23 fail
;   8MByte      32k         8*8*256kx4       A13,A20,A21,A22 distinct, A23 fail
;
;   4Mbyte      32k          32*1Mx1         A13,A21,A20 distinct, A22,A23 fail
;   4Mbyte      32k         4*8*256kx4       A13,A21,A20 distinct, A22,A23 fail
;
;   2Mbyte      32k    expandable 2*8*256kx4 A13,A20 distinct, A21 fails
;   2Mbyte ???  16k      fixed 2*8*256kx4    A13,A21 distinct, A20 fails
;
;   1Mbyte       8k          32*256kx1       A13,A20 fail, A19,A18,A12 distinct
;   1Mbyte       8k           8*256kx1       A13,A20 fail, A19,A18,A12 distinct
;   1Mbyte       8k          4*8*64kx4       A13,A20 fail, A19,A18,A12 distinct
;
; 512Kbyte       8k    expandable 2*8*64kx4  A13,A20,A19 fail, A12,A18 distinct
; 512Kbyte       4k      fixed 2*8*64kx4     A13,A20,A12 fail, A19,A18 distinct
;
; 256Kbyte       4K           8*64kx4        A13,A20,A12,A18 fail, A21,A19 ok
; 256Kbyte       4K          32*64kx1        A13,A20,A12,A18 fail, A21,A19 ok
;

Z_Flag     * &40000000

; MemSize routine... enter with 32K pagesize set
; R0 returns page size
; R1 returns memory size
; R2 returns value set in MEMC
; Can corrupt R3-R14

; Note that on a soft-loaded system, the 1st word of the image may be
; temporarily overwritten, but this is just the reset branch so it's OK.

; MMU is always off at this point, so we must use the physical address of PhysRAM
; Also we are entered in 32-bit config, 32-bit mode,
; but we exit in 32-bit config, 26-bit mode

 [ MorrisSupport
funnypatterns
        &       &66CC9933   ; 0110 1100 1001 0011
        &       &CC993366   ; 1100 1001 0011 0110
 ]

MemSize ROUT
        MOV     r13, lr                                 ;save in a register, cos we've got no stack

 [ MEMC_Type = "IOMD"

        MOV     r12, #IOMD_Base

 [ MorrisSupport
;
        LDRB    r0, [r12, #IOMD_ID0]
        CMP     r0, #&E7
        LDRB    r0, [r12, #IOMD_ID1]
        CMPEQ   r0, #&D4
       ;MOVEQ   r3, #xxxx
        BEQ     MemSizeIOMD                             ; IOMD (Medusa), else assume Morris
;
; MemSize for Morris
;
  [ RO371Timings
        MOV     r11, #&70     ;all 4 banks assumed 32 bit - EDO and timing bits set in case 7500FE (don't care bits otherwise)
  |
        MOV     r11, #IOMD_DRAMWID_DRAM_32bit * &0F     ;set all 4 banks to be 32bit initially
  ]
        MOV     r14, #IOMD_Base
        STRB    r11, [r14, #IOMD_DRAMWID]
 [ 1 = 0
        MOV     R10, #DRAM0PhysRam
        MOV     R11, #(2*OneMByte)
        STMIA   R10!, {R10, R11}
        B       AllocateTheRAM
        ! 0, "*** WARNING *** Bodged RAM sizing version ment for PSwindell"
 ]
        MOV     r10, #0                                 ;indicate no RAM found yet
        MOV     r9, #IOMD_DRAMWID_DRAM_16bit            ;bit to OR into DRAMWID to set 16bit
        MOV     r0, #DRAM0PhysRam
;
; r0    DRAM address
; r9    IOMD_DRAMWID_DRAM_16bit for current DRAM bank
; r11   current IOMD_DRAMWID register contents
;
ExamineDRAMBank                                         ;examine first/next DRAM bank
;
        LDMIA   r0, {r1, r2}                            ;Preserve the two locations that we widdle on

        ADR     r3, funnypatterns                       ;We write different values to two locations
        LDMIA   r3, {r3, r4}                            ; incase bus capacitance holds our value
        STMIA   r0, {r3, r4}
        LDMIA   r0, {r5, r6}                            ;Reread test locations
        EORS    r5, r5, r3                              ;Both locations should read correctly
        EOR     r6, r6, r4                              ; if memory is 32bits wide
       ;TEQ     r5, #0
        TEQEQ   r6, #0
        BEQ     %FT1010                                   ;32bit wide memory

        TST     r5, #&00FF                              ;If the bottom 16bits of each location
        TSTEQ   r5, #&FF00                              ; are correct, the memory is 16bits wide
        TSTEQ   r6, #&00FF
        TSTEQ   r6, #&FF00
        BNE     NoRamInBank                             ;No memory in this bank

        ORR     r11, r11, r9                            ;Bank is 16bits wide
1010
        STMIA   r0, {r1, r2}                            ;Restore the two locations we widdled on
                                                        ;Must do BEFORE poking the DRAMWID register
        MOV     r14, #IOMD_Base                         ;
        STRB    r11, [r14, #IOMD_DRAMWID]               ;
;
; minimum ram test
;
        ADD     r1, r0, #A18
        BL      DistinctAddresses
        BNE     NoRamInBank                             ;Less than 512KBytes, so ignore this bank

        MOV     r6, #0                                  ;Fragment address
        MOV     r7, #0                                  ;Fragment address
        MOV     r8, #A19                                ; now go through address lines A19-A25
1015
        ADD     r1, r0, r8                              ; see if this address line is unique
        BL      DistinctAddresses
        BNE     %FT1020                                   ; if we've failed then r8 is true size, so exit
        MOV     r8, r8, LSL #1                          ; else shift up to next
        TEQ     r8, #A26                                ; only test up to A25
        BNE     %BT1015
        BEQ     %FT1035                           ;Bank fully occupied, DON'T test for higher fragments
1020
;
; Found some DRAM, at address r0, size r8.
; There may be one or two higher address lines connected, so scan upto A25 looking for
; extra DRAM chunks.
;
        MOV     r1, r8
1025
        TEQ     r1, #A25
        BEQ     %FT1035                           ;No higher active address lines found ie one lump of DRAM
        ADD     r1, r0, r1,LSL #1
        BL      DistinctAddresses
        SUB     r1, r1, r0                              ;Recover bit value
        BNE     %BT1025
;
; Got a 2nd fragment, at address r1 (also of size r8)
;
        MOV     r6, r1
1030
        TEQ     r1, #A25
        BEQ     %FT1035                           ;No higher active address lines found ie two lumps of DRAM
        ADD     r1, r0, r1,LSL #1
        BL      DistinctAddresses
        SUB     r1, r1, r0                              ;Recover bit value
        BNE     %BT1030
;
; Got another active address line (ie total four fragments)
;
        MOV     r7, r1
;
1035
;
; Found 1, 2 or 4 lumps of DRAM
;
  [ 1 = 1
;
; New code which relies on reflection to merge fragments into larger blocks
;
        TEQ     r10, #0                         ;Need some ram to dump block/fragment data
        MOVEQ   r10, r0                         ;

        TEQ     r6, #0                          ;Do we have one fragment?
        MOVEQ   r1, r0                          ;EQ: First and only fragment in this bank
        MOVEQ   r2, r8                          ;EQ:   so save actual address and size
        ADDNE   r1, r0, r6                      ;NE: Use reflection to make 1st fragment appear
        SUBNE   r1, r1, r8                      ;NE:   to start just below 2nd fragment
        MOVNE   r2, r8, LSL #1                  ;NE:   treat as one double size fragment

        STMIA   r10!, {r1, r2}                  ; {address, size}

        TEQ     r7, #0                          ;Do 3rd and 4th fragments exist
        ADDNE   r1, r1, r7                      ;NE: yes, merge 3 and 4 together
        STMNEIA r10!, {r1, r2}                  ;  {address, size}
  |
;
; Old code which enters each fragment as found
;
        TEQ     r10, #0                         ;Need some ram to dump block/fragment data
        MOVEQ   r10, r0                         ;

        STMIA   r10!, {r0, r8}                  ;First fragment

        TEQ     r6, #0
        ADDNE   r1, r0, r6
        STMNEIA r10!, {r1, r8}                  ;Second fragment

        TEQ     r7, #0
        ADDNE   r1, r0, r7
        STMNEIA r10!, {r1, r8}                  ;Third
        ADDNE   r1, r1, r6
        STMNEIA r10!, {r1, r8}                  ;and fourth fragments
  ]
  [ Simulator
        TubeString r2, r3, r4, "Address  Size"
        TubeDumpNoStack r0, r2, r3, r4
        TubeDumpNoStack r8, r2, r3, r4
        TubeNewlNoStack r3, r4

        TEQ     R7, #0
        BEQ     skip1
        TubeString r2, r3, r4, "Fragment (1): "
        TubeDumpNoStack r7, r2, r3, r4
        TubeNewlNoStack r3, r4
skip1
        TEQ     R6, #0
        BEQ     skip2
        TubeString r2, r3, r4, "Fragment (2): "
        TubeDumpNoStack r6, r2, r3, r4
        TubeNewlNoStack r3, r4
skip2
  ]



NoRamInBank
        ADD     r0, r0, #DRAM1PhysRam-DRAM0PhysRam      ; move onto next bank
        MOV     r9, r9, LSL #1                          ; shunt up position in DRAMWID
        CMP     r9, #&0010                              ; if more banks to do
        BLT     ExamineDRAMBank                         ; then loop

        TEQ     r10, #0                                 ; have we got any DRAM?
;NoDRAMPanic
        BEQ     NoDRAMPanic                             ; we'd better stop now

;
; Having dumped our block/fragment data to the first bit of DRAM that we found.
; We now go back through it, allocating some for the screen, and some for 'PageZero'.
; The data has been dumped into RAM that we now allocate as screen ram, so it needs
; to be copied into 'PageZero'.
;
; r10 -> byte after last fragment(address, size) pair
;
AllocateTheRAM
        AND     r7, r10, #DRAMBaseAddressMask           ;point to first fragment data
        MOV     r2, #0                                  ;MOS workspace not yet allocated

        LDMIA   r7!, {r4, r5}                           ;first fragment address & size
        CMP     r10, r7                                 ;is there only 1 fragment
 [ 1 = 1
;
; New - requested by Tim Dobson
;
        MOVHI   r1, r5                                  ;if >1 fragments, take first fragment for the screen
        SUBLS   r1, r5, #OneMByte                       ;if this is the only fragment, take all but 1MByte of it
        MOV     r0, r4                                  ;screen starts at beginning of fragment
  [ 1 = 1
;
; New - also requested by Tim Dobson
;
        CMP     r1, #SixteenMByte                       ;Limit our claim to 16Mbyte
        MOVGT   r1, #SixteenMByte
  ]
 |
        MOVHI   r1, r5                                  ;if >1 fragments, consider taking first fragment for the screen
        MOVLS   r1, r5, LSR #1                          ;if this is the only fragment, try for half of it
        MOV     r0, r4                                  ;screen starts at beginning of fragment

        CMP     r1, #OneMByte                           ;Limit our claim to 1Mbyte
        MOVGT   r1, #OneMByte
 ]
        ADD     r4, r4, r1                              ;adjust fragment for amount claimed by screen
        SUBS    r5, r5, r1
        BEQ     %FT1065                                 ;EQ whole fragment used
                                                        ;NE part of fragment remains to be allocated
1060
        TEQ     r2, #0                                  ;allocate MOS workspace if not already done so
        LDREQ   r2, =DRAMOffset_PageZero + DRAMPhysAddrA
        ADDEQ   r2, r2, r4
        MOVEQ   r3, r2

        STMIA   r3!, {r4, r5}                           ;write fragment data to correct place in PageZero
1065
        CMP     r10, r7                                 ;any more fragment (address, size) pairs?
        LDMHIIA r7!, {r4, r5}                           ;HI, yes so load next fragment pair (size
        BHI     %BT1060                                 ;HI, mustbe non-zero) and loop back

        STMDB   r2!, {r0, r1}                           ;write VideoPhysAddr, VideoSize
;
; r2 -> start of PhysRamTable
; r3 -> byte after last used entry in PhysRamTable
;
        MOV     r7, r2
       ;MOV     r2, r2                                  ; r2 -> start of PhysRamTable
        MOV     r10, r3
       ;MOV     r3, r3                                  ; r3 -> byte after last used entry in PhysRamTable


;
; r0 screen start address
; r1 screen size
; r2 -> start of PhysRamTable
; r3 -> byte after last used entry in PhysRamTable

        MOV     r4, #0                                  ;Morris cannot support VRAM, so...
        STR     r4, [r2, #VRAMWidth-PhysRamTable]       ; store width of VRAM (0,1 or 2)
        STR     r4, [r2, #VRAMSize-PhysRamTable]        ; and size of VRAM (fixes DForth's bug of 6/3/95)

        MOV     r14, #IOMD_Base

        MOV     r4, #IOMD_VIDCR_DRAMMode :OR: &10       ; if no VRAM, then turn on DRAM mode, and set increment to &10
        STRB    r4, [r14, #IOMD_VIDCR]
        STR     r0, [r14, #IOMD_VIDCUR]                 ; set up VIDCUR to start of video RAM
        STR     r0, [r14, #IOMD_VIDSTART]               ; do same for VIDSTART
        STR     r0, [r14, #IOMD_VIDINIT]                ; and for VIDINIT
                                                        ; so we don't get a mess when we turn video DMA on later

        LDR     r4, =46500000                           ; if no VRAM, then 46.5E6 bytes/sec bandwidth
        STR     r4, [r2, #VideoBandwidth-PhysRamTable]  ; store video bandwidth

        ADD     r4, r0, r1                              ;form VIDEND (will be on mult. of SAM)
        SUB     r4, r4, #4096
        STR     r4, [r14, #IOMD_VIDEND]                 ;this instruction put in on 6/3/95 after inspection of RPC code
;
;
;
        MOV     r7, r2
        MOV     r10, r3

        B       MemSizeTotalRAM

MemSizeIOMD
 ]

; Right, let's find out where our memory is

; First, we check out the VRAM. This is so that if there's no VRAM, we know to take out the 1st Mbyte of DRAM
; that we find.

; Don't bother checking for more than 2M of VRAM, because we don't know what the 1/2 SAM length is for larger sizes

; StrongARM - aha! but we still have no nice MMU, nor even fast core clock, and this memory sizing type
; stuff is going to be very slow for large memory. So turn on I cache (allowed with MMU off), and fast
; core clock now - this is then ok until MMU etc comes on (near CritStart)

        ARM_read_ID r2
        AND     r2,r2,#&F000
        CMP     r2,#&A000
        BNE     MemSizeIOMD_notSA
        ARM_read_control r2
        ORR     r2,r2,#&1000     ;I cache bit is bit 12
        ARM_write_control r2
        ARMA_fastcoreclock
  [ ARM810fastclock
        B       MemSizeIOMD_not810
  ]
MemSizeIOMD_notSA
  [ ARM810fastclock
    ;fast clock for ARM 810 now
        ARM_read_ID r2
        AND     r2,r2,#&F000
        CMP     r2,#&8000
        BNE     MemSizeIOMD_not810
    [ ARM810usePLL
        ARM8_pll_fclk r2
    |
        ARM8_refclk_fclk r2
    ]
MemSizeIOMD_not810
  ]
        MOV     r2, #IOMD_VREFCR_VRAM_256Kx64 :OR: IOMD_VREFCR_REF_16 ; assume 2 banks of VRAM by default
        STRB    r2, [r12, #IOMD_VREFCR]

        MOV     r0, #VideoPhysRam                       ; point at VRAM
        ADD     r1, r0, #A2                             ; test A2
        BL      DistinctAddresses
        MOVEQ   r6, #2                                  ; we've got 2M of VRAM
        BEQ     %FT08

        MOV     r2, #IOMD_VREFCR_VRAM_256Kx32 :OR: IOMD_VREFCR_REF_16
        STRB    r2, [r12, #IOMD_VREFCR]
        ADD     r1, r0, #A2                             ; check for any VRAM at all
        BL      DistinctAddresses
        MOVEQ   r6, #1                                  ; we've got 1M of VRAM
        MOVNE   r6, #0                                  ; no VRAM
08
 [ IgnoreVRAM
        MOV     r6, #0                                  ; pretend there's no VRAM
 ]
        MOVS    r12, r6                                 ; if no VRAM, then video RAM has yet to be found
        MOVNE   r12, r0                                 ; else point at VRAM

; Now, we have to find a bank of DRAM, so we've got somewhere to store our results!

        MOV     r11, #IOMD_DRAMCR_DRAM_Large * &55      ; set all banks to be large initially
        MOV     r14, #IOMD_Base
        STRB    r11, [r14, #IOMD_DRAMCR]

        MOV     r10, #0                                 ; indicate no RAM found yet
        MOV     r9, #IOMD_DRAMCR_DRAM_Small             ; bit to OR into DRAMCR
        MOV     r0, #DRAM0PhysRam
10
        ADD     r1, r0, #A10                            ; this should be OK for both configurations
        BL      DistinctAddresses
        BNE     %FT25                                   ; [no RAM in this bank at all]

        ADD     r1, r0, #A11                            ; test for 256K DRAM
        BL      DistinctAddresses
        ORRNE   r11, r11, r9                            ; it is, so select small multiplexing
        MOVNE   r14, #IOMD_Base
        STRNEB  r11, [r14, #IOMD_DRAMCR]                ; store new value of DRAMCR, so we can use memory immediately
        MOVNE   r8, #1024*1024                          ; must be 1Mbyte at this address
        BNE     %FT20

; it's bigger than 256K words, so test address lines A21-A25 in sequence
; we assume that the size of each bank is a power of 2

        MOV     r8, #A21                                ; now go through address lines A21-A25
15
        ADD     r1, r0, r8                              ; see if this address line is unique
        BL      DistinctAddresses
        BNE     %FT20                                   ; if we've failed then r8 is true size, so exit
        MOV     r8, r8, LSL #1                          ; else shift up to next
        TEQ     r8, #A26                                ; only test up to A25
        BNE     %BT15
20
        TEQ     r12, #0                                 ; have we found any video RAM yet?
        BNE     %FT22                                   ; yes, so no worries

        MOV     r12, r0                                 ; no, so use this as video RAM
        ADD     r0, r0, #1024*1024                      ; advance RAM pointer by 1M
        SUBS    r8, r8, #1024*1024                      ; take 1 Mbyte off the size
        BEQ     %FT25                                   ; if that's all there was, then go look for the next bank
22
        TEQ     r10, #0                                 ; is this the first lot we've found?
        LDREQ   r10, =DRAMOffset_PageZero + DRAMPhysAddrA
        ADDEQ   r10, r10, r0                            ; then point r10 at DRAM part of PhysRamTable
        MOVEQ   r7, r10                                 ; points to beginning of table
        STMIA   r10!, {r0, r8}                          ; store address, size
25
        AND     r0, r0, #DRAMBaseAddressMask            ; move back to start of DRAM bank (in case we stole some video DRAM)
        ADD     r0, r0, #DRAM1PhysRam-DRAM0PhysRam      ; move onto next bank
        MOV     r9, r9, LSL #2                          ; shunt up position in DRAMCR
        CMP     r9, #&100                               ; if more banks to do
        BCC     %BT10                                   ; then loop

        TEQ     r10, #0                                 ; have we got any DRAM?
NoDRAMPanic
        BEQ     NoDRAMPanic                             ; we'd better stop now

; Now go back and put the VRAM information in, and also program VIDCR and VIDCUR

        STR     r6, [r7, #VRAMWidth-DRAMPhysAddrA]      ; store width of VRAM (0,1 or 2)
        CMP     r6, #1
        MOVCC   r2, #IOMD_VIDCR_DRAMMode :OR: &10       ; if no VRAM, then turn on DRAM mode, and set increment to &10
        MOVEQ   r2, #SAMLength/2/256                    ; if 1M VRAM, then use VRAM mode, and set increment for 1/2 SAM
        MOVHI   r2, #SAMLength/2/256*2                  ; if 2M VRAM, then use VRAM mode, and set increment for 2*1/2 SAM
        LDRCC   r3, =46500000                           ; if no VRAM, then 46.5E6 bytes/sec bandwidth
        LDREQ   r3, =80000000                           ; if 1M VRAM, then 80E6   ---------""--------
        LDRHI   r3, =160000000                          ; if 2M VRAM, then 160E6  ---------""--------
        MOV     r14, #IOMD_Base
        STRB    r2, [r14, #IOMD_VIDCR]
        STR     r12, [r14, #IOMD_VIDCUR]                ; set up VIDCUR to start of video RAM
        STR     r12, [r14, #IOMD_VIDSTART]              ; do same for VIDSTART
        STR     r12, [r14, #IOMD_VIDINIT]               ; and for VIDINIT
                                                        ; so we don't get a mess when we turn video DMA on later
        STR     r3, [r7, #VideoBandwidth-DRAMPhysAddrA] ; store video bandwidth

        ADD     r3, r12, #1024*1024-4096                ; add on a bit to form VIDEND (will be on mult. of SAM)
        STR     r3, [r14, #IOMD_VIDEND]                 ; yes I know it's a bit of a bodge

        MOVS    r14, r6, LSL #20                        ; convert amount of VRAM to bytes
        STR     r14, [r7, #VRAMSize-DRAMPhysAddrA]      ; and store

        MOVEQ   r14, #1024*1024                         ; if no VRAM, then video RAM size is 1M
        STMDB   r7!, {r12, r14}                         ; store video information

        MOV     r2, r7                                  ; r2 -> start of PhysRamTable
        MOV     r3, r10                                 ; r3 -> byte after last used entry in PhysRamTable
MemSizeTotalRAM
; Now we have to work out the total RAM size

  [ Simulator
        TubeString r4, r5, r6, "Address  Size"
  ]
        MOV     r1, #0
26
        LDMIA   r7!, {r4, r5}                           ; get address, size
        ADD     r1, r1, r5                              ; add on size
  [ Simulator
        TubeDumpNoStack r4, r6, r8, r9
        TubeDumpNoStack r5, r6, r8, r9
        TubeNewlNoStack r6, r8
  ]
        TEQ     r7, r10
        BNE     %BT26

        MOV     r0, #Page4K                             ; something to put in MEMC CR soft copy
                                                        ; (it's probably irrelevant)
 |

; MEMC based memory sizing

        MOV     r0, #PhysRamPhys

        ADD     r1, r0, #A13
        BL      DistinctAddresses
        BNE     %05
        ADD     r1, r0, #A21
        BL      DistinctAddresses
        MOVNE   r0, #Page32K
        MOVNE   r1, #2048*1024
        BNE     MemSizeDone

        ADD     r1, r0, #4*1024*1024
        BL      DistinctAddresses
        MOVNE   r0, #Page32K
        MOVNE   r1, #4*1024*1024
        BNE     MemSizeDone

        ADD     r1, r0, #8*1024*1024
        BL      DistinctAddresses
        MOVNE   r0, #Page32K
        MOVNE   r1, #8*1024*1024
        BNE     MemSizeDone

        ADD     r1, r0, #12*1024*1024
        BL      DistinctAddresses
        MOV     r0, #Page32K
        MOVNE   r1, #12*1024*1024
        MOVEQ   r1, #16*1024*1024
        B       MemSizeDone

05
        ADD     r1, r0, #A20
        BL      DistinctAddresses
        BNE     %10
        MOV     r0, #Page16K
        MOV     r1, #2048*1024
        B       MemSizeDone

10
        ADD     r1, r0, #A19
        BL      DistinctAddresses
        BEQ     %15
        MOV     r0, #Page8K
        MOV     r1, #512*1024
        B       MemSizeDone

15
        ADD     r1, r0, #A18
        BL      DistinctAddresses
        BEQ     %20
        MOV     r0, #Page4K
        MOV     r1, #256*1024
        B       MemSizeDone

20
        ADD     r1, r0, #A12
        BL      DistinctAddresses
        BEQ     %25
        MOV     r0, #Page4K
        MOV     r1, #512*1024
        B       MemSizeDone

25
        MOV     r0, #Page8K
        MOV     r1, #1024*1024

MemSizeDone
        LDR     r3, =PhysRamPhys                ; Base of "video RAM"
        MOV     r4, #480*1024                   ; 480K of "video RAM"
        ADD     r5, r3, r4                      ; Base of "1st bank of DRAM"
        SUB     r6, r1, r4                      ; Size of "1st bank of DRAM"

        LDR     r2, =DRAMOffset_PageZero + PhysRamTable
        ADD     r2, r5, r2                      ; physical address of table of physical RAM addresses/sizes
        STMIA   r2, {r3-r6}

        ADD     r3, r2, #4*4                    ; indicate just 2 areas

        LDR     r4, =38400000                   ; indicate 38.4E6 bytes/sec video bandwidth
        STR     r4, [r2, #VideoBandwidth-PhysRamTable]
 ]

        ADRL    r4, ROM                         ; use PC-relative addressing to get to start of image
        TEQ     r4, #PhysROM                    ; then see if it's the normal ROM address
        BEQ     %FT55                           ; if so, then we're OK

        SUB     r1, r1, #OSROM_ImageSize*1024   ; if we've been soft-loaded, then we have ?M less than we thought
        ADD     r5, r4, #OSROM_ImageSize*1024   ; point r5 at end of ROM

; now go through the declared RAM areas looking for our image (we assume it's not split across two banks)

        MOV     r6, r2                          ; make copy of start pointer
30
        TEQ     r6, r3
        BEQ     %FT55
        LDMIA   r6!, {r7,r8}                    ; load address, size
        ADD     r8, r8, r7                      ; r8 -> end
        SUBS    r9, r8, r5
        SUBCSS  r10, r4, r7                     ; test for r4>=r7 and r5<=r8
        BCC     %BT30

; we've found where the ROM is

        BNE     %FT40                           ; if NE from last comparison then r4<>r7 ie start(ROM)<>start(region)
                                                ; else start(ROM)=start(region), so modify region to be end(ROM)..end(region)
        TEQ     r9, #0                          ; test if region completely removed
        STMNEDB r6, {r5, r9}                    ; if region not completely removed, store updated (address, size)
        BNE     %FT55

; region completely removed, so shunt down remaining ones

35
        TEQ     r6, r3                          ; if not got to end
        LDMNEIA r6, {r7, r8}                    ; then load next pair
        STMNEDB r6, {r7, r8}                    ; and store in last
        ADD     r6, r6, #8                      ; advance pointer
        BNE     %BT35
        SUB     r3, r3, #8                      ; move back end pointer
        B       %FT55

; there is a lump at the start

40
        STMDB   r6, {r7, r10}                   ; reduce start bit
        TEQ     r9, #0                          ; is there an end bit?
        BEQ     %FT55                           ; [no, so we've finished]

; now shunt up all remaining areas, to make room for new one

        MOV     r8, r3
45
        TEQ     r8, r6
        LDMNEDB r8, {r7, r10}
        STMNEIA r8, {r7, r10}
        SUBNE   r8, r8, #8
        BNE     %BT45
        STMIA   r8, {r5, r9}                    ; store end lump
        ADD     r3, r3, #8                      ; advance end pointer

; now store zeros to fill out table

55
        ADD     r5, r2, #PhysRamTableEnd-PhysRamTable
        MOV     r6, #0
        MOV     r7, #0
57
        CMP     r3, r5
        STMCCIA r3!, {r6, r7}
        BCC     %BT57

; Now set up L1 + L2
; - first work out how big static L2 needs to be
; - then zero L1 + L2 (L1 is actually inside L2)

        MOV     r3, r1, LSR #22                 ; r3 = memsize / 4M
        TEQ     r1, r3, LSL #22                 ; if any remainder
        ADDNE   r3, r3, #1                      ; then round up (r3 is now how many pages of L2 needed for free pool)
        MOV     r3, r3, LSL #12                 ; convert to bytes
        ADD     r3, r3, #FixedAreasL2Size       ; add on size of L2 for other fixed areas
        STR     r3, [r2, #L2PTSize-PhysRamTable] ; save away for future reference

        LDR     r2, [r2, #DRAMPhysAddrA-PhysRamTable]   ; get address of 1st DRAM bank
        LDR     r5, =DRAMOffset_L2PT
        ADD     r2, r2, r5                              ; make r2 -> L2PT
        MOV     r5, #0                          ; value to initialise L1 and L2 to (translation faults)
        MOV     r6, r5
        MOV     r7, r5
        MOV     r8, r5
        MOV     r9, r5
        MOV     r10, r5
        MOV     r11, r5
        MOV     r12, r5
 [ :LNOT: Simulator                             ; don't bother zeroing L1/2 for Mark
        ADD     r2, r2, r3                      ; start at end and work back
60
        STMDB   r2!, {r5-r12}
        SUBS    r3, r3, #8*4
        BNE     %BT60
 ]

; r2 ends up pointing at L2

        ADD     r3, r2, #DRAMOffset_L1PT-DRAMOffset_L2PT        ; r3 -> L1Phys

; now initialise all the L1 for the area covered by the static L2, as if it were all page mapped
; - the section mapped stuff will be overwritten when we go thru MemInitTable shortly

        ORR     r5, r2, #L1_Page + L1_U         ; take phys base of L2, and or in other bits to form an L1 entry
        LDR     r6, =L2PTSize+DRAMOffset_PageZero-DRAMOffset_L2PT
        LDR     r10, [r2, r6]                   ; r10 = size of L2 (used after this loop, too)
        ADD     r6, r5, r10                     ; r6 = value in r5 when we've finished
        MOV     r7, r3                          ; r7 -> where we're storing L1 entries
61
        STR     r5, [r7], #4                    ; store L1 entry
        ADD     r5, r5, #1024                   ; advance L2 pointer
        TEQ     r5, r6
        BNE     %BT61

; now go through memory initialisation table, setting up entries

        ADR     r5, MemInitTable
65
        LDMIA   r5!, {r6-r8}                    ; load size, logaddr, indicator
        TEQ     r6, #0                          ; if size field is zero
        BEQ     %FT90                           ; then we've finished going through table

        TST     r8, #1                          ; if bit 0 of indicator is set, then it's page mapped
        BNE     %FT75

        TST     r8, #2                          ; is it abort?
        BNE     %FT68                           ; [no]

; it's a section abort (r8=0)

66
        STR     r8, [r3, r7, LSR #20-2]         ; store zero in L1 table
        ADD     r7, r7, #&00100000              ; increment logical address by 1M
        SUBS    r6, r6, #&00100000              ; and decrement size by 1M
        BNE     %BT66                           ; loop until done
        B       %BT65


68
; it's section mapped

        TST     r8, #ROMbit                     ; is it a ROM image offset
        ADDNE   r8, r8, r4                      ; if so, then add in image offset
        BICNE   r8, r8, #ROMbit                 ; and knock out the dodgy bit

        TST     r8, #Vidbit                     ; is it a video memory offset
        LDRNE   r9, =VideoPhysAddr+DRAMOffset_PageZero-DRAMOffset_L2PT
        LDRNE   r9, [r2, r9]                    ; get physical address of video RAM
        ADDNE   r8, r8, r9                      ; add on offset
        BICNE   r8, r8, #Vidbit                 ; and knock out the dodgy bit
70
        STR     r8, [r3, r7, LSR #20-2]         ; store entry in L1 table (assumes bits 18, 19 are clear!)
        ADD     r7, r7, #&00100000              ; increment logical address by 1M
        ADD     r8, r8, #&00100000              ; and physical address by 1M
        SUBS    r6, r6, #&00100000              ; and decrement size by 1M
        BNE     %BT70                           ; if we've not finished then loop
        B       %BT65                           ; else go back to main loop

; explicit L2 setup

75
        CMP     r6, #-1                         ; if size <> -1
        BNE     %FT80                           ; then normal

; size = -1 => this is the chunk with the soft CAM map in it,
; so we must work out a suitable size (and store it in SoftCamMapSize)
; we also have to work out the correct offset in the DRAM bank, since this is
; after variable size L2PT

        MOV     r6, r1, LSR #24-3               ; number of pages for cam map
        CMP     r1, r6, LSL #24-3               ; if bits dropped off
        ADDNE   r6, r6, #1                      ; then need one more page
        MOV     r6, r6, LSL #12
        LDR     r9, =DRAMOffset_PageZero-DRAMOffset_L2PT+SoftCamMapSize
        STR     r6, [r2, r9]                    ; store size used
        ADD     r6, r6, #UndStackSize           ; chunk also includes undstack
        ADD     r9, r10, #DRAMOffset_L2PT       ; undstack/cammap starts at offset L2PT + L2PTSize
        ORR     r8, r8, r9                      ; OR in other misc bits from table
80
        LDR     r9, =DRAMOffset_PageZero-DRAMOffset_L2PT+DRAMPhysAddrA
                                                ; offset from L2 to word containing physical address of 1st DRAM bank
        LDR     r9, [r2, r9]                    ; r9 = address of 1st DRAM bank
        ADD     r8, r8, r9                      ; convert offset to address
        EOR     r8, r8, #L2_SmallPage :EOR: 1   ; make bottom 2 bits correct for L2
        ADD     r9, r2, r7, LSR #10             ; r9 -> L2 for this page
85
        STR     r8, [r9], #4                    ; store entry in L2
        ADD     r8, r8, #4*1024                 ; advance physical page address
        SUBS    r6, r6, #4*1024                 ; one less page to do
        BNE     %BT85
        B       %BT65

; L1 is now set up correctly, and L2 has the correct CB bits, but no accessible pages
; Put in the L2 entries for the logical area we are going to access the L2 (and L1) at
; r10 still holds L2PT size

90
        ADD     r5, r2, #(L2PT :SHR: 10)        ; r5 -> start of L2PT for L2 logical address
        LDR     r6, =(AP_None * L2_APMult) + L2_SmallPage ; r6 = other gubbins to put in L2 entries (not C or B)
        ORR     r6, r6, r2                      ; OR in physical address of L2
        MOV     r7, r10                         ; amount to put in (L2PTSize)
95
        STR     r6, [r5], #4                    ; store entry
        ADD     r6, r6, #4096                   ; move onto next page
        SUBS    r7, r7, #4096                   ; one less page to do
        BNE     %BT95                           ; loop until done

; But before we turn on, we have to temporarily make the addresses we are currently executing out of
; into a section mapped area straight through, so we don't crash before we can jump up into ROM area

        ASSERT ((CritStart :EOR: CritEnd) :AND: &FFF00000)=0    ; make sure start and end are in the same MB chunk

        ADR     r5, CritStart                   ; point at critical region start
        MOV     r5, r5, LSR #20                 ; divide by 1MB
        LDR     r6, [r3, r5, LSL #2]            ; get current L1 entry to put back later
        MOV     r7, r5, LSL #20                 ; r7 = physical address of base of section
        ORR     r7, r7, #(AP_None * L1_APMult)
        ORR     r7, r7, #L1_Section
        STR     r7, [r3, r5, LSL #2]            ; store replacement entry in L1 (not U,C or B)

        ARM_MMU_transbase r3                    ; set up MMU pointer to L1
        ADD     r3, r3, #PhysSpace              ; when we put L1 entry back later, we need to use the copy in PhysSpace area

        MOV     r7, #1
        ARM_MMU_domain r7                       ; only use domain 0

        ARM_flush_cacheandTLB r7                ; flush cache + TLB just in case

        ARM_number r2                           ;should be 6,7,8 or &A
 [ ARM810support
   ;ARM810 has already had fast clock selected (MemSizeIOMD) - so has StrongARM, therefore code below removed
 |
        CMP     r2,#&A
        ARMA_fastcoreclock EQ                   ;otherwise StrongARM is going to have a limp wrist
 ]
        SUB     r2,r2,#6                        ; r2 := 0..4 for ARM 6,7,8,(9),&A
        ADRL    r7,ARM_default_MMU_CR_table
        LDR     r7,[r7,r2,LSL #2]               ;get appropriate default value for MMU control reg

CritStart
        ARM_write_control r7

; now we can jump into the ROM space (if we're not already there)

        RSB     r4, r4, #ROM                    ; make offset from current address to ROM
        ADD     pc, pc, r4                      ; jump up into ROM area
        NOP                                     ; this instruction will be skipped

; now put back the L1 entry we messed up

        STR     r6, [r3, r5, LSL #2]
CritEnd                                         ; 2 words after we go up into ROM
        ARM_flush_TLB r2                        ; flush TLB (no need to flush cache, as there's nothing in it)

        SetMode UND32_mode, r7
        LDR     r13_undef, =UNDSTK              ; set up undefined mode stack pointer

        SetMode SVC26_mode, r7                  ; switch into 26-bit mode
        ADD     r13, r13, r4                    ; adjust return address

        LDR     r2, ResetMemC_Value
        BIC     r2, r2, #&C
        ORR     r2, r2, r0
 [ MEMC_Type <> "IOMD"
        STR     r2, [r2]                        ; set MEMC to right state
 ]
        MOV     r0, #4*1024                     ; r0 = true page size (now split off
                                                ; from MEMC control register)
 [ MEMC_Type = "IOMD"
  [ Simulator
        TubeString r4, r5, r6, "Got through all of MemSize, and we're still here!"
        TubeChar r4, r5, "MOV r5, #4", NoStack
  ]
 ]
        MOV     pc, r13

; Memory map initialisation table
; Consists of word triplets (size,logaddr,type)
; where size    is size in bytes of area (size=0 terminates list)
;       logaddr is the base logical address of area
;       type is one of 5 formats:
;       a) a standard section-mapped L1 entry (physical address gets incremented for each MB in size)
;       b) like a section-mapped L1 entry, but with bit 12 set (address field holds base offset from "ROM" image)
;       c) like a section-mapped L1 entry, but with bit 13 set (address field holds base offset from start of video RAM)
;       d) like a page-mapped L1 entry, which indicates a page-mapped area to fill in
;          the L2 for. In this case the other bits are as follows:-
;               Bits 3,2   - CB respectively
;               Bits (11,10),(9,8),(7,6),(5,4) - access privileges
;               Bits 31-12 - offset in 1st DRAM bank to start of these pages (in units of pages)
;          If the size field contains -1, then it is the SoftCAMMap, and the appropriate size should be worked out,
;           and stored in SoftCamMapSize. Also, since the size of the L2 is variable the offset into the DRAM bank
;           of the SoftCamMap is unknown at assembly time, so the offset bits in table are zero.
;       e) zero - indicating that this area should abort (only necessary for section mapped bits in 48M-64M, cause they
;           have no level 2, therefore section must abort) - used for VIDC1 emulation area.
;       Note in case d), the L1 is not actually touched (it should have already been set up to point to the right L2)
;

ROMbit  *       1 :SHL: 12
Vidbit  *       1 :SHL: 13
PSS     *       PhysSpaceSize           :SHR: 20  ; Number of megabytes in physical space (used in table generation)

        MACRO
        MemInitSection  $size, $U, $C, $B, $logaddr, $ap, $physaddr
        &       ($size)*&00100000
        &       $logaddr
        &       (($U)*L1_U):OR:(($C)*L1_C):OR:(($B)*L1_B):OR:(($ap)*L1_APMult):OR:$physaddr:OR:L1_Section
        MEND

        MACRO
        MemInitROMs     $size, $U, $C, $B, $logaddr, $ap
        &       ($size)*&00100000
        &       $logaddr
        &       (($U)*L1_U):OR:(($C)*L1_C):OR:(($B)*L1_B):OR:(($ap)*L1_APMult):OR:ROMbit:OR:L1_Section
        MEND

        MACRO
        MemInitVideo    $size, $U, $C, $B, $logaddr, $ap
        &       ($size)*&00100000
        &       $logaddr
        &       (($U)*L1_U):OR:(($C)*L1_C):OR:(($B)*L1_B):OR:(($ap)*L1_APMult):OR:Vidbit:OR:L1_Section
        MEND

        MACRO
        MemInitAbort    $size, $logaddr
        &       ($size)*&00100000
        &       $logaddr
        &       0
        MEND

        MACRO
        MemInitPagesL2  $size, $C, $B, $logaddr, $ap, $dramoffset
        &       ($size)
        &       $logaddr
        &       (($C)*L1_C):OR:(($B)*L1_B):OR:(($ap)*L2_APMult):OR:$dramoffset:OR:L1_Page
        MEND

MemInitTable    ;       sz, U, C, B, logaddr,   (ap,     (physaddr))
        MemInitSection   4, 1, 0, 0, &03000000, AP_None, &03000000      ; I/O

        MemInitAbort     1,          &03400000                          ; VIDC1 emulation zone
        MemInitSection   1, 1, 0, 0, &03500000, AP_None, &03400000      ; VIDC20 space
        MemInitSection   2, 1, 0, 0, &03600000, AP_None, &03600000      ; LAGs


        [ OSROM_ImageSize = 4096
        MemInitROMs      4, 1, 1, 1, &03800000, AP_Read                 ; ROM
        MemInitROMs      4, 1, 1, 1, &03C00000, AP_Read                 ; ROM
        |
        MemInitROMs      2, 1, 1, 1, &03800000, AP_Read                 ; ROM
        MemInitROMs      2, 1, 1, 1, &03A00000, AP_Read                 ; ROM
        MemInitROMs      2, 1, 1, 1, &03C00000, AP_Read                 ; ROM
        MemInitROMs      2, 1, 1, 1, &03E00000, AP_Read                 ; ROM
        ]

 [ :LNOT: NewStyle_Screen
        MemInitVideo    16, 1, 0, 1, &05000000, AP_None                 ; real screen (section-mapped)
 ]
        MemInitSection PSS, 1, 0, 0, PhysSpace, AP_None, &00000000      ; map of physical space

        MemInitROMs      2, 1, 1, 1, &FF800000, AP_Read                 ; ROM
        MemInitROMs      2, 1, 1, 1, &FFA00000, AP_Read                 ; ROM
        MemInitROMs      2, 1, 1, 1, &FFC00000, AP_Read                 ; ROM
        MemInitROMs      2, 1, 1, 1, &FFE00000, AP_Read                 ; ROM

; Now explicit initialisation of L2 for static pages

        MemInitPagesL2  &8000, 0, 0, CursorChunkAddress, AP_Read, DRAMOffset_CursorChunk  ;but see L1L2PTenhancements
        MemInitPagesL2  &8000, 1, 1, &00000000, AP_Full, DRAMOffset_PageZero
        MemInitPagesL2  &8000, 1, 1, SysHeapChunkAddress, AP_Full, DRAMOffset_SystemHeap

  [ StrongARM
;StrongARM requires 2*16k of private logical space (used for absolutely nothing else), which is
;readable and cacheable, for data cache cleaning purposes. We want to map the space to
;start of ROM bank 1 (physical target), so that IOMD timings can be poked for maximum read speed
;(only requirement of physical space is that it is readable without h/w abort). Here, though,
;we have to conform to format for MemInitPagesL2, so we just point to some convenient RAM,
;and fix things up later (see L1L2PTenhancements)
;
        MemInitPagesL2  &8000, 1, 1, ARMA_Cleaners_address, AP_Read, DRAMOffset_PageZero
  ]

        MemInitPagesL2     -1, 1, 1, UndStackSoftCamChunk, AP_Full, 0   ; variable offset and size

        &       0, 0, 0                                                 ; terminate table

        LTORG

; DistinctAddresses routine...
; r0,r1 are the addresses to check
; uses r2-5
; writes interleaved patterns (to prevent dynamic storage...)
; checks writing every bit low and high...
; return Z-flag set if distinct

; This routine must work in 32-bit mode

DistinctAddresses ROUT
        LDR     r2, [r0] ; preserve
        LDR     r3, [r1]
        LDR     r4, Pattern
        STR     r4, [r0] ; mark first
        MOV     r5, r4, ROR #16
        STR     r5, [r1] ; mark second
        LDR     r5, [r0]
        CMP     r5, r4 ; check first
        BNE     %10    ; exit with Z clear
        LDR     r5, [r1] ; check second
        CMP     r5, r4, ROR #16 ; clear Z if not same
        BNE     %10
; now check inverse bit writes
        STR     r4, [r1] ; mark second
        MOV     r5, r4, ROR #16
        STR     r5, [r0] ; mark first
        LDR     r5, [r1]
        CMP     r5, r4 ; check second
        BNE     %10   ; exit with Z clear
        LDR     r5, [r0] ; check first
        CMP     r5, r4, ROR #16 ; clear Z if not same
10      STR     r3, [r1] ; restore
        STR     r2, [r0]
        MOV     pc, lr                  ; Z flag is already set up, and other flags don't matter

Pattern
        &       &AAFF5500 ; shiftable bit check pattern

; init state with masked out page size

ResetMemC_Value
        & &E010C :OR: MEMCADR       ; slugged ROMs + flyback refresh only + 32K page

; Constants
;
A0      *       1 :SHL: 00
A1      *       1 :SHL: 01
A2      *       1 :SHL: 02
A3      *       1 :SHL: 03
A4      *       1 :SHL: 04
A5      *       1 :SHL: 05
A6      *       1 :SHL: 06
A7      *       1 :SHL: 07
A8      *       1 :SHL: 08
A9      *       1 :SHL: 09
A10     *       1 :SHL: 10
A11     *       1 :SHL: 11
A12     *       1 :SHL: 12
A13     *       1 :SHL: 13
A14     *       1 :SHL: 14
A15     *       1 :SHL: 15
A16     *       1 :SHL: 16
A17     *       1 :SHL: 17
A18     *       1 :SHL: 18
A19     *       1 :SHL: 19
A20     *       1 :SHL: 20
A21     *       1 :SHL: 21
A22     *       1 :SHL: 22
A23     *       1 :SHL: 23
A24     *       1 :SHL: 24
A25     *       1 :SHL: 25
A26     *       1 :SHL: 26
A27     *       1 :SHL: 27
A28     *       1 :SHL: 28
A29     *       1 :SHL: 29
A30     *       1 :SHL: 30
A31     *       1 :SHL: 31

Page32K * &C ; in MEMC control reg patterns...
Page16K * &8
Page8K  * &4
Page4K  * &0

; +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
; In    r0=0 -> Coming from the Test routine - no fancy business!
;       r1-r6 trashable
;       [[[ r9 = Current MEMC CR (true MEMC value, not fudged to look like 4K page size) ]]]

; Out   [[[ r9 MEMC value with slowest ROM speed, correct pagesize ]]]
;       r7 processor speed in kHz, bit 16 => can do STM to I/O (ie MEMC1a, MEMC2), bit 17 => MEMC2

; This routine must work in 32-bit mode, and should not use any memory!!!!

  [ RO371Timings

TimeCPU ROUT         ;does not actually measure anything - assumes timings (and EDO for 7500FE) according to IOMD id

        MOV     r2, #IOC                ; Address of the IO controller  (IOMD)

        LDRB    r7, [r2, #IOMD_ID0]     ; Is
        CMP     r7, #&E7                ; It
        LDRB    r7, [r2, #IOMD_ID1]     ; A
        CMPEQ   r7, #&D4                ; Risc PC ?
        BEQ     timecpuriscpc
        CMP     r7, #&AA                ; assume 7500 or 7500FE
        BEQ     timecpu7500FE
;7500 then
        MOV     r7, #&32                      ; 5-3 cycle ROM access
        STRB    r7, [r2, #IOMD_ROMCR0]
        STRB    r7, [r2, #IOMD_ROMCR1]
        MOV     r7, #&07                      ; clock dividers: /1 for I/O, /1 for CPU, /1 for memory
        STRB    r7, [r2, #IOMD_CLKCTL]
        LDR     r7, =(1 :SHL: 16) :OR: 16000  ; assumed 16MHz RAM (32 MHz bus)
        MOV     pc, lr

timecpu7500FE
;set memory to 32MHz for early boot (avoid probs with POST and with power-on key detection)
        MOV     r7, #&12                      ; 5-3 cycle ROM access, half speed (ie. 10-6)
        STRB    r7, [r2, #IOMD_ROMCR0]
        STRB    r7, [r2, #IOMD_ROMCR1]
        MOV     r7, #&70                      ; EDO RAM, 32 bit wide, conservative RAS and CAS timing
        STRB    r7, [r2, #IOMD_DRAMWID]       ; DRAM control reg. (more than just width on FE)
        MOV     r7, #&04                      ; clock dividers: /1 for CPU, /2 for memory, /2 for I/O
        STRB    r7, [r2, #IOMD_CLKCTL]
        LDR     r7, =(1 :SHL: 16) :OR: 32000  ; assumed 32MHz RAM (64 MHz bus), even though /2 at the moment
        MOV     pc, lr

timecpuriscpc
        MOV     r7, #&12                      ; 5-3 cycle ROM access
        STR     r7, [r2, #IOMD_ROMCR0]
        STR     r7, [r2, #IOMD_ROMCR1]
        LDR     r7, =(1 :SHL: 16) :OR: 16000  ; assumed 16MHz RAM (32 MHz bus)
        MOV     pc, lr

;used by NewReset, after main kernel boot
;sets full 64MHz memory if on 7500FE
;preserves registers _and_ flags
;
finalmemoryspeed ROUT
        Push    "r0,lr"
        MOV     lr, #IOC
        LDRB    r0, [lr, #IOMD_ID0]     ; Is
        CMP     r0, #&E7                ; It
        LDRB    r0, [lr, #IOMD_ID1]     ; A
        CMPEQ   r0, #&D4                ; Risc PC ?
        BEQ     fmspeed_done
        CMP     r0, #&AA                ; EQ if 7500FE
        MOVEQ   r0, #&80
        STREQB  r0, [lr, #&CC]          ; ASTCR register: set i/o asynchronous timing for fast memory clock
        MOVEQ   r0, #&06                ; clock dividers: /1 for CPU, /1 for memory, /2 for I/O
        STREQB  r0, [lr, #IOMD_CLKCTL]
fmspeed_done
        Pull    "r0,pc",,^

  | ; else if not RO371Timings

ncpuloops * 1024 ; don't go longer than 4ms without refresh !
nmulloops * 128

TimeCPU ROUT            ;ONLY WORKS FOR IOMD(L) machines - this shouldn't be a problem though
 [ :LNOT: AutoSpeedROMS
        LDR     r7, =(1 :SHL: 16) :OR: 16000    ; indicate 16MHz RAM
 |

   [ {TRUE}
;don't do timing for Risc PC
;
;MJS bug fix (since 3.70) - setup r3 properly, and don't corrupt r0 you fool
;
        MOV     r3, #IOC                ; Address of the IO controller
        LDRB    r7, [r3, #IOMD_ID0]     ; Is
        CMP     r7, #&E7                ; It
        LDRB    r7, [r3, #IOMD_ID1]     ; A
        CMPEQ   r7, #&D4                ; Medusa?
        MOVEQ   r7,#&3e00               ;for non-Morris force 16MHz timing, assumed Risc PC
        ORREQ   r7,r7,#&80
        ORREQ   r7,r7,#&10000           ;and note we're on IOMD
        MOVEQ   pc,lr
  ]

; Time CPU/Memory speed
        LDR     r1, =&7FFE              ; 32K @ 2MHz = ~16ms limit
        MOV     r3, #IOC                ; Address of the IO controller

        CMP     r0, #0
        LDREQ   r7, =(1 :SHL: 16) :OR: 16000    ; indicate 16MHz RAM - a little lie :-)
        MOVEQ   pc, lr                          ; Quick, leg it while they're not looking!

        ;Turn off the CPU cache
        ARM_number r4
        SUB     r4,r4,#6
        ADRL    r2,ARM_cacheoff_MMU_CR_table
        LDR     r2,[r2,r4,LSL #2]                 ;get appropriate cache-off value for MMU control reg
        ARM_write_control r2

        ;And don't forget to flush afterwards :-)
        ;SetCop r0, CR_IDCFlush
        ;SetCop  r0, CR_TLBFlush

        ;Turn off DMA/refreshes, but keep the reg contents for future restoration
        LDRB    r4, [r3, #IOMD_VREFCR]  ;Refresh
        LDRB    r5, [r3, #IOMD_SD0CR]   ;Sound
        LDRB    r6, [r3, #IOMD_VIDCR]   ;Video
        MOV     r2, #0
        STRB    r2, [r3, #IOMD_VREFCR]  ;Refresh off
        STRB    r2, [r3, #IOMD_SD0CR]   ;Sound off
        STRB    r2, [r3, #IOMD_VIDCR]   ;Video off

        MOV     r2, r1, LSR #8
        STRB    r1, [r3, #Timer1LL]
        STRB    r2, [r3, #Timer1LH]
        LDR     r2, =ncpuloops
        STRB    r2, [r3, #Timer1GO]     ; start the timer NOW
        B       %FT10                   ; Looks superfluous, but is required
                                        ; to get ncpuloops pipeline breaks
10
        SUBS    r2, r2, #1              ; 1S
        BNE     %BT10                   ; 1N + 2S

        STRB    r2, [r3, #Timer1LR]     ; latch count NOW
        LDRB    r2, [r3, #Timer1CL]
        LDRB    r7, [r3, #Timer1CH]
        ADD     r2, r2, r7, LSL #8      ; count after looping is ...

        SUB     r2, r1, r2              ; decrements !
        MOV     r7, r2, LSR #1          ; IOC clock decrements at 2MHz, so we now have ticks in 1MHz

        ;Put DMA/refreshes back to what they were
        STRB    r4, [r3, #IOMD_VREFCR]  ;Refresh back
        STRB    r5, [r3, #IOMD_SD0CR]   ;Sound back
        STRB    r6, [r3, #IOMD_VIDCR]   ;Video back

        ;And don't forget to flush first
        ;SetCop r0, CR_IDCFlush
        ;SetCop r0, CR_TLBFlush

        ;Turn on the CPU cache
        ARM_number r4
        SUB     r4,r4,#6
        ADRL    r2,ARM_default_MMU_CR_table
        LDR     r2,[r2,r4,LSL #2]                 ;get appropriate default value for MMU control reg
        ARM_write_control r2

        MOV     r2, r7
; In ROM - each cpu loop took 4R cycles @ [MEMCLK cycles+1]/f*500ns/cycle

 [ MorrisSupport
        LDRB    r0, [r3, #IOMD_ID0]     ; Is
        CMP     r0, #&E7                ; It
        LDRB    r0, [r3, #IOMD_ID1]     ; A
        CMPEQ   r0, #&D4                ; Medusa?
        LDRNE   r0, =(4*15*500*ncpuloops)               ;Morris timing values [reordered to prevent miscalculation
                                                        ;due to Aasm integering mid-calculation] (30720000)
        LDREQ   r0, =(4*(8*500/1000)*ncpuloops*1000)    ;RiscPC/IOMD timing values      (16384000)
        DivRem  r7, r0, r2, r1          ; r2 preserved,   R7=memory speed in kHz (MEMCLK/2)
 |
        LDR     r0, =(4*(8*500/1000)*ncpuloops*1000)    ;RiscPC/IOMD timing values      (16384000)
        DivRem  r7, r0, r2, r1          ; r2 preserved,   R7=memory speed in kHz (MEMCLK/2)
 ]

        ;Set the ROM speeds appropriately here, including Burst/NoBurst
        MOV     r4, #SystemROMspeed     ;
        MUL     r0, r7, r4              ; r0 = number of cycles/ROM access *500000
        LDR     r1, =500000
        DivRem  r2, r0, r1, r4          ; r2 = divisor, r0 = remainder, r4 is trashed
        CMP     r0, #0
        ADDGT   r2, r2, #1              ; Always round _UPWARDS_
        CMP     r2, #14
        MOVGT   r2, #14                 ; Top out at 14 cycles

        MOV     r5, #BurstROMspeed      ;
        MUL     r0, r7, r5              ; r0 = number of cycles/ROM burst access *500000
        DivRem  r3, r0, r1, r4          ; r3 = divisor, r0 = remainder, r4 is trashed
        CMP     r0, #0
        ADDGT   r3, r3, #1              ; Always round _upwards_
        CMP     r3, #4
        MOVGT   r3, #4                  ; Top out at 4 cycles

  [ :LNOT: NormalSpeedROMS
    ;limit speeds to 4 cycles minimum (125 ns) - eg. for StrongARM with EPROM
    CMP   r2,#4
    MOVLT r2,#4
    CMP   r3,#4
    MOVLT r3,#4
    ! 0, "*** WARNING Autospeed ROM speed limited to 4 cycles (125 ns) minimum ***"
  ]
        ;So we have R2=cycles for normal access, R3=cycles for burst access
        ;Load the iomd reg into R1, clear the bits we're messing with
        MOV     r4, #IOC
        LDRB    r1, [r4, #IOMD_ROMCR0]  ; Read ROMCR0
        AND     r1, r1, #2_11000000     ; Only preserve bits 6 & 7

        ADR     r0, MemClkTable
        LDRB    r5, [r0, r2]            ; Grab the relevant byte
        ORR     r1, r1, r5

        ADR     r0, BurstTable
        LDRB    r5, [r0, r3]            ; Grab the relevant info
        ORR     r1, r1, r5

        STRB    r1, [r4, #IOMD_ROMCR0]  ; Write ROMCR0
        STRB    r1, [r4, #IOMD_ROMCR1]  ; Write ROMCR1

        ORR     r7, r7, #1 :SHL: 16     ; Note MEMC1a presence (we're on IOMD)
  ]
timecpu_sodthefancytimingstuff
        MOV     pc, lr

  ] ;RO371Timings conditional

MemClkTable
        DCB     2_100101                ; 0 cycles (set to min which is 2)
        DCB     2_100101                ; 1 cycle (set to min. which is 2)
        DCB     2_100101                ; 2 cycles
        DCB     2_100100                ; 3 cycles
        DCB     2_100011                ; 4 cycles
        DCB     2_100010                ; 5 cycles
        DCB     2_100001                ; 6 cycles
        DCB     2_100000                ; 7 cycles
        DCB     2_000011                ; 8 cycles (2x4)
        DCB     2_000010                ; 9 cycles (same as 10)
        DCB     2_000010                ; 10 cycles (2x5)
        DCB     2_000001                ; 11 cycles (same as 12)
        DCB     2_000001                ; 12 cycles (2x6)
        DCB     2_000000                ; 13 cycles (same as 14)
        DCB     2_000000                ; 14 cycles (2x7)

        ALIGN
BurstTable
        DCB     2_000000                ; 0 cycles (no burst)
        DCB     2_011000                ; 1 cycle
        DCB     2_011000                ; 2 cycles
        DCB     2_010000                ; 3 cycles
        DCB     2_001000                ; 4 cycles

        ALIGN

; +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
;
;       SWI OS_MMUControl
;
; in:   r0 = 0 (reason code 0, for modify control register)
;       r1 = EOR mask
;       r2 = AND mask
;
;       new control = ((old control AND r2) EOR r1)
;
; out:  r1 = old value
;       r2 = new value
;
; in:   r0 bits 1 to 28 = 0, bit 0 = 1  (reason code 1, for flush request)
;          r0 bit 31 set if cache(s) to be flushed
;          r0 bit 30 set if TLB(s) to be flushed
;          r0 bit 29 set if flush of entry only (else whole flush)
;       r1 = entry specifier, if r0 bit 29 set
;       (currently, flushing by entry is ignored, and just does full flush)
;

        ^       0
MMUCReason_ModifyControl        # 1    ; reason code 0
MMUCReason_Flush                # 1    ; reason code 1
MMUCReason_Unknown              # 0

MMUControlSWI   ENTRY
        BL      MMUControlSub
        PullEnv
        ORRVS   lr, lr, #V_bit
        ExitSWIHandler

MMUControlSub
        Push    lr
        AND     lr,r0,#&FF
        CMP     lr, #MMUCReason_Unknown
        ADDCC   pc, pc, lr, LSL #2
        B       MMUControl_Unknown
        B       MMUControl_ModifyControl
        B       MMUControl_Flush

MMUControl_Unknown
        ADRL    r0, ErrorBlock_HeapBadReason
 [ International
        BL      TranslateError
 ]
        Pull    lr
        SETV
        MOV     pc, lr

MMUControl_ModifyControl ROUT
        Push    "r3,r4,r5"
        CMP     r1,#0
        CMPEQ   r2,#&FFFFFFFF
        BEQ     MMUC_modcon_readonly
        MOV     r4, pc
        ORR     lr, r4, #I_bit
        TEQP    lr, #0                  ; disable IRQs while we modify soft copy
        ARM_number r5
        MOV     r3, #0
        LDR     lr, [r3, #MMUControlSoftCopy]
        CMP     r5,#&A
  [ ARM810support
        CMPNE   r5,#8                   ; can read control reg on ARM 810 too
  ]
        ARM_read_control lr,EQ
        MOVEQ   lr,lr,LSL #19
        MOVEQ   lr,lr,LSR #19           ; if StrongARM then we can read control reg. - trust this more than soft copy
        AND     r2, r2, lr
        EOR     r2, r2, r1
        MOV     r1, lr
  [ ARM810support
        CMP     r5,#8
        BNE     %FT03
        TST     r2,#4
        ORRNE   r2,r2,#&800            ; if ARM810 then Z bit (branch prediction) mirrors C bit
        BICEQ   r2,r2,#&800
03
  ]
        CMP     r5,#&A
        BNE     %FT05
        TST     r2,#&4                  ; if StrongARM, then I bit mirrors C bit
        ORRNE   r2,r2,#&1000
        BICEQ   r2,r2,#&1000
05
   [ SAWBbroken :LOR: ARM810bpbroken
        MOV     lr,r2
   [ SAWBbroken
        CMP     r5,#&A
        BICEQ   lr,lr,#&0008                   ;sorry guys, we can't use write buffer (safe-ish - safer would zap data cache bit as well)
     ]
     [ ARM810bpbroken
        CMP     r5, #8
        BICEQ   lr,lr,#&0800                   ;sorry guys (or sorry Guy!), we can't use branch predictor
     ]
        STR     lr, [r3, #MMUControlSoftCopy]
   |
        STR     r2, [r3, #MMUControlSoftCopy]
   ]
        BIC     lr, r2, r1              ; lr = bits going from 0->1
        TST     lr, #MMUC_C             ; if cache turning on then flush cache before we do it
        BEQ     %FT10
        ARM_flush_cache r3
10
  [ ARM810support
        CMP     r5,#8
        BNE     %FT12
        BIC     lr, r1, r2              ; lr = bits going from 1->0
        TST     lr, #MMUC_C             ; if cache turning off then clean/flush cache first
        BEQ     %FT12
    [ ARM810cleanflushbroken
        Push    "r0,r1"
        ARM8_cleanflush_IDC r0,r1
        ARM8_branchpredict_off r0       ; and turn off branch predict cleanly (must go off with cache)
        Pull    "r0,r1"
    |
        Push    "r0"
        ARM8_cleanflush_IDC r0
        ARM8_branchpredict_off r0       ; and turn off branch predict cleanly (must go off with cache)
        Pull    "r0"
    ]
12
  ]
        CMP     r5,#&A
        BNE     %FT15
        BIC     lr, r1, r2              ; lr = bits going from 1->0
        TST     lr, #MMUC_C             ; if cache turning off then clean StrongARM data cache first
        BEQ     %FT15
        Push    "r0-r2"
        MOV     r1,#ARMA_Cleaner_flipflop
        LDR     r0,[r1]
        EOR     r0,r0,#16*1024
        STR     r0,[r1]
        ARMA_clean_DC r0,r1,r2
        Pull    "r0-r2"
15
   [ SAWBbroken :LOR: ARM810bpbroken
        MOV     lr,r2
   [ SAWBbroken
        CMP     r5,#&A
        BICEQ   lr,lr,#&0008            ;sorry guys, we can't use write buffer
     ]
     [ ARM810bpbroken
        CMP     r5,#8
        BICEQ   lr,lr,#&0800            ;sorry guys, we can't use branch predictor
     ]
        ARM_write_control lr
   |
        ARM_write_control r2
   ]
        BIC     lr, r1, r2              ; lr = bits going from 1->0
        TST     lr, #MMUC_C             ; if cache turning off then flush cache afterwards
        BEQ     %FT20
        ARM_flush_cache r3
20
        TEQP    r4, #0                  ; restore IRQ state
        Pull    "r3,r4,r5,pc"

MMUC_modcon_readonly
        ARM_number r5
        MOV     r3, #0
        LDR     lr, [r3, #MMUControlSoftCopy]
        CMP     r5,#&A
  [ ARM810support
        CMPNE   r5,#8                   ; can read control reg on ARM 810 too
  ]
        ARM_read_control lr,EQ
        MOVEQ   lr,lr,LSL #19
        MOVEQ   lr,lr,LSR #19           ; if StrongARM then we can read control reg. - trust this more than soft copy
        STREQ   lr, [r3, #MMUControlSoftCopy]
        MOV     r1, lr
        MOV     r2, lr
        Pull    "r3,r4,r5,pc"

MMUControl_Flush
       Push     "r0-r4"
       ARM_read_ID r4
       AND      r4,r4,#&F000
       TST      r0,#&80000000
       BEQ      MMUC_flush_flushT
;flush cache
       CMP      r4,#&A000
  [ ARM810support
       CMPNE    r4,#&8000
  ]
       ARM67_flush_cache NE       ;if not StrongARM or ARM810, assume 6,7
       BNE      MMUC_flush_flushT
  [ ARM810support
       CMP      r4,#&A000
       BEQ      MMUC_flush_SA
;ARM810 then
    [ ARM810cleanflushbroken
       ARM8_cleanflush_IDC r1,r4
       MOV      r4,#&8000
    |
       ARM8_cleanflush_IDC r1
    ]
       B        MMUC_flush_flushT
MMUC_flush_SA
  ]
;StrongARM then
       MOV     r2,#ARMA_Cleaner_flipflop
       LDR     r1,[r2]
       EOR     r1,r1,#16*1024
       STR     r1,[r2]
       ARMA_clean_DC r1,r2,r3     ;effectively, fully clean/flush wrt non-interrupt stuff
       ARMA_drain_WB
       ARMA_flush_IC              ;do *not* flush DC - may be interrupt stuff in it
MMUC_flush_flushT
       TST     r0,#&40000000
       BEQ     MMUC_flush_done
  [ ARM810support
    ;there is a general macro, should have used this before anyway
       ARM_flush_TLB r1
  |
       CMP     r4,#&A000
       ARMA_flush_TLBs EQ
       ARM67_flush_TLB NE         ;if not StrongARM, assume 6,7
  ]
MMUC_flush_done
       Pull     "r0-r4,pc"

; +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
;
;       Exception veneers

; Undefined instruction trap pre-veneer
; in:   r13_undef -> a FD stack
;       r14_undef -> undefined instruction +4
;       psr_undef = PSR at time of undef'd instruction

UndPreVeneer    ROUT

; entered in undef32, and we've branched up to &FF80xxxx, so we need to jump
; back to normal ROM space

        BIC     pc, pc, #&FC000000      ; remove silly bits from pc
        NOP                             ; (this instruction skipped)

        Push    "r0-r7,r14"             ; push r0-r7 on undef stack, and make room for return address
        MOV     r0, r13_undef

; for the time being just merge lr and psr

        mrs     AL, r1, SPSR_all                        ; r1 = saved PSR
        AND     r2, r1, #&F0000003                      ; get saved NZCV and 26 bit modes
        ORR     lr_undef, lr_undef, r2
        AND     r2, r1, #I32_bit + F32_bit              ; extract I and F from new place
        ORR     r1, lr_undef, r2, LSL #IF32_26Shift     ; r1 = combined lr and psr

        mrs     AL, r2, CPSR_all        ; now switch into SVC26
        BIC     r3, r2, #&1F
        ORR     r3, r3, #SVC26_mode
        msr     AL, SPSR_all, r3        ; set SPSR_undef to be CPSR but with SVC26
        msr     AL, CPSR_all, r3        ; and select this mode now

        MOV     lr_svc, r1              ; lr_svc = PC + PSR from exception

        msr     AL, CPSR_all, r2        ; go back into undef mode

        LDR     r1, =UndHan             ; work out address of undefined instruction handler
        LDR     r1, [r1]
        STR     r1, [r0, #8*4]          ; and store it as return address
        Pull    "r0-r7, pc",,^          ; exit to handler, restoring sp_undef and entering SVC26 mode

; Instruction fetch abort pre-veneer

PAbPreVeneer    ROUT

; entered in abort32, and we've branched up to &FF80xxxx, so we need to jump
; back to normal ROM space

        BIC     pc, pc, #&FC000000      ; remove silly bits from pc
        NOP                             ; (this instruction skipped)
        LDR     r13_abort, =PreVeneerRegDump
        STMIA   r13_abort, {r0-r7}
        MOV     r0, r13_abort

; for the time being just merge lr and psr

        mrs     AL, r1, SPSR_all                        ; r1 = saved PSR
        AND     r2, r1, #&F0000003                      ; get saved NZCV and 26 bit modes
        ORR     lr_abort, lr_abort, r2
        AND     r2, r1, #I32_bit + F32_bit              ; extract I and F from new place
        ORR     r1, lr_abort, r2, LSL #IF32_26Shift     ; r1 = combined lr and psr

        mrs     AL, r2, CPSR_all        ; now switch into SVC26
        BIC     r2, r2, #&1F
        ORR     r2, r2, #SVC26_mode
        msr     AL, CPSR_all, r2

        MOV     lr_svc, r1              ; lr_svc = PC + PSR from exception
        LDR     r1, =PAbHan
        LDR     r1, [r1]
        STR     r1, [r0, #8*4]
        LDMIA   r0, {r0-r7, pc}         ; jump to prefetch abort handler

; Preliminary layout of abort indirection nodes

        ^       0
AI_Link #       4
AI_Low  #       4
AI_High #       4
AI_WS   #       4
AI_Addr #       4

DAbPreVeneer    ROUT

; entered in abort32, and we've branched up to &FF80xxxx, so we need to jump
; back to normal ROM space

        BIC     pc, pc, #&FC000000              ; remove silly bits from pc
        NOP                                     ; (this instruction skipped)

        LDR     r13_abort, =PreVeneerRegDump
        STMIA   r13_abort, {r0-r7}              ; save unbanked registers anyway
        STR     lr_abort, [r13_abort, #15*4]    ; save old PC, ie instruction address

        mrs     AL, r0, SPSR_all                ; r0 = PSR when we aborted
        mrs     AL, r1, CPSR_all                ; r1 = CPSR
        ADD     r2, r13_abort, #8*4             ; r2 -> saved register bank for r8 onwards

        MOV     r4, lr_abort                    ; move address of aborting instruction into an unbanked register
        BIC     r1, r1, #&1F                    ; knock out current mode bits
        ANDS    r3, r0, #&1F                    ; extract old mode bits (and test for USR26_mode (=0))
        TEQNE   r3, #USR32_mode                 ; if usr26 or usr32 then use ^ to store registers
  [ SASTMhatbroken
        STMEQIA r2!,{r8-r12}
        STMEQIA r2 ,{r13,r14}^
        SUBEQ   r2, r2, #5*4
  |
        STMEQIA r2, {r8-r14}^
  ]
        BEQ     %FT05

        ORR     r3, r3, r1                      ; and put in user's
        msr     AL, CPSR_all, r3                ; switch to user's mode

        STMIA   r2, {r8-r14}                    ; save the banked registers

        mrs     AL, r5, SPSR_all                ; get the SPSR for the aborter's mode
        STR     r5, [r2, #8*4]                  ; and store away in the spare slot on the end
                                                ; (this is needed for LDM with PC and ^)
05
        ORR     r1, r1, #SVC26_mode
        msr     AL, CPSR_all, r1                ; then switch to SVC26 for the rest of this

        Push    "r0, lr_svc"                    ; save SPSR_abort and lr_svc
        SUB     sp_svc, sp_svc, #8*4            ; make room for r8_usr to r14_usr and PC
  [ SASTMhatbroken
        STMIA   sp_svc!,{r8-r12}
        STMIA   sp_svc, {r13,r14}^
        NOP
        STR     pc, [sp_svc,#2*4]
        SUB     sp_svc, sp_svc, #5*4            ; save USR bank in case STM ^, and also so we can corrupt them
  |
        STMIA   sp_svc, {r8-r15}^               ; save USR bank in case STM ^, and also so we can corrupt them
  ]

        SUB     r11, r2, #8*4                   ; r11 -> register bank
        STR     r4, [sp_svc, #7*4]              ; store aborter's PC in user register bank

;ARM 810 or StrongARM allow signed byte load or half-word load/stores - not supported at present
        LDR     r10, [r4, #-8]!                 ; r10 = actual instruction that aborted, and r4 points to it
        AND     r9, r10, #&0E000000
        TEQ     r9, #&08000000                  ; test for LDM/STM
        BNE     %FT50                           ; if not LDM/STM, then it's an "easy" LDR/STR

;        Write   "It's an LDM/STM"

 [ DebugAborts
        DLINE   "It's an LDM/STM"
 ]

; First count the number of transferred registers, and undo any writeback

        MOV     r9, #0                          ; r9 = no. of registers in list
        MOVS    r8, r10, LSL #16
        BEQ     %FT20
10
        MOVS    r8, r8, LSL #1
        ADDCS   r9, r9, #1
        BNE     %BT10
20
        MOV     r8, r10, LSR #16
        AND     r8, r8, #&0F                    ; base register number
        LDR     r7, [r11, r8, LSL #2]           ; ------""----- value

        TST     r10, #1 :SHL: 23                ; test up/down
        MOVNE   r1, r9                          ; if up, r1 = +ve no. of regs
        RSBEQ   r1, r9, #0                      ; if down, r1 = -ve no. of regs

;initially assume writeback
;we want r6 = base reg value before assumed writeback (r7 is base reg value after abort)
;ARM 6/7 will have performed any writeback, ARM 8,StrongARM will not

        ARM_read_ID r6
        AND     r6, r6, #&F000
        CMP     r6, #&8000                      ;ARM 8 or
        CMPNE   r6, #&A000                      ;StrongARM
        MOVEQ   r6, r7
        SUBNE   r6, r7, r1, ASL #2

;now we want r6 to be the base register value before the abort, so we will discard
;our adjusted value and take r7, if the instruction in fact had no writeback

        TST     r10, #1 :SHL: 21                ; test if write-back bit set
        TEQNE   r8, #15                         ; (if base is PC then write-back not allowed)
        MOVEQ   r6, r7                          ; if not wb, reg after abort is correct

        MOV     r1, sp                          ; r1 -> end of stack frame, and start of user-mode register bank
        SUB     sp, sp, r9, LSL #2              ; make stack frame for registers
        TST     r10, #1 :SHL: 20                ; if its an STM, we have to load up the stack frame
        BNE     %FT30                           ; but if it's an LDM, we call trap routine first

        STR     r6, [r11, r8, LSL #2]           ; store original base in register list, to be overwritten after 1st transfer

; now go through registers, storing them into frame

        MOV     r5, sp                          ; pointer to position in stack frame
        MOV     lr, r10, LSL #16                ; extract bottom 16 bits
        MOVS    lr, lr, LSR #16                 ; ie bitmask of which registers r0-r15 stored
        BEQ     %FT30                           ; this shouldn't happen (it's illegal)

        MOV     r3, r11                         ; current pointer into register bank
21
        TST     r10, #1 :SHL: 22                ; is it STM with ^
        ANDNE   lr, lr, #&FF                    ; if so then extract bottom 8 bits (r0-r7 on 1st pass, r8-r15 on 2nd)
22
        MOVS    lr, lr, LSR #1                  ; shift bit into carry
        LDRCS   r2, [r3], #4                    ; if set bit then transfer word from register bank
        STRCS   r2, [r5], #4                    ; into stack frame
        STRCS   r7, [r11, r8, LSL #2]           ; and after 1st transfer, store updated base into register bank
        ADDCC   r3, r3, #4                      ; else just increment register bank pointer
        BNE     %BT22                           ; if more bits to do, then loop

        TEQ     r5, r1                          ; have we done all registers?
        MOVNE   lr, r10, LSR #8                 ; no, then must have been doing STM with ^, and have some user-bank regs to store
        MOVNE   r3, r1                          ; so point r3 at user-mode register bank
        BNE     %BT21                           ; and go back into loop

30

; now work out address of 1st transfer

        ANDS    r5, r10, #(3 :SHL: 23)          ; bit 24 set => pre, bit 23 set => inc
        SUBEQ   r2, r6, r9, LSL #2              ; if post-dec, then 1st address = initial-nregs*4+4
        ADDEQ   r2, r2, #4
        BEQ     %FT32

        CMP     r5, #2 :SHL: 23
        MOVCC   r2, r6                          ; CC => post-inc, so 1st address = initial
        SUBEQ   r2, r6, r9, LSL #2              ; EQ => pre-dec,  so 1st address = initial-nregs*4
        ADDHI   r2, r6, #4                      ; HI => pre-inc,  so 1st address = initial+4
32
        ANDS    r0, r10, #1 :SHL: 20            ; r0 = 0 => STM
        MOVNE   r0, #1                          ;    = 1 => LDM
        LDR     r1, [r1, #8*4]                  ; get SPSR_abort
        TST     r1, #3                          ; test if transfer took place in USR mode
        ORRNE   r0, r0, #2                      ; if not then set bit 1 of flags word in r0
        MOV     r1, sp                          ; block to transfer from/into
        BIC     r2, r2, #3                      ; LDM/STM always present word-aligned address
        MOV     r3, r9, LSL #2                  ; length of transfer in bytes, and r4 still points to aborting instruction
        BL      ProcessTransfer
        ADDVS   sp, sp, r9, LSL #2              ; if invalid transfer then junk stack frame
        BVS     %FT90                           ; and generate an exception

; we transferred successfully, so now check if LDM and load up register bank from block

        TST     r10, #1 :SHL: 20
        ADDEQ   sp, sp, r9, LSL #2              ; it's an STM, so junk stack frame and tidy up
        BEQ     %FT70

; now go through registers, loading them from frame

        ADD     r1, sp, r9, LSL #2              ; r1 -> end of stack frame, and start of user-mode bank registers
        MOV     r5, sp                          ; pointer to position in stack frame
        MOV     r4, r10, LSL #16                ; extract bottom 16 bits
        MOVS    r4, r4, LSR #16                 ; ie bitmask of which registers r0-r15 stored
        BEQ     %FT40                           ; this shouldn't happen (it's illegal)

        SUB     r3, r1, #8*4                    ; r3 -> notional start of user bank, if it began at r0 (it actually starts at r8)
        MOV     r0, #0                          ; assume no user registers by default
        TST     r10, #1 :SHL: 15                ; is PC in list
        BNE     %FT34                           ; then can't be LDM of user bank
        TST     r10, #1 :SHL: 22                ; is it LDM with ^
        BEQ     %FT34                           ; no, then use main bank for all registers
        LDR     r2, [r1, #8*4]                  ; get SPSR
        ANDS    r2, r2, #3                      ; get bottom 2 bits of mode (EQ => USR26 or USR32)
        BEQ     %FT34                           ; if USR mode then use main bank for all
        TEQ     r2, #FIQ26_mode                 ; if FIQ mode then put r8-r14 in user bank
        LDREQ   lr, =&7F00                      ; then put r8-r14 in user bank
        LDRNE   lr, =&6000                      ; else put r13,r14 in user bank
        AND     r0, r4, lr                      ; r0 = mask of registers to put into user bank
        BIC     r4, r4, lr                      ; r4 = mask of registers to put into main bank
        MOV     lr, #0
34
        MOVS    r4, r4, LSR #1                  ; shift bit into carry
        LDRCS   r2, [r5], #4                    ; if set bit then transfer word from stack frame
        STRCS   r2, [r11, lr, LSL #2]           ; into main register bank
        MOVS    r0, r0, LSR #1                  ; shift bit into carry
        LDRCS   r2, [r5], #4                    ; if set bit then transfer word from stack frame
        STRCS   r2, [r3, lr, LSL #2]            ; into user register bank
        ADD     lr, lr, #1
        ORRS    r6, r0, r4                      ; have we finished both banks?
        BNE     %BT34                           ; no, then loop

; If LDM with PC in list, then add 4 to it, so the exit procedure is the same as if PC not loaded
; Also, if it was an LDM with PC and ^, then we have to update the stacked SPSR

40
        MOV     sp, r1                          ; junk frame

        TST     r10, #1 :SHL: 15                ; check PC in list
        ADDNE   r2, r2, #4                      ; since PC is last, r2 will still hold the value loaded
        STRNE   r2, [r11, #15*4]                ; store back into main register bank
        TSTNE   r10, #1 :SHL: 22                ; now check LDM ^
        BEQ     %FT70                           ; [not LDM with PC in list]

        LDR     r9, [sp, #8*4]                  ; get SPSR_abort
        AND     r8, r9, #&1F                    ; r8 = aborter's mode
        TEQ     r8, #USR32_mode                 ; if in USR32
        BEQ     %FT70                           ; then the ^ has no effect (actually uses CPSR)
        TST     r8, #&10                        ; if 32-bit mode
        LDRNE   r7, [r11, #16*4]                ; then use SPSR for the aborter's mode else use updated r15 in r2 (26-bit format)
        ANDEQ   r7, r2, #&F0000003              ; flag and mode bits in same place
        ANDEQ   r2, r2, #&0C000000              ; but I and F have to move to bits 7 and 6
        ORREQ   r7, r7, r2, LSR #(26-6)

; r7 is now desired PSR (in 32-bit format) to update to
; now check which bits can actually be updated

        TEQ     r8, #USR26_mode
        BICEQ   r9, r9, #&F0000000              ; if USR26 then we can only update NZCV
        ANDEQ   r7, r7, #&F0000000
        ORREQ   r9, r9, r7
        MOVNE   r9, r7                          ; else can update all bits
        STR     r9, [sp, #8*4]                  ; store back updated SPSR_abort (to become CPSR)
        B       %FT70                           ; now tidy up

50

; it's an LDR/STR - first work out offset

 [ DebugAborts
        DLINE   "It's an LDR/STR"
 ]

        TST     r10, #1 :SHL: 25                ; if immediate
        MOVEQ   r9, r10, LSL #(31-11)           ; then extract bottom 12 bits
        MOVEQ   r9, r9, LSR #(31-11)
        BEQ     %FT60

        AND     r8, r10, #&0F                   ; register to shift
        LDR     r9, [r11, r8, LSL #2]           ; get actual value of register

        MOV     r8, r10, LSR #7                 ; extract shift amount
        ANDS    r8, r8, #&1F                    ; (bits 7..11)
        MOVEQ   r8, #32                         ; if zero then make 32

        ANDS    r7, r10, #&60
        ANDEQ   r8, r8, #&1F                    ; LSL 0 is really zero
        MOVEQ   r9, r9, LSL r8
        TEQ     r7, #&20
        MOVEQ   r9, r9, LSR r8
        TEQ     r7, #&40
        MOVEQ   r9, r9, ASR r8
        TEQ     r7, #&60
        MOVEQ   r9, r9, ROR r8                  ; if 32 then we haven't spoilt it!
        TEQEQ   r8, #32                         ; if ROR #32 then really RRX
        BNE     %FT60
        LDR     r7, [sp, #8*4]                  ; get SPSR
        AND     r7, r7, #C_bit
        CMP     r7, #1                          ; set carry from original user
        MOV     r9, r9, RRX
60
        TST     r10, #1 :SHL: 23                ; test for up/down
        RSBEQ   r9, r9, #0                      ; if down then negate

;;;assume ARM 6 configured for LateAbort - others cannot be configured
;;;so, at run time, ARM 6 or 7 means late, ARM 8 or StrongARM means early
;;;
;;; [ LateAborts
;;;        TST     r10, #1 :SHL: 21                ; if write-back
;;;        MOVNE   r8, #0                          ; then no post-inc
;;;        RSBEQ   r8, r9, #0                      ; else post-inc = - pre-inc
;;;        ADD     r0, r8, r9                      ; amount to subtract off base register for correction

;;;        TST     r10, #1 :SHL: 24                ; however, if we're doing post-increment
;;;        MOVEQ   r8, r9                          ; then post-inc = what was pre-inc
;;;        MOVEQ   r0, r9                          ; and adjustment is what was added on
;;;        RSB     r9, r8, #0                      ; and pre-inc = -post-inc
;;; |
;;;        TST     r10, #1 :SHL: 21                ; if write-back
;;;        MOVNE   r8, #0                          ; then no post-inc
;;;        RSBEQ   r8, r9, #0                      ; else post-inc = - pre-inc

;;;        TST     r10, #1 :SHL: 24                ; however, if we're doing post-increment
;;;        MOVEQ   r8, r9                          ; then post-inc = what was pre-inc
;;;        MOVEQ   r9, #0                          ; and pre-inc = 0
;;; ]

        ARM_read_ID r8
        AND     r8, r8, #&F000
        CMP     r8, #&8000
        CMPNE   r8, #&A000
        BEQ     %FT62
;ARM 6 or 7 (late)
        TST     r10, #1 :SHL: 21                ; if write-back
        MOVNE   r8, #0                          ; then no post-inc
        RSBEQ   r8, r9, #0                      ; else post-inc = - pre-inc
        ADD     r0, r8, r9                      ; amount to subtract off base register for correction

        TST     r10, #1 :SHL: 24                ; however, if we're doing post-increment
        MOVEQ   r8, r9                          ; then post-inc = what was pre-inc
        MOVEQ   r0, r9                          ; and adjustment is what was added on
        RSB     r9, r8, #0                      ; and pre-inc = -post-inc
        B       %FT63
62
;ARM 8 or StrongARM (early)
        TST     r10, #1 :SHL: 21                ; if write-back
        MOVNE   r8, #0                          ; then no post-inc
        RSBEQ   r8, r9, #0                      ; else post-inc = - pre-inc

        TST     r10, #1 :SHL: 24                ; however, if we're doing post-increment
        MOVEQ   r8, r9                          ; then post-inc = what was pre-inc
        MOVEQ   r9, #0                          ; and pre-inc = 0

63
        MOV     r7, r10, LSL #31-19
        MOV     r7, r7, LSR #28                 ; r7 = base register number
        LDR     r6, [r11, r7, LSL #2]           ; r6 = base register value

;;; [ LateAborts
;;;        SUB     r0, r6, r0                      ; compute adjusted base register
;;;        STR     r0, [r11, r7, LSL #2]           ; and store back in case we decide to abort after all
;;; ]

        ARM_read_ID r1
        AND     r1, r1, #&F000
        CMP     r1, #&8000
        CMPNE   r1, #&A000
        SUBNE   r0, r6, r0                      ; compute adjusted base register (if late)
        STRNE   r0, [r11, r7, LSL #2]           ; and store back in case we decide to abort after all

; no need to clear PSR bits out of R15, because PSR is separate

        ADD     r9, r9, r6                      ; r2 = offset+base = illegal address

 [ DebugAborts
        DREG    r9, "Aborting address = "
        DREG    r8, "Post-increment = "
        DREG    r4, "Instruction where abort happened = "
 ]

        ANDS    r0, r10, #1 :SHL: 20            ; if an LDR then bit 20 set
        MOVNE   r0, #1                          ; so make 1
        SUBNE   sp, sp, #4                      ; then just create 1 word stack frame
        BNE     %FT65

        MOV     r5, r10, LSR #12                ; else it's an STR (r0 = 0)
        AND     r5, r5, #&0F                    ; r5 = source register number
        LDR     r5, [r11, r5, LSL #2]           ; r5 = value of source register
 [ DebugAborts
        DREG    r5, "Data value to store = "
 ]
        Push    "r5"                            ; create stack frame with this value in it
65
        LDR     r1, [sp, #(1+8)*4]              ; get SPSR_abort
        TST     r1, #3                          ; test if transfer took place in USR mode
        ORRNE   r0, r0, #2                      ; if not then set bit 1 of flags word in r0

        MOV     r1, sp                          ; r1 -> data block
        TST     r10, #1 :SHL: 22                ; if byte transfer
        MOVNE   r3, #1                          ; then length of transfer = 1
        MOVNE   r2, r9                          ; and use unmolested address
        MOVEQ   r3, #4                          ; else length = 4
        BICEQ   r2, r9, #3                      ; and mask out bottom 2 bits of address

        BL      ProcessTransfer
        ADDVS   sp, sp, #4                      ; if illegal transfer, junk stack frame
        BVS     %FT90                           ; and cause exception

        ADD     r6, r9, r8                      ; update base register with offset
        STR     r6, [r11, r7, LSL #2]           ; and store back (NB if LDR and dest=base, the load overwrites the updated base)

        TST     r10, #1 :SHL: 20                ; if it's STR (not LDR)
        ADDEQ   sp, sp, #4                      ; then junk stack frame
        BEQ     %FT70                           ; and tidy up

        Pull    "r6"                            ; LDR/LDRB, so get value to load into register
        TST     r10, #1 :SHL: 22                ; if LDRB
        ANDNE   r6, r6, #&FF                    ; then put zero in top 3 bytes of word
        ANDEQ   r9, r9, #3                      ; else rotate word to correct position - r9 = bottom 2 bits of address
        MOVEQ   r9, r9, LSL #3                  ; multiply by 8 to get rotation factor
        MOVEQ   r6, r6, ROR r9                  ; rotate to correct position in register

        MOV     r5, r10, LSR #12                ; test for LDR PC
        AND     r5, r5, #&0F                    ; r5 = dest register number
        TEQ     r5, #15                         ; if PC
        ADDEQ   r6, r6, #4                      ; then adjust for abort exit
        STR     r6, [r11, r5, LSL #2]           ; store into register bank

70

; Tidy up routine, common to LDR/STR and LDM/STM

        ADD     r2, r11, #8*4                   ; point r2 at 2nd half of main register bank
        LDMIA   sp, {r8-r14}^                   ; reload user bank registers
        NOP                                     ; don't access banked registers after LDM^
        ADD     sp, sp, #8*4                    ; junk user bank stack frame

        Pull    "r0, lr"                        ; r0 = (possibly updated) SPSR_abort, restore lr_svc

        SetMode ABT32_mode, r1                  ; leaves r1 = current PSR

        mrs     AL, r6, SPSR_all                ; get original SPSR, with aborter's original mode
        AND     r7, r6, #&1F
        TEQ     r7, #USR26_mode
        TEQNE   r7, #USR32_mode                 ; test also for USR32
        LDMEQIA r2, {r8-r14}^                   ; if user mode then just use ^ to reload registers
        NOP
        BEQ     %FT80

        ORR     r6, r6, #I32_bit                ; use aborter's flags and mode but set I
        msr     AL, CPSR_all, r6                ; switch to aborter's mode
        LDMIA   r2, {r8-r14}                    ; reload banked registers
        msr     AL, CPSR_all, r1                ; switch back to ABT32

80
        LDR     lr_abort, [r13_abort, #15*4]    ; get PC to return to
        msr     AL, SPSR_all, r0                ; set up new SPSR (may have changed for LDM {PC}^)

        LDMIA   r13_abort, {r0-r7}              ; reload r0-r7
        SUBS    pc, lr_abort, #4                ; go back 8 to adjust for PC being 2 words out,
                                                ; then forward 4 to skip instruction we've just executed

; Call normal exception handler

90

; for the time being just merge lr and psr

        LDR     r0, [sp, #8*4]                          ; r0 = original SPSR (can't have been modified)

        LDR     lr, [r11, #15*4]                        ; get PC of aborter
        AND     r1, r0, #&F0000000                      ; get saved NZCV
        ORR     lr, lr, r1
        AND     r1, r0, #I32_bit + F32_bit              ; extract I and F from new place
        ORR     lr, lr, r1, LSL #IF32_26Shift           ; and merge
        AND     r1, r0, #3                              ; get old mode bits (have to assume a 26-bit mode!)
        ORR     lr, lr, r1                              ; lr = combined lr and psr
        STR     lr, [sp, #9*4]                          ; overwrite stacked lr_svc
        TEQ     r1, #SVC26_mode                         ; if aborter was in SVC mode
        STREQ   lr, [r11, #14*4]                        ; then also overwrite r14 in aborter's register bank

        BIC     r0, r0, #&1F                            ; clear mode bits in SPSR
        ORR     r0, r0, #SVC26_mode :OR: I32_bit        ; and force SVC26 with I set
 [ DebugAborts
        DLINE   "Going to call data abort handler"
        DREG    lr, "lr_svc will be "
        DREG    r0, "PSR going to exit with = "
 ]
        STR     r0, [sp, #8*4]                          ; overwrite stacked SPSR

 [ NewStyle_All
        MOV     r0, #0                                  ; we're going to call abort handler
        STR     r0, [r0, #CDASemaphore]                 ; so allow recovery if we were in CDA
 ]

        CMP     r2, #&20                                ; if aborting address <&20
        ADRCCL  r0, ABORTD                              ; then use MOS's default abort handler
        LDRCS   r0, =DAbHan                             ; else use current handler
        LDRCS   r0, [r0]                                ; get address of data abort handler
 [ DebugAborts
        DREG    r0, "Handler address = "
 ]
        ADD     r0, r0, #4                              ; add on 4 to adjust for abort exit
        STR     r0, [r11, #15*4]                        ; and store in pc in register bank
        B       %BT70                                   ; then junk to normal tidy-up routine

; +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
;
;       ProcessTransfer - Process an abort transfer
;
; in:   r0 = flags
;               bit 0 = 0 => Store to memory
;                       1 => Load from memory
;               bit 1 = 0 => Transfer executed in user mode
;                       1 => Transfer executed in non-user mode
;       r1 = block of data to transfer from/into
;       r2 = illegal address
;       r3 = length of transfer in bytes
;       r4 -> instruction which aborted
;       SVC26 mode
;
; out:  V=0 => transfer accomplished
;       V=1 => transfer not accomplished
;       All registers preserved
;

SectionSizeShift *      20
SectionSize     *       1 :SHL: SectionSizeShift

LargePageSizeShift *    16
LargePageSize   *       1 :SHL: LargePageSizeShift

SmallPageSizeShift *    12
SmallPageSize   *       1 :SHL: SmallPageSizeShift

ProcessTransfer ENTRY "r1-r7,r12"

 [ DebugAborts
        DLINE   "ProcessTransfer entered"
        DREG    r2, "Illegal address = "
        DREG    r3, "Length of transfer = "
        DREG    r4, "Abort happened at address "
        DREG    r0, "Flags = "
        DLINE   "Data = ",cc

        MOV     r5, r3
        MOV     r6, r1
01
        LDR     r7, [r6], #4
        DREG    r7," ",cc
        SUBS    r5, r5, #4
        BHI     %BT01
        DLINE   ""
 ]


; First identify if start address should have aborted

10
        LDR     r7, =L1PT
        MOV     lr, r2, LSR #SectionSizeShift           ; r2 as a multiple of 1Mb
        EOR     r5, r2, lr, LSL #SectionSizeShift       ; r5 = offset within section
        SUB     r5, r2, r5                              ; r5 -> start of section containing r2
        ADD     r5, r5, #SectionSize                    ; r5 -> start of section after r2

        LDR     lr, [r7, lr, LSL #2]                    ; get L1PT entry
        ANDS    r7, lr, #3                              ; 00 => trans.fault, 01 => page, 10 => section, 11 => reserved (fault)
        TEQNE   r7, #3
        BEQ     Fault
        TEQ     r7, #1
        BEQ     CheckPage

; it's section mapped - check section access privileges

15
        ANDS    r7, lr, #3 :SHL: 10                     ; extract ap
        BEQ     Fault                                   ; 00 => no access for anyone (at the moment)
        TST     r0, #2                                  ; test for non-usr access
        BNE     %FT20                                   ; if non-usr then OK to access here
        CMP     r7, #2 :SHL: 10
        BCC     Fault                                   ; 01 => no usr access
        BHI     %FT20                                   ; 11 => full user access, so OK
        TST     r0, #1
        BEQ     Fault                                   ; 10 => usr read-only, so stores not allowed

; access OK, so copy up to end of section/sub-page

20
;ARM 8 and StrongARM will abort for vector reads (as well as writes) in 26bit mode, so we must
;handle vector reads properly as well now
;In fact, StrongARM does not abort (optional in architecture 4), but ARM 8 does - MJS 08-10-96
  [ {FALSE}
        TST     r0, #1                                  ; if load from memory
        BNE     %FT60                                   ; then skip
  ]

; it's a store to memory (may be a vector write), or a read from memory (may be a vector read)
; do it in words if >= 4 bytes, so word writes to VIDC work for example

25
 [ {TRUE}
        CMP     r2, #&1C                                ; if in abort area (but allow any access to &1C)
 |
        CMP     r2, #&20                                ; if in abort area
 ]
        CMPCC   r4, #ROM                                ; and executing out of RAM
        MOVCC   r5, #&20                                ; then set end-of-section = 32
        BCC     Fault                                   ; and check user list

        SetMode SVC32_mode, lr                          ; go into SVC32 so we can poke or peek vector area

        TST     r0, #1                                  ; test for peek/poke
        BEQ     %FT30
26
;peeking
        TEQ     r2, r5                                  ; have we gone onto a new block?
        BEQ     %FT50                                   ; if so then exit if finished else go back to outer loop
        SUBS    r3, r3, #4                              ; have we got at least a word to do?
        LDRCS   lr, [r2], #4                            ; if so then copy word
        STRCS   lr, [r1], #4
        BHI     %BT26                                   ; and if not all done then loop
        BEQ     %FT50                                   ; if all done then switch back to SVC26 and exit

        ADDS    r3, r3, #4
27
        LDRB    lr, [r2], #1                            ; read byte from register bank
        STRB    lr, [r1], #1                            ; and store to memory
        SUBS    r3, r3, #1                              ; decrement byte count
        BEQ     %FT50                                   ; if finished then switch back to SVC26 and exit
        TEQ     r2, r5                                  ; have we gone onto a new block?
        BNE     %BT27                                   ; no, then loop
        B       %FT50

30
;poking
        TEQ     r2, r5                                  ; have we gone onto a new block?
        BEQ     %FT50                                   ; if so then exit if finished else go back to outer loop
        SUBS    r3, r3, #4                              ; have we got at least a word to do?
        LDRCS   lr, [r1], #4                            ; if so then copy word
        STRCS   lr, [r2], #4
        BHI     %BT30                                   ; and if not all done then loop
        BEQ     %FT50                                   ; if all done then switch back to SVC26 and exit

        ADDS    r3, r3, #4
40
        LDRB    lr, [r1], #1                            ; read byte from register bank
        STRB    lr, [r2], #1                            ; and store to memory
        SUBS    r3, r3, #1                              ; decrement byte count
        BEQ     %FT50                                   ; if finished then switch back to SVC26 and exit
        TEQ     r2, r5                                  ; have we gone onto a new block?
        BNE     %BT40                                   ; no, then loop

50
        SetMode SVC26_mode, lr
        CMP     r3, #0
        BNE     %BT10
        EXIT                                            ; exit (VC from CMP)

  [ {FALSE}
; it's a load from memory
60
        LDRB    lr, [r2], #1                            ; read byte from memory
        STRB    lr, [r1], #1                            ; and store to memory bank
        SUBS    r3, r3, #1                              ; decrement byte count
        EXIT    EQ                                      ; if finished then exit (VC from SUBS)
        TEQ     r2, r5                                  ; have we gone onto a new block?
        BNE     %BT60                                   ; no, then loop
        B       %BT10                                   ; yes, then go back to start
  ]

; it's page mapped, so check L2PT
; lr = L1 table entry
; We use the logical copy of physical space here, in order to access the entry pointed to by the L1 entry

CheckPage
        MOV     r5, r2, LSR #SmallPageSizeShift         ; r2 as a multiple of 4K
        MOV     r5, r5, LSL #SmallPageSizeShift
        ADD     r5, r5, #SmallPageSize                  ; if translation fault, then it applies to small page

        MOV     lr, lr, LSR #10                         ; remove domain and U bits
        MOV     lr, lr, LSL #10
        ORR     lr, lr, #PhysSpace                      ; now physical address is converted to a logical one (in physspace)
        AND     r7, r2, #&000FF000                      ; extract bits which are to form L2 offset

        LDR     lr, [lr, r7, LSR #10]                   ; lr = L2PT entry
        ANDS    r7, lr, #3                              ; 00 => trans.fault, 01 => large page
                                                        ; 10 => small page, 11 => reserved (fault)
        TEQNE   r7, #3
        BEQ     Fault
        TEQ     r7, #2                          ; if small page
        MOVEQ   r7, #SmallPageSizeShift-2       ; then sub-page size = 1<<10
        MOVNE   r7, #LargePageSizeShift-2       ; else sub-page size = 1<<14

        MOV     r5, r2, LSR r7                  ; round down to start of sub-page
        MOV     r5, r5, LSL r7
        MOV     r6, #1
        ADD     r5, r5, r6, LSL r7              ; then move on to start of next sub-page

        MOV     r7, r2, LSR r7                  ; put sub-page number in bits 1,2
        AND     r7, r7, #3                      ; and junk other bits
        RSB     r7, r7, #3                      ; invert sub-page ordering
        MOV     r7, r7, LSL #1                  ; and double it
        MOV     lr, lr, LSL r7                  ; then shift up access privileges so that correct ones appear in bits 10,11
        B       %BT15                           ; re-use code to check access privileges

Fault
        SUB     r5, r5, r2                      ; r5 = number of bytes we can do in this section/page/sub-page
        Push    "r3"                            ; save number of bytes to do
        CMP     r3, r5                          ; if more bytes than there are in this block
        MOVHI   r3, r5

; Now scan list of user abort addresses

        MOV     r6, #0
        LDR     r6, [r6, #AbortIndirection]
        TEQ     r6, #0
        BEQ     %FT85                           ; address not in any abort node
75
        LDR     r5, [r6, #AI_Low]
        CMP     r2, r5
        BCC     %FT80
        LDR     r5, [r6, #AI_High]
        CMP     r2, r5
        BCS     %FT80

        Push    "r3"                            ; save number of bytes we can do in this section/page/sub-page
        SUB     r5, r5, r2                      ; number of bytes we can do for this node
        CMP     r3, r5                          ; if bigger than the size of this node
        MOVHI   r3, r5                          ; then restrict number of bytes

        ADD     r5, r6, #AI_WS
        MOV     lr, pc
        LDMIA   r5, {r12, pc}

; returns to here

        ADDVS   sp, sp, #8                      ; if user abort failed, then junk both pushed r3's
        EXIT    VS                              ; and exit

        ADD     r1, r1, r3                      ; advance register block
        ADD     r2, r2, r3                      ; and illegal address pointer

        LDR     r5, [sp, #4]                    ; subtract amount done from stacked total amount to do
        SUBS    r5, r5, r3
        STR     r5, [sp, #4]                    ; and store back

        Pull    "r5"
        SUBS    r3, r5, r3                      ; is there more to do in this section/page/sub-page?
        BEQ     %FT90                           ; no then skip
80
        LDR     r6, [r6, #AI_Link]              ; else try next node
        TEQ     r6, #0
        BNE     %BT75
85
        ADD     sp, sp, #4                      ; junk pushed r3
        SETV                                    ; indicate access invalid
        EXIT                                    ; and exit

90
        Pull    "r3"                            ; restore total amount left to do
        TEQ     r3, #0
        BNE     %BT10                           ; yes, then loop
        EXIT                                    ; no, then exit (V=0 from SUBS)


;some tricks to improve performance, looking at MMU level 1 and level 2 page tables
L1L2PTenhancements ROUT
        Push    "r0-r5,lr"

;if the MMU control reg (soft copy) has R bit set (bit 9), then adjust the L1 entries for ROM
;space to give full write protection (user and supervisor)
        MOV     r0,#0
        LDR     r1,[r0,#MMUControlSoftCopy]
        TST     r1,#&200
        BEQ     L1L2PTe_WPROMdone              ;ARM 610 has no R bit, for example
        LDR     r0,=L1PT
        ADD     r0,r0,#ROM :SHR: (20-2)        ;address of first L1PT entry for ROM space
        MOV     r1,#8                          ;8 entries (8 Mbytes)
L1L2PTe_WPROMloop
        LDR     r2,[r0]
        BIC     r2,r2,#&C00                    ;set AP (access permission) bits to 00
        STR     r2,[r0],#4
        SUBS    r1,r1,#1
        BNE     L1L2PTe_WPROMloop
L1L2PTe_WPROMdone

;go for best available memory speed for data cache cleaner area (StrongARM)
        LDR     r0,=L2PT :OR: (ARMA_Cleaners_address :SHR: 10)  ;address of 1st L2PT word for cleaner area
        LDR     r1,[r0]
        MOV     r1,r1,LSL #20
        MOV     r1,r1,LSR #20                   ;zap physical address field
        ORR     r1,r1,#&01000000                ; = physical address of start of ROM bank 1
        MOV     r2,#8                           ;8 L2PT entries to fiddle
00
        STR     r1,[r0],#4
        SUBS    r2,r2,#1
        BNE     %BT00
        MOV     r0,#IOC
        MOV     r1,#5
        STRB    r1,[r0, #IOMD_ROMCR1]           ;ROM bank 1 speed = fastest (62.5 ns)

;make first 5 pages of cursor chunk cacheable and bufferable - this is rather handy, 'coz things
;like the SWI dispatcher, IRQ dispatcher are here. May be a slight worry over cursor data
;being write-back cached (StrongARM) - should strictly clean,drain write buffer or whatever for shape change.
        LDR     r0,=L2PT :OR: (CursorChunkAddress :SHR: 10)  ;address of 1st L2PT word for CursorChunk
        MOV     R2,#5                           ;5 entries to adjust
01
        LDR     r1,[r0]
        ORR     r1,r1,#&C                       ;make page cacheable and bufferable
        STR     r1,[r0],#4
        SUBS    r2,r2,#1
        BNE     %BT01

;make other 3 pages of chunk bufferable
        MOV     R2,#3
02
        LDR     r1,[r0]
        ORR     r1,r1,#&4                       ;make page bufferable
        STR     r1,[r0],#4
        SUBS    r2,r2,#1
        BNE     %BT02

;if we are on StrongARM, make the pages of the L2PT itself (for AppSpace only), bufferable (improves task swap speed)
;AppSpace is 0-28M
        ARM_read_ID r0
        AND     r0,r0,#&F000
        CMP     r0,#&A000
        BNE     %FT04
        MOV     r0,#L2PT
        ADD     r0,r0,#(L2PT :SHR: 10) ;the L2PT of the L2PT (and first 7 entries are for App Space)
        ADD     r1,r0,#7*4             ;7 L2PT-of-L2PT entries for 28M of space
03
        LDR     r2,[r0]
        ORR     r2,r2,#4               ;bufferable bit
        STR     r2,[r0],#4
        CMP     r0,r1
        BNE     %BT03
        ARMA_drain_WB                  ;let us be paranoid
04

;try to rescue some pages from the L2PT itself, in the AppSpace region - ie. AppSpace max size can really
;be total RAM size, if that is less than 28 Mb, and for every 4Mb less that is we can rescue a 4k page
;and return it to the free pool - handy on a 2Mb Kryten for instance!

        LDR     r0,=MaxCamEntry
        LDR     r0,[r0]
        ADD     r0,r0,#1               ; = no. of 4k RAM pages in machine
        MOV     r0,r0,LSR #8           ; = no. of Mbytes in machine
        ADD     r0,r0,#3
        BIC     r0,r0,#3               ; round up to next 4 Mb
        CMP     r0,#28                 ; if 28Mb or more, no pages to be rescued from L2PT AppSpace
        BHS     %FT09
        LDR     r1,=AppSpaceDANode
        MOV     r2,r0,LSL #20
        STR     r2,[r1,#DANode_MaxSize] ; update AppSpace max size
        MOV     r0,r0,LSR #2           ; no. of L2PT AppSpace pages which cannot be rescued
        MOV     r1,#L2PT
        ADD     r1,r1,#(L2PT :SHR: 10) ;the L2PT of the L2PT (and first 7 entries are for App Space)
        ADD     r1,r1,r0,LSL #2        ;first entry for rescue
        LDR     r3,=FreePoolDANode
        LDR     r2,[r3,#DANode_Base]
        LDR     r3,[r3,#DANode_Size]
        ADD     r2,r2,r3               ; r2 -> next logical address for a rescued page
        MOV     r5,r3                  ; FreePool size so far

        SUB     sp,sp,#16              ; room for 1 page block entry + terminator
        MOV     r3,sp
05
        LDR     r4,[r1],#4             ; pick up the L2PT entry
        BIC     r4,r4,#&0FF
        BIC     r4,r4,#&F00            ; mask to leave physical address only
        STR     r4,[r3,#8]             ; store physical address in word 2 of page block entry
        Push    "r0-r2"
        MOV     r0,#&0C00
        MOV     r1,r3
        MOV     r2,#1
        SWI     XOS_Memory             ; fill in page number, given physical address
        Pull    "r0-r2"
        MOV     r4,#2                  ; means inaccessible in user mode (destined for FreePool)
        STR     r4,[r3,#8]
        MOV     r4,#-1
        STR     r4,[r3,#12]            ; terminator
        STR     r2,[r3,#4]             ; new logical address for page
        Push    "r0"
        MOV     r0,r3
        SWI     XOS_SetMemMapEntries
        Pull    "r0"
        ADD     r2,r2,#4096
        ADD     r5,r5,#4096            ; next page
        ADD     r0,r0,#1
        CMP     r0,#7                  ;7 entries in total for full 28Mb AppSpace
        BNE     %BT05
        ADD     sp,sp,#16              ;drop the workspace

        LDR     r4,=FreePoolDANode
        STR     r5,[r4,#DANode_Size]   ;update FreePoolSize

09
        Pull    "r0-r5,pc"

;
; ---------------- XOS_SynchroniseCodeAreas implementation ---------------
;

;this SWI effectively implements IMB and IMBrange (Instruction Memory Barrier)
;for newer ARMs

;max address range before IMBrange is treated as IMB (performance issue,
;since range clean can only specify entries by virtual address)
ARMA_IMBrange_threshold * 128*1024

;entry:
;   R0 = flags
;        bit 0 set ->  R1,R2 specify virtual address range to synchronise
;                      R1 = start address (word aligned, inclusive)
;                      R2 = end address (word aligned, inclusive)
;        bit 0 clear   synchronise entire virtual space
;        bits 1..31    reserved
;
;exit:
;   R0-R2 preserved
;
;method:
;  ARMs 6,7 need do nothing (no IMB consideration)
;  ARM 8 need do nothing (SWI call itself flushes prefetch unit)
;  StrongARM must:
;    (1) clean data cache, (2) drain write buffer, (3) flush instruction cache
;    - the clean is either whole cache or range as appropriate
;
SyncCodeAreasSWI ROUT
        BIC     LR,LR,#V_bit                 ;clear return V
        ARM_read_ID R10
        AND     R10,R10,#&F000
        CMP     R10,#&A000
        BNE     SLVK                         ;not StrongARM
        TST     R0,#1                        ;range variant of SWI?
        BEQ     %FT01
        MOV     R11,R1                       ;R11 := low address (inclusive)
        ADD     R12,R2,#4                    ;R12 := high address (exclusive)
        SUB     R12,R12,R11
        CMP     R12,#ARMA_IMBrange_threshold
        BHS     %FT01                        ;do full IMB
        ADD     R12,R12,R11
        ARMA_clean_DCrange R11,R12
        ARMA_drain_WB
        ARMA_flush_IC WithoutNOPs
        MOV     R0,R0                  ;NOPs to ensure 4 instructions after IC flush before return
        MOV     R0,R0
        MOV     R0,R0
        B       SLVK
01      ;full IMB required
        MOV     R10,PC
        ORR     R12,R10,#I_bit
        TEQP    R12,#0                     ;disable IRQs to read-modify-write semaphore
        MOV     R12,#0
        LDRB    R11,[R12,#SyncCodeA_sema]
        CMP     R11,#0
        TEQNEP  R10,#0                     ;restore and...
        BNE     SLVK                       ;semaphore set, avoid reentrancy, let first call do it
        MOV     R11,#1
        STRB    R11,[R12,#SyncCodeA_sema]  ;set semaphore
        TEQP    R10,#0                     ;restore IRQ state
        MOV     R12,#ARMA_Cleaner_flipflop
        LDR     R11,[R12]
        EOR     R11,R11,#16*1024
        STR     R11,[R12]
        ARMA_clean_DC R11,R12,R10  ;fully clean/flush DC wrt non-interrupt stuff
        ARMA_drain_WB
        ARMA_flush_IC WithoutNOPs  ;do *not* flush DC - may be stuff from interrupt routines
        MOV     R12,#0
        STRB    R12,[R12,#SyncCodeA_sema]  ;reset semaphore
        MOV     R0,R0              ;NOP to ensure 4 instructions after IC flush before return
        B       SLVK

        LTORG

;
;some service routines here for easy patchability
        ALIGN

dtgps_SAcleanflush  ; used during pages_unsafe/safe in ChangeDyn
    ADR     r0,PageBlock1
    ADD     r0,r0,#4        ; r0 -> page block (logical addresses)
    LDR     r1,NumEntries
dtgps_SAloop0
    LDR     r2,[r0],#12
    ADD     r3,r2,#4096     ; 4k page
dtgps_SAloop1
 [ SAcleanflushbroken         ; 2 separate instructions 'coz SA110 cleanflush (1 instruction) seems ineffective
    ARMA_clean_DCentry r2
    ARMA_flush_DCentry r2
    ADD     r2,r2,#32
    ARMA_clean_DCentry r2
    ARMA_flush_DCentry r2
    ADD     r2,r2,#32
    ARMA_clean_DCentry r2
    ARMA_flush_DCentry r2
    ADD     r2,r2,#32
    ARMA_clean_DCentry r2
    ARMA_flush_DCentry r2
    ADD     r2,r2,#32
 |
    ARMA_cleanflush_DCentry r2
    ADD     r2,r2,#32
    ARMA_cleanflush_DCentry r2
    ADD     r2,r2,#32
    ARMA_cleanflush_DCentry r2
    ADD     r2,r2,#32
    ARMA_cleanflush_DCentry r2
    ADD     r2,r2,#32
  ]
    CMP     r2,r3
    BLO     dtgps_SAloop1
    SUBS    r1,r1,#1
    BNE     dtgps_SAloop0
    ARMA_drain_WB           ; squeeze out those last drops
    MOV     pc,lr

;ARM810 equiv of dtgps_SAcleanflush must clean/flush whole cache (cannot clean/flush by virtual address)
;and is shared with code in meminfo_flushplease, below

meminfo_flushplease         ; used by MemInfo
        ARM_read_ID r0
        AND     r0,r0,#&F000
        CMP     r0,#&A000
        BEQ     mifp_SA
  [ ARM810support
        CMP     r0,#&8000
        BEQ     mifp_810
  ]
;assume ARM 6 or 7 - simple!
        ARM67_flush_cache
        MOV     pc,lr
mifp_SA
;StrongARM - this could take a while...
        MOV     r1,#ARMA_Cleaner_flipflop
        LDR     r0,[r1]
        EOR     r0,r0,#16*1024
        STR     r0,[r1]
        ARMA_clean_DC r0,r1,r2          ;clean/flush data cache wrt non-interrupt stuff (trashes r0,r1,r2)
        ARMA_flush_IC                   ;do *not* flush DC - may be interrupt stuff
        MOV     pc,lr
  [ ARM810support
mifp_810
dtgps_810cleanflush                     ;entry point also used during pages_unsafe/safe in ChangeDyn
    [ ARM810cleanflushbroken
        ARM8_cleanflush_IDC r0,r1
    |
        ARM8_cleanflush_IDC r0
    ]
        MOV     pc,lr
  ]
;

        DCB     "GROT"                  ;spare words marker
        ALIGN   4096                    ;align to page boundary for easy ROMpatch
arm600stuff_endofstuff
  ! 0,"-- size of ARM600+ stuff (4k aligned) is ":CC::STR:(arm600stuff_endofstuff - arm600stuff_startofstuff)


 [ DebugAborts
        InsertDebugRoutines
 ]
        END