; Copyright 2000 Pace Micro Technology plc ; ; Licensed under the Apache License, Version 2.0 (the "License"); ; you may not use this file except in compliance with the License. ; You may obtain a copy of the License at ; ; http://www.apache.org/licenses/LICENSE-2.0 ; ; Unless required by applicable law or agreed to in writing, software ; distributed under the License is distributed on an "AS IS" BASIS, ; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ; See the License for the specific language governing permissions and ; limitations under the License. ; GBLL MinorL2PThack MinorL2PThack SETL {TRUE} ; Fixed page allocation is as follows ^ 0 DRAMOffset_FirstFixed # 0 DRAMOffset_ScratchSpace # 16*1024 DRAMOffset_PageZero # 16*1024 DRAMOffset_L1PT # 16*1024 ; L1PT must be 16K-aligned DRAMOffset_LastFixed # 0 ; IMPORT Init_ARMarch ; IMPORT ARM_Analyse [ MEMM_Type = "VMSAv6" mmuc_init_new ; MMUC initialisation flags for ARMv6/ARMv7 ; This tries to leave the reserved/unpredictable bits untouched, while initialising everything else to what we want ; ARMv7MP (probably) wants SW. ARMv6 wants U+XP (which should both be fixed at 1 on ARMv7) DCD MMUC_SW+MMUC_U+MMUC_XP ; M+C+W+Z+I+L2 clear to keep MMU/caches off. ; A to keep alignment exceptions off (for now at least) ; B+EE clear for little-endian ; S+R+RR clear to match mmuc_init_old ; V+VE clear to keep processor vectors at &0 ; FI clear to disable fast FIQs (interruptible LDM/STM) ; TRE+AFE clear for our VMSAv6 implementation ; TE clear for processor vectors to run in ARM mode DCD MMUC_M+MMUC_A+MMUC_C+MMUC_W+MMUC_B+MMUC_S+MMUC_R+MMUC_Z+MMUC_I+MMUC_V+MMUC_RR+MMUC_FI+MMUC_VE+MMUC_EE+MMUC_L2+MMUC_TRE+MMUC_AFE+MMUC_TE mmuc_init_old ; MMUC initialisation flags for ARMv5 and below, as per ARM600 MMU code ; Late abort (ARM6 only), 32-bit Data and Program space. No Write buffer (ARM920T ; spec says W bit should be set, but I reckon they're bluffing). ; ; The F bit's tricky. (1 => CPCLK=FCLK, 0=>CPCLK=FCLK/2). The only chip using it was the ; ARM700, it never really reached the customer, and it's always been programmed with ; CPCLK=FCLK. Therefore we'll keep it that way, and ignore the layering violation. DCD MMUC_F+MMUC_L+MMUC_D+MMUC_P ; All of these bits should be off already, but just in case... DCD MMUC_B+MMUC_W+MMUC_C+MMUC_A+MMUC_M+MMUC_RR+MMUC_V+MMUC_I+MMUC_Z+MMUC_R+MMUC_S ] ; void RISCOS_InitARM(unsigned int flags) ; RISCOS_InitARM MOV a4, lr ; Check if we're architecture 3. If so, don't read the control register. BL Init_ARMarch MOVEQ a3, #0 ARM_read_control a3, NE [ MEMM_Type = "VMSAv6" CMP a1, #ARMv6 CMPNE a1, #ARMvF ADREQ a2, mmuc_init_new ADRNE a2, mmuc_init_old LDMIA a2, {a2, lr} ORR a3, a3, a2 BIC a3, a3, lr | ; Late abort (ARM6 only), 32-bit Data and Program space. No Write buffer (ARM920T ; spec says W bit should be set, but I reckon they're bluffing). ; ; The F bit's tricky. (1 => CPCLK=FCLK, 0=>CPCLK=FCLK/2). The only chip using it was the ; ARM700, it never really reached the customer, and it's always been programmed with ; CPCLK=FCLK. Therefore we'll keep it that way, and ignore the layering violation. ORR a3, a3, #MMUC_F+MMUC_L+MMUC_D+MMUC_P ; All of these bits should be off already, but just in case... BIC a3, a3, #MMUC_B+MMUC_W+MMUC_C+MMUC_A+MMUC_M BIC a3, a3, #MMUC_RR+MMUC_V+MMUC_I+MMUC_Z+MMUC_R+MMUC_S ] ; Off we go. ARM_write_control a3 MOV a2, #0 [ MEMM_Type = "VMSAv6" myISB ,a2,,y ; Ensure the update went through ] ; In case it wasn't a hard reset [ MEMM_Type = "VMSAv6" CMP a1, #ARMvF ; Assume that all ARMvF ARMs have multi-level caches and thus no single MCR op for invalidating all the caches MCRNE ARM_config_cp,0,a2,ARMv4_cache_reg,C7 ; invalidate I+D caches BLEQ HAL_InvalidateCache_ARMvF ] CMP a1, #ARMv3 MCREQ ARM_config_cp,0,a2,ARMv3_TLBflush_reg,C0 ; flush TLBs MCRNE ARM_config_cp,0,a2,ARMv4_TLB_reg,C7 ; flush TLBs [ MEMM_Type = "VMSAv6" myDSB ,a2,,y myISB ,a2,,y ] ; We assume that ARMs with an I cache can have it enabled while the MMU is off. [ :LNOT:CacheOff ORRNE a3, a3, #MMUC_I ARM_write_control a3, NE ; whoosh [ MEMM_Type = "VMSAv6" myISB ,a2,,y ; Ensure the update went through ] ] ; Check if we are in a 26-bit mode. MRS a2, CPSR ; Keep a soft copy of the CR in a banked register (R13_und) MSR CPSR_c, #F32_bit+I32_bit+UND32_mode MOV sp, a3 ; Switch into SVC32 mode (we may have been in SVC26 before). MSR CPSR_c, #F32_bit+I32_bit+SVC32_mode ; If we were in a 26-bit mode, the lr value given to us would have had PSR flags in. TST a2, #2_11100 MOVNE pc, a4 BICEQ pc, a4, #ARM_CC_Mask ; void *RISCOS_AddRAM(unsigned int flags, void *start, void *end, uintptr_t sigbits, void *ref) ; Entry: ; flags bit 0: video memory (currently only one block permitted) ; bit 1: video memory is not suitable for general use ; bits 8-11: speed indicator (arbitrary, higher => faster) ; other bits reserved (SBZ) ; start = start address of RAM (inclusive) (no alignment requirements) ; end = end address of RAM (exclusive) (no alignment requirements, but must be >= start) ; sigbits = significant address bit mask (1 => this bit of addr decoded, 0 => this bit ignored) ; ref = reference handle (NULL for first call) ; A table is built up at the head of the first block of memory. ; The table consists of (addr, len, flags) pairs, terminated by a count of those pairs; ref points to that ; counter. ; Twelve bits of flags are stored at the bottom of the length word. ROUT RISCOS_AddRAM Push "v1,v2,v3,v4,lr" LDR v4, [sp, #20] ; Get ref ; Round to pages. If we were extra sneaky we could not do this and chuck out incomplete ; pages after concatanation, but it would be a weird HAL that gave us pages split across ; calls. ; ADD a2, a2, #4096 ; round start address up SUB a2, a2, #1 MOV a2, a2, LSR #12 MOV a2, a2, LSL #12 MOV a3, a3, LSR #12 ; round end address down MOV a3, a3, LSL #12 CMP a3, a2 BLS %FT90 ; check we aren't now null CMP v4, #0 BEQ %FT20 ; We are not dealing with the first block since v4 != 0. Make an attempt to merge this block ; with the previous block. LDMDB v4, {v1, v2} ; Get details of the previous block MOV v3, v2, LSL #20 ; Isolate flags BIC v2, v2, v3, LSR #20 ; And strip from length ADD v2, v1, v2 ; Get the end address EOR v2, v2, a2 ; Compare with the current block start address... TST v2, a4 ; ... but only check the decoded bits. EOR v2, v2, a2 ; Restore the previous block end address. TEQEQ v3, a1, LSL #20 ; And are the page flags the same? BNE %FT10 ; We can't merge it after the previous block ; v1 = previous start ; v2 = previous end ; The block is just after the previous block. That means the start address is unchanged, but ; the length is increased. SUB v2, v2, v1 ; Calculate the previous block length. SUB a3, a3, a2 ; Find the length of the new block. ; a3 = length of block ADD v2, v2, a3 ; Add it to the previous length. ORR v2, v2, v3, LSR #20 ; And put the flags back in. STR v2, [v4, #-4] ; Update the block size in memory. MOV a1,v4 Pull "v1,v2,v3,v4,pc" ; The block is not just after the previous block, but it may be just before. This may be the ; case if we are softloaded. 10 SUB v1, v1, #1 ; Compare the address before the previous block start ... SUB a3, a3, #1 ; ... with the address of the last byte in this block ... EOR v1, v1, a3 TST v1, a4 ; ... but check only the decoded bits. ADD a3, a3, #1 ; Restore the end address. TEQEQ v3, a1, LSL #20 ; And are the page flags the same? BNE %FT20 ; Skip if we cannot merge the block. ; The block is just before the previous block. The start address and length both change. LDR v1, [v4, #-8] ; Get the previous block start again. SUB a3, a3, a2 ; Calculate the current block size. SUB v1, v1, a3 ; Subtract from the previous block start address. SUB v2, v2, v1 ; Calculate the new length=end-start ORR v2, v2, v3, LSR #20 ; And put the flags back in. STMDB v4, {v1, v2} ; Update the block info in memory. MOV a1,v4 Pull "v1,v2,v3,v4,pc" ; We now have a region which does not merge with a previous region. We move it up to the ; highest address we can in the hope that this block will merge with the next block. 20 SUB a3, a3, a2 ; Calculate the block size MOV a1, a1, LSL #20 ORR a3, a3, a1, LSR #20 ; Put the flags at the bottom MVN v1, a4 ; Get the non-decoded address lines. ORR a2, v1, a2 ; Set the non-decoded address bit in the start address. 30 CMP v4, #0 ; If the workspace has not been allocated... MOVEQ v4, a2 ; ... use this block. MOVEQ v1, #0 ; Initialise the counter. ; The block/fragment to be added is between a2 and a2+a3. LDRNE v1, [v4] ; Get the old counter if there was one. STMIA v4!, {a2, a3} ; Store address and size. ADD v1, v1, #1 ; Increment the counter. STR v1, [v4] ; Store the counter. 90 MOV a1,v4 Pull "v1,v2,v3,v4,pc" ; We've done with this block now. ; Subtractv1v2fromRAMtable ; ; On entry: v1 = base of memory area ; v2 = size of memory area ; a4 = RAM table handle (ie pointer to terminator word containing number of entries) ; ; On exit: a1-a3 preserved ; a4 and RAM table updated ; other registers corrupted Subtractv1v2fromRAMtable ADD v2, v1, v2 ; v2 = end address MOV v1, v1, LSR #12 MOV v1, v1, LSL #12 ; round base down ADD v2, v2, #4096 SUB v2, v2, #1 MOV v2, v2, LSR #12 MOV v2, v2, LSL #12 ; round end up LDR v5, [a4] SUB v8, a4, v5, LSL #3 10 TEQ v8, a4 MOVEQ pc, lr LDMIA v8!, {v3, v4} MOV v6, v4, LSR #12 ADD v6, v3, v6, LSL #12 ; v6 = end of RAM block CMP v2, v3 ; if our end <= RAM block start CMPHI v6, v1 ; or RAM block end <= our start BLS %BT10 ; then no intersection MOV v4, v4, LSL #20 ; extract flags CMP v1, v3 BHI not_bottom ; our area is at the bottom CMP v2, v6 BHS remove_block SUB v6, v6, v2 ; v6 = new size ORR v6, v6, v4, LSR #20 ; + flags STMDB v8, {v2, v6} ; store new base (= our end) and size B %BT10 ; we've completely covered a block. Remove it. remove_block MOV v6, v8 20 TEQ v6, a4 ; shuffle down subsequent blocks in table LDMNEIA v6, {v3, v4} STMNEDB v6, {v3, v4} ADDNE v6, v6, #8 BNE %20 SUB v5, v5, #1 SUB a4, a4, #8 STR v5, [a4] SUB v8, v8, #8 B %BT10 ; our area is not at the bottom. not_bottom CMP v2, v6 BLO split_block ; our area is at the top SUB v6, v1, v3 ; v6 = new size ORR v6, v6, v4, LSR #20 ; + flags STMDB v8, {v3, v6} ; store original base and new size B %BT10 split_block MOV v6, a4 30 TEQ v6, v8 ; shuffle up subsequent blocks in table LDMNEDB v6, {v3, v4} STMNEIA v6, {v3, v4} SUBNE v6, v6, #8 BNE %BT30 ADD v5, v5, #1 ADD a4, a4, #8 STR v5, [a4] SUB v7, v1, v3 ; v7 = size of first half SUB v6, v6, v2 ; v6 = size of second half ORR v7, v7, v4, LSR #20 ORR v6, v6, v4, LSR #20 ; + flags STMDB v8, {v3, v7} STMIA v8!, {v2, v6} B %BT10 ;void RISCOS_Start(unsigned int flags, int *riscos_header, int *hal_header, void *ref) ; ; We don't return, so no need to obey ATPCS, except for parameter passing. ; register usage: v4 = location of VRAM ; v6 = amount of VRAM ROUT RISCOS_Start TEQ a4, #0 01 BEQ %BT01 ; Stop here if no RAM ; subtract the HAL and OS from the list of RAM areas MOV v1, a2 LDR v2, [a2, #OSHdr_ImageSize] BL Subtractv1v2fromRAMtable LDR v1, [a3, #HALDesc_Start] ADD v1, a3, v1 LDR v2, [a3, #HALDesc_Size] BL Subtractv1v2fromRAMtable LDR v5, [a4] ; v5 = the number of RAM blocks SUB v8, a4, v5, LSL #3 ; Jump back to the start of the list. ; Search for some VRAM 05 LDMIA v8!, {v1, v2} ; Get a block from the list. (v1,v2)=(addr,size+flags) TST v2, #1 ; Is it VRAM? BNE %FT20 ; If so, deal with it below TEQ v8, a4 ; Carry on until end of list or we find some. BNE %BT05 ; Extract some pseudo-VRAM from first RAM block SUB v8, a4, v5, LSL #3 ; Rewind again. LDMIA v8!, {v1, v2} MOV v2, v2, LSR #12 ; Remove flags MOV v2, v2, LSL #12 MOV v4, v1 ; Allocate first block as video memory MOV v6, v2 TEQ v8, a4 ; Was this the only block? If so, leave 16M SUBEQS v6, v6, #16*1024*1024 MOVCC v6, v2, LSR #1 ; If that overflowed, take half the bank. CMP v6, #32*1024*1024 MOVHS v6, #32*1024*1024 ; Limit allocation to 32M (arbitrary) ADD v1, v1, v6 ; Adjust the RAM block base... SUBS v2, v2, v6 ; ... and the size LDMEQIA v8!, {v1, v2} ; Fetch the next block if we claimed it all B %FT30 ; Note real VRAM parameters 20 MOV v6, v2 ; Remember the size and address MOV v4, v1 ; of the VRAM 22 TEQ v8, a4 ; if not at the end of the array LDMNEIA v8, {v1, v2} ; pack the array tighter STMNEDB v8, {v1, v2} ADDNE v8, v8, #8 BNE %BT22 25 SUB v5, v5, #1 ; decrease the counter STR v5, [a4, #-8]! ; and move the end marker down SUB v8, a4, v5, LSL #3 ; Rewind to start of list LDMIA v8!, {v1, v2} ; Fill in the Kernel's permanent memory table 30 ADD ip, v1, #DRAMOffset_PageZero ADD v7, ip, #DRAMPhysAddrA ADD sp, v1, #DRAMOffset_ScratchSpace + ScratchSpaceSize Push "a1,a2,a3" ; Remember our arguments CMP v5, #DRAMPhysTableSize ; Don't overflow our table ADDHI a4, v8, #DRAMPhysTableSize*8 - 8 35 MOV v2, v2, LSR #12 MOVS v2, v2, LSL #12 ; strip out flags STMNEIA v7!, {v1, v2} ; if non-zero length, add it to real list TEQ v8, a4 LDMNEIA v8!, {v1, v2} BNE %BT35 ; Now go back and put the VRAM information in STR v6, [ip, #VRAMFlags] MOV v6, v6, LSR #12 MOV v6, v6, LSL #12 ; strip out flags ADD a3, ip, #VideoPhysAddr STMIA a3, {v4, v6} ; Now we have to work out the total RAM size MOV a2, #0 MOV v6, a3 40 LDMIA v6!, {v1, v2} ; get address, size ADD a2, a2, v2 ; add on size TEQ v6, v7 BNE %BT40 ; a2 = Total memory size (bytes) ; a3 = PhysRamTable ; v7 = After last used entry in PhysRamTable ; now store zeros to fill out table 55 ADD v2, a3, #PhysRamTableEnd-PhysRamTable MOV v3, #0 MOV v4, #0 57 CMP v7, v2 STMLOIA v7!, {v3, v4} BLO %BT57 ; Time to set up the L1PT. Just zero it out for now. LDR a3, [a3, #DRAMPhysAddrA-PhysRamTable] ; get address of 1st RAM bank ADD a3, a3, #DRAMOffset_L1PT+16*1024 ; make a3 -> L1PT end MOV a4, #16*1024 MOV v2, #0 MOV v3, #0 MOV v4, #0 MOV v5, #0 MOV v6, #0 MOV v7, #0 MOV v8, #0 MOV ip, #0 60 STMDB a3!, {v2-v8,ip} ; start at end and work back SUBS a4, a4, #8*4 BNE %BT60 ADD v1, a3, #DRAMOffset_PageZero - DRAMOffset_L1PT ADD v2, a3, #DRAMOffset_LastFixed - DRAMOffset_L1PT STR a2, [v1, #RAMLIMIT] ; remember the RAM size MOV lr, a2, LSR #12 SUB lr, lr, #1 STR lr, [v1, #MaxCamEntry] MOV lr, a2, LSR #12-3+12 CMP a2, lr, LSL #12-3+12 ADDNE lr, lr, #1 MOV lr, lr, LSL #12 STR lr, [v1, #SoftCamMapSize] STR a3, [v1, #InitUsedStart] ; store start of L1PT ADD v1, v1, #DRAMPhysAddrA MOV v3, a3 ; For the next batch of allocation routines, v1-v3 are treated as globals. ; v1 -> current entry in PhysRamTable ; v2 -> next address to allocate in v1 (may point at end of v1) ; v3 -> L1PT (or 0 if MMU on - not yet) ; Allocate the L2PT backing store for the logical L2PT space, to ; prevent recursion. LDR a1, =L2PT MOV a2, #&00400000 LDR a3, =(AP_None * L2X_APMult) BL AllocateL2PT ; Allocate workspace for the HAL ADD a4, v3, #DRAMOffset_PageZero - DRAMOffset_L1PT LDR a3, [sp, #8] ; recover pushed HAL header LDR a1, =HALWorkspace LDR a2, =(AP_Read * L2X_APMult) + L2_C + L2_B LDR lr, [a3, #HALDesc_Workspace] ; their workspace LDR ip, [a3, #HALDesc_NumEntries] ; plus 1 word per entry CMP ip, #KnownHALEntries MOVLO ip, #KnownHALEntries ADD lr, lr, ip, LSL #2 MOV a3, lr, LSR #12 ; round workspace up to whole MOV a3, a3, LSL #12 ; number of pages CMP a3, lr ADDNE a3, a3, #&1000 STR a3, [a4, #HAL_WsSize] ; Make a note of allocated space ADD ip, a1, ip, LSL #2 ; Their workspace starts STR ip, [a4, #HAL_Workspace] ; after our table of entries BL Init_MapInRAM LDR a3, [sp, #8] ; recover pushed HAL header LDR lr, [a3, #HALDesc_Flags] TST lr, #HALFlag_NCNBWorkspace ; do they want uncacheable LDRNE a1, =HALWorkspaceNCNB ; workspace? LDRNE a2, =(AP_None * L2X_APMult) LDRNE a3, =32*1024 BLNE Init_MapInRAM ; Bootstrap time. We want to get the MMU on ASAP. We also don't want to have to ; clear up too much mess later. So what we'll do is map in the three fixed areas ; (L1PT, scratch space and page zero), the CAM, ourselves, and the HAL, ; then turn on the MMU. The CAM will be filled in once the MMU is on, by ; reverse-engineering the page tables? ; Map in page zero ADD a1, v3, #DRAMOffset_PageZero - DRAMOffset_L1PT LDR a2, =ZeroPage [ ECC LDR a3, =(AP_Read * L2X_APMult) + L2_C + L2_B + 1:SHL:31 | LDR a3, =(AP_Read * L2X_APMult) + L2_C + L2_B ] MOV a4, #16*1024 BL Init_MapIn ; Map in scratch space ADD a1, v3, #DRAMOffset_ScratchSpace - DRAMOffset_L1PT MOV a2, #ScratchSpace [ ECC LDR a3, =(AP_Read * L2X_APMult) + L2_C + L2_B + 1:SHL:31 | LDR a3, =(AP_Read * L2X_APMult) + L2_C + L2_B ] MOV a4, #16*1024 BL Init_MapIn ; Map in L1PT MOV a1, v3 LDR a2, =L1PT [ ECC LDR a3, =(AP_None * L2X_APMult) + 1:SHL:31 | LDR a3, =(AP_None * L2X_APMult) ] MOV a4, #16*1024 BL Init_MapIn ; Map in L1PT again in PhysicalAccess (see below) MOV a1, v3, LSR #20 MOV a1, a1, LSL #20 ; megabyte containing L1PT LDR a2, =PhysicalAccess [ ECC LDR a3, =(AP_None * L2X_APMult) + 1:SHL:31 | LDR a3, =(AP_None * L2X_APMult) ] MOV a4, #1024*1024 BL Init_MapIn ; Examine HAL and RISC OS locations LDMFD sp, {v4,v5,v6} ; v4 = flags, v5 = RO desc, v6 = HAL desc LDR lr, [v6, #HALDesc_Size] LDR v7, [v6, #HALDesc_Start] ADD v6, v6, v7 ; (v6,v8)=(start,end) of HAL ADD v8, v6, lr LDR v7, [v5, #OSHdr_ImageSize] ADD v7, v5, v7 ; (v5,v7)=(start,end) of RISC OS TEQ v8, v5 ; check contiguity (as in a ROM image) BNE %FT70 ; HAL and RISC OS are contiguous. Yum. MOV a1, v6 LDR a2, =RISCOS_Header SUB a2, a2, lr SUB ip, a2, a1 ; change physical addresses passed in LDMIB sp, {a3, a4} ; into logical addresses ADD a3, a3, ip ADD a4, a4, ip STMIB sp, {a3, a4} LDR a3, [v5, #OSHdr_DecompressHdr] ; check if ROM is compressed, and if so, make writeable CMP a3, #0 MOVNE a3, #(AP_Full * L2X_APMult) + L2_C + L2_B MOVEQ a3, #(AP_ROM * L2X_APMult) + L2_C + L2_B SUB a4, v7, v6 BL Init_MapIn MOV a3, v6 B %FT75 70 ; HAL is separate. (We should cope with larger images) LDR a2, =ROM MOV a1, v6 SUB ip, a2, a1 ; change physical address passed in LDR a3, [sp, #8] ; into logical address ADD a3, a3, ip STR a3, [sp, #8] SUB a4, v8, v6 MOV a3, #(AP_ROM * L2X_APMult) + L2_C + L2_B BL Init_MapIn ; And now map in RISC OS LDR a2, =RISCOS_Header ; Hmm - what if position independent? MOV a1, v5 SUB ip, a2, a1 ; change physical address passed in LDR a3, [sp, #4] ; into logical address ADD a3, a3, ip STR a3, [sp, #4] SUB a4, v7, v5 LDR a3, [v5, #OSHdr_DecompressHdr] CMP a3, #0 MOVNE a3, #(AP_Full * L2X_APMult) + L2_C + L2_B MOVEQ a3, #(AP_ROM * L2X_APMult) + L2_C + L2_B BL Init_MapIn MOV a3, v5 75 ; We've now allocated all the pages we're going to before the MMU comes on. ; Note the end address (for RAM clear) ADD a1, v3, #DRAMOffset_PageZero - DRAMOffset_L1PT STR v1, [a1, #InitUsedBlock] STR v2, [a1, #InitUsedEnd] STR a3, [a1, #ROMPhysAddr] ; Note the HAL flags passed in. LDR a2, [sp, #0] STR a2, [a1, #HAL_StartFlags] ; Set up a reset IRQ handler (used during RAM clear for keyboard ; scan, and later for IIC CMOS access) MSR CPSR_c, #IRQ32_mode + I32_bit + F32_bit LDR sp_irq, =ScratchSpace + 1024 ; 1K is plenty since Reset_IRQ_Handler now runs in SVC mode MSR CPSR_c, #SVC32_mode + I32_bit + F32_bit LDR a2, =Reset_IRQ_Handler STR a2, [a1, #InitIRQHandler] ; Fill in some initial processor vectors. These will be used during ARM ; analysis, once the MMU is on. We do it here before the data cache is ; activated to save any IMB issues. ADRL a2, InitProcVecs ADD a3, a2, #InitProcVecsEnd - InitProcVecs 76 LDR a4, [a2], #4 CMP a2, a3 STR a4, [a1], #4 BLO %BT76 MMU_activation_zone ; The time has come to activate the MMU. Steady now... Due to unpredictability of MMU ; activation, need to ensure that mapped and unmapped addresses are equivalent. To ; do this, we temporarily make the section containing virtual address MMUon_instr map ; to the same physical address. In case the code crosses a section boundary, do the ; next section as well. ; ; Also note, no RAM access until we've finished the operation, as we don't know it's ; available, and we might lose it due to lack of cache cleaning. ; MOV a1, #4_3333333333333333 ; All domain manager - in case MMU already on ARM_MMU_domain a1 ; (who knows what domains/permissions they are using) ADR a1, MMU_activation_zone MOV a1, a1, LSR #20 ; a1 = megabyte number (stays there till end) ADD lr, v3, a1, LSL #2 ; lr -> L1PT entry LDMIA lr, {a2, a3} ; remember old mappings [ MEMM_Type = "VMSAv6" LDR ip, =(AP_ROM * L1_APMult) + L1_Section | [ ARM6support LDR ip, =(AP_None * L1_APMult) + L1_U + L1_Section | LDR ip, =(AP_ROM * L1_APMult) + L1_U + L1_Section ] ] ORR a4, ip, a1, LSL #20 ; not cacheable, as we don't want ADD v4, a4, #1024*1024 ; to fill the cache with rubbish STMIA lr, {a4, v4} MOV a4, a1 BL Init_ARMarch ; corrupts a1 and ip MOV ip, a1 ; Remember architecture for later MOV a1, a4 MSREQ CPSR_c, #F32_bit+I32_bit+UND32_mode ; Recover the soft copy of the CR MOVEQ v5, sp ARM_read_control v5, NE [ CacheOff ORR v5, v5, #MMUC_M ; MMU on ORR v5, v5, #MMUC_R ; ROM mode enable | ORR v5, v5, #MMUC_W+MMUC_C+MMUC_M ; Write buffer, data cache, MMU on ORR v5, v5, #MMUC_R+MMUC_Z ; ROM mode enable, branch predict enable ] ARM_MMU_transbase v3 ; Always useful to tell it where L1PT is... MOV lr, #0 MCREQ p15, 0, lr, c5, c0 ; MMU may already be on (but flat mapped) MCRNE p15, 0, lr, c8, c7 ; if HAL needed it (eg XScale with ECC) ; so flush TLBs now [ MEMM_Type = "VMSAv6" CMP ip, #ARMv6 MCRGE p15, 0, lr, c2, c0, 2 ; Ensure only TTBR0 is used (v6) MCRGT p15, 0, lr, c12, c0, 0 ; Ensure exception vector base is 0 (Cortex) myISB ,lr,,y ORRGE v5, v5, #MMUC_XP ; Extended pages enabled (v6) BICGE v5, v5, #MMUC_TRE+MMUC_AFE ; TEX remap, Access Flag disabled BICGE v5, v5, #MMUC_EE+MMUC_TE+MMUC_VE ; Exceptions = nonvectored LE ARM CMP ip, #0 ] [ NoUnaligned ORR v5, v5, #MMUC_A ; Alignment exceptions on ] [ HiProcVecs ORR v5, v5, #MMUC_V ; High processor vectors enabled ] MMUon_instr ARM_write_control v5 [ MEMM_Type = "VMSAv6" MOV lr, #0 myISB ,lr,,y ; Just in case ] MOVEQ sp, v5 MSREQ CPSR_c, #F32_bit+I32_bit+SVC32_mode [ MEMM_Type = "VMSAv6" CMP ip, #ARMvF MCRNE ARM_config_cp,0,lr,ARMv4_cache_reg,C7 ; junk MMU-off contents of I-cache (works on ARMv3) MCREQ p15, 0, lr, c7, c5, 0 ; invalidate instruction cache MCREQ p15, 0, lr, c8, c7, 0 ; invalidate TLBs MCREQ p15, 0, lr, c7, c5, 6 ; invalidate branch predictor myISB ,lr,,y ; Ensure below branch works BLEQ HAL_InvalidateCache_ARMvF ; invalidate data cache (and instruction+TLBs again!) | MOV lr, #0 ; junk MMU-off contents of I-cache MCR ARM_config_cp,0,lr,ARMv4_cache_reg,C7 ; (works on ARMv3) ] MOV ip, #4_0000000000000001 ; domain 0 client only ARM_MMU_domain ip ; MMU now on. Need to jump to logical copy of ourselves. Complication arises if our ; physical address overlaps our logical address - in that case we need to map ; in another disjoint copy of ourselves and branch to that first, then restore the ; original two sections. ADRL a4, RISCOS_Header LDR ip, =RISCOS_Header SUB ip, ip, a4 ADR a4, MMU_activation_zone MOV a4, a4, LSR #20 MOV a4, a4, LSL #20 ; a4 = base of scrambled region ADD v4, a4, #2*1024*1024 ; v4 = top of scrambled region SUB v4, v4, #1 ; (inclusive, in case wrapped to 0) ADR v5, MMUon_resume ADD v5, v5, ip ; v5 = virtual address of MMUon_resume CMP v5, a4 BLO MMUon_nooverlap CMP v5, v4 BHI MMUon_nooverlap ASSERT ROM > 3*1024*1024 ; Oh dear. We know the ROM lives high up, so we'll mangle 00100000-002FFFFF. ; But as we're overlapping the ROM, we know we're not overlapping the page tables. LDR lr, =L1PT ; accessing the L1PT virtually now [ MEMM_Type = "VMSAv6" LDR ip, =(AP_ROM * L1_APMult) + L1_Section | [ ARM6support LDR ip, =(AP_None * L1_APMult) + L1_U + L1_Section | LDR ip, =(AP_ROM * L1_APMult) + L1_U + L1_Section ] ] ORR v6, a4, ip ADD ip, v6, #1024*1024 LDMIB lr, {v7, v8} ; sections 1 and 2 STMIB lr, {v6, ip} RSB ip, a4, #&00100000 ADD pc, pc, ip NOP MMUon_overlapresume ; now executing from 00100000 ADD ip, lr, a4, LSR #18 STMIA ip, {a2, a3} ; restore original set of mappings BL Init_PageTablesChanged MOV a2, v7 ; arrange for code below MOV a3, v8 ; to restore section 1+2 instead MOV a1, #1 MMUon_nooverlap ADRL lr, RISCOS_Header LDR ip, =RISCOS_Header SUB ip, ip, lr ADD pc, pc, ip NOP MMUon_resume ; What if the logical address of the page tables is at the physical address of the code? ; Then we have to access it via PhysicalAccess instead. LDR lr, =L1PT CMP lr, a4 BLO MMUon_nol1ptoverlap CMP lr, v4 BHI MMUon_nol1ptoverlap ; PhysicalAccess points to the megabyte containing the L1PT. Find the L1PT within it. LDR lr, =PhysicalAccess MOV v6, v3, LSL #12 ORR lr, lr, v6, LSR #12 MMUon_nol1ptoverlap ADD lr, lr, a1, LSL #2 STMIA lr, {a2, a3} BL Init_PageTablesChanged ; The MMU is now on. Wahey. Let's get allocating. LDR sp, =ScratchSpace + ScratchSpaceSize - 4*3 ; 3 items already on stack :) LDR a1, =ZeroPage ADD lr, v3, #DRAMOffset_PageZero-DRAMOffset_L1PT ; lr = PhysAddr of zero page SUB v1, v1, lr ADD v1, v1, a1 ; turn v1 from PhysAddr to LogAddr LDR a2, [a1, #InitUsedBlock] ; turn this from Phys to Log too SUB a2, a2, lr ADD a2, a2, a1 STR a2, [a1, #InitUsedBlock] ; Store the logical address of the HAL descriptor LDR a2, [sp, #8] STR a2, [a1, #HAL_Descriptor] MOV v3, #0 ; "MMU is on" signal BL ARM_Analyse ChangedProcVecs a1 MOV a1, #L1_Fault BL RISCOS_ReleasePhysicalAddress LDR a1, =HALWorkspace LDR a2, =ZeroPage LDR a3, [a2, #HAL_WsSize] [ ZeroPage <> 0 MOV a2, #0 ] BL memset LDR a2, =ZeroPage LDR a1, =IOLimit STR a1, [a2, #IOAllocLimit] LDR a1, =IO STR a1, [a2, #IOAllocPtr] BL SetUpHALEntryTable ; Initialise the HAL. Due to its memory claiming we need to get our v1 and v2 values ; into workspace and out again around it. LDR a1, =ZeroPage STR v1, [a1, #InitUsedBlock] STR v2, [a1, #InitUsedEnd] LDR a1, =RISCOS_Header LDR a2, =HALWorkspaceNCNB AddressHAL CallHAL HAL_Init [ DebugHALTX BL DebugHALPrint = "HAL initialised",0 ALIGN ] LDR a1, =ZeroPage LDR v1, [a1, #InitUsedBlock] LDR v2, [a1, #InitUsedEnd] ; Start timer zero, at 100 ticks per second MOV a1, #0 CallHAL HAL_TimerGranularity MOV a2, a1 MOV a1, #100 BL __rt_udiv MOV a2, a1 MOV a1, #0 CallHAL HAL_TimerSetPeriod LDR a1, =ZeroPage+InitIRQWs MOV a2, #1 STRB a2, [a1, #KbdScanActive] CallHAL HAL_KbdScanSetup MSR CPSR_c, #F32_bit+SVC32_mode ; enable IRQs for scan ; Remember some stuff that's about to get zapped LDR v8, =ZeroPage LDR v4, [v8, #ROMPhysAddr] LDR v5, [v8, #RAMLIMIT] LDR v7, [v8, #MaxCamEntry] LDR a1, [v8, #HAL_StartFlags] TST a1, #OSStartFlag_RAMCleared ; Clear the memory. BLEQ ClearPhysRAM ; Put it back STR v4, [v8, #ROMPhysAddr] STR v5, [v8, #RAMLIMIT] STR v7, [v8, #MaxCamEntry] ; Set v4 to XCB bits for default cacheable+bufferable LDR v4, [v8, #MMU_PCBTrans] LDRB v4, [v4, #0] ; Set v5 to XCB bits for default bufferable LDR v5, [v8, #MMU_PCBTrans] LDRB v5, [v5, #XCB_NC] ; Set up the data cache cleaner space if necessary (eg. for StrongARM core) MOV a1, #-1 CallHAL HAL_CleanerSpace CMP a1, #-1 ;-1 means none needed (HAL only knows this if for specific ARM core eg. system-on-chip) BEQ %FT20 LDR a2, =DCacheCleanAddress ORR a3, v4, #AP_None * L2X_APMult ; ideally, svc read only, user none but hey ho ASSERT DCacheCleanSize = 4*&10000 ; 64k of physical space used 4 times (allows large page mapping) MOV a4, #&10000 MOV ip, #4 SUB sp, sp, #5*4 ;room for a1-a4,ip 10 STMIA sp, {a1-a4, ip} BL Init_MapIn LDMIA sp, {a1-a4, ip} SUBS ip, ip, #1 ADD a2, a2, #&10000 BNE %BT10 ADD sp, sp, #5*4 20 ; Decompress the ROM LDR a1, =RISCOS_Header LDR a2, [a1, #OSHdr_DecompressHdr] CMP a2, #0 BEQ %FT30 ADD ip, a1, a2 ASSERT OSDecompHdr_WSSize = 0 ASSERT OSDecompHdr_Code = 4 LDMIA ip, {a3-a4} ADRL a2, SyncCodeAreas CMP a3, #0 ; Any workspace required? ADD a4, a4, ip [ DebugHALTX BNE %FT25 DebugTX "Decompressing ROM, no workspace required" [ NoARMv5 MOV lr, pc MOV pc, a4 | BLX a4 ] DebugTX "Decompression complete" B %FT27 25 | ADREQ lr, %FT27 MOVEQ pc, a4 ] Push "a1-a4,v1-v2,v5-v7" ; Allocate workspace for decompression code ; Workspace is located at a 4MB-aligned log addr, and is a multiple of 1MB in ; size. This greatly simplifies the code required to free the workspace, since ; we can guarantee it will have been section-mapped, and won't hit any ; partially-allocated L2PT blocks (where 4 L1PT entries point to subsections of ; the same L2PT page) ; This means all we need to do to free the workspace is zap the L1PT entries ; and rollback v1 & v2 ; Note: This is effectively a MB-aligned version of Init_MapInRAM DebugTX "Allocating decompression workspace" LDR v5, =(1<<20)-1 ADD v7, a3, v5 BIC v7, v7, v5 ; MB-aligned size STR v7, [sp, #8] ; Overwrite stacked WS size MOV v6, #4<<20 ; Current log addr 26 ADD v2, v2, v5 BIC v2, v2, v5 ; MB-aligned physram LDMIA v1, {a2, a3} SUB a2, v2, a2 ; Amount of bank used SUB a2, a3, a2 ; Amount of bank remaining MOVS a2, a2, ASR #20 ; Round down to nearest MB LDRLE v2, [v1, #8]! ; Move to next bank if 0MB left BLE %BT26 CMP a2, v7, LSR #20 MOVHS a4, v7 MOVLO a4, a2, LSL #20 ; a4 = amount to take MOV a1, v2 ; set up parameters for MapIn call MOV a2, v6 ORR a3, v4, #AP_None * L2X_APMult SUB v7, v7, a4 ; Decrease amount to allocate ADD v2, v2, a4 ; Increase physram ptr ADD v6, v6, a4 ; Increase logram ptr BL Init_MapIn CMP v7, #0 BNE %BT26 Pull "a1-a2,v1-v2" ; Pull OS header, IMB func ptr, workspace size, decompression code MOV a3, #4<<20 DebugTX "Decompressing ROM" [ NoARMv5 MOV lr, pc MOV pc, v2 | BLX v2 ] DebugTX "Decompression complete" ; Before we free the workspace, make sure we zero it MOV a1, #4<<20 MOV a2, #0 MOV a3, v1 BL memset ; Flush the workspace from the cache & TLB so we can unmap it MOV a1, #4<<20 MOV a2, v1, LSR #12 ARMop MMU_ChangingEntries ; Zero each L1PT entry LDR a1, =L1PT+(4<<2) MOV a2, #0 MOV a3, v1, LSR #18 BL memset ; Pop our registers and we're done Pull "v1-v2,v5-v7" DebugTX "ROM decompression workspace freed" 27 ; Now that the ROM is decompressed we need to change the ROM page mapping to ; read-only. The easiest way to do this is to make another call to Init_MapIn. ; But before we can do that we need to work out if the HAL+OS are contiguous in ; physical space. To do this we can just check if the L1PT entry for the OS is a ; section mapping. LDR a1, =L1PT+(ROM>>18) LDR a1, [a1] ASSERT L1_Section = 2 ASSERT L1_Page = 1 TST a1, #2 ; Section mapped, get address from L1PT MOVNE a1, a1, LSR #20 MOVNE a1, a1, LSL #20 MOVNE a2, #ROM MOVNE a4, #OSROM_ImageSize*1024 BNE %FT29 ; Page/large page mapped, get address from L2PT LDR a2, =RISCOS_Header LDR a1, =L2PT LDR a1, [a1, a2, LSR #10] LDR a4, [a2, #OSHdr_ImageSize] MOV a1, a1, LSR #12 MOV a1, a1, LSL #12 29 Push "a2,a4" MOV a3, #(AP_ROM * L2X_APMult) + L2_C + L2_B BL Init_MapIn ; Flush & invalidate cache/TLB to ensure everything respects the new page access ; Putting a flush here also means the decompression code doesn't have to worry ; about IMB'ing the decompressed ROM Pull "a1,a2" MOV a2, a2, LSR #12 ARMop MMU_ChangingEntries DebugTX "ROM access changed to read-only" 30 ; Allocate the CAM LDR a3, [v8, #SoftCamMapSize] ORR a2, v4, #AP_None * L2X_APMult LDR a1, =CAM BL Init_MapInRAM ; Allocate the supervisor stack LDR a1, =SVCStackAddress ORR a2, v4, #AP_Read * L2X_APMult LDR a3, =SVCStackSize BL Init_MapInRAM [ HAL32 ; Allocate the interrupt stack LDR a1, =IRQStackAddress ORR a2, v4, #AP_None * L2X_APMult LDR a3, =IRQStackSize BL Init_MapInRAM ] ; Allocate the abort stack LDR a1, =ABTStackAddress ORR a2, v4, #AP_None * L2X_APMult LDR a3, =ABTStackSize BL Init_MapInRAM ; Allocate the undefined stack LDR a1, =UNDStackAddress ORR a2, v4, #AP_None * L2X_APMult LDR a3, =UNDStackSize BL Init_MapInRAM ; Allocate the system heap LDR a1, =SysHeapAddress ORR a2, v4, #AP_Full * L2X_APMult LDR a3, =32*1024 BL Init_MapInRAM ; Allocate the cursor/system/sound block - first the cached bit LDR a1, =CursorChunkAddress ORR a2, v4, #AP_Read * L2X_APMult ; Should be AP_None? LDR a3, =SoundDMABuffers - CursorChunkAddress BL Init_MapInRAM ; then the uncached bit LDR a1, =SoundDMABuffers ORR a2, v5, #AP_Read * L2X_APMult ; Should be AP_None? LDR a3, =?SoundDMABuffers BL Init_MapInRAM [ LongCommandLines LDR a1, =KbuffsBaseAddress ORR a2, v4, #AP_Read * L2X_APMult LDR a3, =(KbuffsSize + &FFF) :AND: &FFFFF000 ;(round to 4k) BL Init_MapInRAM ] [ HiProcVecs ; Map in DebuggerSpace LDR a1, =DebuggerSpace ORR a2, v5, #AP_Read * L2X_APMult LDR a3, =(DebuggerSpace_Size + &FFF) :AND: &FFFFF000 BL Init_MapInRAM ] [ MinorL2PThack ; Allocate backing L2PT for the free pool MOV a1, #FreePoolAddress LDR a2, [v8, #RAMLIMIT] ; Need to round this up to 4M boundary, as AllocateL2PT only does ; individual (1M) sections, rather than 4 at a time, corresponding ; to a L2PT page. The following space is available for dynamic areas, ; and ChangeDyn.s will get upset if it sees only some out of a set of 4 ; section entries pointing to the L2PT page. ASSERT FreePoolAddress :MOD: (4*1024*1024) = 0 ADD a2, a2, #4*1024*1024 SUB a2, a2, #1 MOV a2, a2, LSR #22 MOV a2, a2, LSL #22 ORR a3, v5, #AP_None * L2X_APMult BL AllocateL2PT ; And for application space MOV a1, #0 MOV a2, #AplWorkMaxSize ; Not quite right, but the whole thing's wrong anyway ORR a3, v4, #AP_Full * L2X_APMult BL AllocateL2PT ; And for the system heap. Sigh LDR a1, =SysHeapAddress LDR a2, =SysHeapMaxSize ORR a3, v4, #AP_Full * L2X_APMult BL AllocateL2PT ] LDR a1, =ZeroPage STR v2, [a1, #InitUsedEnd] MSR CPSR_c, #F32_bit+I32_bit+IRQ32_mode LDR sp, =IRQSTK MSR CPSR_c, #F32_bit+I32_bit+ABT32_mode LDR sp, =ABTSTK MSR CPSR_c, #F32_bit+I32_bit+UND32_mode LDR sp, =UNDSTK MSR CPSR_c, #F32_bit+SVC2632 LDR sp, =SVCSTK LDR ip, =CAM STR ip, [a1, #CamEntriesPointer] BL ConstructCAMfromPageTables MOV a1, #4096 STR a1, [v8, #Page_Size] BL CountPageTablePages [ {FALSE} MOV a1, #InitIRQWs MOV a2, #0 MOV a3, #0 STMIA a1!, {a2,a3} STMIA a1!, {a2,a3} ] B Continue_after_HALInit LTORG [ MEMM_Type = "VMSAv6" HAL_InvalidateCache_ARMvF ; Cache invalidation for ARMs with multiple cache levels, used before ARMop initialisation ; This function gets called before we have a stack set up, so we've got to preserve as many registers as possible ; The only register we can safely change is ip, but we can switch into FIQ mode with interrupts disabled and use the banked registers there MRS ip, CPSR MSR CPSR_c, #F32_bit+I32_bit+FIQ32_mode MOV r9, #0 MCR p15, 0, r9, c7, c5, 0 ; invalidate instruction cache MCR p15, 0, r9, c8, c7, 0 ; invalidate TLBs MCR p15, 0, r9, c7, c5, 6 ; invalidate branch target predictor myDSB ,r9,,y ; Wait for completion myISB ,r9,,y ; Check whether we're ARMv7 (and thus multi-level cache) or ARMv6 (and thus single-level cache) MRC p15, 0, r8, c0, c0, 1 TST r8, #&80000000 ; EQ=ARMv6, NE=ARMv7 MCREQ ARM_config_cp,0,r9,ARMv4_cache_reg,C7 ; ARMv3-ARMv6 I+D cache flush BEQ %FT50 ; Skip to the end ; This is basically the same algorithm as the MaintainDataCache_WB_CR7_Lx macro, but tweaked to use less registers and to read from CP15 directly TST r8, #&07000000 BEQ %FT50 MOV r11, #0 ; Current cache level 10 ; Loop1 ADD r10, r11, r11, LSR #1 ; Work out 3 x cachelevel MOV r9, r8, LSR r10 ; bottom 3 bits are the Cache type for this level AND r9, r9, #7 ; get those 3 bits alone CMP r9, #2 BLT %FT40 ; no cache or only instruction cache at this level MCR p15, 2, r11, c0, c0, 0 ; write CSSELR from r11 myISB ,r9 MRC p15, 1, r9, c0, c0, 0 ; read current CSSIDR to r9 AND r10, r9, #&7 ; extract the line length field ADD r10, r10, #4 ; add 4 for the line length offset (log2 16 bytes) LDR r8, =&3FF AND r8, r8, r9, LSR #3 ; r8 is the max number on the way size (right aligned) CLZ r13, r8 ; r13 is the bit position of the way size increment LDR r12, =&7FFF AND r12, r12, r9, LSR #13 ; r12 is the max number of the index size (right aligned) 20 ; Loop2 MOV r9, r12 ; r9 working copy of the max index size (right aligned) 30 ; Loop3 ORR r14, r11, r8, LSL r13 ; factor in the way number and cache number into r14 ORR r14, r14, r9, LSL r10 ; factor in the index number MCR p15, 0, r14, c7, c6, 2 ; Invalidate SUBS r9, r9, #1 ; decrement the index BGE %BT30 SUBS r8, r8, #1 ; decrement the way number BGE %BT20 MRC p15, 0, r8, c0, c0, 1 40 ; Skip ADD r11, r11, #2 AND r14, r8, #&07000000 CMP r14, r11, LSL #23 BGT %BT10 50 ; Finished ; Wait for clean to complete MOV r8, #0 myDSB ,r8,,y MCR p15, 0, r8, c7, c5, 0 ; invalidate instruction cache MCR p15, 0, r8, c8, c7, 0 ; invalidate TLBs MCR p15, 0, r8, c7, c5, 6 ; invalidate branch target predictor myDSB ,r8,,y ; Wait for completion myISB ,r8,,y ; All caches clean; switch back to SVC, then recover the stored PSR from ip (although we can be fairly certain we started in SVC anyway) MSR CPSR_c, #F32_bit+I32_bit+SVC32_mode MSR CPSR_cxsf, ip MOV pc, lr ] ; MEMM_Type = "VMSAv6" CountPageTablePages ROUT LDR a1, =ZeroPage LDR a2, =CAM LDR a3, [a1, #MaxCamEntry] [ ZeroPage <> 0 MOV a1, #0 ] ADD a3, a3, #1 ADD a4, a2, a3, LSL #3 10 LDR ip, [a4, #-8]! ASSERT (L2PT :AND: &3FFFFF) = 0 MOV ip, ip, LSR #22 TEQ ip, #L2PT :SHR: 22 ADDEQ a1, a1, #4096 TEQ a4, a2 BNE %BT10 LDR a2, =ZeroPage STR a1, [a2, #L2PTUsed] MOV pc, lr ; int PhysAddrToPageNo(void *addr) ; ; Converts a physical address to the page number of the page containing it. ; Returns -1 if address is not in RAM. PhysAddrToPageNo MOV a4, #0 LDR ip, =ZeroPage + PhysRamTable 10 LDMIA ip!, {a2, a3} ; get phys addr, size TEQ a3, #0 ; end of list? (size=0) BEQ %FT90 ; then it ain't RAM SUB a2, a1, a2 ; a2 = amount into this bank CMP a2, a3 ; if more than size ADDHS a4, a4, a3 ; increase counter by size of bank BHS %BT10 ; and move to next ADD a4, a4, a2 ; add offset to counter MOV a1, a4, LSR #12 ; convert counter to a page number MOV pc, lr 90 MOV a1, #-1 MOV pc, lr ; A routine to construct the soft CAM from the page tables. This is used ; after a soft reset, and also on a hard reset as it's an easy way of ; clearing up after the recursive page table allocaton. ROUT ConstructCAMfromPageTables Push "v1-v8, lr" LDR a1, =ZeroPage LDR a2, [a1, #MaxCamEntry] LDR ip, [a1, #MMU_PCBTrans] LDR v1, =CAM ; v1 -> CAM (for whole routine) ADD a2, a2, #1 ADD a2, v1, a2, LSL #3 LDR a3, =DuffEntry ; Clear the whole CAM, from MOV a4, #AP_Duff ; the top down. 10 STMDB a2!, {a3, a4} CMP a2, v1 BHI %BT10 MOV v2, #0 ; v2 = logical address LDR v3, =L1PT ; v3 -> L1PT (not used much) LDR v4, =L2PT ; v4 -> L2PT 30 LDR v5, [v3, v2, LSR #18] ; lr = first level descriptor ASSERT L1_Fault = 0 AND a1, v5, #2_11 ; move to next section if not TEQ a1, #L1_Page ; a page table (we ignore section maps BEQ %FT40 ; and we don't do fine tables) ADDS v2, v2, #&00100000 BCC %BT30 Pull "v1-v8, pc" 40 LDR v5, [v4, v2, LSR #10] ; lr = second level descriptor ASSERT L2_Fault = 0 ANDS v6, v5, #2_11 ; move to next page if fault BEQ %FT80 [ MEMM_Type <> "VMSAv6" TEQ v6, #L2_SmallPage ; convert small pages to extended pages BNE %FT50 LDR a2, =ZeroPage ; if we now know that CPU supports them LDR a1, [a2, #ProcessorFlags] TST a1, #CPUFlag_ExtendedPages BEQ %FT50 LDR a1, [lr, #MMU_PCBTrans] ; reprocess C and B bits as per XCB table MOV lr, #0 TST v5, #L2_C ; (eg if C and B both set, replace with ORREQ lr, lr, #DynAreaFlags_NotCacheable ; default cacheable+bufferable XCB) TST v5, #L2_B ORREQ lr, lr, #DynAreaFlags_NotBufferable LDRB lr, [a1, lr, LSR #2] EOR v5, v5, #L2_ExtPage:EOR:L2_SmallPage BIC v5, v5, #2_111111000000 ; remove excess 3 AP fields BIC v5, v5, #2_000000001100 ; remove old C+B BIC a1, v5, #2_000000110011 ; remove all other bits for just address ORR v5, v5, lr ; put in new XCB ARMop MMU_ChangingEntry,,,a2 STR v5, [v4, v2, LSR #10] ; update page table ] 50 MOV a1, v5, LSR #12 MOV a1, a1, LSL #12 ; a1 = address (flags stripped out),,, TEQ v6, #L2_LargePage BICEQ a1, a1, #&0000F000 ; large pages get bits 12-15 ANDEQ lr, v2, #&0000F000 ; from the virtual address ORREQ a1, a1, lr BL PhysAddrToPageNo CMP a1, #-1 BEQ %FT80 ADD a2, v1, a1, LSL #3 ; a2 -> CAM entry [ MEMM_Type = "VMSAv6" AND a1, v5, #L2_AP ; a1 = access permission MOV a1, a1, LSR #L2_APShift ; Map AP_ROM to 0 CMP a1, #AP_ROM MOVEQ a1, #0 ; Now ARM access goes 0 => all R/O, 1 => user none, 2 => user R/O, 3 => user R/W ; PPL access goes 0 => user R/W, 1 => user R/O, 2 => user none, (and let's say 3 all R/O) RSB v6, a1, #3 ; v6 = PPL access AND a1, v5, #2_11 ; a1 = page type CMP a1, #L2_ExtPage ANDHS a1, v5, #L2_TEX+L2_C+L2_B ; Extended TEX and CB bits ANDLO a1, v5, #L2_C+L2_B ; Large CB bits only ANDLO lr, v5, #L2L_TEX ; Large TEX bits ORRLO a1, a1, lr, LSR #L2L_TEXShift-L2_TEXShift ; Move Large TEX back to Extended TEX position MOV lr, #3 ; lr = PCB value (funny loop to do NCNB first) 60 LDRB a3, [ip, lr] ; look in XCBTrans table TEQ a3, a1 ; found a match for our XCB? BEQ %FT70 TST lr, #2_11 SUBNE lr, lr, #1 ; loop goes 3,2,1,0,7,6,5,4,...,31,30,29,28 ADDEQ lr, lr, #7 TEQ lr, #35 BNE %BT60 70 AND a1, lr, #2_00011 ORR v6, v6, a1, LSL #4 ; extract NCNB bits AND a1, lr, #2_11100 ORR v6, v6, a1, LSL #10 ; extract P bits ORR v6, v6, #PageFlags_Unavailable ; ???? pages from scratch to cam only? STMIA a2, {v2, v6} ; store logical address, PPL | AND a1, v5, #&30 ; a1 = access permission MOV a1, a1, LSR #4 ; ARM access goes 0 => all R/O, 1 => user none, 2 => user R/O, 3 => user R/W ; PPL access goes 0 => user R/W, 1 => user R/O, 2 => user none, (and let's say 3 all R/O) RSB v6, a1, #3 ; v6 = PPL access AND a1, v5, #2_11 ; a1 = page type CMP a1, #L2_SmallPage ANDHI a1, v5, #2_0000001111001100 ; Extended TEX and CB bits ANDLS a1, v5, #2_0000000000001100 ; Small/Large CB bits only ANDLO lr, v5, #2_1111000000000000 ; Large TEX bits ORRLO a1, a1, lr, LSR #6 ; Move Large TEX back to Extended TEX position MOV lr, #3 ; lr = PCB value (funny loop to do NCNB first) 60 LDRB a3, [ip, lr] ; look in XCBTrans table TEQ a3, a1 ; found a match for our XCB? BEQ %FT70 TST lr, #2_11 SUBNE lr, lr, #1 ; loop goes 3,2,1,0,7,6,5,4,...,31,30,29,28 ADDEQ lr, lr, #7 TEQ lr, #35 BNE %BT60 70 AND a1, lr, #2_00011 ORR v6, v6, a1, LSL #4 ; extract NCNB bits AND a1, lr, #2_11100 ORR v6, v6, a1, LSL #10 ; extract P bits ORR v6, v6, #PageFlags_Unavailable ; ???? pages from scratch to cam only? STMIA a2, {v2, v6} ; store logical address, PPL ] 80 ADD v2, v2, #&00001000 TST v2, #&000FF000 BNE %BT40 TEQ v2, #0 ; yuck (could use C from ADDS but TST corrupts C BNE %BT30 ; because of big constant) Pull "v1-v8, pc" ; Allocate a physical page from DRAM ; ; On entry: ; v1 -> current entry in PhysRamTable ; v2 -> end of last used physical page ; On exit: ; a1 -> next free page ; v1, v2 updated ; ; No out of memory check... Init_ClaimPhysicalPage MOV a1, v2 LDMIA v1, {a2, a3} ADD a2, a2, a3 ; ip = end of this bank CMP v2, a2 ; advance v2 to next bank if LDRHS a1, [v1, #8]! ; this bank is fully used ADD v2, a1, #4096 MOV pc, lr ; Allocate and map in some RAM. ; ; On entry: ; a1 = logical address ; a2 = access permissions (see Init_MapIn) ; a3 = length (4K multiple) ; v1 -> current entry in PhysRamTable ; v2 = next physical address ; v3 -> L1PT ; ; On exit: ; a1 -> physical address of start of RAM (deduce the rest from PhysRamTable) ; ; No out of memory check... Init_MapInRAM ROUT Push "v4-v8,lr" MOV v8, #-1 MOV v5, a3 ; v5 = amount of memory required MOV v6, a1 ; v6 = logical address MOV v7, a2 ; v7 = access permissions 10 LDMIA v1, {v4, ip} ; v4 = addr of bank, ip = len SUB v4, v2, v4 ; v4 = amount of bank used SUBS v4, ip, v4 ; v4 = amount of bank left LDREQ v2, [v1, #8]! ; move to next bank if 0 left BEQ %BT10 CMP v8, #-1 ; is this the first bank? MOVEQ v8, v2 ; remember it CMP v4, v5 ; sufficient in this bank? MOVHS a4, v5 MOVLO a4, v4 ; a4 = amount to take MOV a1, v2 ; set up parameters for MapIn call MOV a2, v6 ; then move globals (in case MapIn [ ECC ORR a3, v7, #1:SHL:31 | MOV a3, v7 ; needs to allocate for L2PT) ] ADD v2, v2, a4 ; advance physaddr SUB v5, v5, a4 ; decrease wanted ADD v6, v6, a4 ; advance address pointer BL Init_MapIn ; map in the RAM TEQ v5, #0 ; more memory still required? BNE %BT10 MOV a1, v8 Pull "v4-v8,pc" ; Map a range of physical addresses to a range of logical addresses. ; ; On entry: ; a1 = physical address ; a2 = logical address ; a3 = access permissions+C+B bits (bits 11-2 of an L2 extended small page) ; (also set bit 31 to indicate that P bit in L1PT should ; be set) ; a4 = area size (4K multiple) ; v1 -> current entry in PhysRamTable ; v2 = last used physical address ; v3 -> L1PT (or 0 if MMU on) Init_MapIn ROUT Push "lr" ORR lr, a1, a2 ; OR together, physaddr, logaddr ORR lr, lr, a4 ; and size. MOVS ip, lr, LSL #12 ; If all bottom 20 bits 0 BEQ Init_MapIn_Sections ; it's section mapped MOVS ip, lr, LSL #16 ; If bottom 16 bits not all 0 ORRNE a3, a3, #L2_ExtPage ; then extended small pages (4K) BNE %FT10 [ MEMM_Type = "VMSAv6" ORR a3, a3, #L2_LargePage ; else large pages (64K) AND lr, a3, #L2_TEX ; extract TEX from ext page flags BIC a3, a3, #L2_TEX ; small page TEX bits SBZ for large pages ORR a3, a3, lr, LSL #L2L_TEXShift-L2_TEXShift ; replace TEX in large page position | ORR a3, a3, #L2_LargePage ; else large pages (64K) AND lr, a3, #L2_TEX ; extract TEX from ext page flags AND ip, a3, #L2X_AP ; extract AP from ext page flags BIC a3, a3, #L2_AP ; clear way for large page AP ORR ip, ip, ip, LSL #2 ; duplicate up AP to 4 sub-pages ORR ip, ip, ip, LSL #4 ORR a3, a3, lr, LSL #6 ; replace TEX in large page position ORR a3, a3, ip ; replace quadrupled AP ] 10 Push "v4-v7" MOV v4, a1 ; v4 = physaddr MOV v5, a2 ; v5 = logaddr MOV v6, a3 ; v6 = access permissions MOV v7, a4 ; v7 = area size 20 MOV a1, v4 MOV a2, v5 MOV a3, v6 BL Init_MapInPage ; Loop through mapping in each ADD v4, v4, #4096 ; page in turn ADD v5, v5, #4096 SUBS v7, v7, #4096 BNE %BT20 Pull "v4-v7,pc" [ MEMM_Type = "VMSAv6" Init_MapIn_Sections MOVS ip, v3 ; is MMU on? LDREQ ip, =L1PT ; then use virtual address AND lr, a3, #L2_TEX + L2_AP ; extract TEX, AP, APX bits (input is extended small page) BIC a3, a3, #L2_TEX + L2_AP ; and clear them ORR a3, a3, lr, LSL #6 ; put TEX and AP bits back in new position ORR a3, a3, #L1_Section ; Mark as section ORR a1, a1, a3 ; Merge with physical address ADD a2, ip, a2, LSR #18 ; a2 -> L1PT entry 70 STR a1, [a2], #4 ; And store in L1PT ADD a1, a1, #1024*1024 ; Advance one megabyte SUBS a4, a4, #1024*1024 ; and loop BNE %BT70 Pull "pc" | Init_MapIn_Sections MOVS ip, v3 ; is MMU on? LDREQ ip, =L1PT ; then use virtual address AND lr, a3, #4_033300 ; extract TEX and AP bits BIC a3, a3, #4_033300 ; and clear them (now P, Domain and U bits) ORR a3, a3, lr, LSL #6 ; put TEX and AP bits back in new position [ ARM6support ASSERT AP_ROM = 0 TST a3, #4_300000 ; If ROM permission ARM_6 lr,EQ ; and ARM 6 ORREQ a3, a3, #AP_Read * L1_APMult ; then make it Read permission, non-updateable ORRNE a3, a3, #L1_U ORRS a3, a3, #L1_Section ; Add section indicator to permission | ORRS a3, a3, #L1_U+L1_Section ; Add section + U indicators to permission ] ORRMI a3, a3, #L1_P BICMI a3, a3, #1:SHL:31 ORR a1, a1, a3 ; Merge with physical address ADD a2, ip, a2, LSR #18 ; a2 -> L1PT entry 70 STR a1, [a2], #4 ; And store in L1PT ADD a1, a1, #1024*1024 ; Advance one megabyte SUBS a4, a4, #1024*1024 ; and loop BNE %BT70 Pull "pc" ] ; Map a logical page to a physical page, allocating L2PT as necessary. ; ; On entry: ; a1 = physical address ; a2 = logical address ; a3 = access permissions + C + B bits + size (all non-address bits, of appropriate type) ; (also set bit 31 to indicate that P bit in L1PT should be set) ; v1 -> current entry in PhysRamTable ; v2 = last used physical address ; v3 -> L1PT (or 0 if MMU on) ; On exit: ; a1 = logical address ; a2-a4, ip corrupt ; v1, v2 updated ; ; ROM permission is caught if on an ARM 6 and turned into Read ; Extended pages are caught if not available (or MMU off) and turned into Small Init_MapInPage Push "v4-v6, lr" MOV v4, a1 ; v4 = physical address MOV v5, a2 ; v5 = logical address MOV v6, a3 ; v6 = access permissions MOV a1, v5 MOV a2, #4096 BL AllocateL2PT TEQ v3, #0 ; if MMU on, access L2PT virtually... LDREQ a1, =L2PT ; a1 -> L2PT virtual address MOVEQ ip, v5 ; index using whole address BEQ %FT40 MOV ip, v5, LSR #20 LDR a1, [v3, ip, LSL #2] ; a1 = level one descriptor MOV a1, a1, LSR #10 MOV a1, a1, LSL #10 ; a1 -> L2PT tables for this section AND ip, v5, #&000FF000 ; extract L2 table index bits 40 AND lr, v6, #3 TEQ lr, #L2_LargePage ; strip out surplus address bits from BICEQ v6, v6, #&0000F000 ; large page descriptors [ MEMM_Type <> "VMSAv6" TEQ lr, #L2_ExtPage BNE %FT50 TEQ v3, #0 ; if we've been given an extended page BNE %FT45 ; must check that (a) the MMU is on LDR lr, =ZeroPage LDR lr, [lr, #ProcessorFlags] ; and (b) the CPU supports them TST lr, #CPUFlag_ExtendedPages BNE %FT50 45 EOR v6, v6, #L2_SmallPage :EOR: L2_ExtPage AND lr, v6, #L2X_AP ; convert the extended page descriptor BIC v6, v6, #L2_AP ; into a small page descriptor ORR lr, lr, lr, LSL #2 ; (losing the TEX bits in the ORR v6, v6, lr ; process, but they should have been 0) ORR v6, v6, lr, LSL #4 ] 50 ORR lr, v4, v6 ; lr = value for L2PT entry [ ARM6support ASSERT AP_ROM = 0 TST lr, #4_333300 ; if ROM permission ARM_6 a2,EQ ; and ARM 6 ORREQ lr, lr, #AP_Read * L2_APMult ; then make it Read permission ] STR lr, [a1, ip, LSR #10] ; update L2PT entry MOV a1, v5 Pull "v4-v6, pc" ; On entry: ; a1 = virtual address L2PT required for ; a2 = number of bytes of virtual space ; a3 = access permissions that will be placed in L2PT (bits 11-2 of Extended L2 descriptor) ; (also set bit 31 to indicate that P bit in L1PT should be set) ; v1 -> current entry in PhysRamTable ; v2 = last used physical address ; v3 -> L1PT (or 0 if MMU on) ; On exit ; a1-a4,ip corrupt ; v1, v2 updated AllocateL2PT Push "a3,v4-v8,lr" MOV v8, a1, LSR #20 ; round base address down to 1M ADD lr, a1, a2 MOV v7, lr, LSR #20 TEQ lr, v7, LSL #20 ADDNE v7, v7, #1 ; round end address up to 1M MOVS v6, v3 LDREQ v6, =L1PT ; v6->L1PT (whole routine) 05 LDR v5, [v6, v8, LSL #2] ; L1PT contains 1 word per M TEQ v5, #0 ; if non-zero, the L2PT has ; already been allocated [ ARM6support BEQ %FT10 TST v5, #L1_U ; if section is already updateable BNE %FT40 ; leave it. LDR a3, [sp, #0] TST a3, #4_333300 ; if they want anything other than ORRNE v5, v5, #L1_U ; ROM access, make the section STRNE v5, [v6, v8, LSL #2] ; updateable. B %FT40 10 | BNE %FT40 ] BIC lr, v8, #3 ; round down to 4M - each page ADD lr, v6, lr, LSL #2 ; of L2PT maps to 4 sections LDMIA lr, {a3,a4,v5,ip} ; check if any are page mapped ASSERT L1_Fault = 2_00 :LAND: L1_Page = 2_01 :LAND: L1_Section = 2_10 TST a3, #1 TSTEQ a4, #1 TSTEQ v5, #1 TSTEQ ip, #1 BEQ %FT20 ; nothing page mapped - claim a page TST a4, #1 ; at least one of the sections is page mapped SUBNE a3, a4, #1*1024 ; find out where it's pointing to and TST v5, #1 ; derive the corresponding address for our SUBNE a3, v5, #2*1024 ; section TST ip, #1 SUBNE a3, ip, #3*1024 AND lr, v8, #3 ORR a3, a3, lr, LSL #10 [ ARM6support ASSERT AP_ROM = 0 ARM_6 lr ; if ARM 6 BNE %FT15 LDR lr, [sp, #0] ; do they want ROM access? TST lr, #4_000300 BICEQ a3, a3, #L1_U ; set the updateable bit accordingly ORRNE a3, a3, #L1_U 15 ] STR a3, [v6, v8, LSL #2] ; fill in the L1PT entry B %FT40 ; no more to do 20 BL Init_ClaimPhysicalPage ; Claim a page to put L2PT in MOV v4, a1 [ MEMM_Type = "VMSAv6" ORR a3, a1, #L1_Page | [ ARM6support ASSERT AP_ROM = 0 ARM_6 lr ; if ARM 6 LDREQ lr, [sp, #0] ; do they want ROM access? TSTEQ lr, #4_000300 ORREQ a3, a1, #L1_Page ; set the updateable bit accordingly ORRNE a3, a1, #L1_Page + L1_U | ORR a3, a1, #L1_Page + L1_U ] [ ECC ORR a3, a3, #L1_P ] ] AND lr, v8, #3 ORR a3, a3, lr, LSL #10 STR a3, [v6, v8, LSL #2] ; fill in the L1PT ; Need to zero the L2PT. Must do it before calling in MapInPage, as that may well ; want to put something in the thing we are clearing. If the MMU is off, no problem, ; but if the MMU is on, then the L2PT isn't accessible until we've called MapInPage. ; Solution is to use the AccessPhysicalAddress call. TEQ v3, #0 ; MMU on? MOVNE a1, v4 ; if not, just access v4 MOVEQ a1, #L1_B ; if so, map in v4 MOVEQ a2, v4 MOVEQ a3, #0 BLEQ RISCOS_AccessPhysicalAddress MOV a2, #0 MOV a3, #4*1024 BL memset MOV a1, v4 ; Map in the L2PT page itself LDR a2, =L2PT ; (can't recurse, because L2PT ADD a2, a2, v8, LSL #10 ; backing for L2PT is preallocated) BIC a2, a2, #&C00 LDR a3, =(AP_None * L2X_APMult) + L2_ExtPage [ ECC ORR a3, a3, #1:SHL:31 ] BL Init_MapInPage 40 ADD v8, v8, #1 ; go back until all CMP v8, v7 ; pages allocated BLO %BT05 Pull "a3,v4-v8,pc" ; void *RISCOS_AccessPhysicalAddress(unsigned int flags, void *addr, void **oldp) RISCOS_AccessPhysicalAddress [ ECC Push "a1-a3,lr" MOV a1, a2 BL PhysAddrToPageNo CMP a1, #-1 Pull "a1-a3,lr" ] LDR ip, =L1PT + (PhysicalAccess:SHR:18) ; ip -> L1PT entry [ MEMM_Type = "VMSAv6" LDR a4, =(AP_None * L1_APMult) + L1_Section | LDR a4, =(AP_None * L1_APMult) + L1_U + L1_Section ] AND a1, a1, #L1_B ; user can ask for bufferable [ ECC ORRNE a1, a1, #L1_P ] ORR a1, a4, a1 ; a1 = flags for 2nd level descriptor MOV a4, a2, LSR #20 ; rounded to section ORR a1, a1, a4, LSL #20 ; a1 = complete descriptor TEQ a3, #0 LDRNE a4, [ip] ; read old value (if necessary) STR a1, [ip] ; store new one STRNE a4, [a3] ; put old one in [oldp] LDR a1, =PhysicalAccess MOV a3, a2, LSL #12 ; take bottom 20 bits of address ORR a3, a1, a3, LSR #12 ; and make an offset within PhysicalAccess Push "a3,lr" ARMop TLB_InvalidateEntry ; sufficient, cause not cacheable Pull "a1,pc" ; void RISCOS_ReleasePhysicalAddress(void *old) RISCOS_ReleasePhysicalAddress LDR ip, =L1PT + (PhysicalAccess:SHR:18) ; ip -> L1PT entry STR a1, [ip] LDR a1, =PhysicalAccess ARMop TLB_InvalidateEntry,,tailcall ; sufficient, cause not cacheable ; void Init_PageTablesChanged(void) ; ; A TLB+cache invalidation that works on all known ARMs. Invalidate all I+D TLB is the _only_ TLB ; op that works on ARM720T, ARM920T and SA110. Ditto invalidate all I+D cache. ; ; DOES NOT CLEAN THE DATA CACHE. This is a helpful simplification, but requires that don't use ; this routine after we've started using normal RAM. ; Init_PageTablesChanged MOV a3, lr BL Init_ARMarch MOV ip, #0 MCREQ ARM_config_cp,0,ip,ARMv3_TLBflush_reg,C0 MCRNE ARM_config_cp,0,ip,ARMv4_TLB_reg,C7 [ MEMM_Type = "VMSAv6" CMP a1, #ARMvF MCRNE ARM_config_cp,0,ip,ARMv4_cache_reg,C7 ; works on ARMv3 BLEQ HAL_InvalidateCache_ARMvF | MCR ARM_config_cp,0,ip,ARMv4_cache_reg,C7 ; works on ARMv3 ] MOV pc, a3 ;++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ ; ; ClearPhysRAM - Routine to clear "all" memory ; ; While this routine is running, keyboard IRQs may happen. For this reason ; it avoids the base of logical RAM (hardware IRQ vector and workspace). ; ; We also have to avoid the page tables and the PhysRamTable. ; The latter is also used to tell us which areas of memory we should clear. ; We don't have to worry about trampling on the ROM image as it's ; already been excluded from PhysRamTable. ; This routine must work in 32-bit mode. ; in: r7 = memory speed ; r8 = page size ; r9 = MEMC control register ; r13 = total RAM size ; ; None of the above are actually used by this routine ; ; out: r7-r9, r13 preserved ; ClearPhysRAM ROUT MSR CPSR_c, #F32_bit+FIQ32_mode ; get some extra registers MOV r8, #0 MOV r9, #0 MOV r10, #0 MOV r11, #0 MOV r12, #0 MOV r13, #0 MOV r14, #0 MSR CPSR_c, #F32_bit+SVC32_mode [ EmulatorSupport ARM_on_emulator r0 BEQ CPR_skipped ] ;now let us do the clear LDR r0,=ZeroPage+InitClearRamWs ;we can preserve r7-r9,r13 at logical address 52..67 STMIA r0,{r4-r11,lr} LDR r8, =ZeroPage MOV r9, #0 LDR r7, =ZeroPage+PhysRamTable ; point to 5 lots of (physaddr,size) ADR r6, RamSkipTable ADD r4, r7, #PhysRamTableEnd-PhysRamTable ; r4 -> end of table 10 LDR r5, [r6], #4 ; load first skip offset LDMIA r7, {r10, r11} ; load next address, size TEQ r11, #0 BEQ %FT50 15 ADD r11, r10, r11 ; r11 = end address ; Need to check for RAM we've already used LDR r0, [r8, #InitUsedStart] ; check for intersection with used space CMP r0, r10 BLO %FT18 CMP r0, r11 BHS %FT18 ;BKPT 0 LDR r1, [r8, #InitUsedEnd] TEQ r0, r10 ; if at start of block, easy BEQ %FT17 ; just move start pointer forwards MOV r11, r0 ; else set end pointer to start address B %FT18 ; next time round, will be at start address 16 LDR r1, [r8, #InitUsedEnd] 17 CMP r1, r10 ; advance until we find block with end pointer in CMPHS r11, r1 ; intersection if r10 <= r1 <= r11 ADDLS r7, r7, #8 ; get next block if r1 = r11 or r1 outside [r10,r11] LDRLS r5, [r6], #4 LDMLSIA r7, {r10, r11} ; (if r1 = r11, will just move straight to next block) ADDLS r11, r10, r11 BLO %BT17 ; check next block if r1 outside [r10,r11] MOVHI r10, r1 ; if r11 > r1, advance r10 to r1 (do partial block) 18 MOV r0, #L1_B ; map in the appropriate megabyte, MOV r1, r10 ; making it bufferable MOV r2, #0 BL RISCOS_AccessPhysicalAddress SUB lr, r0, r10 ; lr = offset from phys to log ADD r1, r11, lr MOV r2, r0, LSR #20 TEQ r2, r1, LSR #20 ; if end in different megabyte to start MOVNE r1, r2, LSL #20 ; then stop at end of megabyte ADDNE r1, r1, #&00100000 MSR CPSR_c, #F32_bit+FIQ32_mode ; switch to our bank o'zeros MOV r2, #0 19 ADD r5, r5, r0 20 TEQ r0, r1 ; *this* is the speed critical bit - executed TEQNE r0, r5 ; 32768 times per outer loop STMNEIA r0!, {r2,r8-r10} STMNEIA r0!, {r2,r8-r10} BNE %BT20 TEQ r0, r1 BEQ %FT30 LDR r5, [r6], #4 ; load skip amount ADD r0, r0, r5 ; and skip it LDR r5, [r6], #4 ; load next skip offset (NB relative to end of last skip) B %BT19 30 MSR CPSR_c, #F32_bit+SVC32_mode ; back to supervisor mode MOV r10, r0 MOV r11, r1 LDMIA r7, {r0, r1} SUB r11, r11, lr ; turn r11 back into physical address SUB r5, r5, r0 ; r5 back into offset ADD r0, r0, r1 ; r0 = physical end address of block TEQ r0, r11 ; did we reach the real end? MOVNE r10, r11 ; if not, carry on from where we stopped SUBNE r11, r0, r11 ; (this also deals with avoiding InitUsed area) BNE %BT15 40 ADD r7, r7, #8 TEQ r7, r4 ; have we done all areas? BNE %BT10 50 MOV r1, #0 LDR r0, =ZeroPage+InitClearRamWs LDMIA r0, {r4-r11,r14} ;restore CPR_skipped LDR r0, =ZeroPage+OsbyteVars + :INDEX: LastBREAK MOV r1, #&80 STRB r1, [r0] ; flag the fact that RAM cleared MSR CPSR_c, #F32_bit + UND32_mode ; retrieve the MMU control register LDR r0, =ZeroPage ; soft copy STR sp, [r0, #MMUControlSoftCopy] MSR CPSR_c, #F32_bit + SVC32_mode MOV pc, lr LTORG GBLA lastaddr lastaddr SETA 0 GBLA lastregion lastregion SETA 0 MACRO MakeSkipTable $region, $addr, $size [ ($region)<>lastregion & -1 lastaddr SETA 0 ] & ($addr)-lastaddr, $size lastaddr SETA ($addr)+($size) lastregion SETA $region MEND MACRO EndSkipTables WHILE lastregion < (PhysRamTableEnd-PhysRamTable)/8 & -1 lastregion SETA lastregion +1 WEND MEND RamSkipTable MakeSkipTable 1, DRAMOffset_PageZero + 0, InitWsEnd MakeSkipTable 1, DRAMOffset_PageZero + SkippedTables, SkippedTablesEnd-SkippedTables EndSkipTables InitProcVecs BKPT &C000 ; Reset BKPT &C004 ; Undefined Instruction BKPT &C008 ; SWI BKPT &C00C ; Prefetch Abort SUBS pc, lr, #4 ; ignore data aborts BKPT &C014 ; Address Exception LDR pc, InitProcVecs + InitIRQHandler ; IRQ BKPT &C01C ; FIQ InitProcVec_FIQ DCD 0 InitProcVecsEnd ; ; In: a1 = flags (L1_B,L1_C,L1_AP,L1_APX) ; a2 = physical address ; a3 = size ; Out: a1 = assigned logical address, or 0 if failed (no room) ; ; Will detect and return I/O space already mapped appropriately, or map and return new space ; For simplicity and speed of search, works on a section (1Mb) granularity ; ASSERT L1_B = 1:SHL:2 ASSERT L1_C = 1:SHL:3 [ MEMM_Type = "VMSAv6" ASSERT L1_AP = 2_100011 :SHL: 10 | ASSERT L1_AP = 3:SHL:10 ] MapInFlag_DoublyMapped * 1:SHL:20 MapInFlag_APSpecified * 1:SHL:21 RISCOS_MapInIO ROUT Entry "v1-v5,v7" MOV v7, #L1_B:OR:L1_C ORR v7, v7, #L1_AP ; v7 = user-specifiable flags MOV v5, a1 ; v5 = original flags MOV v4, a2 ; v4 = original requested address ADD a3, a2, a3 ; a3 -> end (exclusive) MOV a2, a2, LSR #20 MOV a2, a2, LSL #20 ; round a2 down to a section boundary SUB v4, v4, a2 ; v4 = offset of original within section-aligned area MOV lr, a3, LSR #20 TEQ a3, lr, LSL #20 ADDNE lr, lr, #1 MOV a3, lr, LSL #20 ; round a3 up to a section boundary TST v5, #MapInFlag_APSpecified BICEQ a1, a1, #L1_AP ; For VMSAv6, assume HAL knows what it's doing and requests correct settings for AP_ROM ORREQ a1, a1, #L1_APMult * AP_None ANDS v5, v5, #MapInFlag_DoublyMapped SUBNE v5, a3, a2 ; v5 = offset of second mapping or 0 LDR ip, =ZeroPage LDR a4, =L1PT AND a1, a1, v7 ; only allow bufferable as flags option LDR v2, =IO ; logical end (exclusive) of currently mapped IO LDR v1, [ip, #IOAllocPtr] ; logical start (inclusive) SUB v1, v1, #&100000 10 ADD v1, v1, #&100000 ; next mapped IO section CMP v1, v2 BHS %FT32 ; no more currently mapped IO LDR v3, [a4, v1, LSR #(20-2)] ; L1PT entry (must be for mapped IO) MOV lr, v3, LSR #20 ; physical address bits CMP lr, a2, LSR #20 BNE %BT10 ; no address match AND lr, v3, v7 TEQ lr, a1 BNE %BT10 ; no flags match TEQ v5, #0 ; doubly mapped? BEQ %FT19 ADD lr, v1, v5 ; address of second copy CMP lr, v2 BHS %FT32 LDR v3, [a4, lr, LSR #(20-2)] MOV lr, v3, LSR #20 ; physical address bits CMP lr, a2, LSR #20 BNE %BT10 ; no address match AND lr, v3, v7 TEQ lr, a1 BNE %BT10 ; no flags match 19 ; ; alright, found start of requested IO already mapped, and with required flags ; Push "a2, v1" 20 ADD a2, a2, #&100000 CMP a2, a3 Pull "a2, v1", HS BHS %FT40 ; its all there already! ADD v1, v1, #&100000 ; next mapped IO section CMP v1, v2 BHS %FT30 ; no more currently mapped IO LDR v3, [a4, v1, LSR #(20-2)] ; L1PT entry MOV lr, v3, LSR #20 ; physical address bits CMP lr, a2, LSR #20 BNE %FT29 ; address match failed AND lr, v3, v7 TEQ lr, a1 TEQEQ v5, #0 ; doubly mapped? BEQ %BT20 ; address and flags match so far ADD lr, v1, v5 ; where duplicate should be CMP lr, v2 BHS %FT30 ; no more currently mapped IO LDR v3, [a4, lr, LSR #(20-2)] MOV lr, v3, LSR #20 ; physical address bits CMP lr, a2, LSR #20 BNE %FT29 ; address match failed AND lr, v3, v7 TEQ lr, a1 BEQ %BT20 29 Pull "a2, v1" B %BT10 30 Pull "a2, v1" ; ; request not currently mapped, only partially mapped, or mapped with wrong flags ; 32 LDR ip, =ZeroPage LDR v2, [ip, #IOAllocPtr] ADD v1, v2, a2 SUB v1, v1, a3 ; attempt to allocate size of a3-a2 SUB v1, v1, v5 ; double if necessary LDR v3, [ip, #IOAllocLimit] ; can't extend down below limit CMP v1, v3 MOVLS a1, #0 BLS %FT90 STR v1, [ip, #IOAllocPtr] ORR a2, a2, a1 ORR a2, a2, #L1_Section ; first L1PT value 34 STR a2, [a4, v1, LSR #(20-2)] TEQ v5, #0 ADDNE v2, v1, v5 STRNE a2, [a4, v2, LSR #(20-2)] ADD a2, a2, #&100000 ADD v1, v1, #&100000 ; next section CMP a2, a3 BLO %BT34 LDR v1, [ip, #IOAllocPtr] 40 ADD a1, v1, v4 ; logical address for request 90 EXIT ; void RISCOS_AddDevice(unsigned int flags, struct device *d) RISCOS_AddDevice ADDS a1, a2, #0 ; also clears V B HardwareDeviceAdd_Common ; uint32_t RISCOS_LogToPhys(const void *log) RISCOS_LogToPhys ROUT Push "r4,r5,r8,r9,lr" MOV r4, a1 LDR r8, =L2PT BL logical_to_physical MOVCC a1, r5 BCC %FT10 ; Try checking L1PT for any section mappings (logical_to_physical only ; deals with regular 4K page mappings) ; TODO - Add large page support LDR r9, =L1PT MOV r5, r4, LSR #20 LDR a1, [r9, r5, LSL #2] ASSERT L1_Section = 2 EOR a1, a1, #2 TST a1, #3 MOVNE a1, #-1 BNE %FT10 ; Apply offset from bits 0-19 of logical addr [ NoARMT2 MOV a1, a1, LSR #20 ORR a1, a1, r4, LSL #12 MOV a1, a1, ROR #12 | BFI a1, r4, #0, #20 ] 10 Pull "r4,r5,r8,r9,pc" ; kernel_oserror *RISCOS_IICOpV(IICDesc *descs, uint32_t ndesc_and_bus) RISCOS_IICOpV Push "lr" BL IIC_OpV MOVVC a1, #0 Pull "pc" SetUpHALEntryTable ROUT LDR a1, =ZeroPage LDR a2, [a1, #HAL_Descriptor] LDR a3, [a1, #HAL_Workspace] LDR a4, [a2, #HALDesc_Entries] LDR ip, [a2, #HALDesc_NumEntries] ADD a4, a2, a4 ; a4 -> entry table MOV a2, a4 ; a2 -> entry table (increments) 10 SUBS ip, ip, #1 ; decrement counter LDRCS a1, [a2], #4 BCC %FT20 TEQ a1, #0 ADREQ a1, NullHALEntry ADDNE a1, a4, a1 ; convert offset to absolute STR a1, [a3, #-4]! ; store backwards below HAL workspace B %BT10 20 LDR a1, =ZeroPage ; pad table with NullHALEntries LDR a4, =HALWorkspace ; in case where HAL didn't supply enough ADR a1, NullHALEntry 30 CMP a3, a4 STRHI a1, [a3, #-4]! BHI %BT30 MOV pc, lr NullHALEntry MOV pc, lr ; Can freely corrupt r10-r12 (v7,v8,ip). HardwareSWI AND ip, v5, #&FF CMP ip, #1 BLO HardwareCall BEQ HardwareLookup CMP ip, #3 BLO HardwareDeviceAdd BEQ HardwareDeviceRemove CMP ip, #5 BLO HardwareDeviceEnumerate BHS HardwareBadReason HardwareCall Push "v1-v4,sb,lr" ADD v8, sb, #1 ; v8 = entry no + 1 LDR ip, =ZeroPage LDR v7, [ip, #HAL_Descriptor] AddressHAL ip ; sb set up LDR v7, [v7, #HALDesc_NumEntries] ; v7 = number of entries CMP v8, v7 ; entryno + 1 must be <= number of entries BHI HardwareBadEntry2 LDR ip, [sb, -v8, LSL #2] ADR v7, NullHALEntry TEQ ip, v7 BEQ HardwareBadEntry2 [ NoARMv5 MOV lr, pc MOV pc, ip | BLX ip ] ADD sp, sp, #4*4 Pull "sb,lr" ExitSWIHandler HardwareLookup ADD v8, sb, #1 ; v8 = entry no + 1 LDR ip, =ZeroPage LDR v7, [ip, #HAL_Descriptor] AddressHAL ip LDR v7, [v7, #HALDesc_NumEntries] CMP v8, v7 ; entryno + 1 must be <= number of entries BHI HardwareBadEntry LDR a1, [sb, -v8, LSL #2] ADR v7, NullHALEntry TEQ a1, v7 BEQ HardwareBadEntry MOV a2, sb ExitSWIHandler HardwareDeviceAdd Push "r1-r3,lr" BL HardwareDeviceAdd_Common Pull "r1-r3,lr" B SLVK_TestV HardwareDeviceRemove Push "r1-r3,lr" BL HardwareDeviceRemove_Common Pull "r1-r3,lr" B SLVK_TestV HardwareDeviceAdd_Common Entry BL HardwareDeviceRemove_Common ; first try to remove any device already at the same address EXIT VS LDR lr, =ZeroPage LDR r1, [lr, #DeviceCount] LDR r2, [lr, #DeviceTable] TEQ r2, #0 BEQ %FT80 ADD r1, r1, #1 ; increment DeviceCount LDR lr, [r2, #-4] ; word before heap block is length including length word TEQ r1, lr, LSR #2 ; block already full? BEQ %FT81 LDR lr, =ZeroPage 10 STR r1, [lr, #DeviceCount] ADD lr, r2, r1, LSL #2 SUB lr, lr, #4 11 LDR r1, [lr, #-4]! ; copy existing devices up, so new ones get enumerated first STR r1, [lr, #4] CMP lr, r2 BHI %BT11 STR r0, [r2] MOV r2, r0 MOV r1, #Service_Hardware MOV r0, #0 BL Issue_Service ADDS r0, r2, #0 ; exit with V clear EXIT 80 ; Claim a system heap block for the device table Push "r0" MOV r3, #16 BL ClaimSysHeapNode ADDVS sp, sp, #4 EXIT VS Pull "r0" LDR lr, =ZeroPage MOV r1, #1 STR r2, [lr, #DeviceTable] B %BT10 81 ; Extend the system heap block Push "r0" MOV r0, #HeapReason_ExtendBlock MOV r3, #16 BL DoSysHeapOpWithExtension ADDVS sp, sp, #4 EXIT VS Pull "r0" LDR lr, =ZeroPage LDR r1, [lr, #DeviceCount] STR r2, [lr, #DeviceTable] ADD r1, r1, #1 B %BT10 HardwareDeviceRemove_Common Entry "r4" LDR lr, =ZeroPage LDR r3, [lr, #DeviceCount] LDR r4, [lr, #DeviceTable] TEQ r3, #0 EXIT EQ ; no devices registered 01 LDR r2, [r4], #4 SUBS r3, r3, #1 TEQNE r2, r0 BNE %BT01 TEQ r2, r0 EXIT NE ; this device not registered MOV r0, #1 MOV r1, #Service_Hardware BL Issue_Service CMP r1, #0 ; if service call claimed CMPEQ r1, #1:SHL:31 ; then set V (r0 already points to error block) EXIT VS ; and exit MOV r0, r2 SUBS r3, r3, #1 02 LDRCS r2, [r4], #4 ; copy down remaining devices STRCS r2, [r4, #-8] SUBCSS r3, r3, #1 BCS %BT02 LDR lr, =ZeroPage LDR r3, [lr, #DeviceCount] SUB r3, r3, #1 STR r3, [lr, #DeviceCount] EXIT HardwareDeviceEnumerate Push "r3-r4,lr" LDR lr, =ZeroPage LDR r2, [lr, #DeviceCount] LDR r3, [lr, #DeviceTable] SUBS r4, r2, r1 MOVLS r1, #-1 BLS %FT90 ; if r1 is out of range then exit ADD r3, r3, r1, LSL #2 10 ADD r1, r1, #1 LDR r2, [r3], #4 LDR lr, [r2, #HALDevice_Type] EOR lr, lr, r0 MOVS lr, lr, LSL #16 ; EQ if types match SUBNES r4, r4, #1 BNE %BT10 TEQ lr, #0 MOVNE r1, #-1 BNE %FT90 LDR lr, [r2, #HALDevice_Version] MOV lr, lr, LSR #16 CMP lr, r0, LSR #16 ; newer than our client understands? BLS %FT90 SUBS r4, r4, #1 BHI %BT10 MOV r1, #-1 90 Pull "r3-r4,lr" ExitSWIHandler HardwareBadReason ADR r0, ErrorBlock_HardwareBadReason [ International Push "lr" BL TranslateError Pull "lr" ] B SLVK_SetV HardwareBadEntry2 ADD sp, sp, #4*4 Pull "sb,lr" HardwareBadEntry ADR r0, ErrorBlock_HardwareBadEntry [ International Push "lr" BL TranslateError Pull "lr" ] B SLVK_SetV MakeErrorBlock HardwareBadReason MakeErrorBlock HardwareBadEntry [ DebugTerminal DebugTerminal_Rdch Push "a2-a4,sb,ip" WritePSRc SVC_mode, r1 MOV sb, ip 20 CallHAL HAL_DebugRX CMP a1, #27 BNE %FT25 LDR a2, =ZeroPage + OsbyteVars + :INDEX: RS423mode LDRB a2, [a2] TEQ a2, #0 ; is RS423 raw data,or keyb emulator? BNE %FT25 LDR a2, =ZeroPage LDRB a1, [a2, #ESC_Status] ORR a1, a1, #&40 STRB a1, [a2, #ESC_Status] ; mark escape flag MOV a1, #27 SEC ; tell caller to look carefully at R0 Pull "a2-a4,sb,ip,pc" 25 CMP a1, #-1 Pull "a2-a4,sb,ip,pc",NE ; claim it LDR R0, =ZeroPage LDRB R14, [R0, #CallBack_Flag] TST R14, #CBack_VectorReq BLNE process_callback_chain B %BT20 DebugTerminal_Wrch Push "a1-a4,sb,ip,lr" MOV sb, ip CallHAL HAL_DebugTX Pull "a1-a4,sb,ip,pc" ; don't claim it ] [ DebugHALTX DebugHALPrint Push "a1-a4,v1,sb,ip" AddressHAL MOV v1, lr 10 LDRB a1, [v1], #1 TEQ a1, #0 BEQ %FT20 CallHAL HAL_DebugTX B %BT10 20 MOV a1, #13 CallHAL HAL_DebugTX MOV a1, #10 CallHAL HAL_DebugTX ADD v1, v1, #3 BIC lr, v1, #3 Pull "a1-a4,v1,sb,ip" MOV pc, lr ] Reset_IRQ_Handler SUB lr, lr, #4 Push "a1-a4,v1-v2,sb,ip,lr" MRS a1, SPSR MRS a2, CPSR ORR a3, a2, #SVC32_mode MSR CPSR_c, a3 Push "a1-a2,lr" ; If it's not an IIC interrupt, pass it on to the keyboard scan code LDR v2, =ZeroPage AddressHAL v2 CallHAL HAL_IRQSource ADD v1, v2, #IICBus_Base MOV ip, #0 10 LDR a2, [v1, #IICBus_Type] TST a2, #IICFlag_Background BEQ %FT20 LDR a2, [v1, #IICBus_Device] CMP a2, a1 ADREQ lr, Reset_IRQ_Exit BEQ IICIRQ 20 ADD ip, ip, #1 ADD v1, v1, #IICBus_Size CMP ip, #IICBus_Count BNE %BT10 LDRB a2, [v2, #InitIRQWs+KbdScanActive] TEQ a2, #0 CallHAL HAL_KbdScanInterrupt,NE ; Keyboard scan code will have return -1 if it handled the IRQ ; If it didn't handle it, or keyboard scanning is inactive, something ; bad has happened CMP a1, #-1 CallHAL HAL_IRQDisable,NE ; Stop the rogue device from killing us completely Reset_IRQ_Exit Pull "a1-a2,lr" MSR CPSR_c, a2 MSR SPSR_cxsf, a1 Pull "a1-a4,v1-v2,sb,ip,pc",,^ END