; Copyright 2000 Pace Micro Technology plc ; ; Licensed under the Apache License, Version 2.0 (the "License"); ; you may not use this file except in compliance with the License. ; You may obtain a copy of the License at ; ; http://www.apache.org/licenses/LICENSE-2.0 ; ; Unless required by applicable law or agreed to in writing, software ; distributed under the License is distributed on an "AS IS" BASIS, ; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ; See the License for the specific language governing permissions and ; limitations under the License. ; GBLL MinorL2PThack MinorL2PThack SETL {TRUE} ; Fixed page allocation is as follows ^ 0 DRAMOffset_FirstFixed # 0 DRAMOffset_ScratchSpace # 16*1024 DRAMOffset_PageZero # 16*1024 DRAMOffset_PageTables # 0 ; Base of the page tables used during init; just a useful symbol we can use for relative offsets [ LongDesc DRAMOffset_LL2PT # 16*1024 ; Each page needs to be page-aligned DRAMOffset_LL1PT # 4096 ; Only needs to be 32 byte aligned | DRAMOffset_L1PT # 16*1024 ; L1PT must be 16K-aligned ] DRAMOffset_LastFixed # 0 ; IMPORT Init_ARMarch ; IMPORT ARM_Analyse [ MEMM_Type = "VMSAv6" mmuc_init_new ; MMUC initialisation flags for ARMv6/ARMv7 ; This tries to leave the reserved/unpredictable bits untouched, while initialising everything else to what we want ; ARMv7MP (probably) wants SW. ARMv6 wants U+XP (which should both be fixed at 1 on ARMv7) DCD MMUC_BEN+MMUC_SW+MMUC_U+MMUC_XP ; M+C+W+Z+I+L2 clear to keep MMU/caches off. ; A to keep alignment exceptions off (for now at least) ; B+EE clear for little-endian ; S+R+RR clear to match mmuc_init_old ; V+VE clear to keep processor vectors at &0 ; FI clear to disable fast FIQs (interruptible LDM/STM) ; TRE+AFE clear for our VMSAv6 implementation ; TE clear for processor vectors to run in ARM mode DCD MMUC_M+MMUC_A+MMUC_C+MMUC_W+MMUC_B+MMUC_S+MMUC_R+MMUC_Z+MMUC_I+MMUC_V+MMUC_RR+MMUC_FI+MMUC_VE+MMUC_EE+MMUC_L2+MMUC_TRE+MMUC_AFE+MMUC_TE mmuc_init_old ; MMUC initialisation flags for ARMv5 and below, as per ARM600 MMU code ; Late abort (ARM6 only), 32-bit Data and Program space. No Write buffer (ARM920T ; spec says W bit should be set, but I reckon they're bluffing). ; ; The F bit's tricky. (1 => CPCLK=FCLK, 0=>CPCLK=FCLK/2). The only chip using it was the ; ARM700, it never really reached the customer, and it's always been programmed with ; CPCLK=FCLK. Therefore we'll keep it that way, and ignore the layering violation. DCD MMUC_F+MMUC_L+MMUC_D+MMUC_P ; All of these bits should be off already, but just in case... DCD MMUC_B+MMUC_W+MMUC_C+MMUC_A+MMUC_M+MMUC_RR+MMUC_V+MMUC_I+MMUC_Z+MMUC_R+MMUC_S ] ; void RISCOS_InitARM(unsigned int flags) ; RISCOS_InitARM MOV a4, lr ; Check if we're architecture 3. If so, don't read the control register. BL Init_ARMarch MOVEQ a3, #0 ARM_read_control a3, NE [ MEMM_Type = "VMSAv6" CMP a1, #ARMv6 CMPNE a1, #ARMvF ADREQ a2, mmuc_init_new ADRNE a2, mmuc_init_old LDMIA a2, {a2, lr} ORR a3, a3, a2 BIC a3, a3, lr | ; Late abort (ARM6 only), 32-bit Data and Program space. No Write buffer (ARM920T ; spec says W bit should be set, but I reckon they're bluffing). ; ; The F bit's tricky. (1 => CPCLK=FCLK, 0=>CPCLK=FCLK/2). The only chip using it was the ; ARM700, it never really reached the customer, and it's always been programmed with ; CPCLK=FCLK. Therefore we'll keep it that way, and ignore the layering violation. ORR a3, a3, #MMUC_F+MMUC_L+MMUC_D+MMUC_P ; All of these bits should be off already, but just in case... BIC a3, a3, #MMUC_B+MMUC_W+MMUC_C+MMUC_A+MMUC_M BIC a3, a3, #MMUC_RR+MMUC_V+MMUC_I+MMUC_Z+MMUC_R+MMUC_S ] ; Off we go. ARM_write_control a3 MOV a2, #0 [ MEMM_Type = "VMSAv6" myISB ,a2,,y ; Ensure the update went through ] ; In case it wasn't a hard reset [ MEMM_Type = "VMSAv6" CMP a1, #ARMvF ; Assume that all ARMvF ARMs have multi-level caches and thus no single MCR op for invalidating all the caches ADREQ lr, %FT01 BEQ HAL_InvalidateCache_ARMvF MCRNE ARM_config_cp,0,a2,ARMv4_cache_reg,C7 ; invalidate I+D caches 01 ] CMP a1, #ARMv3 BNE %FT01 MCREQ ARM_config_cp,0,a2,ARMv3_TLBflush_reg,C0 ; flush TLBs B %FT02 01 MCRNE ARM_config_cp,0,a2,ARMv4_TLB_reg,C7 ; flush TLBs 02 [ MEMM_Type = "VMSAv6" myDSB ,a2,,y myISB ,a2,,y ] ; We assume that ARMs with an I cache can have it enabled while the MMU is off. [ :LNOT:CacheOff ORRNE a3, a3, #MMUC_I ARM_write_control a3, NE ; whoosh [ MEMM_Type = "VMSAv6" myISB ,a2,,y ; Ensure the update went through ] ] ; Check if we are in a 26-bit mode. MRS a2, CPSR ; Keep a soft copy of the CR in a banked register (R13_und) MSR CPSR_c, #F32_bit+I32_bit+UND32_mode MOV sp, a3 ; Switch into SVC32 mode (we may have been in SVC26 before). MSR CPSR_c, #F32_bit+I32_bit+SVC32_mode ; If we were in a 26-bit mode, the lr value given to us would have had PSR flags in. TST a2, #2_11100 MOVNE pc, a4 BICEQ pc, a4, #ARM_CC_Mask ; void *RISCOS_AddRAM(unsigned int flags, uintptr_t start, uintptr_t end, uintptr_t sigbits, void *ref) ; Entry: ; flags bit 0: video memory (currently only one block permitted) ; bit 1: video memory is not suitable for general use ; bit 2: memory can't be used for DMA (sound, video, or other) ; bits 8-11: speed indicator (arbitrary, higher => faster) ; bit 12: start, end, sigbits are shifted right 12 bits (for supporting large physical addresses) ; other bits reserved (SBZ) ; start = start address of RAM (inclusive) (no alignment requirements) ; end = end address of RAM (exclusive) (no alignment requirements, but must be >= start) ; sigbits = significant address bit mask (1 => this bit of addr decoded, 0 => this bit ignored) ; ref = reference handle (NULL for first call) ; ; The first registered block must be in the low 4GB, and blocks must not cross ; 4GB thresholds. ; The (unshifted) sigbits is assumed to be the same across all calls. ; A table is built up at the head of the first block of memory. ; The table consists of (addr, len, flags) pairs, terminated by a count of ; those pairs; ref points to that counter. ; The table stores addresses are in terms of 4K pages, allowing us to ; theoretically support a 44 bit physical space. ; Twelve bits of flags are stored at the bottom of the length word. ; The remaining 20 length bits are the length of the block in 4K pages, minus ; one (so a full 4GB region will have length &FFFFF) ROUT RISCOS_AddRAM Entry "v1-v4" LDR v4, [sp, #5*4] ; Get ref ; Round to pages. If we were extra sneaky we could not do this and chuck out incomplete ; pages after concatanation, but it would be a weird HAL that gave us pages split across ; calls. ; TST a1, #OSAddRAM_LargeAddresses ADDEQ a2, a2, #4096 ; round start address up SUBEQ a2, a2, #1 MOVEQ a2, a2, LSR #12 MOVEQ a3, a3, LSR #12 ; round end address down MOVEQ a4, a4, LSR #12 ORREQ a4, a4, #&FF000000 ORREQ a4, a4, #&00F00000 BIC a1, a1, #OSAddRAM_LargeAddresses ; Flag no longer relevant [ :LNOT: LongDesc ; Ignore any RAM above the 32bit barrier if we've been built with ; long-descriptor support disabled (HAL may not know our build settings) CMP a2, #1:SHL:20 BHS %FT90 ] CMP a3, a2 BLS %FT90 ; check we aren't now null CMP v4, #0 BEQ %FT20 ; We are not dealing with the first block since v4 != 0. Make an attempt to merge this block ; with the previous block. LDMDB v4, {v1, v2} ; Get details of the previous block EOR ip, v1, a2 CMP ip, #1:SHL:20 BHS %FT20 ; Don't merge if they're in different 4GB blocks MOV v3, v2, LSL #20 ; Isolate flags MOV v2, v2, LSR #12 ; And get length in pages ADD v2, v1, v2 ; Get the end address ADD v2, v2, #1 EOR v2, v2, a2 ; Compare with the current block start address... TST v2, a4 ; ... but only check the decoded bits. EOR v2, v2, a2 ; Restore the previous block end address. TEQEQ v3, a1, LSL #20 ; And are the page flags the same? BNE %FT10 ; We can't merge it after the previous block ; v1 = previous start ; v2 = previous end ; The block is just after the previous block. That means the start address is unchanged, but ; the length is increased. LDR v2, [v4, #-4] ; Reload length+flags SUB a3, a3, a2 ; Find the length of the new block. ; a3 = length of block ADD v2, v2, a3, LSL #12 ; Add it to the previous length. STR v2, [v4, #-4] ; Update the block size in memory. MOV a1,v4 EXIT ; The block is not just after the previous block, but it may be just before. This may be the ; case if we are softloaded. 10 SUB v1, v1, #1 ; Compare the address before the previous block start ... SUB a3, a3, #1 ; ... with the address of the last page in this block ... EOR v1, v1, a3 TST v1, a4 ; ... but check only the decoded bits. ADD a3, a3, #1 ; Restore the end address. TEQEQ v3, a1, LSL #20 ; And are the page flags the same? BNE %FT20 ; Skip if we cannot merge the block. ; The block is just before the previous block. The start address and length both change. LDMDB v4, {v1, v2} ; Get the previous block again SUB a3, a3, a2 ; Calculate the current block size. SUB v1, v1, a3 ; Subtract from the previous block start address. ADD v2, v2, a3, LSL #12 ; Calculate the new length+flags STMDB v4, {v1, v2} ; Update the block info in memory. MOV a1,v4 EXIT ; We now have a region which does not merge with a previous region. We move it up to the ; highest address we can in the hope that this block will merge with the next block. 20 SUB a3, a3, a2 ; Calculate the block size SUB a3, a3, #1 ORR a3, a3, a1, LSL #20 ; Put the flags in MOV a3, a3, ROR #20 ; And rotate so they're at the bottom MVN v1, a4 ; Get the non-decoded address lines. ORR a2, v1, a2 ; Set the non-decoded address bit in the start address. 30 CMP v4, #0 ; If the workspace has not been allocated... MOVEQ v4, a2, LSL #12 ; ... use this block. MOVEQ v1, #0 ; Initialise the counter. ; The block/fragment to be added is between a2 and a2+a3. LDRNE v1, [v4] ; Get the old counter if there was one. STMIA v4!, {a2, a3} ; Store address and size. ADD v1, v1, #1 ; Increment the counter. STR v1, [v4] ; Store the counter. 90 MOV a1,v4 EXIT ; We've done with this block now. ; Subtractv1v2fromRAMtable ; ; On entry: v1 = base of memory area ; v2 = size of memory area ; a4 = RAM table handle (ie pointer to terminator word containing number of entries) ; ; On exit: a1-a3 preserved ; a4 and RAM table updated ; other registers corrupted Subtractv1v2fromRAMtable ADD v2, v1, v2 ; v2 = end address MOV v1, v1, LSR #12 ; round base down ADD v2, v2, #4096 SUB v2, v2, #1 MOV v2, v2, LSR #12 ; round end up LDR v5, [a4] SUB v8, a4, v5, LSL #3 10 TEQ v8, a4 MOVEQ pc, lr LDMIA v8!, {v3, v4} ADD v6, v3, v4, LSR #12 ADD v6, v6, #1 ; v6 = end of RAM block CMP v2, v3 ; if our end <= RAM block start CMPHI v6, v1 ; or RAM block end <= our start BLS %BT10 ; then no intersection MOV v4, v4, LSL #20 ; extract flags CMP v1, v3 BHI not_bottom ; our area is at the bottom CMP v2, v6 BHS remove_block SUB v6, v6, v2 ; v6 = new size SUB v6, v6, #1 MOV v6, v6, LSL #12 ORR v6, v6, v4, LSR #20 ; + flags STMDB v8, {v2, v6} ; store new base (= our end) and size B %BT10 ; we've completely covered a block. Remove it. remove_block MOV v7, v8 20 TEQ v7, a4 ; shuffle down subsequent blocks in table LDMNEIA v7, {v3, v4} STMNEDB v7, {v3, v4} ADDNE v7, v7, #8 BNE %BT20 SUB v5, v5, #1 SUB a4, a4, #8 STR v5, [a4] SUB v8, v8, #8 B %BT10 ; our area is not at the bottom. not_bottom CMP v2, v6 BLO split_block ; our area is at the top SUB v6, v1, v3 ; v6 = new size SUB v6, v6, #1 MOV v6, v6, LSL #12 ORR v6, v6, v4, LSR #20 ; + flags STMDB v8, {v3, v6} ; store original base and new size B %BT10 split_block MOV v7, a4 30 TEQ v7, v8 ; shuffle up subsequent blocks in table LDMDB v7, {v3, v4} STMNEIA v7, {v3, v4} SUBNE v7, v7, #8 BNE %BT30 ADD v5, v5, #1 ADD a4, a4, #8 STR v5, [a4] MOV v4, v4, LSL #20 ; (re)extract flags SUB v7, v1, v3 ; v7 = size of first half SUB v6, v6, v2 ; v6 = size of second half SUB v7, v7, #1 SUB v6, v6, #1 MOV v7, v7, LSL #12 MOV v6, v6, LSL #12 ORR v7, v7, v4, LSR #20 ORR v6, v6, v4, LSR #20 ; + flags STMDB v8, {v3, v7} STMIA v8!, {v2, v6} B %BT10 ;void RISCOS_Start(unsigned int flags, int *riscos_header, int *hal_header, void *ref) ; ; We don't return, so no need to obey ATPCS, except for parameter passing. ; register usage: v4 = location of VRAM ; v6 = amount of VRAM ROUT RISCOS_Start TEQ a4, #0 01 BEQ %BT01 ; Stop here if no RAM ; subtract the HAL and OS from the list of RAM areas MOV v1, a2 LDR v2, [a2, #OSHdr_ImageSize] BL Subtractv1v2fromRAMtable LDR v1, [a3, #HALDesc_Start] ADD v1, a3, v1 LDR v2, [a3, #HALDesc_Size] BL Subtractv1v2fromRAMtable LDR v5, [a4] ; v5 = the number of RAM blocks SUB v8, a4, v5, LSL #3 ; Jump back to the start of the list. ; Search for some VRAM 05 LDMIA v8!, {v1, v2} ; Get a block from the list. (v1,v2)=(addr,size+flags) TST v2, #OSAddRAM_IsVRAM ; Is it VRAM? BNE %FT20 ; If so, deal with it below TEQ v8, a4 ; Carry on until end of list or we find some. BNE %BT05 ; Extract some pseudo-VRAM from first DMA-capable RAM block SUB v8, a4, v5, LSL #3 ; Rewind again. 06 LDMIA v8!, {v1, v2} TEQ v8, a4 ; End of list? TSTNE v2, #OSAddRAM_NoDMA ; DMA capable? BNE %BT06 MOV v2, v2, LSR #12 ; Remove flags ADD v2, v2, #1 ; Is this the only DMA-capable block? MOV v4, v8 MOV v6, #OSAddRAM_NoDMA 07 TEQ v4, a4 BEQ %FT08 LDR v6, [v4, #4] ADD v4, v4, #8 TST v6, #OSAddRAM_NoDMA BNE %BT07 08 ; v6 has NoDMA set if v8 was the only block TST v6, #OSAddRAM_NoDMA MOV v4, v1 ; Allocate block as video memory MOV v6, v2 BEQ %FT09 SUBS v6, v6, #(16*1024*1024):SHR:12 ; Leave 16M if it was the only DMA-capable block MOVLS v6, v2, LSR #1 ; If that overflowed, take half the bank. 09 CMP v6, #(32*1024*1024):SHR:12 MOVHS v6, #(32*1024*1024):SHR:12 ; Limit allocation to 32M (arbitrary) ADD v1, v1, v6 ; Adjust the RAM block base... SUBS v2, v2, v6 ; ... and the size LDREQ v6, [v8, #-4] BEQ %FT21 ; pack array tighter if this block is all gone STR v1, [v8, #-8] ; update base LDR v1, [v8, #-4] MOV v1, v1, LSL #20 ; original flags ORR v6, v6, v1 ; merged into VRAM size ORR v1, v1, v2 ; and into new size MOV v1, v1, ROR #20 MOV v6, v6, ROR #20 SUB v1, v1, #4096 STR v1, [v8, #-4] ; update size B %FT30 ; Note real VRAM parameters 20 MOV v6, v2 ; Remember the size+flags and MOV v4, v1 ; address of the VRAM 21 ADD v6, v6, #4096 ; Assume <4GB of VRAM! 22 TEQ v8, a4 ; if not at the end of the array LDMNEIA v8, {v1, v2} ; pack the array tighter STMNEDB v8, {v1, v2} ADDNE v8, v8, #8 BNE %BT22 25 SUB v5, v5, #1 ; decrease the counter STR v5, [a4, #-8]! ; and move the end marker down 30 SUB v8, a4, v5, LSL #3 ; Rewind to start of list ; Scan forwards to find the fastest block of non-DMAable memory which is at least DRAMOffset_LastFixed size, at least 16KB aligned, and located in the first 4GB LDMIA v8!, {v1, v2} 31 TEQ v8, a4 MOVEQ v1, v1, LSL #12 BEQ %FT32 LDMIA v8!, {v7, ip} MOV sp, v7, ROR #2 ; Bottom two bits (for 16KB alignment) rotated high CMP sp, #1:SHL:18 ; Ignore if not aligned, or base address >= 4GB BHS %BT31 CMP ip, #DRAMOffset_LastFixed-4096 ANDHS sp, ip, #&F*OSAddRAM_Speed+OSAddRAM_NoDMA ANDHS lr, v2, #&F*OSAddRAM_Speed+OSAddRAM_NoDMA ASSERT OSAddRAM_Speed = 1:SHL:8 ASSERT OSAddRAM_NoDMA < OSAddRAM_Speed MOVHS sp, sp, ROR #8 ; Give NoDMA flag priority over speed when sorting CMPHS sp, lr, ROR #8 MOVHI v1, v7 MOVHI v2, ip B %BT31 32 ; Fill in the Kernel's permanent memory table, sorting by address, speed and DMA ability. ; * Address: All memory that falls in the low 4GB of the physical map ; comes first. This makes it easier for our initial memory allocation ; (no danger of allocating pages which can't be accessed with the MMU ; off), but may also help with wider software compatibility (all low- ; RAM pages occupy the lowest physical page numbers) ; * Non-DMAable RAM is preferred over DMAable, as the kernel requires ; very little DMAable RAM, and we don't want to permanently claim ; DMAable RAM if we're not actually using it for DMA (in case machine ; only has a tiny amount available) ; * Speed: Fastest RAM is listed first, so that we'll prefer to allocate ; it for these important kernel/system areas ADD ip, v1, #DRAMOffset_PageZero ASSERT DRAMOffset_PageZero > 0 ; If the workspace block is the block containing the OS_AddRAM list, make sure the two don't overlap otherwise we might corrupt it while we copy it ADD sp, v1, #DRAMOffset_ScratchSpace + ScratchSpaceSize Push "a1,a2,a3" ; Remember our arguments SUB v8, a4, v5, LSL #3 ; Rewind to start of list CMP v5, #DRAMPhysTableSize ; Don't overflow our table ADDHI a4, v8, #DRAMPhysTableSize*8 - 8 ; First put the VRAM information in to free up some regs ADD v7, ip, #VideoPhysAddr STMIA v7!, {v4, v6} ; Now fill in the rest ASSERT DRAMPhysAddrA = VideoPhysAddr+8 MOV v1, v1, LSR #12 ADDS v2, v2, #4096 ; Store true length ADDCS v2, v2, #1:SHL:31 ; If it overflowed, must have been 4GB block, so clamp at 2GB (loop below will add the second 2GB) STMIA v7!, {v1, v2} ; workspace block must be first 33 TEQ v8, a4 BEQ %FT39 LDMIA v8!, {v1, v2} ADDS v2, v2, #4096 ; Get true length ADDCS v2, v2, #1:SHL:31 ; If it overflowed, must have been 4GB block, so split into two 2GB blocks SUBCS v2, v2, #4096 ADDCS v1, v1, #1:SHL:(31-12) STMCSDB v8!, {v1, v2} ADDCS v2, v2, #4096 SUBCS v1, v1, #1:SHL:(31-12) ADD a1, ip, #DRAMPhysAddrA LDMIA a1!, {a2, a3} TEQ v1, a2 BEQ %BT33 ; don't duplicate the initial block ; Perform insertion sort ; a1-a3, v3-v6, ip, lr free AND v3, v2, #&F*OSAddRAM_Speed CMP v1, #1:SHL:20 ORRLO v3, v3, #1:SHL:31 ; Low RAM takes priority TST v2, #OSAddRAM_NoDMA ORRNE v3, v3, #1:SHL:30 ; Followed by non-DMA 34 AND v4, a3, #&F*OSAddRAM_Speed CMP a2, #1:SHL:20 ORRLO v4, v4, #1:SHL:31 ; Low RAM takes priority TST a3, #OSAddRAM_NoDMA ORRNE v4, v4, #1:SHL:30 ; Followed by non-DMA CMP v3, v4 ; Compare priority value BHI %FT35 TEQ a1, v7 LDMNEIA a1!, {a2, a3} BNE %BT34 ADD a1, a1, #8 35 ADD v7, v7, #8 ; Insert at a1-8, overwriting {a2, a3} 36 STMDB a1, {v1, v2} ; store new entry TEQ a1, v7 MOVNE v1, a2 ; if not at end, shuffle MOVNE v2, a3 ; overwritten entry down one, LDMNEIA a1!, {a2, a3} ; load next to be overwritten, BNE %BT36 ; and loop B %BT33 39 ; Now we have to work out the total RAM size MOV a2, #0 ADD v6, ip, #PhysRamTable MOV a3, v6 40 LDMIA v6!, {v1, v2} ; get address, size ADD a2, a2, v2, LSR #12 ; add on size TEQ v6, v7 BNE %BT40 ; Work out how much DMAable RAM the HAL/kernel needs LDR a1, [sp, #8] LDR a1, [a1, #HALDesc_Flags] TST a1, #HALFlag_NCNBWorkspace ; do they want uncacheable workspace? LDRNE a1, =SoundDMABuffers-CursorChunkAddress + ?SoundDMABuffers + 32*1024 + DRAMOffset_LastFixed LDREQ a1, =SoundDMABuffers-CursorChunkAddress + ?SoundDMABuffers + DRAMOffset_LastFixed ; Scan PhysRamTable for a DMAable block of at least this size, extract it, and stash it in InitDMABlock ; Once the initial memory claiming is done we can re-insert it ADD a4, a3, #DRAMPhysAddrA-VideoPhysAddr ; don't claim VRAM ; First block needs special treatment as we've already claimed some of it LDMIA a4!, {v1, v2} TST v2, #OSAddRAM_NoDMA BNE %FT41 CMP v2, a1 BLO %FT41 ; Oh crumbs, the first block is a match for our DMA block ; Claim it as normal, but set InitDMAEnd to v1+DRAMOffset_LastFixed so ; that the already used bit won't get used for DMA ; We also need to be careful later on when picking the initial v2 value MOV lr, v1, LSL #12 ADD lr, lr, #DRAMOffset_LastFixed STR lr, [ip, #InitDMAEnd] B %FT43 41 ; Go on to check the rest of PhysRamTable SUB a1, a1, #DRAMOffset_LastFixed 42 LDMIA a4!, {v1, v2} TST v2, #OSAddRAM_NoDMA BNE %BT42 CMP v2, a1 BLO %BT42 CMP v1, #1:SHL:20 ; <4GB only for now BHS %BT42 ; Make a note of this block MOV lr, v1, LSL #12 STR lr, [ip, #InitDMAEnd] 43 STR v1, [ip, #InitDMABlock] STR v2, [ip, #InitDMABlock+4] SUB lr, a4, a3 STR lr, [ip, #InitDMAOffset] ; Now shrink/remove this memory from PhysRamTable SUB v2, v2, a1 ADD v1, v1, a1, LSR #12 CMP v2, #4096 ; Block all gone? STMHSDB a4, {v1, v2} ; no, just shrink it BHS %FT55 45 CMP a4, v7 LDMNEIA a4, {v1, v2} STMNEDB a4, {v1, v2} ADDNE a4, a4, #8 BNE %BT45 SUB v7, v7, #8 ; a2 = Total memory size (pages) ; a3 = PhysRamTable ; v7 = After last used entry in PhysRamTable ; ip -> ZeroPage ; now store zeros to fill out table 55 ADD v2, a3, #PhysRamTableEnd-PhysRamTable MOV v3, #0 MOV v4, #0 57 CMP v7, v2 STMLOIA v7!, {v3, v4} BLO %BT57 ; Calculate PhysIllegalMask before anything tries to use it BL DeterminePhysIllegalMask ; Time to set up the L1PT. Just zero it out for now. [ LongDesc ASSERT DRAMOffset_LL1PT = DRAMOffset_LL2PT+16*1024 LDR a4, =DRAMOffset_LL1PT+4096-(PhysRamTable+DRAMOffset_PageZero) ; offset from a3 to L1PT end ADD a3, a3, a4 MOV a4, #16*1024+4096 ; Clear the full L1PT page, even though we only need/use a small part of it | LDR a4, =DRAMOffset_L1PT+16*1024-(PhysRamTable+DRAMOffset_PageZero) ; offset from a3 to L1PT end ADD a3, a3, a4 MOV a4, #16*1024 ] MOV v2, #0 MOV v3, #0 MOV v4, #0 MOV v5, #0 MOV v6, #0 MOV v7, #0 MOV v8, #0 MOV ip, #0 60 STMDB a3!, {v2-v8,ip} ; start at end and work back SUBS a4, a4, #8*4 BNE %BT60 [ LongDesc ; a3 -> L2PT ; Set up L1PT ADD lr, a3, #DRAMOffset_LL1PT - DRAMOffset_PageTables ASSERT :INDEX: DRAMOffset_PageTables = :INDEX: DRAMOffset_LL2PT ORR v1, a3, #LL12_Table MOV v2, #0 STRD v1, [lr], #8 ADD v1, v1, #4096 STRD v1, [lr], #8 ADD v1, v1, #4096 STRD v1, [lr], #8 ADD v1, v1, #4096 STRD v1, [lr], #8 ] ADD v1, a3, #DRAMOffset_PageZero - DRAMOffset_PageTables ADD v2, a3, #DRAMOffset_LastFixed - DRAMOffset_PageTables STR a2, [v1, #RAMLIMIT] ; remember the RAM size SUB lr, a2, #1 STR lr, [v1, #MaxCamEntry] MOV lr, a2, LSR #12-CAM_EntrySizeLog2 ; no. of pages needed for CAM CMP a2, lr, LSL #12-CAM_EntrySizeLog2 ADDNE lr, lr, #1 ; round up MOV lr, lr, LSL #12 STR lr, [v1, #SoftCamMapSize] STR a3, [v1, #InitUsedStart] ; store start of L1PT ADD v1, v1, #DRAMPhysAddrA MOV v2, v2, LSR #12 MOV v3, a3 ; Detect if the DMA claiming adjusted the first block ; If so, we'll need to reset v2 to the start of the block at v1 LDR a1, [v1] ADD lr, a1, #DRAMOffset_LastFixed:SHR:12 TEQ lr, v2 MOVNE v2, a1 ; For the next batch of allocation routines, v1-v3 are treated as globals. ; v1 -> current entry in PhysRamTable ; v2 -> next address to allocate in v1 (may point at end of v1), in units of pages ; v3 -> L1PT (or 0 if MMU on - not yet) ; Set up some temporary PCBTrans and PPLTrans pointers, and the initial page flags used by the page tables ADD a1, v3, #DRAMOffset_PageZero - DRAMOffset_PageTables BL Init_PCBTrans ; Allocate the L2PT backing store for the logical L2PT space, to ; prevent recursion. [ LongDesc ; L3PT is 8MB in size, and each 4KB of L3PT only covers 2MB of RAM ; This means we'll need 4 pages of L3PT to create the full logical ; mapping of L3PT. So to avoid recursion, we need to start by explicitly ; allocating the area that will be used for the logical mapping of L3PT. LDR a1, =LL3PT+(LL3PT:SHR:9) MOV a2, #&00800000:SHR:9 BL AllocateL2PT ; Now allocate the rest of the L3PT logical mapping LDR a1, =LL3PT MOV a2, #&00800000 BL AllocateL2PT | ; Each 4KB of L2PT covers 4MB of RAM, and L2PT is 4MB in size, so the ; first page we allocate here will allow us to create the full logical ; mapping of L2PT (protecting against any recursion in the allocation ; routines) LDR a1, =L2PT MOV a2, #&00400000 BL AllocateL2PT ] ; Allocate workspace for the HAL ADD a4, v3, #DRAMOffset_PageZero - DRAMOffset_PageTables LDR a3, [sp, #8] ; recover pushed HAL header LDR a1, =HALWorkspace LDR a2, =AreaFlags_HALWorkspace LDR lr, [a3, #HALDesc_Workspace] ; their workspace LDR ip, [a3, #HALDesc_NumEntries] ; plus 1 word per entry CMP ip, #KnownHALEntries MOVLO ip, #KnownHALEntries ADD lr, lr, ip, LSL #2 MOV a3, lr, LSR #12 ; round workspace up to whole MOV a3, a3, LSL #12 ; number of pages CMP a3, lr ADDNE a3, a3, #&1000 STR a3, [a4, #HAL_WsSize] ; Make a note of allocated space ADD ip, a1, ip, LSL #2 ; Their workspace starts STR ip, [a4, #HAL_Workspace] ; after our table of entries BL Init_MapInRAM LDR a3, [sp, #8] ; recover pushed HAL header LDR lr, [a3, #HALDesc_Flags] TST lr, #HALFlag_NCNBWorkspace ; do they want uncacheable LDRNE a1, =HALWorkspaceNCNB ; workspace? LDRNE a2, =AreaFlags_HALWorkspaceNCNB LDRNE a3, =32*1024 BLNE Init_MapInRAM_DMA ; Bootstrap time. We want to get the MMU on ASAP. We also don't want to have to ; clear up too much mess later. So what we'll do is map in the three fixed areas ; (L1PT, scratch space and page zero), the CAM, ourselves, and the HAL, ; then turn on the MMU. The CAM will be filled in once the MMU is on, by ; reverse-engineering the page tables? ; Map in page zero ADD a1, v3, #DRAMOffset_PageZero - DRAMOffset_PageTables LDR a2, =ZeroPage LDR a3, =AreaFlags_ZeroPage MOV a4, #16*1024 BL Init_MapIn ; Map in scratch space ADD a1, v3, #DRAMOffset_ScratchSpace - DRAMOffset_PageTables MOV a2, #ScratchSpace LDR a3, =AreaFlags_ScratchSpace MOV a4, #16*1024 BL Init_MapIn [ LongDesc ; Map in L1PT+L2PT MOV a1, v3 LDR a2, =LL2PT ADD a3, v3, #DRAMOffset_PageZero - DRAMOffset_PageTables LDR a3, [a3, #PageTable_PageFlags] ORR a3, a3, #PageFlags_Unavailable ASSERT LL1PT=LL2PT+16*1024 MOV a4, #16*1024+4096 BL Init_MapIn | ; Map in L1PT MOV a1, v3 LDR a2, =L1PT ADD a3, v3, #DRAMOffset_PageZero - DRAMOffset_PageTables LDR a3, [a3, #PageTable_PageFlags] ORR a3, a3, #PageFlags_Unavailable MOV a4, #16*1024 BL Init_MapIn ] ; Map in L1PT again in PhysicalAccess (see below) [ LongDesc MOV a1, v3, LSR #21 MOV a1, a1, LSL #21 ; 2MB containing L1PT MOV a4, #2*1024*1024 | MOV a1, v3, LSR #20 MOV a1, a1, LSL #20 ; megabyte containing L1PT MOV a4, #1024*1024 ] LDR a2, =PhysicalAccess ADD a3, v3, #DRAMOffset_PageZero - DRAMOffset_PageTables LDR a3, [a3, #PageTable_PageFlags] ORR a3, a3, #PageFlags_Unavailable BL Init_MapIn ; Examine HAL and RISC OS locations LDMFD sp, {v4,v5,v6} ; v4 = flags, v5 = RO desc, v6 = HAL desc LDR lr, [v6, #HALDesc_Size] LDR v7, [v6, #HALDesc_Start] ADD v6, v6, v7 ; (v6,v8)=(start,end) of HAL ADD v8, v6, lr LDR v7, [v5, #OSHdr_ImageSize] ADD v7, v5, v7 ; (v5,v7)=(start,end) of RISC OS TEQ v8, v5 ; check contiguity (as in a ROM image) BNE %FT70 ; HAL and RISC OS are contiguous. Yum. MOV a1, v6 LDR a2, =RISCOS_Header SUB a2, a2, lr SUB ip, a2, a1 ; change physical addresses passed in LDMIB sp, {a3, a4} ; into logical addresses ADD a3, a3, ip ADD a4, a4, ip STMIB sp, {a3, a4} LDR a3, [v5, #OSHdr_DecompressHdr] ; check if ROM is compressed, and if so, make writeable CMP a3, #0 MOVNE a3, #OSAP_None MOVEQ a3, #OSAP_ROM SUB a4, v7, v6 BL Init_MapIn MOV a3, v6 B %FT75 70 ; HAL is separate. (We should cope with larger images) LDR a2, =ROM MOV a1, v6 SUB ip, a2, a1 ; change physical address passed in LDR a3, [sp, #8] ; into logical address ADD a3, a3, ip STR a3, [sp, #8] SUB a4, v8, v6 MOV a3, #OSAP_ROM BL Init_MapIn ; And now map in RISC OS LDR a2, =RISCOS_Header ; Hmm - what if position independent? MOV a1, v5 SUB ip, a2, a1 ; change physical address passed in LDR a3, [sp, #4] ; into logical address ADD a3, a3, ip STR a3, [sp, #4] SUB a4, v7, v5 LDR a3, [v5, #OSHdr_DecompressHdr] CMP a3, #0 MOVNE a3, #OSAP_None MOVEQ a3, #OSAP_ROM BL Init_MapIn MOV a3, v5 75 ; We've now allocated all the pages we're going to before the MMU comes on. ; Note the end address (for RAM clear) ADD a1, v3, #DRAMOffset_PageZero - DRAMOffset_PageTables STR v1, [a1, #InitUsedBlock] STR v2, [a1, #InitUsedEnd] STR a3, [a1, #ROMPhysAddr] ; Note the HAL flags passed in. LDR a2, [sp, #0] STR a2, [a1, #HAL_StartFlags] ; Set up a reset IRQ handler (for IIC CMOS access) MSR CPSR_c, #IRQ32_mode + I32_bit + F32_bit LDR sp_irq, =ScratchSpace + 1024 ; 1K is plenty since Reset_IRQ_Handler now runs in SVC mode MSR CPSR_c, #SVC32_mode + I32_bit + F32_bit LDR a2, =Reset_IRQ_Handler STR a2, [a1, #InitIRQHandler] ; Fill in some initial processor vectors. These will be used during ARM ; analysis, once the MMU is on. We do it here before the data cache is ; activated to save any IMB issues. ADRL a2, InitProcVecs ADD a3, a2, #InitProcVecsEnd - InitProcVecs 76 LDR a4, [a2], #4 CMP a2, a3 STR a4, [a1], #4 BLO %BT76 MMU_activation_zone ; The time has come to activate the MMU. Steady now... Due to unpredictability of MMU ; activation, need to ensure that mapped and unmapped addresses are equivalent. To ; do this, we temporarily make the section containing virtual address MMUon_instr map ; to the same physical address. In case the code crosses a section boundary, do the ; next section as well. ; MOV a1, #4_0000000000000001 ; domain 0 client only ARM_MMU_domain a1 ADR a1, MMU_activation_zone [ LongDesc ; To avoid refactoring things too much, only touch one page table ; entry instead of two. However because each L2PT entry is 2MB we end ; up covering the same area (just 2MB aligned instead of the 1MB ; alignment that you'd get with short descriptor L1PT). MOV a3, a1, LSR #21 ; a3 = 2MB number (stays there till end) ADD lr, v3, a3, LSL #3 ; lr -> L2PT entry LDRD a1, [lr] ; remember old mapping Push "a1,a2" MOV a1, a3, LSL #21 ORR a1, a1, #LL12_Block + LLAttr_Nrm_NC + LL_Page_LowAttr_AP2 ; Non-cacheable, read-only ORR a1, a1, #LL_Page_LowAttr_AF MOV a2, #0 STRD a1, [lr] Pull "a1,a2" | MOV a1, a1, LSR #20 ; a1 = megabyte number (stays there till end) ADD lr, v3, a1, LSL #2 ; lr -> L1PT entry LDMIA lr, {a2, a3} ; remember old mappings [ MEMM_Type = "VMSAv6" LDR ip, =(AP_ROM * L1_APMult) + L1_Section | [ ARM6support LDR ip, =(AP_None * L1_APMult) + L1_U + L1_Section | LDR ip, =(AP_ROM * L1_APMult) + L1_U + L1_Section ] ] ORR a4, ip, a1, LSL #20 ; not cacheable, as we don't want ADD v4, a4, #1024*1024 ; to fill the cache with rubbish STMIA lr, {a4, v4} ] MOV a4, a1 Push "a2,lr" [ LongDesc ADD a1, v3, #DRAMOffset_LL1PT-DRAMOffset_PageTables | MOV a1, v3 ] ADD a2, v3, #DRAMOffset_PageZero-DRAMOffset_PageTables BL SetTTBR Pull "a2,lr" BL Init_ARMarch ; corrupts a1 and ip MOV ip, a1 ; Remember architecture for later MOV a1, a4 MSREQ CPSR_c, #F32_bit+I32_bit+UND32_mode ; Recover the soft copy of the CR MOVEQ v5, sp ARM_read_control v5, NE [ CacheOff ORR v5, v5, #MMUC_M ; MMU on ORR v5, v5, #MMUC_R ; ROM mode enable | ORR v5, v5, #MMUC_W+MMUC_C+MMUC_M ; Write buffer, data cache, MMU on ORR v5, v5, #MMUC_R+MMUC_Z ; ROM mode enable, branch predict enable ] [ MEMM_Type = "VMSAv6" ORR v5, v5, #MMUC_XP ; Extended pages enabled (v6) BIC v5, v5, #MMUC_TRE+MMUC_AFE ; TEX remap, Access Flag disabled BIC v5, v5, #MMUC_EE+MMUC_TE+MMUC_VE ; Exceptions = nonvectored LE ARM [ SupportARMv6 :LAND: NoARMv7 ; Deal with a couple of ARM11 errata ARM_read_ID lr LDR a4, =&FFF0 AND lr, lr, a4 LDR a4, =&B760 TEQ lr, a4 BNE %FT01 ORR v5, v5, #MMUC_FI ; Erratum 716151: Disable hit-under-miss (enable fast interrupt mode) to prevent D-cache corruption from D-cache cleaning (the other workaround, ensuring a DSB exists inbetween the clean op and the next store access to that cache line, feels a bit heavy-handed since we'd probably have to disable IRQs to make it fully safe) ; Update the aux control register MRC p15, 0, lr, c1, c0, 1 ; Bit 28: Erratum 714068: Set PHD bit to prevent deadlock from PLI or I-cache invalidate by MVA ; Bit 31: Erratum 716151: Set FIO bit to override some of the behaviour implied by FI bit ORR lr, lr, #(1:SHL:28)+(1:SHL:31) MCR p15, 0, lr, c1, c0, 1 myISB ,lr TEQ pc, #0 ; Restore NE condition from Init_ARMarch 01 ] ] [ NoUnaligned ORR v5, v5, #MMUC_A ; Alignment exceptions on ] [ HiProcVecs ORR v5, v5, #MMUC_V ; High processor vectors enabled ] MMUon_instr ; Note, no RAM access until we've reached MMUon_nol1ptoverlap and the flat ; logical-physical mapping of the ROM has been removed (we can't guarantee that ; the RAM mapping hasn't been clobbered, and SP is currently bogus). ARM_write_control v5 [ MEMM_Type = "VMSAv6" MOV lr, #0 myISB ,lr,,y ; Just in case ] MOVEQ sp, v5 MSREQ CPSR_c, #F32_bit+I32_bit+SVC32_mode [ MEMM_Type = "VMSAv6" CMP ip, #ARMvF BEQ %FT01 MCRNE ARM_config_cp,0,lr,ARMv4_cache_reg,C7 ; junk MMU-off contents of I-cache (works on ARMv3) B %FT02 01 MCREQ p15, 0, lr, c7, c5, 0 ; invalidate instruction cache MCREQ p15, 0, lr, c8, c7, 0 ; invalidate TLBs MCREQ p15, 0, lr, c7, c5, 6 ; invalidate branch predictor myISB ,lr,,y ; Ensure below branch works BLEQ HAL_InvalidateCache_ARMvF ; invalidate data cache (and instruction+TLBs again!) 02 | MOV lr, #0 ; junk MMU-off contents of I-cache MCR ARM_config_cp,0,lr,ARMv4_cache_reg,C7 ; (works on ARMv3) ] ; MMU now on. Need to jump to logical copy of ourselves. Complication arises if our ; physical address overlaps our logical address - in that case we need to map ; in another disjoint copy of ourselves and branch to that first, then restore the ; original two sections. ADRL a4, RISCOS_Header LDR ip, =RISCOS_Header SUB ip, ip, a4 ADR a4, MMU_activation_zone [ LongDesc MOV a4, a4, LSR #21 MOV a4, a4, LSL #21 ; a4 = base of scrambled region | MOV a4, a4, LSR #20 MOV a4, a4, LSL #20 ; a4 = base of scrambled region ] ADD v4, a4, #2*1024*1024 ; v4 = top of scrambled region SUB v4, v4, #1 ; (inclusive, in case wrapped to 0) ADR v5, MMUon_resume ADD v5, v5, ip ; v5 = virtual address of MMUon_resume CMP v5, a4 BLO MMUon_nooverlap CMP v5, v4 BHI MMUon_nooverlap [ LongDesc ASSERT ROM > 4*1024*1024 ; Oh dear. We know the ROM lives high up, so we'll mangle 00200000-003FFFFF. ; But as we're overlapping the ROM, we know we're not overlapping the page tables. LDR lr, =LL2PT ; accessing the L2PT virtually now ; Use IP+SP as temp ORR ip, a4, #LL12_Block + LLAttr_Nrm_NC + LL_Page_LowAttr_AP2 ; Non-cacheable, read-only ORR ip, ip, #LL_Page_LowAttr_AF MOV sp, #0 LDRD v7, [lr, #8] STRD ip, [lr, #8] myDSB ,ip ; there shouldn't have been anything at the target, so DSB+ISB is all that's needed for synchronisation myISB ,ip,,y RSB ip, a4, #&00200000 ADD pc, pc, ip NOP MMUon_overlapresume ; now executing from 00200000 ADD ip, lr, a4, LSR #18 STRD a1, [ip] ; restore original set of mappings BL Init_PageTablesChanged MOV a1, v7 ; arrange for code below MOV a2, v8 ; to restore this area instead MOV a3, #1 | ASSERT ROM > 3*1024*1024 ; Oh dear. We know the ROM lives high up, so we'll mangle 00100000-002FFFFF. ; But as we're overlapping the ROM, we know we're not overlapping the page tables. LDR lr, =L1PT ; accessing the L1PT virtually now [ MEMM_Type = "VMSAv6" LDR ip, =(AP_ROM * L1_APMult) + L1_Section | [ ARM6support LDR ip, =(AP_None * L1_APMult) + L1_U + L1_Section | LDR ip, =(AP_ROM * L1_APMult) + L1_U + L1_Section ] ] ORR v6, a4, ip ADD ip, v6, #1024*1024 LDMIB lr, {v7, v8} ; sections 1 and 2 STMIB lr, {v6, ip} [ MEMM_Type = "VMSAv6" myDSB ,ip ; there shouldn't have been anything at the target, so DSB+ISB is all that's needed for synchronisation myISB ,ip,,y ] RSB ip, a4, #&00100000 ADD pc, pc, ip NOP MMUon_overlapresume ; now executing from 00100000 ADD ip, lr, a4, LSR #18 STMIA ip, {a2, a3} ; restore original set of mappings BL Init_PageTablesChanged MOV a2, v7 ; arrange for code below MOV a3, v8 ; to restore section 1+2 instead MOV a1, #1 ] ; Jump to the final logical mapping of ROM MMUon_nooverlap ADRL lr, RISCOS_Header LDR ip, =RISCOS_Header SUB ip, ip, lr ADD pc, pc, ip NOP MMUon_resume ; Repair the page table mapping that we temporarily modified. ; But what if the logical address of the page tables is at the physical address of the code? (i.e. our temporary modification is blocking access to the page tables) ; Then we have to access it via PhysicalAccess instead. [ LongDesc LDR lr, =LL2PT | LDR lr, =L1PT ] CMP lr, a4 BLO MMUon_nol1ptoverlap CMP lr, v4 BHI MMUon_nol1ptoverlap ; PhysicalAccess points to the megabyte containing the L1PT. Find the L1PT within it. LDR lr, =PhysicalAccess [ LongDesc MOV v6, v3, LSL #11 ORR lr, lr, v6, LSR #11 MMUon_nol1ptoverlap ADD lr, lr, a3, LSL #3 STRD a1, [lr] | MOV v6, v3, LSL #12 ORR lr, lr, v6, LSR #12 MMUon_nol1ptoverlap ADD lr, lr, a1, LSL #2 STMIA lr, {a2, a3} ] BL Init_PageTablesChanged ; The MMU is now on. Wahey. Let's get allocating. ; ; Input: ; v3 = phys addr of DRAMOffset_PageTables ; R13_und = MMU control register soft copy (for ARMv3) ; ; All other registers are set up from scratch LDR sp, =ScratchSpace + ScratchSpaceSize - 4*3 ; 3 items already on stack :) LDR a1, =ZeroPage ADD lr, v3, #DRAMOffset_PageZero-DRAMOffset_PageTables ; lr = PhysAddr of zero page LDR v1, [a1, #InitUsedBlock] ; turn this from Phys to Log SUB v1, v1, lr ADD v1, v1, a1 STR v1, [a1, #InitUsedBlock] LDR v2, [a1, #InitUsedEnd] ; Store the logical address of the HAL descriptor LDR a2, [sp, #8] STR a2, [a1, #HAL_Descriptor] MOV v3, #0 ; "MMU is on" signal BL ARM_Analyse ChangedProcVecs a1 [ LongDesc ASSERT L1_Fault = LL_Fault ] MOV a1, #L1_Fault BL ReleasePhysicalAddress LDR a1, =HALWorkspace LDR a2, =ZeroPage LDR a3, [a2, #HAL_WsSize] [ ZeroPage <> 0 MOV a2, #0 ] BL memset LDR a2, =ZeroPage LDR a1, =IOLimit STR a1, [a2, #IOAllocLimit] LDR a1, [a2, #SoftCamMapSize] RSB a1, a1, #CAMTop ; Start of CAM [ LongDesc BFC a1, #0, #21 ; Round down to 2MB for IO start | MOV a1, a1, LSR #20 ; Round down to 1MB for IO start MOV a1, a1, LSL #20 ] STR a1, [a2, #IOAllocPtr] STR a1, [a2, #IOAllocTop] BL SetUpHALEntryTable ; Initialise the HAL. Due to its memory claiming we need to get our v1 and v2 values ; into workspace and out again around it. LDR a1, =ZeroPage STR v1, [a1, #InitUsedBlock] STR v2, [a1, #InitUsedEnd] LDR a1, =RISCOS_Header LDR a2, =HALWorkspaceNCNB AddressHAL CallHAL HAL_Init DebugTX "HAL initialised" MOV a1, #64 ; Old limit prior to OMAP3 port CallHAL HAL_IRQMax CMP a1, #MaxInterrupts MOVHI a1, #MaxInterrupts ; Avoid catastrophic failure if someone forgot to increase MaxInterrupts LDR a2, =ZeroPage STR a1, [a2, #IRQMax] LDR v1, [a2, #InitUsedBlock] LDR v2, [a2, #InitUsedEnd] ; Start timer zero, at 100 ticks per second MOV a1, #0 CallHAL HAL_TimerGranularity MOV a2, a1 MOV a1, #100 BL __rt_udiv MOV a2, a1 MOV a1, #0 CallHAL HAL_TimerSetPeriod DebugTX "IICInit" BL IICInit ; Remember some stuff that's about to get zapped LDR ip, =ZeroPage LDR v4, [ip, #ROMPhysAddr] LDR v5, [ip, #RAMLIMIT] LDR v7, [ip, #MaxCamEntry] LDR v8, [ip, #IRQMax] LDR a1, [ip, #HAL_StartFlags] TST a1, #OSStartFlag_RAMCleared BLEQ ClearWkspRAM ; Only clear the memory if the HAL didn't ; Put it back LDR a1, =ZeroPage STR v4, [a1, #ROMPhysAddr] STR v5, [a1, #RAMLIMIT] STR v7, [a1, #MaxCamEntry] STR v8, [a1, #IRQMax] MSR CPSR_c, #F32_bit + UND32_mode ; retrieve the MMU control register soft copy STR sp, [a1, #MMUControlSoftCopy] MSR CPSR_c, #F32_bit + SVC32_mode MOV v8, a1 ; Calculate CPU feature flags BL ReadCPUFeatures DebugTX "HAL_CleanerSpace" ; Set up the data cache cleaner space if necessary (eg. for StrongARM core) MOV a1, #-1 CallHAL HAL_CleanerSpace CMP a1, #-1 ;-1 means none needed (HAL only knows this if for specific ARM core eg. system-on-chip) BEQ %FT20 LDR a2, =DCacheCleanAddress LDR a3, =AreaFlags_DCacheClean ASSERT DCacheCleanSize = 4*&10000 ; 64k of physical space used 4 times (allows large page mapping) MOV a4, #&10000 MOV ip, #4 SUB sp, sp, #5*4 ;room for a1-a4,ip 10 STMIA sp, {a1-a4, ip} BL Init_MapIn LDMIA sp, {a1-a4, ip} SUBS ip, ip, #1 ADD a2, a2, #&10000 BNE %BT10 ADD sp, sp, #5*4 20 ; Decompress the ROM LDR a1, =RISCOS_Header LDR a2, [a1, #OSHdr_DecompressHdr] CMP a2, #0 BEQ %FT30 ADD ip, a1, a2 ASSERT OSDecompHdr_WSSize = 0 ASSERT OSDecompHdr_Code = 4 LDMIA ip, {a3-a4} ADRL a2, SyncCodeAreas CMP a3, #0 ; Any workspace required? ADD a4, a4, ip [ DebugHALTX BNE %FT25 DebugTX "Decompressing ROM, no workspace required" [ NoARMv5 MOV lr, pc MOV pc, a4 | BLX a4 ] DebugTX "Decompression complete" B %FT27 25 | ADREQ lr, %FT27 MOVEQ pc, a4 ] Push "a1-a4,v1-v2,v5-v7" ; Allocate workspace for decompression code ; Workspace is located at a 4MB-aligned log addr, and is a multiple of 1MB in ; size. This greatly simplifies the code required to free the workspace, since ; we can guarantee it will have been section-mapped, and won't hit any ; partially-allocated L2PT blocks (where 4 L1PT entries point to subsections of ; the same L2PT page) ; This means all we need to do to free the workspace is zap the L1PT entries ; and rollback v1 & v2 ; Note: This is effectively a MB-aligned version of Init_MapInRAM ROMDecompWSAddr * 4<<20 [ LongDesc ROMDecompAlign * 21 ; 2MB aligned for long descriptor format | ROMDecompAlign * 20 ] DebugTX "Allocating decompression workspace" LDR v5, =(1<>18) MOV a4, v1, LSR #18 265 STRD a1, [a3], #8 SUBS a4, a4, #8 BGE %BT265 | LDR a1, =L1PT+(ROMDecompWSAddr>>18) MOV a2, #0 MOV a3, v1, LSR #18 BL memset ] ; Pop our registers and we're done Pull "v1-v2,v5-v7" DebugTX "ROM decompression workspace freed" 27 ; Now that the ROM is decompressed we need to change the ROM page mapping to ; read-only. The easiest way to do this is to make another call to Init_MapIn. ; But before we can do that we need to work out if the HAL+OS are contiguous in ; physical space, so that we remap the correct area. LDR a1, =RISCOS_Header BL RISCOS_LogToPhys Push "a1" LDR a1, =ROM BL RISCOS_LogToPhys Pull "a2" ; a1 = HAL phys, a2 = OS phys LDR a3, =ZeroPage LDR a3, [a3, #HAL_Descriptor] LDR a3, [a3, #HALDesc_Size] ADD a3, a3, a1 ; a3 = phys addr of OS if contiguous CMP a2, a3 ; Contiguous mapping, remap combined HAL+OS MOVEQ a2, #ROM MOVEQ a4, #OSROM_ImageSize*1024 ; Discontiguous mapping, only remap OS MOVNE a1, a2 LDRNE a2, =RISCOS_Header LDRNE a4, [a2, #OSHdr_ImageSize] MOV a3, #OSAP_ROM BL Init_MapIn ; Flush & invalidate cache/TLB to ensure everything respects the new page access ; Putting a flush here also means the decompression code doesn't have to worry ; about IMB'ing the decompressed ROM ARMop MMU_Changing ; Perform full clean+invalidate to ensure any lingering cache lines for the decompression workspace are gone DebugTX "ROM access changed to read-only" 30 ; Allocate the CAM LDR a3, [v8, #SoftCamMapSize] LDR a2, =AreaFlags_CAM RSB a1, a3, #CAMTop STR a1, [v8, #CamEntriesPointer] BL Init_MapInRAM ; Allocate the supervisor stack LDR a1, =SVCStackAddress LDR a2, =AreaFlags_SVCStack LDR a3, =SVCStackSize BL Init_MapInRAM ; Allocate the interrupt stack LDR a1, =IRQStackAddress LDR a2, =AreaFlags_IRQStack LDR a3, =IRQStackSize BL Init_MapInRAM ; Allocate the abort stack LDR a1, =ABTStackAddress LDR a2, =AreaFlags_ABTStack LDR a3, =ABTStackSize BL Init_MapInRAM ; Allocate the undefined stack LDR a1, =UNDStackAddress LDR a2, =AreaFlags_UNDStack LDR a3, =UNDStackSize BL Init_MapInRAM ; Allocate the system heap (just 32K for now - will grow as needed) LDR a1, =SysHeapAddress LDR a2, =AreaFlags_SysHeap LDR a3, =32*1024 BL Init_MapInRAM_Clear ; Allocate the cursor/system/sound block - first the cached bit LDR a1, =CursorChunkAddress LDR a2, =AreaFlags_CursorChunkCacheable LDR a3, =SoundDMABuffers - CursorChunkAddress BL Init_MapInRAM_DMA ; then the uncached bit LDR a1, =SoundDMABuffers LDR a2, =AreaFlags_CursorChunk LDR a3, =?SoundDMABuffers BL Init_MapInRAM_DMA LDR a1, =KbuffsBaseAddress LDR a2, =AreaFlags_Kbuffs LDR a3, =(KbuffsSize + &FFF) :AND: &FFFFF000 ;(round to 4k) BL Init_MapInRAM_Clear [ HiProcVecs ; Map in DebuggerSpace LDR a1, =DebuggerSpace LDR a2, =AreaFlags_DebuggerSpace LDR a3, =(DebuggerSpace_Size + &FFF) :AND: &FFFFF000 BL Init_MapInRAM_Clear ] [ MinorL2PThack ; Allocate backing L2PT for application space ; Note that ranges must be 4M aligned, as AllocateL2PT only does individual ; (1M) sections, rather than 4 at a time, corresponding to a L2PT page. The ; following space is available for dynamic areas, and ChangeDyn.s will get ; upset if it sees only some out of a set of 4 section entries pointing to the ; L2PT page. MOV a1, #0 MOV a2, #AplWorkMaxSize ; Not quite right, but the whole thing's wrong anyway ASSERT AplWorkMaxSize :MOD: (4*1024*1024) = 0 BL AllocateL2PT ; And for the system heap. Sigh LDR a1, =SysHeapAddress LDR a2, =SysHeapMaxSize ASSERT SysHeapAddress :MOD: (4*1024*1024) = 0 ASSERT SysHeapMaxSize :MOD: (4*1024*1024) = 0 BL AllocateL2PT ] STR v2, [v8, #InitUsedEnd] ; Put InitDMABlock back into PhysRamTable Push "v1-v7" ASSERT InitDMAOffset = InitDMABlock+8 ADD v1, v8, #InitDMABlock LDMIA v1, {v1-v3} ADD v3, v3, #PhysRamTable ADD v3, v3, v8 ; Work out whether the block was removed or merely shrunk LDMDB v3, {v4-v5} ADD v6, v1, v2, LSR #12 ADD v7, v4, v5, LSR #12 STMDB v3, {v1-v2} TEQ v6, v7 BEQ %FT40 ; End addresses match, it was shrunk 35 LDMIA v3, {v1-v2} ; Shuffle following entries down STMIA v3!, {v4-v5} MOV v4, v1 MOVS v5, v2 BNE %BT35 40 Pull "v1-v7" MSR CPSR_c, #F32_bit+I32_bit+IRQ32_mode LDR sp, =IRQSTK MSR CPSR_c, #F32_bit+I32_bit+ABT32_mode LDR sp, =ABTSTK MSR CPSR_c, #F32_bit+I32_bit+UND32_mode LDR sp, =UNDSTK MSR CPSR_c, #F32_bit+SVC2632 LDR sp, =SVCSTK BL ConstructCAMfromPageTables MOV a1, #4096 STR a1, [v8, #Page_Size] BL CountPageTablePages BL Count32bitPages B Continue_after_HALInit LTORG [ MEMM_Type = "VMSAv6" HAL_InvalidateCache_ARMvF ; Cache invalidation for ARMs with multiple cache levels, used before ARMop initialisation ; This function gets called before we have a stack set up, so we've got to preserve as many registers as possible ; The only register we can safely change is ip, but we can switch into FIQ mode with interrupts disabled and use the banked registers there MRS ip, CPSR MSR CPSR_c, #F32_bit+I32_bit+FIQ32_mode MOV r9, #0 MCR p15, 0, r9, c7, c5, 0 ; invalidate instruction cache MCR p15, 0, r9, c8, c7, 0 ; invalidate TLBs MCR p15, 0, r9, c7, c5, 6 ; invalidate branch target predictor myDSB ,r9,,y ; Wait for completion myISB ,r9,,y ; Check whether we're ARMv7 (and thus multi-level cache) or ARMv6 (and thus single-level cache) MRC p15, 0, r8, c0, c0, 1 TST r8, #&80000000 ; EQ=ARMv6, NE=ARMv7 BEQ %FT80 ; This is basically the same algorithm as the MaintainDataCache_WB_CR7_Lx macro, but tweaked to use less registers and to read from CP15 directly MRC p15, 1, r8, c0, c0, 1 ; Read CLIDR to r8 TST r8, #&07000000 BEQ %FT50 MOV r11, #0 ; Current cache level 10 ; Loop1 ADD r10, r11, r11, LSR #1 ; Work out 3 x cachelevel MOV r9, r8, LSR r10 ; bottom 3 bits are the Cache type for this level AND r9, r9, #7 ; get those 3 bits alone CMP r9, #2 BLT %FT40 ; no cache or only instruction cache at this level MCR p15, 2, r11, c0, c0, 0 ; write CSSELR from r11 ISB MRC p15, 1, r9, c0, c0, 0 ; read current CSSIDR to r9 AND r10, r9, #CCSIDR_LineSize_mask ; extract the line length field ADD r10, r10, #4 ; add 4 for the line length offset (log2 16 bytes) LDR r8, =CCSIDR_Associativity_mask:SHR:CCSIDR_Associativity_pos AND r8, r8, r9, LSR #CCSIDR_Associativity_pos ; r8 is the max number on the way size (right aligned) CLZ r13, r8 ; r13 is the bit position of the way size increment LDR r12, =CCSIDR_NumSets_mask:SHR:CCSIDR_NumSets_pos AND r12, r12, r9, LSR #CCSIDR_NumSets_pos ; r12 is the max number of the index size (right aligned) 20 ; Loop2 MOV r9, r12 ; r9 working copy of the max index size (right aligned) 30 ; Loop3 ORR r14, r11, r8, LSL r13 ; factor in the way number and cache number into r14 ORR r14, r14, r9, LSL r10 ; factor in the index number DCISW r14 ; Invalidate SUBS r9, r9, #1 ; decrement the index BGE %BT30 SUBS r8, r8, #1 ; decrement the way number BGE %BT20 DSB ; Cortex-A7 errata 814220: DSB required when changing cache levels when using set/way operations. This also counts as our end-of-maintenance DSB. MRC p15, 1, r8, c0, c0, 1 40 ; Skip ADD r11, r11, #2 AND r14, r8, #&07000000 CMP r14, r11, LSL #23 BGT %BT10 50 ; Finished ; Wait for clean to complete MOV r8, #0 MCR p15, 0, r8, c7, c5, 0 ; invalidate instruction cache MCR p15, 0, r8, c8, c7, 0 ; invalidate TLBs MCR p15, 0, r8, c7, c5, 6 ; invalidate branch target predictor myDSB ,r8,,y ; Wait for completion myISB ,r8,,y ; All caches clean; switch back to SVC, then recover the stored PSR from ip (although we can be fairly certain we started in SVC anyway) MSR CPSR_c, #F32_bit+I32_bit+SVC32_mode MSR CPSR_cxsf, ip MOV pc, lr 80 ; ARMv6 case MCR ARM_config_cp,0,r9,ARMv4_cache_reg,C7 ; ARMv3-ARMv6 I+D cache flush B %BT50 ] ; MEMM_Type = "VMSAv6" CountPageTablePages ROUT Entry LDR a1, =ZeroPage LDR a2, [a1, #CamEntriesPointer] LDR a3, [a1, #MaxCamEntry] [ ZeroPage <> 0 MOV a1, #0 ] ADD a3, a3, #1 ADD a4, a2, a3, LSL #CAM_EntrySizeLog2 [ LongDesc LDR lr, =LL3PT 10 LDR ip, [a4, #CAM_LogAddr-CAM_EntrySize]! SUB ip, ip, lr CMP ip, #8:SHL:20 ADDLO a1, a1, #4096 TEQ a4, a2 BNE %BT10 | ASSERT (L2PT :AND: &3FFFFF) = 0 LDR lr, =L2PT :SHR: 22 10 LDR ip, [a4, #CAM_LogAddr-CAM_EntrySize]! TEQ lr, ip, LSR #22 ADDEQ a1, a1, #4096 TEQ a4, a2 BNE %BT10 ] LDR a2, =ZeroPage STR a1, [a2, #LxPTUsed] EXIT Count32bitPages ROUT LDR a1, =ZeroPage LDR a2, [a1, #MaxCamEntry] [ LongDesc ; ~64bit RAM addresses supported, examine PhysRamTable to determine ; the last page number with a 32bit address Entry MOV a3, #-1 ADD a4, a1, #PhysRamTable 10 LDMIA a4!, {ip, lr} CMP ip, #1:SHL:20 ; Address below 4G? ADDLO a3, a3, lr, LSR #12 ; Count it up CMPLO a3, a2 ; Don't overrun the table BLO %BT10 STR a3, [a1, #MaxCamEntry32] ; Update ProcessorFlags CMP a2, a3 LDRNE a2, [a1, #ProcessorFlags] ORRNE a2, a2, #CPUFlag_HighRAM STRNE a2, [a1, #ProcessorFlags] EXIT | ; No 64bit support, so all pages must have 32bit addresses STR a2, [a1, #MaxCamEntry32] MOV pc, lr ] ; int PhysAddrToPageNo(uint64_t addr) ; ; Converts a physical address to the page number of the page containing it. ; Returns -1 if address is not in RAM. PhysAddrToPageNo ; Convert address to 4K addressing MOV a1, a1, LSR #12 ORR a1, a1, a2, LSL #20 MOV a4, #0 LDR ip, =ZeroPage + PhysRamTable 10 LDMIA ip!, {a2, a3} ; get phys addr, size MOVS a3, a3, LSR #12 ; end of list? (size=0) BEQ %FT90 ; then it ain't RAM SUB a2, a1, a2 ; a2 = amount into this bank CMP a2, a3 ; if more than size ADDHS a4, a4, a3 ; increase counter by size of bank BHS %BT10 ; and move to next ADD a1, a4, a2 ; add offset to counter MOV pc, lr 90 MOV a1, #-1 MOV pc, lr ; A routine to construct the soft CAM from the page tables. This is used ; after a soft reset, and also on a hard reset as it's an easy way of ; clearing up after the recursive page table allocaton. ROUT ConstructCAMfromPageTables Push "v1-v8, lr" LDR a1, =ZeroPage LDR a2, [a1, #MaxCamEntry] LDR v1, [a1, #CamEntriesPointer] ; v1 -> CAM (for whole routine) ADD a2, a2, #1 ADD a2, v1, a2, LSL #CAM_EntrySizeLog2 LDR a3, =DuffEntry ; Clear the whole CAM, from MOV a4, #AreaFlags_Duff ; the top down. ASSERT CAM_LogAddr=0 ASSERT CAM_PageFlags=4 ASSERT CAM_PMP=8 ASSERT CAM_PMPIndex=12 ASSERT CAM_EntrySize=16 MOV v2, #0 MOV v3, #-1 10 STMDB a2!, {a3, a4, v2, v3} CMP a2, v1 BHI %BT10 MOV v2, #0 ; v2 = logical address 30 MOV a1, v2 BL LoadAndDecodeL1Entry ; a1,a2 = phys addr, a3 = page flags/type, a4 = page size (bytes) CMP a3, #-2 ; Only care about page table pointers BEQ %FT40 ADDS v2, v2, #&00100000 BCC %BT30 Pull "v1-v8, pc" 40 MOV a1, v2 BL LoadAndDecodeL2Entry ; a1,a2 = phys addr, a3 = flags (-1 if fault), a4 = page size (bytes) CMP a3, #-1 ; move to next page if fault BEQ %FT80 SUBS a4, a4, #4096 ; large pages get bits 12-15 from the virtual address ANDNE lr, v2, a4 ORR v6, a3, #PageFlags_Unavailable ORRNE a1, a1, lr BL PhysAddrToPageNo ; -1 if unknown page ADDS a1, v1, a1, LSL #CAM_EntrySizeLog2 ; a1 -> CAM entry ASSERT CAM_LogAddr=0 ASSERT CAM_PageFlags=4 STMCCIA a1, {v2, v6} ; store logical address, PPL 80 ADD v2, v2, #&00001000 TST v2, #&000FF000 BNE %BT40 TEQ v2, #0 ; yuck (could use C from ADDS but TST corrupts C BNE %BT30 ; because of big constant) Pull "v1-v8, pc" ; Allocate a physical page from DRAM ; ; On entry: ; v1 -> current entry in PhysRamTable ; v2 -> end of last used physical page (page units) ; On exit: ; a1 -> next free page (assumed 32bit address) ; v1, v2 updated ; ; No out of memory check... Init_ClaimPhysicalPage MOV a1, v2 LDMIA v1, {a2, a3} ADD a2, a2, a3, LSR #12 ; a2 = end of this bank CMP v2, a2 ; advance v2 to next bank if LDRHS a1, [v1, #8]! ; this bank is fully used ADD v2, a1, #1 MOV a1, a1, LSL #12 ; Convert to byte address MOV pc, lr ; Allocate and map in some RAM. ; ; On entry: ; a1 = logical address ; a2 = access permissions (see Init_MapIn) ; a3 = length (4K multiple) ; v1 -> current entry in PhysRamTable ; v2 = next physical address (page units) ; v3 -> L1PT ; ; On exit: ; a1 -> physical address of start of RAM (deduce the rest from PhysRamTable) ; ; No out of memory check... Init_MapInRAM ROUT Push "v4-v8,lr" MOV v8, #-1 MOV v5, a3 ; v5 = amount of memory required MOV v6, a1 ; v6 = logical address MOV v7, a2 ; v7 = access permissions 10 LDMIA v1, {v4, ip} ; v4 = addr of bank, ip = len+flags MOV ip, ip, LSR #12 SUB v4, v2, v4 ; v4 = amount of bank used RSBS v4, v4, ip ; v4 = amount of bank left (pages) LDREQ v2, [v1, #8]! ; move to next bank if 0 left BEQ %BT10 CMP v8, #-1 ; is this the first bank? MOVEQ v8, v2 ; remember it CMP v4, v5, LSR #12 ; sufficient in this bank? MOVHS a4, v5 MOVLO a4, v4, LSL #12 ; a4 = amount to take MOV a1, v2, LSL #12 ; set up parameters for MapIn call MOV a2, v6 ; then move globals (in case MapIn MOV a3, v7 ; needs to allocate for L2PT) ADD v2, v2, a4, LSR #12 ; advance physaddr SUB v5, v5, a4 ; decrease wanted ADD v6, v6, a4 ; advance log address pointer BL Init_MapIn ; map in the RAM TEQ v5, #0 ; more memory still required? BNE %BT10 MOV a1, v8 Pull "v4-v8,pc" Init_MapInRAM_Clear ROUT ; same as Init_MapInRAM but also Push "a1,a3,v5,lr" ; clears the mapped in result BL Init_MapInRAM MOV v5, a1 Pull "a1,a3" MOV a2, #0 BL memset MOV a1, v5 Pull "v5,pc" ; Allocate and map a physically contigous chunk of some DMAable RAM. ; ; On entry: ; a1 = logical address ; a2 = access permissions (see Init_MapIn) ; a3 = length (4K multiple) ; v1 -> current entry in PhysRamTable ; v2 = next physical address (page units) ; v3 -> L1PT ; ; On exit: ; a1 -> physical address of start of RAM (deduce the rest from PhysRamTable) ; ; Use this routine with caution - correct total amount of required DMA memory ; must have been calculated beforehand and stashed in InitDMABlock Init_MapInRAM_DMA ROUT Push "a1,a3,v4-v5,ip,lr" TEQ v3, #0 ; MMU on? LDREQ v4, =ZeroPage ; get workspace directly ADDNE v4, v3, #DRAMOffset_PageZero-DRAMOffset_PageTables ; deduce from L1PT LDR v5, [v4, #InitDMAEnd] ADD lr, v5, a3 ; claim the RAM STR lr, [v4, #InitDMAEnd] MOV a4, a3 MOV a3, a2 MOV a2, a1 MOV a1, v5 BL Init_MapIn ; map it in ; DMA regions won't get cleared by ClearWkspRam, so do it manually ; Could potentially skip this if the HAL says RAM is already clear, but ; for now do it anyway (especially since startup flags haven't been set ; when we're first called) Pull "a1,a3" TEQ v3, #0 MOVNE a1, v5 MOV a2, #0 BL memset MOV a1, v5 Pull "v4-v5,ip,pc" ; Map a range of physical addresses to a range of logical addresses. ; ; On entry: ; a1 = physical address (32bit) ; a2 = logical address ; a3 = DA flags ; a4 = area size (4K multiple) ; v1 -> current entry in PhysRamTable ; v2 = last used physical address (page units) ; v3 -> L1PT (or 0 if MMU on) Init_MapIn ROUT Entry "v4-v7" MOV v4, a1 ; v4 = physaddr MOV v5, a2 ; v5 = logaddr MOV v6, a3 ; v6 = page flags MOV v7, a4 ; v7 = area size ; Set up a2-a4 for the Get*PTE functions TEQ v3, #0 LDREQ a3, =ZeroPage ADDNE a3, v3, #DRAMOffset_PageZero-DRAMOffset_PageTables MOV a2, v6 LDR a4, [a3, #MMU_PCBTrans] LDR a3, [a3, #MMU_PPLTrans] ORR lr, v4, v5 ; OR together, physaddr, logaddr ORR lr, lr, v7 ; and size. [ LongDesc MOVS ip, lr, LSL #11 ; If all bottom 21 bits 0 BEQ %FT50 ; it's L2PT block mapped | MOVS ip, lr, LSL #12 ; If all bottom 20 bits 0 BEQ %FT50 ; it's section mapped ] MOV a1, #0 ; We don't want the address in the result MOVS ip, lr, LSL #16 ; If bottom 16 bits not all 0 ADR lr, %FT10 BNE Get4KPTE ; then small pages (4K) BL Get64KPTE ; else large pages (64K) 10 [ LongDesc ORR v6, a1, a2 ; v6 = high & low attributes | MOV v6, a1 ; v6 = access permissions ] 20 MOV a1, v4 MOV a2, v5 MOV a3, v6 BL Init_MapInPage ; Loop through mapping in each ADD v4, v4, #4096 ; page in turn ADD v5, v5, #4096 SUBS v7, v7, #4096 BNE %BT20 EXIT [ LongDesc 50 BL Get2MPTE MOVS ip, v3 ; is MMU on? LDREQ ip, =LL2PT ; then use virtual address ADD a3, ip, v5, LSR #18 ; a2 -> L2PT entry 70 STRD a1, [a3], #8 ; And store in L2PT ADD a1, a1, #2*1024*1024 ; Advance two megabytes SUBS v7, v7, #2*1024*1024 ; and loop BNE %BT70 EXIT | 50 BL Get1MPTE MOVS ip, v3 ; is MMU on? LDREQ ip, =L1PT ; then use virtual address ADD a2, ip, v5, LSR #18 ; a2 -> L1PT entry 70 STR a1, [a2], #4 ; And store in L1PT ADD a1, a1, #1024*1024 ; Advance one megabyte SUBS v7, v7, #1024*1024 ; and loop BNE %BT70 EXIT ] ; Map a logical page to a physical page, allocating L2PT as necessary. ; ; On entry: ; a1 = physical address (32bit) ; a2 = logical address [ LongDesc ; a3 = high & low page attributes merged into one word | ; a3 = access permissions + C + B bits + size (all non-address bits, of appropriate type) ] ; v1 -> current entry in PhysRamTable ; v2 = last used physical address (page units) ; v3 -> L1PT (or 0 if MMU on) ; On exit: ; a1 = logical address ; a2-a4, ip corrupt ; v1, v2 updated ; Init_MapInPage ROUT Entry "v4-v6" MOV v4, a1 ; v4 = physical address MOV v5, a2 ; v5 = logical address MOV v6, a3 ; v6 = access permissions MOV a1, v5 MOV a2, #4096 BL AllocateL2PT TEQ v3, #0 ; if MMU on, access L2PT virtually... [ LongDesc LDREQ a1, =LL3PT MOVEQ ip, v5 ; index using whole address BEQ %FT40 MOV ip, v5, LSR #21 ADD a1, v3, ip, LSL #3 LDRD a1, [a1] ; a1 = level two descriptor BFC a1, #0, #LL_LowAddr_Start ; a1 -> L3PT tables for this section MOV ip, v5, LSL #32-(12+9) MOV ip, ip, LSR #32-(12+9) ; extract L3 table index bits 40 MOV a3, v4 BFI a3, v6, #0, #LL_LowAddr_Start ; Add low attributes MOV a4, v6 BFC a4, #0, #LL_HighAttr_Start ; High attributes ADD a1, a1, ip, LSR #9 STRD a3, [a1] ; update L3PT entry MOV a1, v5 EXIT | LDREQ a1, =L2PT ; a1 -> L2PT virtual address MOVEQ ip, v5 ; index using whole address BEQ %FT40 MOV ip, v5, LSR #20 LDR a1, [v3, ip, LSL #2] ; a1 = level one descriptor MOV a1, a1, LSR #10 MOV a1, a1, LSL #10 ; a1 -> L2PT tables for this section AND ip, v5, #&000FF000 ; extract L2 table index bits 40 AND lr, v6, #3 TEQ lr, #L2_LargePage ; strip out surplus address bits from BICEQ v4, v4, #&0000F000 ; large page descriptors ORR lr, v4, v6 ; lr = value for L2PT entry STR lr, [a1, ip, LSR #10] ; update L2PT entry MOV a1, v5 EXIT ] ; On entry: ; a1 = virtual address L2PT required for ; a2 = number of bytes of virtual space ; v1 -> current entry in PhysRamTable ; v2 = last used physical address (page units) ; v3 -> L1PT (or 0 if MMU on) ; On exit ; a1-a4,ip corrupt ; v1, v2 updated AllocateL2PT ROUT [ LongDesc Entry "v4-v8" MOV v8, a1, LSR #21 ; round base address down to 2M ADD lr, a1, a2 MOV v7, lr, LSR #21 TEQ lr, v7, LSL #21 ADDNE v7, v7, #1 ; round end address up to 2M MOVS v6, v3 LDREQ v6, =LL2PT ; v6->L2PT (whole routine) 05 ADD a3, v6, v8, LSL #3 ; L2PT contains 1 entry per 2MB LDRD a1, [a3] TEQ a1, #0 ; if non-zero, the L3PT has BNE %FT40 ; already been allocated ; With 8 bytes per entry, every 4KB L3PT covers 2MB of RAM, the same ; as the single L2PT entry covers. So no need to deal with partially ; used pages like the short descriptor case. BL Init_ClaimPhysicalPage ; Claim a page to put L3PT in MOV v4, a1 ; Need to zero the L3PT. Must do it before calling in MapInPage, as that may well ; want to put something in the thing we are clearing. If the MMU is off, no problem, ; but if the MMU is on, then the L3PT isn't accessible until we've called MapInPage. ; Solution is to use the AccessPhysicalAddress call. TEQ v3, #0 ; MMU on? MOVNE a1, v4 ; if not, just access v4 LDREQ a1, =OSAP_None+DynAreaFlags_NotCacheable ; if so, map in v4 MOVEQ a2, v4 SUBEQ sp, sp, #4 MOVEQ a3, #0 MOVEQ a4, sp BLEQ AccessPhysicalAddress MOV a2, #0 MOV a3, #4*1024 BL memset TEQ v3, #0 LDREQ a1, [sp], #4 BLEQ ReleasePhysicalAddress ; Fill in the L2PT entry ORR a1, v4, #LL12_Table MOV a2, #0 ADD a3, v6, v8, LSL #3 STRD a1, [a3] ; Get the correct page table entry flags for Init_MapInPage TEQ v3, #0 LDREQ a3, =ZeroPage ADDNE a3, v3, #DRAMOffset_PageZero-DRAMOffset_PageTables LDR a2, [a3, #PageTable_PageFlags] LDR a4, [a3, #MMU_PCBTrans] LDR a3, [a3, #MMU_PPLTrans] MOV a1, #0 BL Get4KPTE ORR a3, a1, a2 MOV a1, v4 ; Map in the L3PT page itself LDR a2, =LL3PT ; (can't recurse, because L3PT ADD a2, a2, v8, LSL #12 ; backing for L3PT is preallocated) BL Init_MapInPage 40 ADD v8, v8, #1 ; go back until all CMP v8, v7 ; pages allocated BLO %BT05 EXIT | Entry "v4-v8" MOV v8, a1, LSR #20 ; round base address down to 1M ADD lr, a1, a2 MOV v7, lr, LSR #20 TEQ lr, v7, LSL #20 ADDNE v7, v7, #1 ; round end address up to 1M MOVS v6, v3 LDREQ v6, =L1PT ; v6->L1PT (whole routine) 05 LDR v5, [v6, v8, LSL #2] ; L1PT contains 1 word per M TEQ v5, #0 ; if non-zero, the L2PT has ; already been allocated BNE %FT40 BIC lr, v8, #3 ; round down to 4M - each page ADD lr, v6, lr, LSL #2 ; of L2PT maps to 4 sections LDMIA lr, {a3,a4,v5,ip} ; check if any are page mapped ASSERT L1_Fault = 2_00 :LAND: L1_Page = 2_01 :LAND: L1_Section = 2_10 TST a3, #1 TSTEQ a4, #1 TSTEQ v5, #1 TSTEQ ip, #1 BEQ %FT20 ; nothing page mapped - claim a page TST a4, #1 ; at least one of the sections is page mapped SUBNE a3, a4, #1*1024 ; find out where it's pointing to and TST v5, #1 ; derive the corresponding address for our SUBNE a3, v5, #2*1024 ; section TST ip, #1 SUBNE a3, ip, #3*1024 AND lr, v8, #3 ORR a3, a3, lr, LSL #10 STR a3, [v6, v8, LSL #2] ; fill in the L1PT entry B %FT40 ; no more to do 20 BL Init_ClaimPhysicalPage ; Claim a page to put L2PT in MOV v4, a1 ; Need to zero the L2PT. Must do it before calling in MapInPage, as that may well ; want to put something in the thing we are clearing. If the MMU is off, no problem, ; but if the MMU is on, then the L2PT isn't accessible until we've called MapInPage. ; Solution is to use the AccessPhysicalAddress call. TEQ v3, #0 ; MMU on? MOVNE a1, v4 ; if not, just access v4 LDREQ a1, =OSAP_None+DynAreaFlags_NotCacheable ; if so, map in v4 MOVEQ a2, v4 SUBEQ sp, sp, #4 MOVEQ a3, #0 MOVEQ a4, sp BLEQ AccessPhysicalAddress MOV a2, #0 MOV a3, #4*1024 BL memset TEQ v3, #0 LDREQ a1, [sp], #4 BLEQ ReleasePhysicalAddress [ MEMM_Type = "VMSAv6" ORR a3, v4, #L1_Page | ORR a3, v4, #L1_Page + L1_U ; Set the U bit for ARM6 (assume L2 pages will generally be cacheable) ] AND lr, v8, #3 ORR a3, a3, lr, LSL #10 STR a3, [v6, v8, LSL #2] ; fill in the L1PT ; Get the correct page table entry flags for Init_MapInPage TEQ v3, #0 LDREQ a3, =ZeroPage ADDNE a3, v3, #DRAMOffset_PageZero-DRAMOffset_PageTables LDR a2, [a3, #PageTable_PageFlags] LDR a4, [a3, #MMU_PCBTrans] LDR a3, [a3, #MMU_PPLTrans] MOV a1, #0 BL Get4KPTE MOV a3, a1 MOV a1, v4 ; Map in the L2PT page itself LDR a2, =L2PT ; (can't recurse, because L2PT ADD a2, a2, v8, LSL #10 ; backing for L2PT is preallocated) BIC a2, a2, #&C00 BL Init_MapInPage 40 ADD v8, v8, #1 ; go back until all CMP v8, v7 ; pages allocated BLO %BT05 EXIT ] ; in: ip = ZeroPage ; out: v3 = PhysIllegalMask DeterminePhysIllegalMask Push "lr" MOV v3, #&FFFFFFFF ; By default any upper bits are illegal ARM_read_ID lr AND lr, lr, #&F :SHL: 16 CMP lr, #ARMvF :SHL: 16 ; Check that feature registers are implemented BNE %FT01 ; Some ARMv5 chips supported supersections, but let's not worry about them MRC p15, 0, lr, c0, c1, 7 ; ID_MMFR3 TST lr, #&F :SHL: 28 MOVEQ v3, #&FFFFFF00 01 STR v3, [ip, #PhysIllegalMask] Pull "pc" [ :LNOT: LongDesc MACRO $lab ConstructIOPTE $pte, $phys_mb, $flags, $tmp ; $pte (output) L1 page table entry word ; $phys_mb (input, preserved) physical address, in megabytes ; for vanilla sections: ; bits 0..11 go in bits 20..31 ; for supersections: ; bits 0..3 assumed zero ; bits 4..11 go in bits 24..31 ; bits 12..15 go in bits 20..23 ; bits 16..20 go in bits 5..8 ; $flags (input, preserved) page table attribute bits ; ; UBFXNE should be safe pre v6T2, since we won't attempt to use ; supersections on such CPUs and they won't trap untaken undefined instructions ASSERT $pte <> $phys_mb ASSERT $pte <> $flags ASSERT $pte <> $tmp ASSERT $tmp <> $phys_mb ASSERT $tmp <> $flags $lab ANDS $tmp, $flags, #L1_SS UBFXNE $tmp, $phys_mb, #32-20, #L1_SSb32Width ORR $pte, $phys_mb, $tmp UBFXNE $tmp, $phys_mb, #36-20, #L1_SSb36Width ASSERT L1_SSb32Shift = 20 ORR $pte, $flags, $pte, LSL #L1_SSb32Shift ORRNE $pte, $pte, $tmp, LSL #L1_SSb36Shift MEND ] ; void *AccessPhysicalAddress(unsigned int flags, uint64_t phys, void **oldp) ; ; APCS calling convention. ; ; flags: RISC OS page flags ; phys: Physical address to access ; oldp: Pointer to word to store old state (or NULL) ; ; On exit: Returns logical address corresponding to 'phys'. ; ; Arranges for the physical address 'phys' to be (temporarily) mapped into ; logical memory. In fact, at least the whole megabyte containing 'phys' is ; mapped in. All mappings use the same shared logical window; the current state ; of the window will be returned in 'oldp', to allow it to be restored (via ; ReleasePhysicalAddress) once you've finised with it. ; ; Note: No cache maintenance performed. Assumption is that mappings will be ; non-cacheable. AccessPhysicalAddress ROUT ; Check physical address is valid on current CPU LDR ip, =ZeroPage Push "a1,v3,lr" LDR v3, [ip, #PhysIllegalMask] TST a3, v3 BNE %FT90 [ LongDesc UBFX v3, a2, #0, #21 ; v3 = offset for result ; Use Get2MPTE to convert into page table entry MOV ip, a2 BFI ip, a3, #0, #21 ; Get2MPTE packs the high address bits into the low bits GetPTE a1, 2M, ip, a1 ; Force XN (easier to do afterwards since PPL mapping is non-trivial) ORR a2, a2, #LL_Page_HighAttr_XN MOV lr, a4 LDR ip, =LL2PT + (PhysicalAccess:SHR:18) ; ip -> L2PT entry LDRD a3, [ip] ; Get old entry STRD a1, [ip] ; Put new entry ; Compact old entry into a single word if necessary CMP lr, #0 BFINE a3, a4, #12, #8 STRNE a3, [lr] LDR a1, =PhysicalAccess ORR a1, a1, v3 | ; Use Get1MPTE to convert DA flags into L1PT section-mapping flags MOV ip, #0 GetPTE a1, 1M, ip, a1 ; Mapping size (section or supersection) depends on address MOV lr, a2, LSR #20 ORR lr, lr, a3, LSL #12 ; lr = physical megabyte number TEQ a3, #0 ORRNE a1, a1, #L1_SS ; need to use supersection for such addresses BIC a2, a2, #&FF000000 ; at most, bits 0-23 are used as offsets into section/supersection BICNE lr, lr, #&F ; if address >4GB, round mapped address to 16MB (supersection) BICEQ a2, a2, #&00F00000 ; else no further rounding needed (section) and bits 20-23 are not used as an offset either ConstructIOPTE v3, lr, a1, ip LDR ip, =L1PT + (PhysicalAccess:SHR:18) ; ip -> L1PT entry [ MEMM_Type = "VMSAv6" ORR v3, v3, #L1_XN ; force non-executable to prevent speculative instruction fetches ] TEQ a4, #0 LDRNE lr, [ip] ; read old value (if necessary) STRNE lr, [a4] ; put old one in [oldp] MOV a4, #15 STR v3, [ip], #4 ; store first of 16 new L1PT entries TST v3, #L1_SS MOVEQ v3, #0 ; if supersection mapped then use 16 duplicate entries, else remaining entries unmapped 10 SUBS a4, a4, #1 STR v3, [ip], #4 BNE %BT10 LDR a1, =PhysicalAccess ORR a1, a1, a2 ] STR a1, [sp] ARMop MMU_ChangingUncached ; sufficient, cause not cacheable Pull "a1,v3,pc" 90 ; Invalid physical address ADD sp, sp, #1*4 MOV a1, #0 Pull "v3,pc" ; void ReleasePhysicalAddress(void *old) ; ; APCS calling convention. ; ; Call with the 'oldp' value from a previous AccessPhysicalAddress call to ; restore previous physical access window state. ReleasePhysicalAddress [ LongDesc LDR ip, =LL2PT + (PhysicalAccess:SHR:18) ; ip -> L2PT entry ; The 8 byte page table entry is packed into the 4 byte token as folllows: ; * Bits 0-11 give the low 12 bits of the page table entry (page type + low attributes) ; * Bits 21-31 give bits 21-31 of the page table entry (low PA) ; * Bits 12-20 give bits 32-39 of the page table entry (high PA) ; * Bit 20 is spare (kept at zero) ; * The upper attributes are fixed (always XN) UBFX a2, a1, #12, #8 ; Get high word BICS a1, a1, #&FF000 ; Get low word ORRNE a2, a2, #LL_Page_HighAttr_XN ; Always XN STRD a1, [ip] | LDR ip, =L1PT + (PhysicalAccess:SHR:18) ; ip -> L1PT entry MOV a4, #15 STR a1, [ip], #4 ; restore first of 16 L1PT entries TST a1, #L1_SS MOVEQ a1, #0 ; if supersection mapped then use 16 duplicate entries, else remaining entries unmapped 10 SUBS a4, a4, #1 STR a1, [ip], #4 BNE %BT10 ] ARMop MMU_ChangingUncached,,tailcall ; sufficient, cause not cacheable ; void Init_PageTablesChanged(void) ; ; A TLB+cache invalidation that works on all known ARMs. Invalidate all I+D TLB is the _only_ TLB ; op that works on ARM720T, ARM920T and SA110. Ditto invalidate all I+D cache. ; ; DOES NOT CLEAN THE DATA CACHE. This is a helpful simplification, but requires that don't use ; this routine after we've started using normal RAM. ; Init_PageTablesChanged MOV a3, lr BL Init_ARMarch MOV ip, #0 BNE %FT01 MCREQ ARM_config_cp,0,ip,ARMv3_TLBflush_reg,C0 B %FT02 01 MCRNE ARM_config_cp,0,ip,ARMv4_TLB_reg,C7 02 [ MEMM_Type = "VMSAv6" CMP a1, #ARMvF ADREQ lr, %FT01 BEQ HAL_InvalidateCache_ARMvF MCRNE ARM_config_cp,0,ip,ARMv4_cache_reg,C7 ; works on ARMv3 01 | MCR ARM_config_cp,0,ip,ARMv4_cache_reg,C7 ; works on ARMv3 ] MOV pc, a3 ;++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ ; ; ClearWkspRAM - Routine to clear "all" workspace ; ; We have to avoid anything between InitUsedStart and InitUsedEnd - i.e. ; the page tables, HAL workspace, etc. ; ; Note that zero page workspace isn't included in InitUsedStart/InitUsedEnd. ; Sensitive areas of it (e.g. PhysRamTable, IRQ vector) are skipped via the ; help of RamSkipTable ; ; The bulk of RAM is cleared during the keyboard scan (ClearFreePoolSection). ; ; out: r4-r11, r13 preserved ; ClearWkspRAM ROUT MSR CPSR_c, #F32_bit+FIQ32_mode ; get some extra registers MOV r8, #0 MOV r9, #0 MOV r10, #0 MOV r11, #0 MOV r12, #0 MOV r13, #0 MOV r14, #0 MSR CPSR_c, #F32_bit+SVC32_mode LDR r0,=ZeroPage+InitClearRamWs ;we can preserve r4-r11,lr in one of the skipped regions STMIA r0,{r4-r11,lr} DebugTX "ClearWkspRAM" ; Start off by clearing zero page + scratch space, as these: ; (a) are already mapped in and ; (b) may require the use of the skip table LDR r0, =ZeroPage ADD r1, r0, #16*1024 ADR r6, RamSkipTable MSR CPSR_c, #F32_bit+FIQ32_mode ; switch to our bank o'zeros LDR r5, [r6], #4 ; load first skip addr 10 TEQ r0, r1 TEQNE r0, r5 STMNEIA r0!, {r8-r11} BNE %BT10 TEQ r0, r1 BEQ %FT20 LDR r5, [r6], #4 ; load skip amount ADD r0, r0, r5 ; and skip it LDR r5, [r6], #4 ; load next skip addr B %BT10 20 LDR r0, =ScratchSpace ADD r1, r0, #ScratchSpaceSize 30 TEQ r0, r1 STMNEIA r0!, {r8-r11} STMNEIA r0!, {r8-r11} BNE %BT30 MSR CPSR_c, #F32_bit+SVC32_mode LDR r0, =ZeroPage+InitClearRamWs LDMIA r0, {r4-r11,r14} ;restore [ {FALSE} ; NewReset sets this later LDR r0, =ZeroPage+OsbyteVars + :INDEX: LastBREAK MOV r1, #&80 STRB r1, [r0] ; flag the fact that RAM cleared ] MOV pc, lr LTORG MACRO MakeSkipTable $addr, $size ASSERT ($addr :AND: 15) = 0 ASSERT ($size :AND: 15) = 0 ASSERT ($addr-ZeroPage) < 16*1024 & $addr, $size MEND MACRO EndSkipTables & -1 MEND RamSkipTable MakeSkipTable ZeroPage, InitWsEnd MakeSkipTable ZeroPage+SkippedTables, SkippedTablesEnd-SkippedTables EndSkipTables ;++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ ; ; ClearFreePoolSection - Routine to clear a section of the free pool ; ; During keyboard scanning we soak up slack time clearing the bulk of RAM ; by picking a section of the free pool, mapping it in, clearing & flushing. ; ; In: r0 = CAM entry to continue from ; Out: r0 = updated ; ClearFreePoolSection ROUT Push "r1-r3, lr" LDR r3, =ZeroPage LDR r1, [r3, #MaxCamEntry] LDR r2, =ZeroPage+FreePoolDANode CMP r0, r1 BHI %FT30 LDR r3, [r3, #CamEntriesPointer] ADD r1, r3, r1, LSL #CAM_EntrySizeLog2 ; top entry (inc) ADD r3, r3, r0, LSL #CAM_EntrySizeLog2 ; starting entry 10 LDR r14, [r3, #CAM_PageFlags] TST r14, #DynAreaFlags_PMP BEQ %FT20 LDR r14, [r3, #CAM_PMP] TEQ r14, r2 BEQ %FT40 20 ADD r3, r3, #CAM_EntrySize ; next CMP r3, r1 BLS %BT10 30 MOV r0, #-1 Pull "r1-r3, pc" 40 Push "r0-r12" ; This is a PMP entry in the free pool LDR r14, [r3, #CAM_PMPIndex] ; list index LDR r9, [r2, #DANode_PMP] ; PMP list base LDR r3, [r9, r14, LSL #2] ; ppn BL ppn_to_physical ; => r8,r9 = PA [ MEMM_Type = "ARM600" ; Map in this section, cacheable + bufferable to ensure burst writes ; are performed (StrongARM will only perform burst writes to CB areas) MOV a1, #OSAP_None | ; Map in this section with default NCB cache policy. Making it cacheable ; is liable to slow things down significantly on some platforms (e.g. ; PL310 L2 cache) LDR a1, =OSAP_None + DynAreaFlags_NotCacheable ] MOV a2, r8 MOV a3, r9 MOV a4, #0 BL AccessPhysicalAddress MOV r5, r8, LSR #20 ORR r5, r5, r9, LSL #12 ; r5 = physical MB MOV r4, #0 ; clear to this value MOV r6, r4 MOV r7, r4 MOV r12, r4 45 MOV r8, r4 MOV r9, r4 MOV r10, r4 MOV r11, r4 ; Fill that page ADD r2, r0, #4096 50 STMIA r0!, {r4,r6-r12} STMIA r0!, {r4,r6-r12} TEQ r0, r2 BNE %BT50 MOV r10, r5 ; previous phys MB ; Step the CAM until there are no more pages in that section LDR r1, [sp, #1*4] LDR r2, [sp, #2*4] LDR r11, [sp, #3*4] B %FT65 60 LDR r14, [r11, #CAM_PageFlags] TST r14, #DynAreaFlags_PMP BEQ %FT65 LDR r14, [r11, #CAM_PMP] TEQ r14, r2 BEQ %FT70 65 ADD r11, r11, #CAM_EntrySize ; next CMP r11, r1 BLS %BT60 MOV r14, #-1 ; CAM top, no more B %FT80 70 ; Next PMP entry in the free pool LDR r14, [r11, #CAM_PMPIndex] ; list index LDR r9, [r2, #DANode_PMP] ; PMP list base LDR r3, [r9, r14, LSL #2] ; ppn BL ppn_to_physical ; => r8,r9 = PA MOV r5, r8, LSR #20 ORR r5, r5, r9, LSL #12 TEQ r10, r5 ; same MB as previous? LDRNE r14, =ZeroPage LDRNE r14, [r14, #CamEntriesPointer] SUBNE r14, r11, r14 MOVNE r14, r14, LSR #CAM_EntrySizeLog2 ; no, so compute continuation point SUBEQ r0, r0, #4 ; wind back to make sure we stay in the correct megabyte of PhysicalAccess [ NoARMT2 MOVEQ r0, r0, LSR #20 ORREQ r0, r0, r8, LSL #12 MOVEQ r0, r0, ROR #12 | BFIEQ r0, r8, #0, #20 ] STREQ r11, [sp, #3*4] BEQ %BT45 ; yes, so clear it 80 STR r14, [sp, #0*4] ; return value for continuation [ MEMM_Type = "ARM600" ; VMSAv6 maps as non-cacheable, so no flush required ; Make page uncacheable so the following is safe MOV r4, r0 LDR r0, =OSAP_None+DynAreaFlags_NotCacheable MOV r1, r10, LSL #20 MOV r2, r10, LSR #12 MOV r3, #0 BL AccessPhysicalAddress MOV r0, r4 ; Clean & invalidate the cache before the 1MB window closes [ CacheCleanerHack ; StrongARM requires special clean code, because we haven't mapped in ; DCacheCleanAddress yet. Cheat and only perform a clean, not full ; clean + invalidate (should be safe as we've only been writing) ARM_read_ID r2 AND r2, r2, #&F000 CMP r2, #&A000 BNE %FT90 85 SUB r0, r0, #32 ; rewind 1 cache line ARMA_clean_DCentry r0 MOVS r1, r0, LSL #12 ; start of the MB? BNE %BT85 B %FT91 90 ] ARMop Cache_CleanInvalidateAll ] 91 MOV a1, #L1_Fault BL ReleasePhysicalAddress ; reset to default Pull "r0-r12" Pull "r1-r3, pc" InitProcVecs BKPT &C000 ; Reset BKPT &C004 ; Undefined Instruction BKPT &C008 ; SWI BKPT &C00C ; Prefetch Abort SUBS pc, lr, #4 ; ignore data aborts BKPT &C014 ; Address Exception LDR pc, InitProcVecs + InitIRQHandler ; IRQ BKPT &C01C ; FIQ InitProcVec_FIQ DCD 0 InitProcVecsEnd ; ; In: a1 = flags (L1_B,L1_C,L1_TEX) ; bit 20 set if doubly mapped ; bit 21 set if L1_AP specified (else default to AP_None) ; a2 = physical address ; a3 = size ; Out: a1 = assigned logical address, or 0 if failed (no room) ; RISCOS_MapInIO ROUT MOV a4, a3 MOV a3, #0 ; drop through... ; ; In: a1 = flags (L1_B,L1_C,L1_TEX) ; bit 20 set if doubly mapped ; bit 21 set if L1_AP specified (else default to AP_None) ; a2,a3 = physical address ; a4 = size ; Out: a1 = assigned logical address, or 0 if failed (no room) ; RISCOS_MapInIO64 ROUT ; Will detect and return I/O space already mapped appropriately, or map and return new space ; For simplicity and speed of search, works on a section (1Mb) granularity ; ASSERT L1_B = 1:SHL:2 ASSERT L1_C = 1:SHL:3 [ MEMM_Type = "VMSAv6" ASSERT L1_AP = 2_100011 :SHL: 10 ASSERT L1_TEX = 2_111 :SHL: 12 | ASSERT L1_AP = 3:SHL:10 ASSERT L1_TEX = 2_1111 :SHL: 12 ] MapInFlag_DoublyMapped * 1:SHL:20 MapInFlag_APSpecified * 1:SHL:21 [ LongDesc ! 0, "LongDescTODO Decide how to handle this" AND a1, a1, #MapInFlag_DoublyMapped ORR a1, a1, #LL12_Block+LLAttr_SO ; privileged device mapping ORR a1, a1, #LL_Page_LowAttr_AF+LL_Page_LowAttr_SH1+LL_Page_LowAttr_SH0 RISCOS_MapInIO_LowAttr ; a1 bits 0-11 = low attributes, bits 20+ = our extra flags | TST a1, #MapInFlag_APSpecified BICEQ a1, a1, #L1_AP ; For VMSAv6, assume HAL knows what it's doing and requests correct settings for AP_ROM ORREQ a1, a1, #L1_APMult * AP_None BIC a1, a1, #3 [ MEMM_Type = "VMSAv6" ORR a1, a1, #L1_Section+L1_XN ; force non-executable to prevent speculative instruction fetches | ORR a1, a1, #L1_Section ] RISCOS_MapInIO_PTE ; a1 bits 0-19 = L1 section entry flags, bits 20+ = our extra flags ] Entry "a2,v1-v8" LDR ip, =ZeroPage SUB a4, a4, #1 ; reduce by 1 so end physical address is inclusive ADDS v1, a2, a4 ADC v2, a3, #0 ; v1,v2 = end physical address LDR v3, [ip, #PhysIllegalMask] TST v2, v3 MOVNE a1, #0 BNE %FT90 ; can't map in physical addresses in this range [ LongDesc MOV v4, a2, LSR #21 ORR v4, v4, a3, LSL #11 ; v4 = physical start 2MB to map MOV v5, v1, LSR #21 ORR v5, v5, v2, LSL #11 ADD v5, v5, #1 ; v5 = exclusive physical end 2MB to map ANDS v8, a1, #MapInFlag_DoublyMapped SUBNE v8, v5, v4 ; v8 = offset of second mapping (in 2MB) or 0 UBFX a1, a1, #0, #LL_LowAttr_Start+LL_LowAttr_Size ; mask out our extra flags ORR a1, a1, v4, LSL #21 MOV a2, v4, LSR #11 ORR a2, a2, #LL_Page_HighAttr_XN ; a1,a2 = first PT entry to match LDR v7, [ip, #IOAllocPtr] MOV v7, v7, LSR #18 ; v7 = logical 2MB*8 that we're checking for a match LDR v1, =LL2PT LDR v2, [ip, #IOAllocTop] MOV v2, v2, LSR #18 ; v2 = last logical 2MB*8 to check (exclusive) 10 ADD ip, v7, v8, LSL #3 ; logical 2MB*8 of base mapping or second mapping if there is one CMP ip, v2 BHS %FT50 ; run out of logical addresses to check LDRD a3, [v1, v7] ; check only or first entry TEQ a1, a3 TEQEQ a2, a4 LDREQD a3, [v1, ip] ; check only or second entry TEQEQ a1, a3 TEQEQ a2, a4 ADD v7, v7, #8 ; next logical 2MB to check BNE %BT10 ; Found start of requested IO already mapped, and with required flags ; Now check that the remaining secions are all there too MOV v6, v4 ; current 2MB being checked MOV v3, v7, LSL #18 SUB v3, v3, #1:SHL:21 ; start logical address 20 ADD v6, v6, #1 ; next physical 2MB ADDS a1, a1, #1:SHL:21 ; next PTE ADC a2, a2, #0 ADD ip, v7, v8, LSL #3 CMP v6, v5 BHS %FT80 CMP ip, v2 BHS %FT45 ; run out of logical addresses to check LDRD a3, [v1, v7] ; check only or first entry TEQ a1, a3 TEQEQ a2, a4 LDREQD a3, [v1, ip] ; check only or second entry TEQEQ a1, a3 TEQEQ a2, a4 ADDEQ v7, v7, #8 ; next logical 2MB*8 BEQ %BT20 ; good so far, try next entry ; Mismatch, rewind PTE and continue outer loop SUB v6, v6, v4 SUBS a1, a1, v6, LSL #21 SBC a2, a2, v6, LSR #11 B %BT10 45 ; Rewind PTE SUB v6, v6, v4 SUBS a1, a1, v6, LSL #21 SBC a2, a2, v6, LSR #11 50 ; Request not currently mapped, only partially mapped, or mapped with wrong flags LDR ip, =ZeroPage LDR a3, [ip, #IOAllocPtr] MOV a3, a3, LSR #21 SUB v7, v5, v4 ; v7 = number of 2MB required SUB a3, a3, v7 MOV v3, a3, LSL #21 LDR v6, [ip, #IOAllocLimit] CMP v3, v6 ; run out of room to allocate IO? MOVLS a1, #0 ; LS is to match previous version of the code - perhaps should be LO? BLS %FT90 STR v3, [ip, #IOAllocPtr] MOV a3, a3, LSL #3 60 ADD ip, a3, v8, LSL #3 STRD a1, [v1, a3] ; write only or first entry STRD a1, [v1, ip] ; write only or second entry ADDS a1, a1, #1:SHL:21 ADC a2, a2, #0 ADD a3, a3, #8 SUBS v7, v7, #1 BNE %BT60 PageTableSync ; corrupts a1 80 LDR a2, [sp] ; retrieve original physical address from stack BFI v3, a2, #0, #21 ; apply sub-2MB offset MOV a1, v3 90 EXIT | MOVS v3, v2 MOVNE v3, #&F ; v3 = number of MB to use in rounding (0 for sections, 15 for supersections) MOV v4, a2, LSR #20 ORR v4, v4, a3, LSL #12 BIC v4, v4, v3 ; v4 = physical start MB to map MOV v5, v1, LSR #20 ORR v5, v5, v2, LSL #12 ADD v5, v5, #1 ; make exclusive ADD v5, v5, v3 BIC v5, v5, v3 ; v5 = physical end MB to map ANDS a2, a1, #MapInFlag_DoublyMapped SUBNE a2, v5, v4 ; a2 = offset of second mapping (in MB) or 0 LDR v6, =&FFFFF AND a1, a1, v6 ; mask out our extra flags CMP v5, #&1000 ORRHI a1, a1, #L1_SS ; set supersection flag if necessary LDR a3, [ip, #IOAllocPtr] MOV a3, a3, LSR #20 ADD a3, a3, v3 BIC a3, a3, v3 ; a3 = logical MB that we're checking for a match ConstructIOPTE a4, v4, a1, lr ; a4 = first PT entry to match ADD v3, v3, #1 ; v3 = number of MB to step between sections or supersections LDR v1, =L1PT LDR v2, [ip, #IOAllocTop] MOV v2, v2, LSR #20 ; v2 = last logical MB to check (exclusive) SUB a3, a3, v3 ; no increment on first iteration 10 ADD a3, a3, v3 ; next logical MB to check ADD ip, a3, a2 ; logical MB of base mapping or second mapping if there is one CMP ip, v2 BHS %FT50 ; run out of logical addresses to check LDR lr, [v1, a3, LSL #2] ; check only or first entry TEQ lr, a4 LDREQ lr, [v1, ip, LSL #2] ; check only or second entry TEQEQ lr, a4 BNE %BT10 ; Found start of requested IO already mapped, and with required flags ; Now check that the remaining secions are all there too MOV v6, v4 ; v6 = expected physical MB MOV v7, a3 ; v7 = logical MB we expect to find it at 20 ADD v6, v6, v3 ; next physical MB ADD v7, v7, v3 ; next logical MB ADD ip, v7, a2 ; logical MB of base mapping or second mapping if there is one CMP v6, v5 MOVHS a4, a3, LSL #20 BHS %FT80 ; reached end and everything matched CMP ip, v2 BHS %FT50 ; run out of logical addresses to check ConstructIOPTE v8, v6, a1, lr LDR lr, [v1, v7, LSL #2] ; check only or first entry TEQ lr, v8 LDREQ lr, [v1, ip, LSL #2] ; check only or second entry TEQEQ lr, v8 BEQ %BT20 ; good so far, try next entry B %BT10 ; mismatch, continue outer loop 50 ; Request not currently mapped, only partially mapped, or mapped with wrong flags LDR ip, =ZeroPage SUB v8, v3, #1 ; v8 = number of MB to use in rounding (0 for sections, 15 for supersections) LDR a3, [ip, #IOAllocPtr] MOV a3, a3, LSR #20 BIC a3, a3, v8 ; round down to 1MB or 16MB boundary (some memory may remain unmapped above when we map in a supersection) SUB a4, v5, v4 ADD a4, a4, a2 ; a4 = number of MB required SUB a3, a3, a4 MOV a4, a3, LSL #20 LDR v6, [ip, #IOAllocLimit] CMP a4, v6 ; run out of room to allocate IO? MOVLS a1, #0 ; LS is to match previous version of the code - perhaps should be LO? BLS %FT90 STR a4, [ip, #IOAllocPtr] 60 ConstructIOPTE v8, v4, a1, lr ; v8 = page table entry for this (super)section MOV v7, v3 ; number of consecutive entries to program the same 70 ADD v6, a3, a2 STR v8, [v1, a3, LSL #2] ; write only or first entry ADD a3, a3, #1 STR v8, [v1, v6, LSL #2] ; write only or second entry SUBS v7, v7, #1 BNE %BT70 ADD v4, v4, v3 CMP v4, v5 BLO %BT60 MOV a2, a1 PageTableSync ; corrupts a1 MOV a1, a2 80 LDR a2, [sp] ; retrieve original physical address from stack BIC a2, a2, #&FF000000 ; distance from 16MB boundary for supersections TST a1, #L1_SS BICEQ a2, a2, #&00F00000 ; distance from 1MB boundary for sections ADD a1, a4, a2 90 EXIT ] ; void RISCOS_AddDevice(unsigned int flags, struct device *d) RISCOS_AddDevice ADDS a1, a2, #0 ; also clears V B HardwareDeviceAdd_Common ; uint64_t RISCOS_LogToPhys(const void *log) RISCOS_LogToPhys ROUT Push "r4,r5,r8,r9,lr" MOV r4, a1 BL logical_to_physical MOVCC a1, r8 MOVCC a2, r9 BCC %FT10 ; Try checking L1PT for any section mappings (logical_to_physical only ; deals with regular 4K page mappings) ; TODO - Add large page support MOV r0, a1, LSR #20 MOV r0, r0, LSL #20 BL LoadAndDecodeL1Entry CMP r2, #-2 BHS %FT10 MOVHS a1, #-1 ; No L1 page MOVHS a2, #-1 SUBLO r3, r3, #1 ; Valid L1 page, apply sub-page offset ANDLO r4, r4, r3 ADDLO a1, r0, r4 10 Pull "r4,r5,r8,r9,pc" ; int RISCOS_IICOpV(IICDesc *descs, uint32_t ndesc_and_bus) RISCOS_IICOpV ROUT Push "lr" BL IIC_OpV MOVVC a1, #IICStatus_Completed Pull "pc", VC ; Map from RISC OS error numbers to abstract IICStatus return values LDR a1, [a1] LDR lr, =ErrorNumber_IIC_NoAcknowledge SUB a1, a1, lr ; 0/1/2 = NoAck/Error/Busy CMP a1, #3 MOVCS a1, #3 ; 3+ => unknown, either way it's an Error ADR lr, %FT10 LDRB a1, [lr, a1] Pull "pc" 10 ASSERT (ErrorNumber_IIC_Error - ErrorNumber_IIC_NoAcknowledge) = 1 ASSERT (ErrorNumber_IIC_Busy - ErrorNumber_IIC_NoAcknowledge) = 2 DCB IICStatus_NoACK, IICStatus_Error, IICStatus_Busy, IICStatus_Error ALIGN SetUpHALEntryTable ROUT LDR a1, =ZeroPage LDR a2, [a1, #HAL_Descriptor] LDR a3, [a1, #HAL_Workspace] LDR a4, [a2, #HALDesc_Entries] LDR ip, [a2, #HALDesc_NumEntries] ADD a4, a2, a4 ; a4 -> entry table MOV a2, a4 ; a2 -> entry table (increments) 10 SUBS ip, ip, #1 ; decrement counter LDRCS a1, [a2], #4 BCC %FT20 TEQ a1, #0 ADREQ a1, NullHALEntry ADDNE a1, a4, a1 ; convert offset to absolute STR a1, [a3, #-4]! ; store backwards below HAL workspace B %BT10 20 LDR a1, =ZeroPage ; pad table with NullHALEntries LDR a4, =HALWorkspace ; in case where HAL didn't supply enough ADR a1, NullHALEntry 30 CMP a3, a4 STRHI a1, [a3, #-4]! BHI %BT30 MOV pc, lr NullHALEntry MOV pc, lr ; Can freely corrupt r10-r12 (v7,v8,ip). HardwareSWI AND ip, v5, #&FF CMP ip, #OSHW_LookupRoutine ASSERT OSHW_CallHAL < OSHW_LookupRoutine BLO HardwareCallHAL BEQ HardwareLookupRoutine CMP ip, #OSHW_DeviceRemove ASSERT OSHW_DeviceAdd < OSHW_DeviceRemove BLO HardwareDeviceAdd BEQ HardwareDeviceRemove CMP ip, #OSHW_DeviceEnumerateChrono ASSERT OSHW_DeviceEnumerate < OSHW_DeviceEnumerateChrono ASSERT OSHW_DeviceEnumerateChrono < OSHW_MaxSubreason BLO HardwareDeviceEnumerate BEQ HardwareDeviceEnumerateChrono BHI HardwareBadReason HardwareCallHAL Push "v1-v4,sb,lr" ADD v8, sb, #1 ; v8 = entry no + 1 LDR ip, =ZeroPage LDR v7, [ip, #HAL_Descriptor] AddressHAL ip ; sb set up LDR v7, [v7, #HALDesc_NumEntries] ; v7 = number of entries CMP v8, v7 ; entryno + 1 must be <= number of entries BHI HardwareBadEntry2 LDR ip, [sb, -v8, LSL #2] ADR v7, NullHALEntry TEQ ip, v7 BEQ HardwareBadEntry2 [ NoARMv5 MOV lr, pc MOV pc, ip | BLX ip ] ADD sp, sp, #4*4 Pull "sb,lr" ExitSWIHandler HardwareLookupRoutine ADD v8, sb, #1 ; v8 = entry no + 1 LDR ip, =ZeroPage LDR v7, [ip, #HAL_Descriptor] AddressHAL ip LDR v7, [v7, #HALDesc_NumEntries] CMP v8, v7 ; entryno + 1 must be <= number of entries BHI HardwareBadEntry LDR a1, [sb, -v8, LSL #2] ADR v7, NullHALEntry TEQ a1, v7 BEQ HardwareBadEntry MOV a2, sb ExitSWIHandler HardwareDeviceAdd Push "r1-r3,lr" BL HardwareDeviceAdd_Common Pull "r1-r3,lr" B SLVK_TestV HardwareDeviceRemove Push "r1-r3,lr" BL HardwareDeviceRemove_Common Pull "r1-r3,lr" B SLVK_TestV HardwareDeviceAdd_Common Entry BL HardwareDeviceRemove_Common ; first try to remove any device already at the same address EXIT VS LDR lr, =ZeroPage LDR r1, [lr, #DeviceCount] LDR r2, [lr, #DeviceTable] TEQ r2, #0 BEQ %FT80 ADD r1, r1, #1 ; increment DeviceCount LDR lr, [r2, #-4] ; word before heap block is length including length word TEQ r1, lr, LSR #2 ; block already full? BEQ %FT81 LDR lr, =ZeroPage 10 STR r1, [lr, #DeviceCount] ADD lr, r2, r1, LSL #2 SUB lr, lr, #4 11 LDR r1, [lr, #-4]! ; copy existing devices up, so new ones get enumerated first STR r1, [lr, #4] CMP lr, r2 BHI %BT11 STR r0, [r2] MOV r2, r0 MOV r1, #Service_Hardware MOV r0, #0 BL Issue_Service ADDS r0, r2, #0 ; exit with V clear EXIT 80 ; Claim a system heap block for the device table Push "r0" MOV r3, #16 BL ClaimSysHeapNode ADDVS sp, sp, #4 EXIT VS Pull "r0" LDR lr, =ZeroPage MOV r1, #1 STR r2, [lr, #DeviceTable] B %BT10 81 ; Extend the system heap block Push "r0" MOV r0, #HeapReason_ExtendBlock MOV r3, #16 BL DoSysHeapOpWithExtension ADDVS sp, sp, #4 EXIT VS Pull "r0" LDR lr, =ZeroPage LDR r1, [lr, #DeviceCount] STR r2, [lr, #DeviceTable] ADD r1, r1, #1 B %BT10 HardwareDeviceRemove_Common ROUT Entry "r4" LDR lr, =ZeroPage LDR r3, [lr, #DeviceCount] LDR r4, [lr, #DeviceTable] TEQ r3, #0 EXIT EQ ; no devices registered 10 LDR r2, [r4], #4 SUBS r3, r3, #1 TEQNE r2, r0 BNE %BT10 TEQ r2, r0 EXIT NE ; this device not registered MOV r0, #1 MOV r1, #Service_Hardware BL Issue_Service CMP r1, #0 ; if service call claimed CMPEQ r1, #1:SHL:31 ; then set V (r0 already points to error block) EXIT VS ; and exit ; Search for device again - we may have been re-entered MOV r0, r2 LDR lr, =ZeroPage LDR r3, [lr, #DeviceCount] LDR r4, [lr, #DeviceTable] TEQ r3, #0 EXIT EQ ; no devices registered 20 LDR r2, [r4], #4 SUBS r3, r3, #1 TEQNE r2, r0 BNE %BT20 TEQ r2, r0 EXIT NE ; this device not registered SUBS r3, r3, #1 30 LDRCS r2, [r4], #4 ; copy down remaining devices STRCS r2, [r4, #-8] SUBCSS r3, r3, #1 BCS %BT30 LDR lr, =ZeroPage LDR r3, [lr, #DeviceCount] SUB r3, r3, #1 STR r3, [lr, #DeviceCount] EXIT HardwareDeviceEnumerate Push "r3-r4,lr" LDR lr, =ZeroPage LDR r2, [lr, #DeviceCount] LDR r3, [lr, #DeviceTable] SUBS r4, r2, r1 MOVLS r1, #-1 BLS %FT90 ; if r1 is out of range then exit ADD r3, r3, r1, LSL #2 10 ADD r1, r1, #1 LDR r2, [r3], #4 LDR lr, [r2, #HALDevice_Type] EOR lr, lr, r0 MOVS lr, lr, LSL #16 ; EQ if types match SUBNES r4, r4, #1 BNE %BT10 TEQ lr, #0 MOVNE r1, #-1 BNE %FT90 LDR lr, [r2, #HALDevice_Version] MOV lr, lr, LSR #16 CMP lr, r0, LSR #16 ; newer than our client understands? BLS %FT90 SUBS r4, r4, #1 BHI %BT10 MOV r1, #-1 90 Pull "r3-r4,lr" ExitSWIHandler HardwareDeviceEnumerateChrono Push "r3-r4,lr" LDR lr, =ZeroPage LDR r2, [lr, #DeviceCount] LDR r3, [lr, #DeviceTable] SUBS r4, r2, r1 MOVLS r1, #-1 BLS %FT90 ; if r1 is out of range then exit ADD r3, r3, r4, LSL #2 10 ADD r1, r1, #1 LDR r2, [r3, #-4]! LDR lr, [r2, #HALDevice_Type] EOR lr, lr, r0 MOVS lr, lr, LSL #16 ; EQ if types match SUBNES r4, r4, #1 BNE %BT10 TEQ lr, #0 MOVNE r1, #-1 BNE %FT90 LDR lr, [r2, #HALDevice_Version] MOV lr, lr, LSR #16 CMP lr, r0, LSR #16 ; newer than our client understands? BLS %FT90 SUBS r4, r4, #1 BHI %BT10 MOV r1, #-1 90 Pull "r3-r4,lr" ExitSWIHandler HardwareBadReason ADR r0, ErrorBlock_HardwareBadReason [ International Push "lr" BL TranslateError Pull "lr" ] B SLVK_SetV HardwareBadEntry2 ADD sp, sp, #4*4 Pull "sb,lr" HardwareBadEntry ADR r0, ErrorBlock_HardwareBadEntry [ International Push "lr" BL TranslateError Pull "lr" ] B SLVK_SetV MakeErrorBlock HardwareBadReason MakeErrorBlock HardwareBadEntry [ DebugTerminal DebugTerminal_Rdch Push "a2-a4,sb,ip" MOV sb, ip 20 WritePSRc SVC_mode, r1 CallHAL HAL_DebugRX CMP a1, #27 BNE %FT25 LDR a2, =ZeroPage + OsbyteVars + :INDEX: RS423mode LDRB a2, [a2] TEQ a2, #0 ; is RS423 raw data,or keyb emulator? BNE %FT25 LDR a2, =ZeroPage LDRB a1, [a2, #ESC_Status] ORR a1, a1, #&40 STRB a1, [a2, #ESC_Status] ; mark escape flag MOV a1, #27 SEC ; tell caller to look carefully at R0 Pull "a2-a4,sb,ip,pc" 25 CMP a1, #-1 Pull "a2-a4,sb,ip,pc",NE ; claim it LDR R0, =ZeroPage LDRB R14, [R0, #CallBack_Flag] TST R14, #CBack_VectorReq BLNE process_callbacks_disableIRQ B %BT20 DebugTerminal_Wrch Push "a1-a4,sb,ip,lr" MOV sb, ip CallHAL HAL_DebugTX Pull "a1-a4,sb,ip,pc" ; don't claim it ] Reset_IRQ_Handler SUB lr, lr, #4 Push "a1-a4,v1-v2,sb,ip,lr" MRS a1, SPSR MRS a2, CPSR ORR a3, a2, #SVC32_mode MSR CPSR_c, a3 Push "a1-a2,lr" ; If it's not an IIC interrupt, mute it LDR v2, =ZeroPage AddressHAL v2 CallHAL HAL_IRQSource ADD v1, v2, #IICBus_Base MOV ip, #0 10 LDR a2, [v1, #IICBus_Type] TST a2, #IICFlag_Background BEQ %FT20 LDR a2, [v1, #IICBus_Device] CMP a2, a1 ADREQ lr, Reset_IRQ_Exit BEQ IICIRQ 20 ADD ip, ip, #1 ADD v1, v1, #IICBus_Size CMP ip, #IICBus_Count BNE %BT10 CallHAL HAL_IRQDisable ; Stop the rogue device from killing us completely Reset_IRQ_Exit MyCLREX a1, a2 Pull "a1-a2,lr" MSR CPSR_c, a2 MSR SPSR_cxsf, a1 Pull "a1-a4,v1-v2,sb,ip,pc",,^ [ DebugHALTX DebugHALPrint Push "a1-a4,v1,v2,sb,ip" MRS v2, CPSR AddressHAL MOV v1, lr 10 LDRB a1, [v1], #1 TEQ a1, #0 BEQ %FT20 CallHAL HAL_DebugTX B %BT10 20 MOV a1, #13 ; CallHAL HAL_DebugTX MOV a1, #10 ; CallHAL HAL_DebugTX ADD v1, v1, #3 BIC lr, v1, #3 MSR CPSR_sf, v2 Pull "a1-a4,v1,v2,sb,ip" MOV pc, lr ] [ DebugHALTX DebugHALPrintReg ; Output number on top of stack to the serial port Push "a1-a4,v1-v4,sb,ip,lr" ; this is 11 regs MRS v1, CPSR LDR v2, [sp,#11*4] ; find TOS value on stack ADR v3, hextab MOV v4, #8 05 AddressHAL 10 LDRB a1, [v3, v2, LSR #28] CallHAL HAL_DebugTX MOV v2, v2, LSL #4 SUBS v4, v4, #1 BNE %BT10 MOV a1, #13 CallHAL HAL_DebugTX MOV a1, #10 CallHAL HAL_DebugTX MSR CPSR_sf, v1 Pull "a1-a4,v1-v4,sb,ip,lr" ADD sp, sp, #4 MOV pc, lr hextab DCB "0123456789abcdef" ] ; ; ; [ DebugHALTX ;HALDebugHexTX ; stmfd r13!, {r0-r3,sb,ip,lr} ; AddressHAL ; b jbdt1 ;HALDebugHexTX2 ; stmfd r13!, {r0-r3,sb,ip,lr} ; AddressHAL ; mov r0,r0,lsl #16 ; b jbdt2 ;HALDebugHexTX4 ; stmfd r13!, {r0-r3,sb,ip,lr} ; AddressHAL ; mov r0,r0,ror #24 ; hi byte ; bl jbdtxh ; mov r0,r0,ror #24 ; bl jbdtxh ;jbdt2 ; mov r0,r0,ror #24 ; bl jbdtxh ; mov r0,r0,ror #24 ;jbdt1 ; bl jbdtxh ; mov r0,#' ' ; CallHAL HAL_DebugTX ; ldmfd r13!, {r0-r3,sb,ip,pc} ; ;jbdtxh stmfd r13!,{a1,v1,lr} ; print byte as hex. corrupts a2-a4, ip, assumes sb already AddressHAL'd ; and v1,a1,#&f ; get low nibble ; and a1,a1,#&f0 ; get hi nibble ; mov a1,a1,lsr #4 ; shift to low nibble ; cmp a1,#&9 ; 9? ; addle a1,a1,#&30 ; addgt a1,a1,#&37 ; convert letter if needed ; CallHAL HAL_DebugTX ; cmp v1,#9 ; addle a1,v1,#&30 ; addgt a1,v1,#&37 ; CallHAL HAL_DebugTX ; ldmfd r13!,{a1,v1,pc} ; ] ; END