From 2dfd92c1b25050fb6a2a96f921e4d56da61f90a5 Mon Sep 17 00:00:00 2001 From: Jeffrey Lee <jlee@gitlab.riscosopen.org> Date: Mon, 12 Sep 2011 20:31:39 +0000 Subject: [PATCH] ARMv7 fixes Detail: hdr/Copro15ops: - Fixed incorrect encodings of ISH/ISHST variants of DMB/DSB instructions s/ARMops, s/HAL, hdr/KernelWS: - Replace the ARMv7 cache maintenance code with the example code from the ARMv7 ARM. This allows it to deal with caches with non power-of-two set/way counts, and caches with only one way. - Fixed Analyse_WB_CR7_Lx to use the cache level ID register to work out how many caches to query instead of just looking for a 0 result from CSSIDR. - Also only look for 7 cache levels, since level 8 doesn't exist according to the ARMv7 ARM. s/NewReset: - Removed some incorrect/misleading debug output Admin: Tested on rev A2 BB-xM Version 5.35, 4.79.2.98.2.51. Tagged as 'Kernel-5_35-4_79_2_98_2_51' --- VersionASM | 6 +- VersionNum | 8 +- hdr/Copro15ops | 8 +- hdr/KernelWS | 4 +- s/ARMops | 303 ++++++++++++++++++++----------------------------- s/HAL | 94 +++++++-------- s/NewReset | 4 +- 7 files changed, 187 insertions(+), 240 deletions(-) diff --git a/VersionASM b/VersionASM index 4a78938..bee0d80 100644 --- a/VersionASM +++ b/VersionASM @@ -13,11 +13,11 @@ GBLS Module_ComponentPath Module_MajorVersion SETS "5.35" Module_Version SETA 535 -Module_MinorVersion SETS "4.79.2.98.2.50" +Module_MinorVersion SETS "4.79.2.98.2.51" Module_Date SETS "12 Sep 2011" Module_ApplicationDate SETS "12-Sep-11" Module_ComponentName SETS "Kernel" Module_ComponentPath SETS "castle/RiscOS/Sources/Kernel" -Module_FullVersion SETS "5.35 (4.79.2.98.2.50)" -Module_HelpVersion SETS "5.35 (12 Sep 2011) 4.79.2.98.2.50" +Module_FullVersion SETS "5.35 (4.79.2.98.2.51)" +Module_HelpVersion SETS "5.35 (12 Sep 2011) 4.79.2.98.2.51" END diff --git a/VersionNum b/VersionNum index ef1b274..bd8426e 100644 --- a/VersionNum +++ b/VersionNum @@ -5,12 +5,12 @@ * */ #define Module_MajorVersion_CMHG 5.35 -#define Module_MinorVersion_CMHG 4.79.2.98.2.50 +#define Module_MinorVersion_CMHG 4.79.2.98.2.51 #define Module_Date_CMHG 12 Sep 2011 #define Module_MajorVersion "5.35" #define Module_Version 535 -#define Module_MinorVersion "4.79.2.98.2.50" +#define Module_MinorVersion "4.79.2.98.2.51" #define Module_Date "12 Sep 2011" #define Module_ApplicationDate "12-Sep-11" @@ -18,6 +18,6 @@ #define Module_ComponentName "Kernel" #define Module_ComponentPath "castle/RiscOS/Sources/Kernel" -#define Module_FullVersion "5.35 (4.79.2.98.2.50)" -#define Module_HelpVersion "5.35 (12 Sep 2011) 4.79.2.98.2.50" +#define Module_FullVersion "5.35 (4.79.2.98.2.51)" +#define Module_HelpVersion "5.35 (12 Sep 2011) 4.79.2.98.2.51" #define Module_LibraryVersionInfo "5:35" diff --git a/hdr/Copro15ops b/hdr/Copro15ops index 5e7ddef..ea94c34 100644 --- a/hdr/Copro15ops +++ b/hdr/Copro15ops @@ -605,10 +605,10 @@ C15 CN 15 DCI &F57FF04E ; DSB ST | [ "$option"="ISH" - DCI &F57FF04D ; DSB ISH + DCI &F57FF04B ; DSB ISH | [ "$option"="ISHST" - DCI &F57FF04C ; DSB ISHST + DCI &F57FF04A ; DSB ISHST | [ "$option"="NSH" DCI &F57FF047 ; DSB NSH @@ -656,10 +656,10 @@ C15 CN 15 DCI &F57FF05E ; DMB ST | [ "$option"="ISH" - DCI &F57FF05D ; DMB ISH + DCI &F57FF05B ; DMB ISH | [ "$option"="ISHST" - DCI &F57FF05C ; DMB ISHST + DCI &F57FF05A ; DMB ISHST | [ "$option"="NSH" DCI &F57FF057 ; DMB NSH diff --git a/hdr/KernelWS b/hdr/KernelWS index b7a33c7..f32be4f 100644 --- a/hdr/KernelWS +++ b/hdr/KernelWS @@ -1323,8 +1323,8 @@ MMUControlSoftCopy # 4 ; Soft copy of ARM control register DeviceCount # 4 ; size of our table of devices in the system heap DeviceTable # 4 ; pointer to table Cache_Lx_Info # 4 ; Cache level ID register -Cache_Lx_DTable # 4*8 ; Data/unified cache layout for all 8 levels -Cache_Lx_ITable # 4*8 ; Instruction cache layout for all 8 levels +Cache_Lx_DTable # 4*7 ; Data/unified cache layout for all 7 levels +Cache_Lx_ITable # 4*7 ; Instruction cache layout for all 7 levels ] AplWorkSize * AppSpaceDANode + DANode_Size diff --git a/s/ARMops b/s/ARMops index 46dea8f..426a5de 100644 --- a/s/ARMops +++ b/s/ARMops @@ -529,29 +529,42 @@ Analyse_WB_CR7_Lx MRC p15, 1, a1, c0, c0, 1 ; Cache level ID register MOV v2, v6 ; Work around DTable/ITable alignment issues STR a1, [v2, #Cache_Lx_Info]! - ADD a1, v2, #Cache_Lx_DTable-Cache_Lx_Info - ADD a2, v2, #Cache_Lx_ITable-Cache_Lx_Info + ADD a2, v2, #Cache_Lx_DTable-Cache_Lx_Info MOV a3, #0 MOV a4, #256 ; Smallest instruction cache line length MOV v2, #256 ; Smallest data/unified cache line length (although atm we only need this to be the smallest data cache line length) 10 - MCR p15, 2, a3, c0, c0, 0 ; Program cache size selection register - MRC p15, 1, v1, c0, c0, 0 ; Get size info (data/unified) - STR v1, [a1],#4 - CMP v1, #0 ; Does the cache exist? + ANDS v1, a1, #6 ; Data or unified cache at this level? + MCRNE p15, 2, a3, c0, c0, 0 ; Program cache size selection register + myISB ,v1 + MRCNE p15, 1, v1, c0, c0, 0 ; Get size info (data/unified) + STR v1, [a2] AND v1, v1, #7 ; Get line size - CMPNE v1, v2 - MOVLT v2, v1 ; Earlier CMP will not set LE flags if v1=0 + CMP v1, v2 + MOVLT v2, v1 ADD a3, a3, #1 - MCR p15, 2, a3, c0, c0, 0 ; Program cache size selection register - MRC p15, 1, v1, c0, c0, 0 ; Get size info (instruction) - STR v1, [a2],#4 - CMP v1, #0 ; Does the cache exist? + ANDS v1, a1, #1 ; Instruction cache at this level? + MCRNE p15, 2, a3, c0, c0, 0 ; Program cache size selection register + myISB ,v1 + MRCNE p15, 1, v1, c0, c0, 0 ; Get size info (instruction) + STR v1, [a2, #Cache_Lx_ITable-Cache_Lx_DTable] AND v1, v1, #7 ; Get line size - CMPNE v1, a4 - MOVLT a4, v1 ; Earlier CMP will not set LE flags if v1=0 + CMP v1, a4 + MOVLT a4, v1 + ; Shift the cache level ID register along to get the type of the next + ; cache level + ; However, we need to stop once we reach the first blank entry, because + ; ARM have been sneaky and started to reuse some of the bits from the + ; high end of the register (the Cortex-A8 TRM lists bits 21-23 as being + ; for cache level 8, but the ARMv7 ARM lists them as being for the level + ; of unification for inner shareable memory). The ARMv7 ARM does warn + ; about making sure you stop once you find the first blank entry, but + ; it doesn't say why! + TST a1, #7 ADD a3, a3, #1 - CMP a3, #16 + MOVNE a1, a1, LSR #3 + CMP a3, #14 ; Stop after level 7 (even though an 8th level might exist on some CPUs?) + ADD a2, a2, #4 BLT %BT10 STRB a4, [v6, #ICache_LineLen] ; Store log2(line size)-2 STRB v2, [v6, #DCache_LineLen] ; log2(line size)-2 @@ -1878,150 +1891,122 @@ MMU_ChangingUncachedEntries_WB_Cal_LD ROUT ; ICache_LineLen = log2(line len)-2 for smallest instruction cache line length ; DCache_RangeThreshold = clean threshold for data cache ; Cache_Lx_Info = Cache level ID register -; Cache_Lx_DTable = Cache size identification register for all 8 data/unified caches -; Cache_Lx_ITable = Cache size identification register for all 8 instruction caches +; Cache_Lx_DTable = Cache size identification register for all 7 data/unified caches +; Cache_Lx_ITable = Cache size identification register for all 7 instruction caches -Cache_CleanAll_WB_CR7_Lx ROUT -; Clean cache by traversing all sets and ways for all data caches - Push "a2,a3,a4,v1,v2,v3,v4,v5,lr" +; ARMv7 cache maintenance routines are a bit long-winded, so we use this macro +; to reduce the risk of mistakes creeping in due to code duplication +; +; $op: Operation to perform ('clean', 'invalidate', 'cleaninvalidate') +; $levels: Which levels to apply to ('lou', 'loc', 'louis') +; Uses r0-r8 & lr as temp +; Performs the indicated op on the indicated data & unified caches +; +; Code based around the alternate/faster code given in the ARMv7 ARM (section +; B2.2.4, alternate/faster code only in doc revision 9), but tightened up a bit +; +; Note that HAL_InvalidateCache_ARMvF uses its own implementation of this +; algorithm, since it must cope with different temporary registers and it needs +; to read the cache info straight from the CP15 registers +; + MACRO + MaintainDataCache_WB_CR7_Lx $op, $levels LDR lr, =ZeroPage - LDR a1, [lr, #Cache_Lx_Info]! + LDR r0, [lr, #Cache_Lx_Info]! ADD lr, lr, #Cache_Lx_DTable-Cache_Lx_Info - BIC a1, a1, #&FF000000 ; Discard unification/coherency bits - MOV a2, #0 ; Current cache level -20 - TST a1, #7 ; Get flags - BEQ %FT10 ; Cache clean complete - LDR a3, [lr], #4 ; Get size info - AND v1, a3, #&7 ; log2(Line size)-2 - BIC a3, a3, #&F0000007 ; Clear flags & line size - MOV v2, a3, LSL #19 ; Number of ways-1 in upper 10 bits - MOV v3, a3, LSR #13 ; Number of sets-1 in lower 15 bits - ; Way number needs to be packed right up at the high end of the data word; shift it up - CLZ a4, v2 - MOV v2, v2, LSL a4 - ; Set number needs to start at log2(Line size)+2 - MOV v3, v3, LSL #4 ; Start at bit 4 - MOV v3, v3, LSL v1 ; Start at log2(Line size)+2 - ; Now calculate the offset numbers we will use to increment sets & ways - BIC v4, v2, v2, LSL #1 ; Way increment - BIC v5, v3, v3, LSL #1 ; Set increment - ; Now we can finally clean this cache! - ORR a3, a2, v3 ; Current way (0), set (max), and level -30 - MCR p15, 0, a3, c7, c10, 2 ; Clean - ADDS a3, a3, v4 ; Increment way - BCC %BT30 ; Overflow will occur once ways are enumerated - TST a3, v3 ; Are set bits all zero? - SUBNE a3, a3, v5 ; No, so decrement set and loop around again - BNE %BT30 - ; This cache is now clean. Move on to the next level. - ADD a2, a2, #2 - MOVS a1, a1, LSR #3 - BNE %BT20 -10 - myDSB ,a1 ; Wait for cache cleaning to complete - Pull "a2,a3,a4,v1,v2,v3,v4,v5,pc" + [ "$levels"="lou" + ANDS r3, r0, #&38000000 + MOV r3, r3, LSR #26 ; Cache level value (naturally aligned) + | + [ "$levels"="loc" + ANDS r3, r0, #&07000000 + MOV r3, r3, LSR #23 ; Cache level value (naturally aligned) + | + [ "$levels"="louis" + ANDS r3, r0, #&00E00000 + MOV r3, r3, LSR #20 ; Cache level value (naturally aligned) + | + ! 1, "Unrecognised levels" + ] + ] + ] + BEQ %FT50 + MOV r8, #0 ; Current cache level +10 ; Loop1 + ADD r2, r8, r8, LSR #1 ; Work out 3 x cachelevel + MOV r1, r0, LSR r2 ; bottom 3 bits are the Cache type for this level + AND r1, r1, #7 ; get those 3 bits alone + CMP r1, #2 + BLT %FT40 ; no cache or only instruction cache at this level + LDR r1, [lr, r8, LSL #1] ; read CCSIDR to r1 + AND r2, r1, #&7 ; extract the line length field + ADD r2, r2, #4 ; add 4 for the line length offset (log2 16 bytes) + LDR r7, =&3FF + AND r7, r7, r1, LSR #3 ; r7 is the max number on the way size (right aligned) + CLZ r5, r7 ; r5 is the bit position of the way size increment + LDR r4, =&7FFF + AND r4, r4, r1, LSR #13 ; r4 is the max number of the index size (right aligned) +20 ; Loop2 + MOV r1, r4 ; r1 working copy of the max index size (right aligned) +30 ; Loop3 + ORR r6, r8, r7, LSL r5 ; factor in the way number and cache number into r6 + ORR r6, r6, r1, LSL r2 ; factor in the index number + [ "$op"="clean" + MCR p15, 0, r6, c7, c10, 2 ; Clean + | + [ "$op"="invalidate" + MCR p15, 0, r6, c7, c6, 2 ; Invalidate + | + [ "$op"="cleaninvalidate" + MCR p15, 0, r6, c7, c14, 2 ; Clean & invalidate + | + ! 1, "Unrecognised op" + ] + ] + ] + SUBS r1, r1, #1 ; decrement the index + BGE %BT30 + SUBS r7, r7, #1 ; decrement the way number + BGE %BT20 +40 ; Skip + ADD r8, r8, #2 + CMP r3, r8 + BGT %BT10 + myDSB ,r0 +50 ; Finished + MEND + +Cache_CleanAll_WB_CR7_Lx ROUT +; Clean cache by traversing all sets and ways for all data caches + Push "r1-r8,lr" + MaintainDataCache_WB_CR7_Lx clean, loc + Pull "r1-r8,pc" Cache_CleanInvalidateAll_WB_CR7_Lx ROUT ; ; similar to Cache_CleanAll, but does clean&invalidate of Dcache, and invalidates ICache ; - Push "a2,a3,a4,v1,v2,v3,v4,v5,lr" - LDR lr, =ZeroPage - LDR a1, [lr, #Cache_Lx_Info]! - ADD lr, lr, #Cache_Lx_DTable-Cache_Lx_Info - BIC a1, a1, #&FF000000 ; Discard unification/coherency bits - MOV a2, #0 ; Current cache level -20 - TST a1, #7 ; Get flags - BEQ %FT10 ; Cache clean complete - LDR a3, [lr], #4 ; Get size info - AND v1, a3, #&7 ; log2(Line size)-2 - BIC a3, a3, #&F0000007 ; Clear flags & line size - MOV v2, a3, LSL #19 ; Number of ways-1 in upper 10 bits - MOV v3, a3, LSR #13 ; Number of sets-1 in lower 15 bits - ; Way number needs to be packed right up at the high end of the data word; shift it up - CLZ a4, v2 - MOV v2, v2, LSL a4 - ; Set number needs to start at log2(Line size)+2 - MOV v3, v3, LSL #4 ; Start at bit 4 - MOV v3, v3, LSL v1 ; Start at log2(Line size)+2 - ; Now calculate the offset numbers we will use to increment sets & ways - BIC v4, v2, v2, LSL #1 ; Way increment - BIC v5, v3, v3, LSL #1 ; Set increment - ; Now we can finally clean this cache! - ORR a3, a2, v3 ; Current way (0), set (max), and level -30 - MCR p15, 0, a3, c7, c14, 2 ; Clean & invalidate - ADDS a3, a3, v4 ; Increment way - BCC %BT30 ; Overflow will occur once ways are enumerated - TST a3, v3 ; Are set bits all zero? - SUBNE a3, a3, v5 ; No, so decrement set and loop around again - BNE %BT30 - ; This cache is now clean. Move on to the next level. - ADD a2, a2, #2 - MOVS a1, a1, LSR #3 - BNE %BT20 -10 - MOV a1, #0 - myDSB ,a1,,y ; Wait for cache clean to complete + Push "r1-r8,lr" + MaintainDataCache_WB_CR7_Lx cleaninvalidate, loc MCR p15, 0, a1, c7, c5, 0 ; invalidate ICache MCR p15, 0, a1, c7, c5, 6 ; invalidate branch predictors myDSB ,a1,,y ; Wait for cache/branch invalidation to complete myISB ,a1,,y ; Ensure that the effects of the completed cache/branch invalidation are visible - Pull "a2,a3,a4,v1,v2,v3,v4,v5,pc" + Pull "r1-r8,pc" Cache_InvalidateAll_WB_CR7_Lx ROUT ; ; no clean, assume caller knows what's happening ; - Push "a2,a3,a4,v1,v2,v3,v4,v5,lr" - LDR lr, =ZeroPage - LDR a1, [lr, #Cache_Lx_Info]! - ADD lr, lr, #Cache_Lx_DTable-Cache_Lx_Info - BIC a1, a1, #&FF000000 ; Discard unification/coherency bits - MOV a2, #0 ; Current cache level -20 - TST a1, #7 ; Get flags - BEQ %FT10 ; Cache clean complete - LDR a3, [lr], #4 ; Get size info - AND v1, a3, #&7 ; log2(Line size)-2 - BIC a3, a3, #&F0000007 ; Clear flags & line size - MOV v2, a3, LSL #19 ; Number of ways-1 in upper 10 bits - MOV v3, a3, LSR #13 ; Number of sets-1 in lower 15 bits - ; Way number needs to be packed right up at the high end of the data word; shift it up - CLZ a4, v2 - MOV v2, v2, LSL a4 - ; Set number needs to start at log2(Line size)+2 - MOV v3, v3, LSL #4 ; Start at bit 4 - MOV v3, v3, LSL v1 ; Start at log2(Line size)+2 - ; Now calculate the offset numbers we will use to increment sets & ways - BIC v4, v2, v2, LSL #1 ; Way increment - BIC v5, v3, v3, LSL #1 ; Set increment - ; Now we can finally clean this cache! - ORR a3, a2, v3 ; Current way (0), set (max), and level -30 - MCR p15, 0, a3, c7, c6, 2 ; Invalidate - ADDS a3, a3, v4 ; Increment way - BCC %BT30 ; Overflow will occur once ways are enumerated - TST a3, v3 ; Are set bits all zero? - SUBNE a3, a3, v5 ; No, so decrement set and loop around again - BNE %BT30 - ; This cache is now clean. Move on to the next level. - ADD a2, a2, #2 - MOVS a1, a1, LSR #3 - BNE %BT20 -10 - MOV a1, #0 - myDSB ,a1,,y ; Wait for invalidation to complete + Push "r1-r8,lr" + MaintainDataCache_WB_CR7_Lx cleaninvalidate, loc MCR p15, 0, a1, c7, c5, 0 ; invalidate ICache MCR p15, 0, a1, c7, c5, 6 ; invalidate branch predictors myDSB ,a1,,y ; Wait for cache/branch invalidation to complete myISB ,a1,,y ; Ensure that the effects of the completed cache/branch invalidation are visible - Pull "a2,a3,a4,v1,v2,v3,v4,v5,pc" + Pull "r1-r8,pc" Cache_RangeThreshold_WB_CR7_Lx ROUT @@ -2073,51 +2058,13 @@ IMB_Full_WB_CR7_Lx ROUT ; do: clean DCache; drain WBuffer, invalidate ICache/branch predictor ; Luckily, we only need to clean as far as the level of unification ; - Push "a2,a3,a4,v1,v2,v3,v4,v5,lr" - LDR lr, =ZeroPage - LDR a1, [lr, #Cache_Lx_Info]! - ADD lr, lr, #Cache_Lx_DTable-Cache_Lx_Info - MOV a1, a1, LSR #27 - AND a1, a1, #&7 ; Get level of unification - MOV a2, #0 ; Current cache level - SUBS a1, a1, #1 - BLT %FT10 ; Cache clean complete -20 - LDR a3, [lr], #4 ; Get size info - AND v1, a3, #&7 ; log2(Line size)-2 - BIC a3, a3, #&F0000007 ; Clear flags & line size - MOV v2, a3, LSL #19 ; Number of ways-1 in upper 10 bits - MOV v3, a3, LSR #13 ; Number of sets-1 in lower 15 bits - ; Way number needs to be packed right up at the high end of the data word; shift it up - CLZ a4, v2 - MOV v2, v2, LSL a4 - ; Set number needs to start at log2(Line size)+2 - MOV v3, v3, LSL #4 ; Start at bit 4 - MOV v3, v3, LSL v1 ; Start at log2(Line size)+2 - ; Now calculate the offset numbers we will use to increment sets & ways - BIC v4, v2, v2, LSL #1 ; Way increment - BIC v5, v3, v3, LSL #1 ; Set increment - ; Now we can finally clean this cache! - ORR a3, a2, v3 ; Current way (0), set (max), and level -30 - MCR p15, 0, a3, c7, c10, 2 ; Clean - ADDS a3, a3, v4 ; Increment way - BCC %BT30 ; Overflow will occur once ways are enumerated - TST a3, v3 ; Are set bits all zero? - SUBNE a3, a3, v5 ; No, so decrement set and loop around again - BNE %BT30 - ; This cache is now clean. Move on to the next level. - ADD a2, a2, #2 - SUBS a1, a1, #1 - BGE %BT20 -10 - MOV a1, #0 - myDSB ,a1,,y ; Wait for clean to complete + Push "r1-r8,lr" + MaintainDataCache_WB_CR7_Lx clean, lou MCR p15, 0, a1, c7, c5, 0 ; invalidate ICache MCR p15, 0, a1, c7, c5, 6 ; invalidate branch predictors myDSB ,a1,,y ; Wait for cache/branch invalidation to complete myISB ,a1,,y ; Ensure that the effects of the completed cache/branch invalidation are visible - Pull "a2,a3,a4,v1,v2,v3,v4,v5,pc" + Pull "r1-r8,pc" ; a1 = start address (inclusive, cache line aligned) ; a2 = end address (exclusive, cache line aligned) @@ -2160,7 +2107,7 @@ MMU_Changing_WB_CR7_Lx ROUT BL Cache_CleanInvalidateAll_WB_CR7_Lx MOV a1, #0 MCR p15, 0, a1, c8, c7, 0 ; invalidate ITLB and DTLB - myDSB ,a1,,y ; Wait TLB invalidation to complete + myDSB ,a1,,y ; Wait for TLB invalidation to complete myISB ,a1,,y ; Ensure that the effects are visible Pull "pc" diff --git a/s/HAL b/s/HAL index f0d8637..2a882db 100644 --- a/s/HAL +++ b/s/HAL @@ -1074,54 +1074,56 @@ HAL_InvalidateCache_ARMvF ; The only register we can safely change is ip, but we can switch into FIQ mode with interrupts disabled and use the banked registers there MRS ip, CPSR MSR CPSR_c, #F32_bit+I32_bit+FIQ32_mode - MOV r8, #0 - MCR p15, 0, r8, c7, c5, 0 ; invalidate instruction cache - MCR p15, 0, r8, c8, c7, 0 ; invalidate TLBs - MCR p15, 0, r8, c7, c5, 6 ; invalidate branch target predictor - myDSB ,r8,,y ; Wait for completion - myISB ,r8,,y + MOV r9, #0 + MCR p15, 0, r9, c7, c5, 0 ; invalidate instruction cache + MCR p15, 0, r9, c8, c7, 0 ; invalidate TLBs + MCR p15, 0, r9, c7, c5, 6 ; invalidate branch target predictor + myDSB ,r9,,y ; Wait for completion + myISB ,r9,,y ; Check whether we're ARMv7 (and thus multi-level cache) or ARMv6 (and thus single-level cache) - MRC p15, 0, r9, c0, c0, 1 - TST r9, #&80000000 ; EQ=ARMv6, NE=ARMv7 - MCREQ ARM_config_cp,0,r8,ARMv4_cache_reg,C7 ; ARMv3-ARMv6 I+D cache flush - BEQ %FT10 ; Skip to the end - - MRC p15, 1, r8, c0, c0, 1 ; Cache level ID register - BIC r8, r8, #&FF000000 ; Discard unification/coherency bits - MOV r9, #0 ; Current cache level -20 - TST r8, #7 ; Get flags - BEQ %FT10 ; Cache clean complete - MCR p15, 2, r9, c0, c0, 0 ; Program cache size selection register - myISB ,r8,,y - MRC p15, 1, r10, c0, c0, 0 ; Get size info - AND r11, r10, #&7 ; log2(Line size)-2 - BIC r10, r10, #&F0000007 ; Clear flags & line size - MOV r12, r10, LSL #19 ; Number of ways-1 in upper 10 bits - MOV r10, r10, LSR #13 ; Number of sets-1 in lower 15 bits - ; Way number needs to be packed right up at the high end of the data word; shift it up - CLZ r14, r12 - MOV r12, r12, LSL r14 - ; Set number needs to start at log2(Line size)+2 - MOV r10, r10, LSL #4 ; Start at bit 4 - MOV r10, r10, LSL r11 ; Start at log2(Line size)+2 - ; Now calculate the offset numbers we will use to increment sets & ways - BIC r12, r12, r12, LSL #1 ; Way increment - BIC r11, r10, r10, LSL #1 ; Set increment - ; Now we can finally clean this cache! - ORR r14, r9, r10 ; Current way (0), set (max), and level -30 + MRC p15, 0, r8, c0, c0, 1 + TST r8, #&80000000 ; EQ=ARMv6, NE=ARMv7 + MCREQ ARM_config_cp,0,r9,ARMv4_cache_reg,C7 ; ARMv3-ARMv6 I+D cache flush + BEQ %FT50 ; Skip to the end + + ; This is basically the same algorithm as the MaintainDataCache_WB_CR7_Lx macro, but tweaked to use less registers and to read from CP15 directly + TST r8, #&07000000 + BEQ %FT50 + MOV r11, #0 ; Current cache level +10 ; Loop1 + ADD r10, r11, r11, LSR #1 ; Work out 3 x cachelevel + MOV r9, r8, LSR r10 ; bottom 3 bits are the Cache type for this level + AND r9, r9, #7 ; get those 3 bits alone + CMP r9, #2 + BLT %FT40 ; no cache or only instruction cache at this level + MCR p15, 2, r11, c0, c0, 0 ; write CSSELR from r11 + myISB ,r9 + MRC p15, 1, r9, c0, c0, 0 ; read current CSSIDR to r9 + AND r10, r9, #&7 ; extract the line length field + ADD r10, r10, #4 ; add 4 for the line length offset (log2 16 bytes) + LDR r8, =&3FF + AND r8, r8, r9, LSR #3 ; r8 is the max number on the way size (right aligned) + CLZ r13, r8 ; r13 is the bit position of the way size increment + LDR r12, =&7FFF + AND r12, r12, r9, LSR #13 ; r12 is the max number of the index size (right aligned) +20 ; Loop2 + MOV r9, r12 ; r9 working copy of the max index size (right aligned) +30 ; Loop3 + ORR r14, r11, r8, LSL r13 ; factor in the way number and cache number into r14 + ORR r14, r14, r9, LSL r10 ; factor in the index number MCR p15, 0, r14, c7, c6, 2 ; Invalidate - ADDS r14, r14, r12 ; Increment way - BCC %BT30 ; Overflow will occur once ways are enumerated - TST r14, r10 ; Are set bits all zero? - SUBNE r14, r14, r11 ; No, so decrement set and loop around again - BNE %BT30 - ; This cache is now clean. Move on to the next level. - ADD r9, r9, #2 - MOVS r8, r8, LSR #3 - BNE %BT20 -10 + SUBS r9, r9, #1 ; decrement the index + BGE %BT30 + SUBS r8, r8, #1 ; decrement the way number + BGE %BT20 + MRC p15, 0, r8, c0, c0, 1 +40 ; Skip + ADD r11, r11, #2 + AND r14, r8, #&07000000 + CMP r14, r11, LSL #23 + BGT %BT10 + +50 ; Finished ; Wait for clean to complete MOV r8, #0 myDSB ,r8,,y diff --git a/s/NewReset b/s/NewReset index d8baa5d..f6cc34f 100644 --- a/s/NewReset +++ b/s/NewReset @@ -687,15 +687,13 @@ kbdwait SUBS r6, r6, #1 ; else wait a maximum of 5 seconds. BNE kbdwait kbddone - DebugTX "Keyboard scan complete" MSR CPSR_c, #I32_bit+SVC32_mode - DebugTX "FIQ enabled" CallHAL HAL_KbdScanFinish LDR r1, =ZeroPage+InitIRQWs MOV r0, #0 STRB r0, [r1, #KbdScanActive] MSR CPSR_c, #SVC32_mode - DebugTX "IRQ enabled" + DebugTX "Keyboard scan complete" | [ KeyWait <> 0 ; Check for keyboard there every 1/5 sec. but give up after 2 secs. -- GitLab