Commit de8e610e authored by Jeffrey Lee's avatar Jeffrey Lee
Browse files

Update Cortex kernel to use correct instruction/memory barriers and to perform...

Update Cortex kernel to use correct instruction/memory barriers and to perform branch target predictor maintenance. Plus tweak default CMOS settings.

Detail:
  hdr/Copro15ops - Added myISB, myDSB, myDMB macros to provide barrier functionality on ARMv6+
  s/ARMops, s/HAL, s/VMSAv6, s/AMBControl/memmap - Correct barrier operations are now performed on ARMv6+ following CP15 writes. Branch predictors are now also maintained properly.
  s/NewReset - Change default CMOS settings so number of CDFS drives is 0 in Cortex builds. Fixes rogue CDFS icon on iconbar.
Admin:
  Tested on rev C2 beagleboard


Version 5.35, 4.79.2.98.2.27. Tagged as 'Kernel-5_35-4_79_2_98_2_27'
parent 55d55e10
......@@ -13,11 +13,11 @@
GBLS Module_ComponentPath
Module_MajorVersion SETS "5.35"
Module_Version SETA 535
Module_MinorVersion SETS "4.79.2.98.2.26"
Module_Date SETS "22 Apr 2010"
Module_ApplicationDate SETS "22-Apr-10"
Module_MinorVersion SETS "4.79.2.98.2.27"
Module_Date SETS "23 Jun 2010"
Module_ApplicationDate SETS "23-Jun-10"
Module_ComponentName SETS "Kernel"
Module_ComponentPath SETS "castle/RiscOS/Sources/Kernel"
Module_FullVersion SETS "5.35 (4.79.2.98.2.26)"
Module_HelpVersion SETS "5.35 (22 Apr 2010) 4.79.2.98.2.26"
Module_FullVersion SETS "5.35 (4.79.2.98.2.27)"
Module_HelpVersion SETS "5.35 (23 Jun 2010) 4.79.2.98.2.27"
END
......@@ -5,19 +5,19 @@
*
*/
#define Module_MajorVersion_CMHG 5.35
#define Module_MinorVersion_CMHG 4.79.2.98.2.26
#define Module_Date_CMHG 22 Apr 2010
#define Module_MinorVersion_CMHG 4.79.2.98.2.27
#define Module_Date_CMHG 23 Jun 2010
#define Module_MajorVersion "5.35"
#define Module_Version 535
#define Module_MinorVersion "4.79.2.98.2.26"
#define Module_Date "22 Apr 2010"
#define Module_MinorVersion "4.79.2.98.2.27"
#define Module_Date "23 Jun 2010"
#define Module_ApplicationDate "22-Apr-10"
#define Module_ApplicationDate "23-Jun-10"
#define Module_ComponentName "Kernel"
#define Module_ComponentPath "castle/RiscOS/Sources/Kernel"
#define Module_FullVersion "5.35 (4.79.2.98.2.26)"
#define Module_HelpVersion "5.35 (22 Apr 2010) 4.79.2.98.2.26"
#define Module_FullVersion "5.35 (4.79.2.98.2.27)"
#define Module_HelpVersion "5.35 (23 Jun 2010) 4.79.2.98.2.27"
#define Module_LibraryVersionInfo "5:35"
......@@ -550,5 +550,131 @@ C15 CN 15
MCR$cond ARM_config_cp,0,R0,ARMA_TCI_reg,C2,2
MEND
;
; -------------- Additional ARMv7 stuff -----------------------------------
;
; Provided here are ISB, DSB and DMB macros suitable for ARMv6+
; Although ARMv4 & v5 do provide CP15 ops that are compatible with the ARMv6 ops, it's implementation defined whether each processor implements the ops or not (and the ops are unpredictable if unimplemented)
; So to play it safe these macros will complain if used on pre-ARMv6
; Instruction Synchronisation Barrier - required on ARMv6+ to ensure the effects of the following are visible to following instructions:
; * Completed cache, TLB & branch predictor maintenance operations
; * CP14/CP15 writes
MACRO
myISB $cond,$temp
[ NoARMv6
! 1, "Don't know what to do on pre-ARMv6!"
|
[ NoARMv7
; ARMv6, use legacy MCR op
MOV$cond $temp,#0
MCR$cond p15,0,$temp,c7,c5,4
|
; ARMv7+, use ISB instruction (saves on temp register, but instruction is unconditional)
; Shouldn't hurt too much if we just ignore the condition code
DCI &F57FF06F ; ISB SY
]
]
MEND
; Data Synchronisation Barrier - aka drain write buffer/data write barrier. Stalls pipeline until all preceeding memory accesses (including cache/TLB/BTC ops complete.
MACRO
myDSB $cond,$temp,$option
[ NoARMv6
! 1, "Don't know what to do on pre-ARMv6!"
|
[ NoARMv7
; pre-ARMv7, use legacy MCR op
MOV$cond $temp,#0
MCR$cond p15,0,$temp,c7,c10,4
|
; ARMv7+, use DSB instruction
[ "$option"="SY" :LOR: "$option"=""
DCI &F57FF04F ; DSB SY
|
[ "$option"="ST" :LOR: "$option"="SYST"
DCI &F57FF04E ; DSB ST
|
[ "$option"="ISH"
DCI &F57FF04D ; DSB ISH
|
[ "$option"="ISHST"
DCI &F57FF04C ; DSB ISHST
|
[ "$option"="NSH"
DCI &F57FF047 ; DSB NSH
|
[ "$option"="NSHST"
DCI &F57FF046 ; DSB NSHST
|
[ "$option"="OSH"
DCI &F57FF043 ; DSB OSH
|
[ "$option"="OSHST"
DCI &F57FF042 ; DSB OSHST
|
! 1, "Unrecognised DSB option"
]
]
]
]
]
]
]
]
]
]
MEND
; Data Memory Barrier - More lightweight DSB, ensures memory accesses behave correctly without stalling the pipeline to wait for preceeding accesses to complete. I.e. it's only good for synchronising load/store instructions.
MACRO
myDMB $cond,$temp,$option
[ NoARMv6
! 1, "Don't know what to do on pre-ARMv6!"
|
[ NoARMv7
; ARMv6, use legacy MCR op
MOV$cond $temp,#0
MCR$cond p15,0,$temp,c7,c10,5
|
; ARMv7+, use DMB instruction
[ "$option"="SY" :LOR: "$option"=""
DCI &F57FF05F ; DMB SY
|
[ "$option"="ST" :LOR: "$option"="SYST"
DCI &F57FF05E ; DMB ST
|
[ "$option"="ISH"
DCI &F57FF05D ; DMB ISH
|
[ "$option"="ISHST"
DCI &F57FF05C ; DMB ISHST
|
[ "$option"="NSH"
DCI &F57FF057 ; DMB NSH
|
[ "$option"="NSHST"
DCI &F57FF056 ; DMB NSHST
|
[ "$option"="OSH"
DCI &F57FF053 ; DMB OSH
|
[ "$option"="OSHST"
DCI &F57FF052 ; DMB OSHST
|
! 1, "Unrecognised DMB option"
]
]
]
]
]
]
]
]
]
]
MEND
END
......@@ -158,6 +158,9 @@ AMB_LazyFixUp ROUT
MOV lr,r4 ;restore return address
ARM_write_FAR r5 ;restore FAR
ARM_write_FSR r6 ;restore FSR
[ MEMM_Type = "VMSAv6"
myISB ; Not sure if this is necessary or not; do it just in case
]
;
90
MOVS r0,#0
......
......@@ -1835,9 +1835,6 @@ MMU_ChangingUncachedEntries_WB_Cal_LD ROUT
; Cache_Lx_ITable = Cache size identification register for all 8 instruction caches
Cache_CleanAll_WB_CR7_Lx ROUT
;
; Currently disables interrupts to allow safe programming and reading of cache size selection register
;
; Clean cache by traversing all sets and ways for all data caches
Push "a2,a3,a4,v1,v2,v3,v4,v5,lr"
MOV lr, #ZeroPage
......@@ -1877,7 +1874,7 @@ Cache_CleanAll_WB_CR7_Lx ROUT
BNE %BT20
10
MOV a1, #0
MCR p15, 0, a1, c7, c10, 4 ; drain WBuffer
myDSB ; Wait for cache cleaning to complete
Pull "a2,a3,a4,v1,v2,v3,v4,v5,pc"
......@@ -1923,8 +1920,11 @@ Cache_CleanInvalidateAll_WB_CR7_Lx ROUT
BNE %BT20
10
MOV a1, #0
MCR p15, 0, a1, c7, c10, 4 ; drain WBuffer
MCR p15, 0, a1, c7, c5, 0 ; invalidate ICache
myDSB ; Wait for cache clean to complete
MCR p15, 0, a1, c7, c5, 0 ; invalidate ICache
MCR p15, 0, a1, c7, c5, 6 ; invalidate branch predictors
myDSB ; Wait for cache/branch invalidation to complete
myISB ; Ensure that the effects of the completed cache/branch invalidation are visible
Pull "a2,a3,a4,v1,v2,v3,v4,v5,pc"
......@@ -1970,7 +1970,11 @@ Cache_InvalidateAll_WB_CR7_Lx ROUT
BNE %BT20
10
MOV a1, #0
MCR p15, 0, a1, c7, c5, 0 ; invalidate ICache
myDSB ; Wait for invalidation to complete
MCR p15, 0, a1, c7, c5, 0 ; invalidate ICache
MCR p15, 0, a1, c7, c5, 6 ; invalidate branch predictors
myDSB ; Wait for cache/branch invalidation to complete
myISB ; Ensure that the effects of the completed cache/branch invalidation are visible
Pull "a2,a3,a4,v1,v2,v3,v4,v5,pc"
......@@ -1980,30 +1984,40 @@ Cache_RangeThreshold_WB_CR7_Lx ROUT
MOV pc, lr
TLB_InvalidateAll_WB_CR7_Lx ROUT
MMU_ChangingUncached_WB_CR7_Lx
myDSB ; Ensure the page table write has actually completed
myISB ; Also required
TLB_InvalidateAll_WB_CR7_Lx ROUT
MOV a1, #0
MCR p15, 0, a1, c8, c7, 0 ; invalidate ITLB and DTLB
MCR p15, 0, a1, c8, c7, 0 ; invalidate ITLB and DTLB
MCR p15, 0, a1, c7, c5, 6 ; invalidate branch predictors
myDSB ; Wait for cache/branch invalidation to complete
myISB ; Ensure that the effects of the completed cache/branch invalidation are visible
MOV pc, lr
; a1 = page affected (page aligned address)
;
TLB_InvalidateEntry_WB_CR7_Lx ROUT
MMU_ChangingUncachedEntry_WB_CR7_Lx
MCR p15, 0, a1, c8, c7, 1 ; invalidate ITLB & DTLB entry
myDSB ; Ensure the page table write has actually completed
myISB ; Also required
TLB_InvalidateEntry_WB_CR7_Lx ROUT
MCR p15, 0, a1, c8, c7, 1 ; invalidate ITLB & DTLB entry
MCR p15, 0, a1, c7, c5, 7 ; invalidate branch predictor entry
myDSB ; Wait for cache/branch invalidation to complete
myISB ; Ensure that the effects of the completed cache/branch invalidation are visible
MOV pc, lr
WriteBuffer_Drain_WB_CR7_Lx ROUT
MOV a1, #0
MCR p15, 0, a1, c7, c10, 4 ; drain WBuffer
myDSB ; DSB is the new name for write buffer draining
myISB ; Also do ISB for extra paranoia
MOV pc, lr
IMB_Full_WB_CR7_Lx ROUT
;
; do: clean DCache; drain WBuffer, invalidate ICache
; do: clean DCache; drain WBuffer, invalidate ICache/branch predictor
; Luckily, we only need to clean as far as the level of unification
;
Push "a2,a3,a4,v1,v2,v3,v4,v5,lr"
......@@ -2045,8 +2059,11 @@ IMB_Full_WB_CR7_Lx ROUT
BGE %BT20
10
MOV a1, #0
MCR p15, 0, a1, c7, c10, 4 ; drain WBuffer (is this required?)
MCR p15, 0, a1, c7, c5, 0 ; invalidate ICache
myDSB ; Wait for clean to complete
MCR p15, 0, a1, c7, c5, 0 ; invalidate ICache
MCR p15, 0, a1, c7, c5, 6 ; invalidate branch predictors
myDSB ; Wait for cache/branch invalidation to complete
myISB ; Ensure that the effects of the completed cache/branch invalidation are visible
Pull "a2,a3,a4,v1,v2,v3,v4,v5,pc"
; a1 = start address (inclusive, cache line aligned)
......@@ -2056,33 +2073,50 @@ IMB_Range_WB_CR7_Lx ROUT
SUB a2, a2, a1
CMP a2, #32*1024 ; Maximum L1 cache size on Cortex-A8 is 32K, use that to guess what approach to take
ADD a2, a2, a1
CMPLO a1, a2 ; The routine below will fail if the end address wraps around, so just IMB_Full instead
BHS IMB_Full_WB_CR7_Lx
Push "a3,lr"
Push "a1,a3,lr"
MOV lr, #0
LDRB lr, [lr, #DCache_LineLen] ; log2(line len)-2
MOV a3, #4
MOV lr, a3, LSL lr
10
MCR p15, 0, a1, c7, c11, 1 ; clean DCache entry by VA to PoU
MCR p15, 0, a1, c7, c5, 1 ; invalidate ICache entry (to PoC - is this bad?)
ADD a1, a1, lr
CMP a1, a2
BLO %BT10
MOV a1, #0
MCR p15, 0, a1, c7, c10, 4 ; drain WBuffer (required?)
myDSB ; Wait for clean to complete
Pull "a1" ; Get start address back
MOV lr, #0
LDRB lr, [lr, #ICache_LineLen] ; Use ICache line length, just in case D&I length differ
MOV lr, a3, LSL lr
10
MCR p15, 0, a1, c7, c5, 1 ; invalidate ICache entry
ADD a1, a1, lr
CMP a1, a2
BLO %BT10
MCR p15, 0, a1, c7, c5, 6 ; invalidate branch predictors
myDSB ; Wait for cache/branch invalidation to complete
myISB ; Ensure that the effects of the completed cache/branch invalidation are visible
Pull "a3,pc"
MMU_Changing_WB_CR7_Lx ROUT
Push "lr"
myDSB ; Ensure the page table write has actually completed
myISB ; Also required
BL Cache_CleanInvalidateAll_WB_CR7_Lx
MOV a1, #0
MCR p15, 0, a1, c8, c7, 0 ; invalidate ITLB and DTLB
MCR p15, 0, a1, c8, c7, 0 ; invalidate ITLB and DTLB
myDSB ; Wait TLB invalidation to complete
myISB ; Ensure that the effects are visible
Pull "pc"
; a1 = page affected (page aligned address)
;
MMU_ChangingEntry_WB_CR7_Lx ROUT
Push "a2, lr"
myDSB ; Ensure the page table write has actually completed
myISB ; Also required
MOV lr, #0
LDRB lr, [lr, #DCache_LineLen] ; log2(line len)-2
MOV a2, #4
......@@ -2090,14 +2124,25 @@ MMU_ChangingEntry_WB_CR7_Lx ROUT
ADD a2, a1, #PageSize
10
MCR p15, 0, a1, c7, c14, 1 ; clean&invalidate DCache entry to PoC
MCR p15, 0, a1, c7, c5, 1 ; invalidate ICache entry to PoC
ADD a1, a1, lr
CMP a1, a2
BLO %BT10
BNE %BT10
myDSB ; Wait for clean to complete
MOV lr, #0
MCR p15, 0, lr, c7, c10, 4 ; drain WBuffer
LDRB lr, [lr, #ICache_LineLen] ; Use ICache line length, just in case D&I length differ
MOV a1, #4
MOV lr, a1, LSL lr
SUB a1, a2, #PageSize ; Get start address back
10
MCR p15, 0, a1, c7, c5, 1 ; invalidate ICache entry to PoC
ADD a1, a1, lr
CMP a1, a2
BNE %BT10
SUB a1, a1, #PageSize
MCR p15, 0, a1, c8, c7, 1 ; invalidate DTLB and ITLB
MCR p15, 0, a1, c7, c5, 6 ; invalidate branch predictors
myDSB
myISB
Pull "a2, pc"
; a1 = first page affected (page aligned address)
......@@ -2105,6 +2150,8 @@ MMU_ChangingEntry_WB_CR7_Lx ROUT
;
MMU_ChangingEntries_WB_CR7_Lx ROUT
Push "a2, a3, lr"
myDSB ; Ensure the page table write has actually completed
myISB ; Also required
MOV a2, a2, LSL #Log2PageSize
MOV a3, #0
LDR a3, [a3, #DCache_RangeThreshold] ;check whether cheaper to do global clean
......@@ -2117,32 +2164,47 @@ MMU_ChangingEntries_WB_CR7_Lx ROUT
MOV a3, lr, LSL a3
MOV lr, a1
10
MCR p15, 0, a1, c7, c14, 1 ; clean&invalidate DCache entry to PoC
MCR p15, 0, a1, c7, c5, 1 ; invalidate ICache entry to PoC
MCR p15, 0, a1, c7, c14, 1 ; clean&invalidate DCache entry to PoC
ADD a1, a1, a3
CMP a1, a2
BLO %BT10
MOV a1, #0
MCR p15, 0, a1, c7, c10, 4 ; drain WBuffer
MOV a1, lr ; restore start address
20
MCR p15, 0, a1, c8, c7, 1 ; invalidate DTLB & ITLB entry
ADD a1, a1, #PageSize
BNE %BT10
myDSB ; Wait for clean to complete
MOV a3, #0
LDRB a3, [a3, #ICache_LineLen] ; Use ICache line length, just in case D&I length differ
MOV a1, #4
MOV a3, a1, LSL a3
MOV a1, lr ; Get start address back
10
MCR p15, 0, a1, c7, c5, 1 ; invalidate ICache entry to PoC
ADD a1, a1, a3
CMP a1, a2
BNE %BT10
20
MCR p15, 0, lr, c8, c7, 1 ; invalidate DTLB & ITLB entry
ADD lr, lr, #PageSize
CMP lr, a2
BLO %BT20
MCR p15, 0, a1, c7, c5, 6 ; invalidate branch predictors
myDSB
myISB
Pull "a2, a3, pc"
;
30
BL Cache_CleanInvalidateAll_WB_CR7_Lx
MOV a1, #0
MCR p15, 0, a1, c8, c7, 0 ; invalidate ITLB and DTLB
myDSB ; Wait TLB invalidation to complete
myISB ; Ensure that the effects are visible
Pull "a2, a3, pc"
; a1 = first page affected (page aligned address)
; a2 = number of pages
;
MMU_ChangingUncachedEntries_WB_CR7_Lx ROUT
myDSB ; Ensure the page table write has actually completed
myISB ; Also required
CMP a2, #32 ; arbitrary-ish threshold
MCRHS p15, 0, a1, c8, c7, 0 ; invalidate ITLB and DTLB
BHS %FT20
Push "a2"
10
......@@ -2151,10 +2213,10 @@ MMU_ChangingUncachedEntries_WB_CR7_Lx ROUT
SUBS a2, a2, #1
BNE %BT10
Pull "a2"
MOV pc, lr
;
20
MCR p15, 0, a1, c8, c7, 0 ; invalidate ITLB and DTLB
MCR p15, 0, a1, c7, c5, 6 ; invalidate branch predictors
myDSB
myISB
MOV pc, lr
; --------------------------------------------------------------------------
......
......@@ -76,6 +76,9 @@ RISCOS_InitARM
; Off we go.
ARM_write_control a3
[ MEMM_Type = "VMSAv6"
myISB ; Ensure the update went through
]
; In case it wasn't a hard reset
MOV a2, #0
......@@ -86,11 +89,18 @@ RISCOS_InitARM
CMP a1, #ARMv3
MCREQ ARM_config_cp,0,a2,ARMv3_TLBflush_reg,C0 ; flush TLBs
MCRNE ARM_config_cp,0,a2,ARMv4_TLB_reg,C7 ; flush TLBs
[ MEMM_Type = "VMSAv6"
myDSB
myISB
]
; We assume that ARMs with an I cache can have it enabled while the MMU is off.
[ :LNOT:CacheOff
ORRNE a3, a3, #MMUC_I
ARM_write_control a3, NE ; whoosh
[ MEMM_Type = "VMSAv6"
myISB ; Ensure the update went through
]
]
; Check if we are in a 26-bit mode.
......@@ -679,6 +689,7 @@ MMU_activation_zone
CMP ip, #ARMv6
MCRGE p15, 0, lr, c2, c0, 2 ; Ensure only TTBR0 is used (v6)
MCRGT p15, 0, lr, c12, c0, 0 ; Ensure exception vector base is 0 (Cortex)
myISB
ORRGE v5, v5, #MMUC_XP ; Extended pages enabled (v6)
BICGT v5, v5, #MMUC_TRE+MMUC_AFE ; TEX remap, Access Flag disabled (Cortex)
BICGE v5, v5, #MMUC_EE+MMUC_TE+MMUC_VE ; Exceptions = nonvectored LE ARM
......@@ -689,6 +700,9 @@ MMU_activation_zone
]
MMUon_instr
ARM_write_control v5
[ MEMM_Type = "VMSAv6"
myISB ; Just in case
]
MOVEQ sp, v5
MSREQ CPSR_c, #F32_bit+I32_bit+SVC32_mode
......@@ -697,6 +711,10 @@ MMUon_instr
MCRNE ARM_config_cp,0,lr,ARMv4_cache_reg,C7 ; (works on ARMv3)
MCREQ p15, 0, lr, c7, c5, 0 ; invalidate instruction cache
MCREQ p15, 0, lr, c8, c7, 0 ; invalidate TLBs
MCREQ p15, 0, lr, c7, c5, 6 ; invalidate branch predictor
[ MEMM_Type = "VMSAv6"
myISB ; Ensure below branch works
]
BLEQ HAL_InvalidateCache_ARMvF ; invalidate data cache (and instruction+TLBs again!)
MOV ip, #4_0000000000000001 ; domain 0 client only
......@@ -1032,6 +1050,9 @@ HAL_InvalidateCache_ARMvF
MOV r8, #0
MCR p15, 0, r8, c7, c5, 0 ; invalidate instruction cache
MCR p15, 0, r8, c8, c7, 0 ; invalidate TLBs
MCR p15, 0, r8, c7, c5, 6 ; invalidate branch target predictor
myDSB ; Wait for completion
myISB
MRC p15, 1, r8, c0, c0, 1 ; Cache level ID register
BIC r8, r8, #&FF000000 ; Discard unification/coherency bits
MOV r9, #0 ; Current cache level
......@@ -1039,6 +1060,7 @@ HAL_InvalidateCache_ARMvF
TST r8, #7 ; Get flags
BEQ %FT10 ; Cache clean complete
MCR p15, 2, r9, c0, c0, 0 ; Program cache size selection register
myISB
MRC p15, 1, r10, c0, c0, 0 ; Get size info
AND r11, r10, #&7 ; log2(Line size)-2
BIC r10, r10, #&F0000007 ; Clear flags & line size
......@@ -1067,6 +1089,14 @@ HAL_InvalidateCache_ARMvF
MOVS r8, r8, LSR #3
BNE %BT20
10
; Wait for clean to complete
myDSB
MOV r8, #0
MCR p15, 0, r8, c7, c5, 0 ; invalidate instruction cache
MCR p15, 0, r8, c8, c7, 0 ; invalidate TLBs
MCR p15, 0, r8, c7, c5, 6 ; invalidate branch target predictor
myDSB ; Wait for completion
myISB
; All caches clean; switch back to SVC, then recover the stored PSR from ip (although we can be fairly certain we started in SVC anyway)
MSR CPSR_c, #F32_bit+I32_bit+SVC32_mode
MSR CPSR_cxsf, ip
......
......@@ -1157,7 +1157,11 @@ DefaultCMOSTable ; list of non-zero options wanted :
]
= AlarmAndTimeCMOS,2_00010000 ; !Alarm autosave on
= FSLockCMOS+5, &EA ; Checksum for no password
[ M_CortexA8
= CDROMFSCMOS, &C0 ; drives = 0, buffer size = 256K
|
= CDROMFSCMOS, &C1 ; drives = 1, buffer size = 256K
]
]
= &FF
ALIGN
......
......@@ -435,6 +435,7 @@ MMUControl_ModifyControl ROUT
Pull "r0"
15
ARM_write_control r2
myISB ; Must be running on >=ARMv6, so perform ISB to ensure CP15 write is complete
BIC lr, r1, r2 ; lr = bits going from 1->0
TST lr, #MMUC_C ; if cache turning off then flush cache afterwards
TSTNE lr, #MMUC_I
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment