From de8e610e2e7f9b5c9f274e64111f1351b9fbc7b3 Mon Sep 17 00:00:00 2001
From: Jeffrey Lee <jlee@gitlab.riscosopen.org>
Date: Wed, 23 Jun 2010 22:34:28 +0000
Subject: [PATCH] Update Cortex kernel to use correct instruction/memory
 barriers and to perform branch target predictor maintenance. Plus tweak
 default CMOS settings.

Detail:
  hdr/Copro15ops - Added myISB, myDSB, myDMB macros to provide barrier functionality on ARMv6+
  s/ARMops, s/HAL, s/VMSAv6, s/AMBControl/memmap - Correct barrier operations are now performed on ARMv6+ following CP15 writes. Branch predictors are now also maintained properly.
  s/NewReset - Change default CMOS settings so number of CDFS drives is 0 in Cortex builds. Fixes rogue CDFS icon on iconbar.
Admin:
  Tested on rev C2 beagleboard


Version 5.35, 4.79.2.98.2.27. Tagged as 'Kernel-5_35-4_79_2_98_2_27'
---
 VersionASM          |  10 ++--
 VersionNum          |  14 ++---
 hdr/Copro15ops      | 126 +++++++++++++++++++++++++++++++++++++++++
 s/AMBControl/memmap |   3 +
 s/ARMops            | 134 ++++++++++++++++++++++++++++++++------------
 s/HAL               |  30 ++++++++++
 s/NewReset          |   4 ++
 s/VMSAv6            |   1 +
 8 files changed, 274 insertions(+), 48 deletions(-)

diff --git a/VersionASM b/VersionASM
index 6e445ad..b722a2d 100644
--- a/VersionASM
+++ b/VersionASM
@@ -13,11 +13,11 @@
                         GBLS    Module_ComponentPath
 Module_MajorVersion     SETS    "5.35"
 Module_Version          SETA    535
-Module_MinorVersion     SETS    "4.79.2.98.2.26"
-Module_Date             SETS    "22 Apr 2010"
-Module_ApplicationDate  SETS    "22-Apr-10"
+Module_MinorVersion     SETS    "4.79.2.98.2.27"
+Module_Date             SETS    "23 Jun 2010"
+Module_ApplicationDate  SETS    "23-Jun-10"
 Module_ComponentName    SETS    "Kernel"
 Module_ComponentPath    SETS    "castle/RiscOS/Sources/Kernel"
-Module_FullVersion      SETS    "5.35 (4.79.2.98.2.26)"
-Module_HelpVersion      SETS    "5.35 (22 Apr 2010) 4.79.2.98.2.26"
+Module_FullVersion      SETS    "5.35 (4.79.2.98.2.27)"
+Module_HelpVersion      SETS    "5.35 (23 Jun 2010) 4.79.2.98.2.27"
                         END
diff --git a/VersionNum b/VersionNum
index 447acaa..5c7d06c 100644
--- a/VersionNum
+++ b/VersionNum
@@ -5,19 +5,19 @@
  *
  */
 #define Module_MajorVersion_CMHG        5.35
-#define Module_MinorVersion_CMHG        4.79.2.98.2.26
-#define Module_Date_CMHG                22 Apr 2010
+#define Module_MinorVersion_CMHG        4.79.2.98.2.27
+#define Module_Date_CMHG                23 Jun 2010
 
 #define Module_MajorVersion             "5.35"
 #define Module_Version                  535
-#define Module_MinorVersion             "4.79.2.98.2.26"
-#define Module_Date                     "22 Apr 2010"
+#define Module_MinorVersion             "4.79.2.98.2.27"
+#define Module_Date                     "23 Jun 2010"
 
-#define Module_ApplicationDate          "22-Apr-10"
+#define Module_ApplicationDate          "23-Jun-10"
 
 #define Module_ComponentName            "Kernel"
 #define Module_ComponentPath            "castle/RiscOS/Sources/Kernel"
 
-#define Module_FullVersion              "5.35 (4.79.2.98.2.26)"
-#define Module_HelpVersion              "5.35 (22 Apr 2010) 4.79.2.98.2.26"
+#define Module_FullVersion              "5.35 (4.79.2.98.2.27)"
+#define Module_HelpVersion              "5.35 (23 Jun 2010) 4.79.2.98.2.27"
 #define Module_LibraryVersionInfo       "5:35"
diff --git a/hdr/Copro15ops b/hdr/Copro15ops
index 6a0c7e8..84dc83d 100644
--- a/hdr/Copro15ops
+++ b/hdr/Copro15ops
@@ -550,5 +550,131 @@ C15 CN 15
         MCR$cond ARM_config_cp,0,R0,ARMA_TCI_reg,C2,2
         MEND
 
+;
+; -------------- Additional ARMv7 stuff -----------------------------------
+;
+
+; Provided here are ISB, DSB and DMB macros suitable for ARMv6+
+; Although ARMv4 & v5 do provide CP15 ops that are compatible with the ARMv6 ops, it's implementation defined whether each processor implements the ops or not (and the ops are unpredictable if unimplemented)
+; So to play it safe these macros will complain if used on pre-ARMv6
+
+; Instruction Synchronisation Barrier - required on ARMv6+ to ensure the effects of the following are visible to following instructions:
+; * Completed cache, TLB & branch predictor maintenance operations
+; * CP14/CP15 writes
+        MACRO
+        myISB $cond,$temp
+      [ NoARMv6
+        ! 1, "Don't know what to do on pre-ARMv6!"
+      |
+       [ NoARMv7
+        ; ARMv6, use legacy MCR op
+        MOV$cond $temp,#0
+        MCR$cond p15,0,$temp,c7,c5,4
+       |
+        ; ARMv7+, use ISB instruction (saves on temp register, but instruction is unconditional)
+        ; Shouldn't hurt too much if we just ignore the condition code
+        DCI &F57FF06F ; ISB SY
+       ]
+      ]
+        MEND
+
+; Data Synchronisation Barrier - aka drain write buffer/data write barrier. Stalls pipeline until all preceeding memory accesses (including cache/TLB/BTC ops complete.
+        MACRO
+        myDSB $cond,$temp,$option
+     [ NoARMv6
+        ! 1, "Don't know what to do on pre-ARMv6!"
+     |
+      [ NoARMv7
+        ; pre-ARMv7, use legacy MCR op
+        MOV$cond $temp,#0
+        MCR$cond p15,0,$temp,c7,c10,4
+      |
+        ; ARMv7+, use DSB instruction
+       [ "$option"="SY" :LOR: "$option"=""
+        DCI &F57FF04F ; DSB SY
+       |
+       [ "$option"="ST" :LOR: "$option"="SYST"
+        DCI &F57FF04E ; DSB ST
+       |
+       [ "$option"="ISH"
+        DCI &F57FF04D ; DSB ISH
+       |
+       [ "$option"="ISHST"
+        DCI &F57FF04C ; DSB ISHST
+       |
+       [ "$option"="NSH"
+        DCI &F57FF047 ; DSB NSH
+       |
+       [ "$option"="NSHST"
+        DCI &F57FF046 ; DSB NSHST
+       |
+       [ "$option"="OSH"
+        DCI &F57FF043 ; DSB OSH
+       |
+       [ "$option"="OSHST"
+        DCI &F57FF042 ; DSB OSHST
+       |
+        ! 1, "Unrecognised DSB option"
+       ]
+       ]
+       ]
+       ]
+       ]
+       ]
+       ]
+       ]
+      ]
+     ]
+        MEND
+
+; Data Memory Barrier - More lightweight DSB, ensures memory accesses behave correctly without stalling the pipeline to wait for preceeding accesses to complete. I.e. it's only good for synchronising load/store instructions.
+        MACRO
+        myDMB $cond,$temp,$option
+     [ NoARMv6
+        ! 1, "Don't know what to do on pre-ARMv6!"
+     |
+      [ NoARMv7
+        ; ARMv6, use legacy MCR op
+        MOV$cond $temp,#0
+        MCR$cond p15,0,$temp,c7,c10,5
+      |
+        ; ARMv7+, use DMB instruction
+       [ "$option"="SY" :LOR: "$option"=""
+        DCI &F57FF05F ; DMB SY
+       |
+       [ "$option"="ST" :LOR: "$option"="SYST"
+        DCI &F57FF05E ; DMB ST
+       |
+       [ "$option"="ISH"
+        DCI &F57FF05D ; DMB ISH
+       |
+       [ "$option"="ISHST"
+        DCI &F57FF05C ; DMB ISHST
+       |
+       [ "$option"="NSH"
+        DCI &F57FF057 ; DMB NSH
+       |
+       [ "$option"="NSHST"
+        DCI &F57FF056 ; DMB NSHST
+       |
+       [ "$option"="OSH"
+        DCI &F57FF053 ; DMB OSH
+       |
+       [ "$option"="OSHST"
+        DCI &F57FF052 ; DMB OSHST
+       |
+        ! 1, "Unrecognised DMB option"
+       ]
+       ]
+       ]
+       ]
+       ]
+       ]
+       ]
+       ]
+      ]
+     ]
+        MEND
+
 
     END
diff --git a/s/AMBControl/memmap b/s/AMBControl/memmap
index 3a256d2..701b556 100644
--- a/s/AMBControl/memmap
+++ b/s/AMBControl/memmap
@@ -158,6 +158,9 @@ AMB_LazyFixUp ROUT
         MOV     lr,r4                                    ;restore return address
         ARM_write_FAR r5                                 ;restore FAR
         ARM_write_FSR r6                                 ;restore FSR
+      [ MEMM_Type = "VMSAv6"
+        myISB ; Not sure if this is necessary or not; do it just in case
+      ]
 ;
 90
         MOVS    r0,#0
diff --git a/s/ARMops b/s/ARMops
index 5d84755..8973890 100644
--- a/s/ARMops
+++ b/s/ARMops
@@ -1835,9 +1835,6 @@ MMU_ChangingUncachedEntries_WB_Cal_LD ROUT
 ; Cache_Lx_ITable = Cache size identification register for all 8 instruction caches
 
 Cache_CleanAll_WB_CR7_Lx ROUT
-;
-; Currently disables interrupts to allow safe programming and reading of cache size selection register
-;
 ; Clean cache by traversing all sets and ways for all data caches
         Push    "a2,a3,a4,v1,v2,v3,v4,v5,lr"
         MOV     lr, #ZeroPage
@@ -1877,7 +1874,7 @@ Cache_CleanAll_WB_CR7_Lx ROUT
         BNE     %BT20
 10
         MOV     a1, #0
-        MCR     p15, 0, a1, c7, c10, 4           ; drain WBuffer
+        myDSB ; Wait for cache cleaning to complete
         Pull    "a2,a3,a4,v1,v2,v3,v4,v5,pc"
 
 
@@ -1923,8 +1920,11 @@ Cache_CleanInvalidateAll_WB_CR7_Lx ROUT
         BNE     %BT20
 10
         MOV     a1, #0
-        MCR     p15, 0, a1, c7, c10, 4           ; drain WBuffer
-        MCR     p15, 0, a1, c7, c5, 0            ; invalidate ICache
+        myDSB                         ; Wait for cache clean to complete
+        MCR     p15, 0, a1, c7, c5, 0 ; invalidate ICache
+        MCR     p15, 0, a1, c7, c5, 6 ; invalidate branch predictors
+        myDSB                         ; Wait for cache/branch invalidation to complete
+        myISB                         ; Ensure that the effects of the completed cache/branch invalidation are visible
         Pull    "a2,a3,a4,v1,v2,v3,v4,v5,pc"
 
 
@@ -1970,7 +1970,11 @@ Cache_InvalidateAll_WB_CR7_Lx ROUT
         BNE     %BT20
 10
         MOV     a1, #0
-        MCR     p15, 0, a1, c7, c5, 0            ; invalidate ICache
+        myDSB                         ; Wait for invalidation to complete
+        MCR     p15, 0, a1, c7, c5, 0 ; invalidate ICache
+        MCR     p15, 0, a1, c7, c5, 6 ; invalidate branch predictors
+        myDSB                         ; Wait for cache/branch invalidation to complete
+        myISB                         ; Ensure that the effects of the completed cache/branch invalidation are visible
         Pull    "a2,a3,a4,v1,v2,v3,v4,v5,pc"
 
 
@@ -1980,30 +1984,40 @@ Cache_RangeThreshold_WB_CR7_Lx ROUT
         MOV     pc, lr
 
 
-TLB_InvalidateAll_WB_CR7_Lx ROUT
 MMU_ChangingUncached_WB_CR7_Lx
+        myDSB ; Ensure the page table write has actually completed
+        myISB ; Also required
+TLB_InvalidateAll_WB_CR7_Lx ROUT
         MOV     a1, #0
-        MCR     p15, 0, a1, c8, c7, 0           ; invalidate ITLB and DTLB
+        MCR     p15, 0, a1, c8, c7, 0 ; invalidate ITLB and DTLB
+        MCR     p15, 0, a1, c7, c5, 6 ; invalidate branch predictors
+        myDSB                         ; Wait for cache/branch invalidation to complete
+        myISB                         ; Ensure that the effects of the completed cache/branch invalidation are visible
         MOV     pc, lr
 
 
 ; a1 = page affected (page aligned address)
 ;
-TLB_InvalidateEntry_WB_CR7_Lx ROUT
 MMU_ChangingUncachedEntry_WB_CR7_Lx
-        MCR     p15, 0, a1, c8, c7, 1           ; invalidate ITLB & DTLB entry
+        myDSB ; Ensure the page table write has actually completed
+        myISB ; Also required
+TLB_InvalidateEntry_WB_CR7_Lx ROUT
+        MCR     p15, 0, a1, c8, c7, 1 ; invalidate ITLB & DTLB entry
+        MCR     p15, 0, a1, c7, c5, 7 ; invalidate branch predictor entry
+        myDSB                         ; Wait for cache/branch invalidation to complete
+        myISB                         ; Ensure that the effects of the completed cache/branch invalidation are visible
         MOV     pc, lr
 
 
 WriteBuffer_Drain_WB_CR7_Lx ROUT
-        MOV     a1, #0
-        MCR     p15, 0, a1, c7, c10, 4          ; drain WBuffer
+        myDSB ; DSB is the new name for write buffer draining
+        myISB ; Also do ISB for extra paranoia
         MOV     pc, lr
 
 
 IMB_Full_WB_CR7_Lx ROUT
 ;
-; do: clean DCache; drain WBuffer, invalidate ICache
+; do: clean DCache; drain WBuffer, invalidate ICache/branch predictor
 ; Luckily, we only need to clean as far as the level of unification
 ;
         Push    "a2,a3,a4,v1,v2,v3,v4,v5,lr"
@@ -2045,8 +2059,11 @@ IMB_Full_WB_CR7_Lx ROUT
         BGE     %BT20
 10
         MOV     a1, #0
-        MCR     p15, 0, a1, c7, c10, 4           ; drain WBuffer (is this required?)
-        MCR     p15, 0, a1, c7, c5, 0           ; invalidate ICache
+        myDSB                         ; Wait for clean to complete
+        MCR     p15, 0, a1, c7, c5, 0 ; invalidate ICache
+        MCR     p15, 0, a1, c7, c5, 6 ; invalidate branch predictors
+        myDSB                         ; Wait for cache/branch invalidation to complete
+        myISB                         ; Ensure that the effects of the completed cache/branch invalidation are visible
         Pull    "a2,a3,a4,v1,v2,v3,v4,v5,pc"
 
 ;  a1 = start address (inclusive, cache line aligned)
@@ -2056,33 +2073,50 @@ IMB_Range_WB_CR7_Lx ROUT
         SUB     a2, a2, a1
         CMP     a2, #32*1024 ; Maximum L1 cache size on Cortex-A8 is 32K, use that to guess what approach to take
         ADD     a2, a2, a1
+        CMPLO   a1, a2 ; The routine below will fail if the end address wraps around, so just IMB_Full instead
         BHS     IMB_Full_WB_CR7_Lx
-        Push    "a3,lr"
+        Push    "a1,a3,lr"
         MOV     lr, #0
         LDRB    lr, [lr, #DCache_LineLen] ; log2(line len)-2
         MOV     a3, #4
         MOV     lr, a3, LSL lr
 10
         MCR     p15, 0, a1, c7, c11, 1           ; clean DCache entry by VA to PoU
-        MCR     p15, 0, a1, c7, c5, 1            ; invalidate ICache entry (to PoC - is this bad?)
         ADD     a1, a1, lr
         CMP     a1, a2
         BLO     %BT10
-        MOV     a1, #0
-        MCR     p15, 0, a1, c7, c10, 4           ; drain WBuffer (required?)
+        myDSB ; Wait for clean to complete
+        Pull    "a1" ; Get start address back
+        MOV     lr, #0
+        LDRB    lr, [lr, #ICache_LineLen] ; Use ICache line length, just in case D&I length differ
+        MOV     lr, a3, LSL lr
+10
+        MCR     p15, 0, a1, c7, c5, 1            ; invalidate ICache entry
+        ADD     a1, a1, lr
+        CMP     a1, a2
+        BLO     %BT10
+        MCR     p15, 0, a1, c7, c5, 6 ; invalidate branch predictors
+        myDSB                         ; Wait for cache/branch invalidation to complete
+        myISB                         ; Ensure that the effects of the completed cache/branch invalidation are visible
         Pull    "a3,pc"
 
 MMU_Changing_WB_CR7_Lx ROUT
         Push    "lr"
+        myDSB ; Ensure the page table write has actually completed
+        myISB ; Also required
         BL      Cache_CleanInvalidateAll_WB_CR7_Lx
         MOV     a1, #0
-        MCR     p15, 0, a1, c8, c7, 0           ; invalidate ITLB and DTLB
+        MCR     p15, 0, a1, c8, c7, 0 ; invalidate ITLB and DTLB
+        myDSB                         ; Wait TLB invalidation to complete
+        myISB                         ; Ensure that the effects are visible
         Pull    "pc"
 
 ; a1 = page affected (page aligned address)
 ;
 MMU_ChangingEntry_WB_CR7_Lx ROUT
         Push    "a2, lr"
+        myDSB ; Ensure the page table write has actually completed
+        myISB ; Also required
         MOV     lr, #0
         LDRB    lr, [lr, #DCache_LineLen] ; log2(line len)-2
         MOV     a2, #4
@@ -2090,14 +2124,25 @@ MMU_ChangingEntry_WB_CR7_Lx ROUT
         ADD     a2, a1, #PageSize
 10
         MCR     p15, 0, a1, c7, c14, 1          ; clean&invalidate DCache entry to PoC
-        MCR     p15, 0, a1, c7, c5, 1           ; invalidate ICache entry to PoC
         ADD     a1, a1, lr
         CMP     a1, a2
-        BLO     %BT10
+        BNE     %BT10
+        myDSB ; Wait for clean to complete
         MOV     lr, #0
-        MCR     p15, 0, lr, c7, c10, 4          ; drain WBuffer
+        LDRB    lr, [lr, #ICache_LineLen] ; Use ICache line length, just in case D&I length differ
+        MOV     a1, #4
+        MOV     lr, a1, LSL lr
+        SUB     a1, a2, #PageSize ; Get start address back
+10
+        MCR     p15, 0, a1, c7, c5, 1           ; invalidate ICache entry to PoC
+        ADD     a1, a1, lr
+        CMP     a1, a2
+        BNE     %BT10
         SUB     a1, a1, #PageSize
         MCR     p15, 0, a1, c8, c7, 1           ; invalidate DTLB and ITLB
+        MCR     p15, 0, a1, c7, c5, 6           ; invalidate branch predictors
+        myDSB
+        myISB
         Pull    "a2, pc"
 
 ; a1 = first page affected (page aligned address)
@@ -2105,6 +2150,8 @@ MMU_ChangingEntry_WB_CR7_Lx ROUT
 ;
 MMU_ChangingEntries_WB_CR7_Lx ROUT
         Push    "a2, a3, lr"
+        myDSB ; Ensure the page table write has actually completed
+        myISB ; Also required
         MOV     a2, a2, LSL #Log2PageSize
         MOV     a3, #0
         LDR     a3, [a3, #DCache_RangeThreshold]   ;check whether cheaper to do global clean
@@ -2117,32 +2164,47 @@ MMU_ChangingEntries_WB_CR7_Lx ROUT
         MOV     a3, lr, LSL a3
         MOV     lr, a1
 10
-        MCR     p15, 0, a1, c7, c14, 1             ; clean&invalidate DCache entry to PoC
-        MCR     p15, 0, a1, c7, c5, 1              ; invalidate ICache entry to PoC
+        MCR     p15, 0, a1, c7, c14, 1          ; clean&invalidate DCache entry to PoC
         ADD     a1, a1, a3
         CMP     a1, a2
-        BLO     %BT10
-        MOV     a1, #0
-        MCR     p15, 0, a1, c7, c10, 4             ; drain WBuffer
-        MOV     a1, lr                             ; restore start address
-20
-        MCR     p15, 0, a1, c8, c7, 1              ; invalidate DTLB & ITLB entry
-        ADD     a1, a1, #PageSize
+        BNE     %BT10
+        myDSB ; Wait for clean to complete
+        MOV     a3, #0
+        LDRB    a3, [a3, #ICache_LineLen] ; Use ICache line length, just in case D&I length differ
+        MOV     a1, #4
+        MOV     a3, a1, LSL a3
+        MOV     a1, lr ; Get start address back
+10
+        MCR     p15, 0, a1, c7, c5, 1           ; invalidate ICache entry to PoC
+        ADD     a1, a1, a3
         CMP     a1, a2
+        BNE     %BT10
+20
+        MCR     p15, 0, lr, c8, c7, 1              ; invalidate DTLB & ITLB entry
+        ADD     lr, lr, #PageSize
+        CMP     lr, a2
         BLO     %BT20
+        MCR     p15, 0, a1, c7, c5, 6           ; invalidate branch predictors
+        myDSB
+        myISB
         Pull    "a2, a3, pc"
 ;
 30
         BL      Cache_CleanInvalidateAll_WB_CR7_Lx
         MOV     a1, #0
         MCR     p15, 0, a1, c8, c7, 0              ; invalidate ITLB and DTLB
+        myDSB                         ; Wait TLB invalidation to complete
+        myISB                         ; Ensure that the effects are visible
         Pull    "a2, a3, pc"
 
 ; a1 = first page affected (page aligned address)
 ; a2 = number of pages
 ;
 MMU_ChangingUncachedEntries_WB_CR7_Lx ROUT
+        myDSB ; Ensure the page table write has actually completed
+        myISB ; Also required
         CMP     a2, #32                            ; arbitrary-ish threshold
+        MCRHS   p15, 0, a1, c8, c7, 0              ; invalidate ITLB and DTLB
         BHS     %FT20
         Push    "a2"
 10
@@ -2151,10 +2213,10 @@ MMU_ChangingUncachedEntries_WB_CR7_Lx ROUT
         SUBS    a2, a2, #1
         BNE     %BT10
         Pull    "a2"
-        MOV     pc, lr
-;
 20
-        MCR     p15, 0, a1, c8, c7, 0              ; invalidate ITLB and DTLB
+        MCR     p15, 0, a1, c7, c5, 6           ; invalidate branch predictors
+        myDSB
+        myISB
         MOV     pc, lr
 
 ; --------------------------------------------------------------------------
diff --git a/s/HAL b/s/HAL
index 7fa6eee..3f9e172 100644
--- a/s/HAL
+++ b/s/HAL
@@ -76,6 +76,9 @@ RISCOS_InitARM
 
         ; Off we go.
         ARM_write_control a3
+ [ MEMM_Type = "VMSAv6"
+        myISB ; Ensure the update went through
+ ]
 
         ; In case it wasn't a hard reset
         MOV     a2, #0
@@ -86,11 +89,18 @@ RISCOS_InitARM
         CMP     a1, #ARMv3
         MCREQ   ARM_config_cp,0,a2,ARMv3_TLBflush_reg,C0        ; flush TLBs
         MCRNE   ARM_config_cp,0,a2,ARMv4_TLB_reg,C7             ; flush TLBs
+ [ MEMM_Type = "VMSAv6"
+        myDSB
+        myISB
+ ]
 
         ; We assume that ARMs with an I cache can have it enabled while the MMU is off.
         [ :LNOT:CacheOff
         ORRNE   a3, a3, #MMUC_I
         ARM_write_control a3, NE                                ; whoosh
+ [ MEMM_Type = "VMSAv6"
+        myISB ; Ensure the update went through
+ ]
         ]
 
         ; Check if we are in a 26-bit mode.
@@ -679,6 +689,7 @@ MMU_activation_zone
         CMP     ip, #ARMv6
         MCRGE   p15, 0, lr, c2, c0, 2           ; Ensure only TTBR0 is used (v6)
         MCRGT   p15, 0, lr, c12, c0, 0          ; Ensure exception vector base is 0 (Cortex)
+        myISB
         ORRGE   v5, v5, #MMUC_XP ; Extended pages enabled (v6)
         BICGT   v5, v5, #MMUC_TRE+MMUC_AFE ; TEX remap, Access Flag disabled (Cortex)
         BICGE   v5, v5, #MMUC_EE+MMUC_TE+MMUC_VE ; Exceptions = nonvectored LE ARM
@@ -689,6 +700,9 @@ MMU_activation_zone
   ]
 MMUon_instr
         ARM_write_control v5
+  [ MEMM_Type = "VMSAv6"
+        myISB ; Just in case
+  ]
         MOVEQ   sp, v5
         MSREQ   CPSR_c, #F32_bit+I32_bit+SVC32_mode
 
@@ -697,6 +711,10 @@ MMUon_instr
         MCRNE   ARM_config_cp,0,lr,ARMv4_cache_reg,C7           ; (works on ARMv3)
         MCREQ   p15, 0, lr, c7, c5, 0           ; invalidate instruction cache
         MCREQ   p15, 0, lr, c8, c7, 0           ; invalidate TLBs
+        MCREQ   p15, 0, lr, c7, c5, 6           ; invalidate branch predictor
+  [ MEMM_Type = "VMSAv6"
+        myISB ; Ensure below branch works
+  ]
         BLEQ    HAL_InvalidateCache_ARMvF       ; invalidate data cache (and instruction+TLBs again!)
 
         MOV     ip, #4_0000000000000001                         ; domain 0 client only
@@ -1032,6 +1050,9 @@ HAL_InvalidateCache_ARMvF
         MOV     r8, #0
         MCR     p15, 0, r8, c7, c5, 0           ; invalidate instruction cache
         MCR     p15, 0, r8, c8, c7, 0           ; invalidate TLBs
+        MCR     p15, 0, r8, c7, c5, 6           ; invalidate branch target predictor
+        myDSB                                   ; Wait for completion
+        myISB
         MRC     p15, 1, r8, c0, c0, 1 ; Cache level ID register
         BIC     r8, r8, #&FF000000 ; Discard unification/coherency bits
         MOV     r9, #0 ; Current cache level
@@ -1039,6 +1060,7 @@ HAL_InvalidateCache_ARMvF
         TST     r8, #7 ; Get flags
         BEQ     %FT10 ; Cache clean complete
         MCR     p15, 2, r9, c0, c0, 0 ; Program cache size selection register
+        myISB
         MRC     p15, 1, r10, c0, c0, 0 ; Get size info
         AND     r11, r10, #&7 ; log2(Line size)-2
         BIC     r10, r10, #&F0000007 ; Clear flags & line size
@@ -1067,6 +1089,14 @@ HAL_InvalidateCache_ARMvF
         MOVS    r8, r8, LSR #3
         BNE     %BT20
 10
+        ; Wait for clean to complete
+        myDSB
+        MOV     r8, #0
+        MCR     p15, 0, r8, c7, c5, 0           ; invalidate instruction cache
+        MCR     p15, 0, r8, c8, c7, 0           ; invalidate TLBs
+        MCR     p15, 0, r8, c7, c5, 6           ; invalidate branch target predictor
+        myDSB                                   ; Wait for completion
+        myISB
         ; All caches clean; switch back to SVC, then recover the stored PSR from ip (although we can be fairly certain we started in SVC anyway)
         MSR     CPSR_c, #F32_bit+I32_bit+SVC32_mode
         MSR     CPSR_cxsf, ip
diff --git a/s/NewReset b/s/NewReset
index e7c5e1c..7ee725a 100644
--- a/s/NewReset
+++ b/s/NewReset
@@ -1157,7 +1157,11 @@ DefaultCMOSTable ; list of non-zero options wanted :
         ]
         =       AlarmAndTimeCMOS,2_00010000 ; !Alarm autosave on
         =       FSLockCMOS+5,   &EA     ; Checksum for no password
+   [ M_CortexA8
+        =       CDROMFSCMOS,    &C0     ; drives = 0, buffer size = 256K
+   |
         =       CDROMFSCMOS,    &C1     ; drives = 1, buffer size = 256K
+   ]
   ]
         =       &FF
         ALIGN
diff --git a/s/VMSAv6 b/s/VMSAv6
index 42d17f3..adba860 100644
--- a/s/VMSAv6
+++ b/s/VMSAv6
@@ -435,6 +435,7 @@ MMUControl_ModifyControl ROUT
         Pull    "r0"
 15
         ARM_write_control r2
+        myISB ; Must be running on >=ARMv6, so perform ISB to ensure CP15 write is complete
         BIC     lr, r1, r2              ; lr = bits going from 1->0
         TST     lr, #MMUC_C             ; if cache turning off then flush cache afterwards
         TSTNE   lr, #MMUC_I
-- 
GitLab