diff --git a/Makefile b/Makefile
index cf6ddf16ce3012df51785f10ca7e5db0cad37427..c533e6f260c0f530fb5ceb897832a0019c9ac079 100644
--- a/Makefile
+++ b/Makefile
@@ -70,7 +70,8 @@ EXPORTS   = ${EXP_HDR}.EnvNumbers \
             ${C_EXP_HDR}.RISCOS \
             ${C_EXP_HDR}.HALEntries \
             ${C_EXP_HDR}.HALDevice \
-            ${C_EXP_HDR}.OSEntries
+            ${C_EXP_HDR}.OSEntries \
+            ${C_EXP_HDR}.Variables
 
 #
 # Generic rules:
@@ -191,6 +192,10 @@ ${C_EXP_HDR}.OSEntries: hdr.OSEntries
 	${MKDIR} ${C_EXP_HDR}
 	perl Build:Hdr2H hdr.OSEntries $@
 
+${C_EXP_HDR}.Variables: hdr.Variables
+	${MKDIR} ${C_EXP_HDR}
+	perl Build:Hdr2H hdr.Variables $@
+
 o.Global.h.HALDevice: hdr.HALDevice
 	${MKDIR} o.Global.h
 	dir o
diff --git a/VersionASM b/VersionASM
index 678be64696ae7820e8b740bf9ba48b060b8259eb..3249b4238922df0243e53c2510bfbaaa45acca93 100644
--- a/VersionASM
+++ b/VersionASM
@@ -13,11 +13,11 @@
                         GBLS    Module_ComponentPath
 Module_MajorVersion     SETS    "5.35"
 Module_Version          SETA    535
-Module_MinorVersion     SETS    "4.79.2.98.2.37"
-Module_Date             SETS    "22 May 2011"
-Module_ApplicationDate  SETS    "22-May-11"
+Module_MinorVersion     SETS    "4.79.2.98.2.38"
+Module_Date             SETS    "04 Jun 2011"
+Module_ApplicationDate  SETS    "04-Jun-11"
 Module_ComponentName    SETS    "Kernel"
 Module_ComponentPath    SETS    "castle/RiscOS/Sources/Kernel"
-Module_FullVersion      SETS    "5.35 (4.79.2.98.2.37)"
-Module_HelpVersion      SETS    "5.35 (22 May 2011) 4.79.2.98.2.37"
+Module_FullVersion      SETS    "5.35 (4.79.2.98.2.38)"
+Module_HelpVersion      SETS    "5.35 (04 Jun 2011) 4.79.2.98.2.38"
                         END
diff --git a/VersionNum b/VersionNum
index ec4317c99f9461f12fab5444e4e288c1f2b33f92..b4d2e264d901fb1fd356fdf250465ce5bfcf44e4 100644
--- a/VersionNum
+++ b/VersionNum
@@ -5,19 +5,19 @@
  *
  */
 #define Module_MajorVersion_CMHG        5.35
-#define Module_MinorVersion_CMHG        4.79.2.98.2.37
-#define Module_Date_CMHG                22 May 2011
+#define Module_MinorVersion_CMHG        4.79.2.98.2.38
+#define Module_Date_CMHG                04 Jun 2011
 
 #define Module_MajorVersion             "5.35"
 #define Module_Version                  535
-#define Module_MinorVersion             "4.79.2.98.2.37"
-#define Module_Date                     "22 May 2011"
+#define Module_MinorVersion             "4.79.2.98.2.38"
+#define Module_Date                     "04 Jun 2011"
 
-#define Module_ApplicationDate          "22-May-11"
+#define Module_ApplicationDate          "04-Jun-11"
 
 #define Module_ComponentName            "Kernel"
 #define Module_ComponentPath            "castle/RiscOS/Sources/Kernel"
 
-#define Module_FullVersion              "5.35 (4.79.2.98.2.37)"
-#define Module_HelpVersion              "5.35 (22 May 2011) 4.79.2.98.2.37"
+#define Module_FullVersion              "5.35 (4.79.2.98.2.38)"
+#define Module_HelpVersion              "5.35 (04 Jun 2011) 4.79.2.98.2.38"
 #define Module_LibraryVersionInfo       "5:35"
diff --git a/hdr/ARMops b/hdr/ARMops
index 502ccd43555d39f3b73b4bbeabcfdb563db3557c..214e5dd1d35fa4467929745eaa439b69e5dc416f 100644
--- a/hdr/ARMops
+++ b/hdr/ARMops
@@ -41,6 +41,7 @@ ARM922T         #       1
 X80200          #       1
 X80321          #       1
 Cortex_A8       #       1
+ARM1176JZF_S    #       1
 ARMunk          *       255
 
 ; These flags are stored in ProcessorFlags and returned by OS_PlatformFeatures 0 (Read code features)
diff --git a/hdr/Copro15ops b/hdr/Copro15ops
index 124703b30bb383218f8d3904c54083543bb2a07b..7d26a82dd98261483559a8d8c89b19b15b5c9fad 100644
--- a/hdr/Copro15ops
+++ b/hdr/Copro15ops
@@ -559,36 +559,42 @@ C15 CN 15
 ; Provided here are ISB, DSB and DMB macros suitable for ARMv6+
 ; Although ARMv4 & v5 do provide CP15 ops that are compatible with the ARMv6 ops, it's implementation defined whether each processor implements the ops or not (and the ops are unpredictable if unimplemented)
 ; So to play it safe these macros will complain if used on pre-ARMv6
+; For all the macros, set the $quick to something if the value of $temp is
+; already zero (this will cut out a pointless MOV)
 
 ; Instruction Synchronisation Barrier - required on ARMv6+ to ensure the effects of the following are visible to following instructions:
 ; * Completed cache, TLB & branch predictor maintenance operations
 ; * CP14/CP15 writes
         MACRO
-        myISB $cond,$temp
-      [ NoARMv6
+        myISB $cond,$temp,$option,$quick
+     [ NoARMv6
         ! 1, "Don't know what to do on pre-ARMv6!"
-      |
-       [ NoARMv7
+     |
+      [ NoARMv7
         ; ARMv6, use legacy MCR op
+       [ "$quick"="q"
         MOV$cond $temp,#0
+       ]
         MCR$cond p15,0,$temp,c7,c5,4
-       |
+      |
         ; ARMv7+, use ISB instruction (saves on temp register, but instruction is unconditional)
         ; Shouldn't hurt too much if we just ignore the condition code
         DCI &F57FF06F ; ISB SY
-       ]
       ]
+     ]
         MEND
 
 ; Data Synchronisation Barrier - aka drain write buffer/data write barrier. Stalls pipeline until all preceeding memory accesses (including cache/TLB/BTC ops complete.
         MACRO
-        myDSB $cond,$temp,$option
+        myDSB $cond,$temp,$option,$quick
      [ NoARMv6
         ! 1, "Don't know what to do on pre-ARMv6!"
      |
       [ NoARMv7
         ; pre-ARMv7, use legacy MCR op
+       [ "$quick"=""
         MOV$cond $temp,#0
+       ]
         MCR$cond p15,0,$temp,c7,c10,4
       |
         ; ARMv7+, use DSB instruction
@@ -631,13 +637,15 @@ C15 CN 15
 
 ; Data Memory Barrier - More lightweight DSB, ensures memory accesses behave correctly without stalling the pipeline to wait for preceeding accesses to complete. I.e. it's only good for synchronising load/store instructions.
         MACRO
-        myDMB $cond,$temp,$option
+        myDMB $cond,$temp,$option,$quick
      [ NoARMv6
         ! 1, "Don't know what to do on pre-ARMv6!"
      |
       [ NoARMv7
         ; ARMv6, use legacy MCR op
+       [ "$quick"=""
         MOV$cond $temp,#0
+       ]
         MCR$cond p15,0,$temp,c7,c10,5
       |
         ; ARMv7+, use DMB instruction
diff --git a/s/AMBControl/memmap b/s/AMBControl/memmap
index 87b36e71ea067d4f384bfe0c8cf2f074c84e9de5..38d9a7003046939bf95c948b7a9e461319635832 100644
--- a/s/AMBControl/memmap
+++ b/s/AMBControl/memmap
@@ -160,7 +160,7 @@ AMB_LazyFixUp ROUT
         ARM_write_FAR r5                                 ;restore FAR
         ARM_write_FSR r6                                 ;restore FSR
       [ MEMM_Type = "VMSAv6"
-        myISB ; Not sure if this is necessary or not; do it just in case
+        myISB   ,r0 ; Not sure if this is necessary or not; do it just in case
       ]
 ;
 90
diff --git a/s/ARMops b/s/ARMops
index 9b3d91a647e52e98eaa22fa44eb3f15bcd1c8d88..8a8a723501286642edca4ee190a53edb0c132e4b 100644
--- a/s/ARMops
+++ b/s/ARMops
@@ -695,6 +695,7 @@ $var    SETA    $var+(CT_M_$sz:SHL:CT_M_pos)
         MEND
 
 
+; CPUDesc table for ARMv3-ARMv6
 KnownCPUTable
 ;                                                        /------Cache Type register fields-----\ 
 ;                              ID reg   Mask     Arch    Type         S  Dsz Das Dln Isz Ias Iln
@@ -717,9 +718,10 @@ KnownCPUTable
         CPUDesc ARM922T,       &029220, &0FFFF0, ARMv4T,  WB_CR7_LDa, 1,  8K, 64, 8,  8K, 64, 8
         CPUDesc X80200,        &052000, &0FFFF0, ARMv5TE, WB_Cal_LD,  1, 32K, 32, 8, 32K, 32, 8
         CPUDesc X80321,    &69052400, &FFFFF700, ARMv5TE, WB_Cal_LD,  1, 32K, 32, 8, 32K, 32, 8
+        CPUDesc ARM1176JZF_S,  &00B760, &00FFF0, ARMv6,   WB_CR7_LDa, 1, 16K, 32, 16,16K, 32, 16
         DCD     -1
 
-; Simplified CPUDesc table for Fancy ARMs
+; Simplified CPUDesc table for ARMv7 only
 ; The cache size data is ignored
 KnownCPUTable_Fancy
         CPUDesc Cortex_A8,     &00C080, &00FFF0, ARMvF, WB_CR7_Lx, 1, 16K, 32, 16, 16K, 32, 16
@@ -744,7 +746,8 @@ KnownCPUFlags
         DCD     0,                            0    ; ARM 922T
         DCD     CPUFlag_ExtendedPages+CPUFlag_XScale,  0    ; X80200
         DCD     CPUFlag_XScale,               0    ; X80321
-        DCD     CPUFlag_ExtendedPages,        0    ; Cortex_A8
+        DCD     0,                            0    ; Cortex_A8
+        DCD     0,                            0    ; ARM1176JZF_S
 
  [ MEMM_Type = "VMSAv6"
 ; --------------------------------------------------------------------------
@@ -1878,8 +1881,7 @@ Cache_CleanAll_WB_CR7_Lx ROUT
         MOVS    a1, a1, LSR #3
         BNE     %BT20
 10
-        MOV     a1, #0
-        myDSB ; Wait for cache cleaning to complete
+        myDSB   ,a1 ; Wait for cache cleaning to complete
         Pull    "a2,a3,a4,v1,v2,v3,v4,v5,pc"
 
 
@@ -1925,11 +1927,11 @@ Cache_CleanInvalidateAll_WB_CR7_Lx ROUT
         BNE     %BT20
 10
         MOV     a1, #0
-        myDSB                         ; Wait for cache clean to complete
+        myDSB   ,a1,,y                ; Wait for cache clean to complete
         MCR     p15, 0, a1, c7, c5, 0 ; invalidate ICache
         MCR     p15, 0, a1, c7, c5, 6 ; invalidate branch predictors
-        myDSB                         ; Wait for cache/branch invalidation to complete
-        myISB                         ; Ensure that the effects of the completed cache/branch invalidation are visible
+        myDSB   ,a1,,y                ; Wait for cache/branch invalidation to complete
+        myISB   ,a1,,y                ; Ensure that the effects of the completed cache/branch invalidation are visible
         Pull    "a2,a3,a4,v1,v2,v3,v4,v5,pc"
 
 
@@ -1975,11 +1977,11 @@ Cache_InvalidateAll_WB_CR7_Lx ROUT
         BNE     %BT20
 10
         MOV     a1, #0
-        myDSB                         ; Wait for invalidation to complete
+        myDSB   ,a1,,y                ; Wait for invalidation to complete
         MCR     p15, 0, a1, c7, c5, 0 ; invalidate ICache
         MCR     p15, 0, a1, c7, c5, 6 ; invalidate branch predictors
-        myDSB                         ; Wait for cache/branch invalidation to complete
-        myISB                         ; Ensure that the effects of the completed cache/branch invalidation are visible
+        myDSB   ,a1,,y                ; Wait for cache/branch invalidation to complete
+        myISB   ,a1,,y                ; Ensure that the effects of the completed cache/branch invalidation are visible
         Pull    "a2,a3,a4,v1,v2,v3,v4,v5,pc"
 
 
@@ -1990,33 +1992,40 @@ Cache_RangeThreshold_WB_CR7_Lx ROUT
 
 
 MMU_ChangingUncached_WB_CR7_Lx
-        myDSB ; Ensure the page table write has actually completed
-        myISB ; Also required
+        myDSB   ,a1    ; Ensure the page table write has actually completed
+        myISB   ,a1,,y ; Also required
 TLB_InvalidateAll_WB_CR7_Lx ROUT
         MOV     a1, #0
         MCR     p15, 0, a1, c8, c7, 0 ; invalidate ITLB and DTLB
         MCR     p15, 0, a1, c7, c5, 6 ; invalidate branch predictors
-        myDSB                         ; Wait for cache/branch invalidation to complete
-        myISB                         ; Ensure that the effects of the completed cache/branch invalidation are visible
+        myDSB   ,a1,,y                ; Wait for cache/branch invalidation to complete
+        myISB   ,a1,,y                ; Ensure that the effects of the completed cache/branch invalidation are visible
         MOV     pc, lr
 
 
 ; a1 = page affected (page aligned address)
 ;
 MMU_ChangingUncachedEntry_WB_CR7_Lx
-        myDSB ; Ensure the page table write has actually completed
-        myISB ; Also required
+      [ NoARMv7
+        Push    "a2"
+        myDSB   ,a2    ; Ensure the page table write has actually completed
+        myISB   ,a2,,y ; Also required
+        Pull    "a2"
+      |
+        myDSB
+        myISB
+      ]
 TLB_InvalidateEntry_WB_CR7_Lx ROUT
         MCR     p15, 0, a1, c8, c7, 1 ; invalidate ITLB & DTLB entry
         MCR     p15, 0, a1, c7, c5, 6 ; invalidate branch predictors
-        myDSB                         ; Wait for cache/branch invalidation to complete
-        myISB                         ; Ensure that the effects of the completed cache/branch invalidation are visible
+        myDSB   ,a1                   ; Wait for cache/branch invalidation to complete
+        myISB   ,a1,,y                ; Ensure that the effects of the completed cache/branch invalidation are visible
         MOV     pc, lr
 
 
 WriteBuffer_Drain_WB_CR7_Lx ROUT
-        myDSB ; DSB is the new name for write buffer draining
-        myISB ; Also do ISB for extra paranoia
+        myDSB   ,a1    ; DSB is the new name for write buffer draining
+        myISB   ,a1,,y ; Also do ISB for extra paranoia
         MOV     pc, lr
 
 
@@ -2064,11 +2073,11 @@ IMB_Full_WB_CR7_Lx ROUT
         BGE     %BT20
 10
         MOV     a1, #0
-        myDSB                         ; Wait for clean to complete
+        myDSB   ,a1,,y                ; Wait for clean to complete
         MCR     p15, 0, a1, c7, c5, 0 ; invalidate ICache
         MCR     p15, 0, a1, c7, c5, 6 ; invalidate branch predictors
-        myDSB                         ; Wait for cache/branch invalidation to complete
-        myISB                         ; Ensure that the effects of the completed cache/branch invalidation are visible
+        myDSB   ,a1,,y                ; Wait for cache/branch invalidation to complete
+        myISB   ,a1,,y                ; Ensure that the effects of the completed cache/branch invalidation are visible
         Pull    "a2,a3,a4,v1,v2,v3,v4,v5,pc"
 
 ;  a1 = start address (inclusive, cache line aligned)
@@ -2090,7 +2099,7 @@ IMB_Range_WB_CR7_Lx ROUT
         ADD     a1, a1, lr
         CMP     a1, a2
         BLO     %BT10
-        myDSB ; Wait for clean to complete
+        myDSB   ,a1  ; Wait for clean to complete
         Pull    "a1" ; Get start address back
         MOV     lr, #0
         LDRB    lr, [lr, #ICache_LineLen] ; Use ICache line length, just in case D&I length differ
@@ -2101,28 +2110,28 @@ IMB_Range_WB_CR7_Lx ROUT
         CMP     a1, a2
         BLO     %BT10
         MCR     p15, 0, a1, c7, c5, 6 ; invalidate branch predictors
-        myDSB                         ; Wait for cache/branch invalidation to complete
-        myISB                         ; Ensure that the effects of the completed cache/branch invalidation are visible
+        myDSB   ,a1                   ; Wait for cache/branch invalidation to complete
+        myISB   ,a1,,y                ; Ensure that the effects of the completed cache/branch invalidation are visible
         Pull    "a3,pc"
 
 MMU_Changing_WB_CR7_Lx ROUT
         Push    "lr"
-        myDSB ; Ensure the page table write has actually completed
-        myISB ; Also required
+        myDSB   ,a1    ; Ensure the page table write has actually completed
+        myISB   ,a1,,y ; Also required
         BL      Cache_CleanInvalidateAll_WB_CR7_Lx
         MOV     a1, #0
         MCR     p15, 0, a1, c8, c7, 0 ; invalidate ITLB and DTLB
-        myDSB                         ; Wait TLB invalidation to complete
-        myISB                         ; Ensure that the effects are visible
+        myDSB   ,a1,,y                ; Wait TLB invalidation to complete
+        myISB   ,a1,,y                ; Ensure that the effects are visible
         Pull    "pc"
 
 ; a1 = page affected (page aligned address)
 ;
 MMU_ChangingEntry_WB_CR7_Lx ROUT
         Push    "a2, lr"
-        myDSB ; Ensure the page table write has actually completed
-        myISB ; Also required
         MOV     lr, #0
+        myDSB   ,lr,,y ; Ensure the page table write has actually completed
+        myISB   ,lr,,y ; Also required
         LDRB    lr, [lr, #DCache_LineLen] ; log2(line len)-2
         MOV     a2, #4
         MOV     lr, a2, LSL lr
@@ -2132,8 +2141,8 @@ MMU_ChangingEntry_WB_CR7_Lx ROUT
         ADD     a1, a1, lr
         CMP     a1, a2
         BNE     %BT10
-        myDSB ; Wait for clean to complete
         MOV     lr, #0
+        myDSB   ,lr,,y ; Wait for clean to complete
         LDRB    lr, [lr, #ICache_LineLen] ; Use ICache line length, just in case D&I length differ
         MOV     a1, #4
         MOV     lr, a1, LSL lr
@@ -2146,8 +2155,8 @@ MMU_ChangingEntry_WB_CR7_Lx ROUT
         SUB     a1, a1, #PageSize
         MCR     p15, 0, a1, c8, c7, 1           ; invalidate DTLB and ITLB
         MCR     p15, 0, a1, c7, c5, 6           ; invalidate branch predictors
-        myDSB
-        myISB
+        myDSB   ,a1
+        myISB   ,a1,,y
         Pull    "a2, pc"
 
 ; a1 = first page affected (page aligned address)
@@ -2155,8 +2164,8 @@ MMU_ChangingEntry_WB_CR7_Lx ROUT
 ;
 MMU_ChangingEntries_WB_CR7_Lx ROUT
         Push    "a2, a3, lr"
-        myDSB ; Ensure the page table write has actually completed
-        myISB ; Also required
+        myDSB   ,lr    ; Ensure the page table write has actually completed
+        myISB   ,lr,,y ; Also required
         MOV     a2, a2, LSL #Log2PageSize
         MOV     a3, #0
         LDR     a3, [a3, #DCache_RangeThreshold]   ;check whether cheaper to do global clean
@@ -2173,8 +2182,8 @@ MMU_ChangingEntries_WB_CR7_Lx ROUT
         ADD     a1, a1, a3
         CMP     a1, a2
         BNE     %BT10
-        myDSB ; Wait for clean to complete
         MOV     a3, #0
+        myDSB   ,a3,,y ; Wait for clean to complete
         LDRB    a3, [a3, #ICache_LineLen] ; Use ICache line length, just in case D&I length differ
         MOV     a1, #4
         MOV     a3, a1, LSL a3
@@ -2190,39 +2199,38 @@ MMU_ChangingEntries_WB_CR7_Lx ROUT
         CMP     lr, a2
         BNE     %BT20
         MCR     p15, 0, a1, c7, c5, 6           ; invalidate branch predictors
-        myDSB
-        myISB
+        myDSB   ,a1
+        myISB   ,a1,,y
         Pull    "a2, a3, pc"
 ;
 30
         BL      Cache_CleanInvalidateAll_WB_CR7_Lx
         MOV     a1, #0
         MCR     p15, 0, a1, c8, c7, 0              ; invalidate ITLB and DTLB
-        myDSB                         ; Wait TLB invalidation to complete
-        myISB                         ; Ensure that the effects are visible
+        myDSB   ,a1,,y                ; Wait TLB invalidation to complete
+        myISB   ,a1,,y                ; Ensure that the effects are visible
         Pull    "a2, a3, pc"
 
 ; a1 = first page affected (page aligned address)
 ; a2 = number of pages
 ;
 MMU_ChangingUncachedEntries_WB_CR7_Lx ROUT
-        myDSB ; Ensure the page table write has actually completed
-        myISB ; Also required
+        Push    "a2,lr"
+        myDSB   ,lr    ; Ensure the page table write has actually completed
+        myISB   ,lr,,y ; Also required
         CMP     a2, #32                            ; arbitrary-ish threshold
         MCRHS   p15, 0, a1, c8, c7, 0              ; invalidate ITLB and DTLB
         BHS     %FT20
-        Push    "a2"
 10
         MCR     p15, 0, a1, c8, c7, 1              ; invalidate DTLB & ITLB entry
         ADD     a1, a1, #PageSize
         SUBS    a2, a2, #1
         BNE     %BT10
-        Pull    "a2"
 20
         MCR     p15, 0, a1, c7, c5, 6           ; invalidate branch predictors
-        myDSB
-        myISB
-        MOV     pc, lr
+        myDSB   ,lr,,y
+        myISB   ,lr,,y
+        Pull    "a2,pc"
 
  ] ; MEMM_Type = "VMSAv6"
 
@@ -2239,9 +2247,8 @@ ARM_PrintProcessorType
 
         Push    "lr"
         ADR     a2, PNameTable
-        LDR     a1, [a2, a1, LSL #1]
-        MOV     a1, a1, LSL #16
-        ADD     a1, a2, a1, LSR #16
+        LDHA    a1, a2, a1, a3
+        ADD     a1, a2, a1
         BL      Write0_Translated
         SWI     XOS_NewLine
         SWI     XOS_NewLine
@@ -2265,6 +2272,7 @@ PNameTable
         DCW     PName_X80200    - PNameTable
         DCW     PName_X80321    - PNameTable
         DCW     PName_Cortex_A8 - PNameTable
+        DCW     PName_ARM1176JZF_S - PNameTable
 
 PName_ARM600
         =       "600:ARM 600 Processor",0
@@ -2298,6 +2306,8 @@ PName_X80321
         =       "X80321:80321 Processor",0
 PName_Cortex_A8
         =       "CortexA8:Cortex-A8 Processor",0
+PName_ARM1176JZF_S
+        =       "ARM1176JZF_S:ARM1176JZF-S Processor",0
         ALIGN
 
 
diff --git a/s/HAL b/s/HAL
index 3b79e5fc6de09c42c4ba760e6940edbbfed440a4..0e766b5134ad64f7c8575ad73bb513dd840d722e 100644
--- a/s/HAL
+++ b/s/HAL
@@ -76,12 +76,12 @@ RISCOS_InitARM
 
         ; Off we go.
         ARM_write_control a3
+        MOV     a2, #0
  [ MEMM_Type = "VMSAv6"
-        myISB ; Ensure the update went through
+        myISB   ,a2,,y ; Ensure the update went through
  ]
 
         ; In case it wasn't a hard reset
-        MOV     a2, #0
  [ MEMM_Type = "VMSAv6"
         CMP     a1, #ARMvF
         ; Assume that all ARMvF ARMs have multi-level caches and thus no single MCR op for invalidating all the caches
@@ -92,8 +92,8 @@ RISCOS_InitARM
         MCREQ   ARM_config_cp,0,a2,ARMv3_TLBflush_reg,C0        ; flush TLBs
         MCRNE   ARM_config_cp,0,a2,ARMv4_TLB_reg,C7             ; flush TLBs
  [ MEMM_Type = "VMSAv6"
-        myDSB
-        myISB
+        myDSB   ,a2,,y
+        myISB   ,a2,,y
  ]
 
         ; We assume that ARMs with an I cache can have it enabled while the MMU is off.
@@ -101,7 +101,7 @@ RISCOS_InitARM
         ORRNE   a3, a3, #MMUC_I
         ARM_write_control a3, NE                                ; whoosh
  [ MEMM_Type = "VMSAv6"
-        myISB ; Ensure the update went through
+        myISB   ,a2,,y ; Ensure the update went through
  ]
         ]
 
@@ -691,7 +691,7 @@ MMU_activation_zone
         CMP     ip, #ARMv6
         MCRGE   p15, 0, lr, c2, c0, 2           ; Ensure only TTBR0 is used (v6)
         MCRGT   p15, 0, lr, c12, c0, 0          ; Ensure exception vector base is 0 (Cortex)
-        myISB
+        myISB   ,lr,,y
         ORRGE   v5, v5, #MMUC_XP ; Extended pages enabled (v6)
         BICGT   v5, v5, #MMUC_TRE+MMUC_AFE ; TEX remap, Access Flag disabled (Cortex)
         BICGE   v5, v5, #MMUC_EE+MMUC_TE+MMUC_VE ; Exceptions = nonvectored LE ARM
@@ -703,19 +703,19 @@ MMU_activation_zone
 MMUon_instr
         ARM_write_control v5
   [ MEMM_Type = "VMSAv6"
-        myISB ; Just in case
+        MOV     lr, #0
+        myISB   ,lr,,y ; Just in case
   ]
         MOVEQ   sp, v5
         MSREQ   CPSR_c, #F32_bit+I32_bit+SVC32_mode
 
   [ MEMM_Type = "VMSAv6"
         CMP     ip, #ARMvF
-        MOV     lr, #0                                          ; junk MMU-off contents of I-cache
-        MCRNE   ARM_config_cp,0,lr,ARMv4_cache_reg,C7           ; (works on ARMv3)
+        MCRNE   ARM_config_cp,0,lr,ARMv4_cache_reg,C7           ; junk MMU-off contents of I-cache (works on ARMv3)
         MCREQ   p15, 0, lr, c7, c5, 0           ; invalidate instruction cache
         MCREQ   p15, 0, lr, c8, c7, 0           ; invalidate TLBs
         MCREQ   p15, 0, lr, c7, c5, 6           ; invalidate branch predictor
-        myISB ; Ensure below branch works
+        myISB   ,lr,,y ; Ensure below branch works
         BLEQ    HAL_InvalidateCache_ARMvF       ; invalidate data cache (and instruction+TLBs again!)
   |
         MOV     lr, #0                                          ; junk MMU-off contents of I-cache
@@ -1057,8 +1057,8 @@ HAL_InvalidateCache_ARMvF
         MCR     p15, 0, r8, c7, c5, 0           ; invalidate instruction cache
         MCR     p15, 0, r8, c8, c7, 0           ; invalidate TLBs
         MCR     p15, 0, r8, c7, c5, 6           ; invalidate branch target predictor
-        myDSB                                   ; Wait for completion
-        myISB
+        myDSB   ,r8,,y                          ; Wait for completion
+        myISB   ,r8,,y
         MRC     p15, 1, r8, c0, c0, 1 ; Cache level ID register
         BIC     r8, r8, #&FF000000 ; Discard unification/coherency bits
         MOV     r9, #0 ; Current cache level
@@ -1066,7 +1066,7 @@ HAL_InvalidateCache_ARMvF
         TST     r8, #7 ; Get flags
         BEQ     %FT10 ; Cache clean complete
         MCR     p15, 2, r9, c0, c0, 0 ; Program cache size selection register
-        myISB
+        myISB   ,r8,,y
         MRC     p15, 1, r10, c0, c0, 0 ; Get size info
         AND     r11, r10, #&7 ; log2(Line size)-2
         BIC     r10, r10, #&F0000007 ; Clear flags & line size
@@ -1096,13 +1096,13 @@ HAL_InvalidateCache_ARMvF
         BNE     %BT20
 10
         ; Wait for clean to complete
-        myDSB
         MOV     r8, #0
+        myDSB   ,r8,,y
         MCR     p15, 0, r8, c7, c5, 0           ; invalidate instruction cache
         MCR     p15, 0, r8, c8, c7, 0           ; invalidate TLBs
         MCR     p15, 0, r8, c7, c5, 6           ; invalidate branch target predictor
-        myDSB                                   ; Wait for completion
-        myISB
+        myDSB   ,r8,,y                          ; Wait for completion
+        myISB   ,r8,,y
         ; All caches clean; switch back to SVC, then recover the stored PSR from ip (although we can be fairly certain we started in SVC anyway)
         MSR     CPSR_c, #F32_bit+I32_bit+SVC32_mode
         MSR     CPSR_cxsf, ip
diff --git a/s/VMSAv6 b/s/VMSAv6
index 5ce61fc2002a8cf4ab9768022462e76dee2f4939..679fd0acb2772181055aa7d9cb678cca0a4012c9 100644
--- a/s/VMSAv6
+++ b/s/VMSAv6
@@ -376,15 +376,12 @@ MMUControl_ModifyControl ROUT
         CMP     r1,#0
         CMPEQ   r2,#&FFFFFFFF
         BEQ     MMUC_modcon_readonly
-        MOV     r3,#0
-        LDRB    r5,[r3, #ProcessorArch]
+        MOV     r3,#ZeroPage
+        ASSERT  ZeroPage = 0 ; r3 assumed to be zero for CP15 ops
         PHPSEI  r4                      ; disable IRQs while we modify soft copy (and possibly switch caches off/on)
 
-        CMP     r5,#ARMv4
-        LDRLO   lr, [r3, #MMUControlSoftCopy]
-        ARM_read_control lr,HS
-;        MOVHS   lr,lr,LSL #19
-;        MOVHS   lr,lr,LSR #19           ; if ARMv4 or later, we can read control reg. - trust this more than soft copy
+        ; We're ARMv6+, just read the real control reg and ignore the soft copy
+        ARM_read_control lr
         AND     r2, r2, lr
         EOR     r2, r2, r1
         MOV     r1, lr
@@ -398,44 +395,30 @@ MMUControl_ModifyControl ROUT
         TSTEQ   lr, #MMUC_I
         BEQ     %FT10
 
-        Push    "r0"
-        MOV     r0, #0
-        ARMop   Cache_InvalidateAll,,,r0
-        Pull    "r0"
+        ARMop   Cache_InvalidateAll,,,r3
 10
         BIC     lr, r1, r2              ; lr = bits going from 1->0
         TST     lr, #MMUC_C             ; if cache turning off then clean data cache first
         BEQ     %FT15
-        Push    "r0"
-        MOV     r0, #0
-        ARMop   Cache_CleanAll,,,r0
-        Pull    "r0"
+        ARMop   Cache_CleanAll,,,r3
 15
         ARM_write_control r2
-        myISB ; Must be running on >=ARMv6, so perform ISB to ensure CP15 write is complete
+        myISB   ,r3,,y ; Must be running on >=ARMv6, so perform ISB to ensure CP15 write is complete
         BIC     lr, r1, r2              ; lr = bits going from 1->0
         TST     lr, #MMUC_C             ; if cache turning off then flush cache afterwards
         TSTNE   lr, #MMUC_I
         BEQ     %FT20
-        Push    "r0"
-        MOV     r0, #0
-        ARMop   Cache_InvalidateAll,,,r0
-        Pull    "r0"
+        ARMop   Cache_InvalidateAll,,,r3
 20
         PLP     r4                      ; restore IRQ state
         Pull    "r3,r4,r5,pc"
 
 MMUC_modcon_readonly
         MOV     r3, #0
-        LDRB    r5, [r3, #ProcessorArch]
-        CMP     r5, #ARMv4
-        LDRLO   lr, [r3, #MMUControlSoftCopy]
-        ARM_read_control lr,HS
-;        MOVHS   lr,lr,LSL #19
-;        MOVHS   lr,lr,LSR #19           ; if ARMv4 or later, we can read control reg. - trust this more than soft copy
-        STRHS   lr, [r3, #MMUControlSoftCopy]
-        MOV     r1, lr
-        MOV     r2, lr
+        ; We're ARMv6+, just read the real control reg and ignore the soft copy
+        ARM_read_control r1
+        STR     r1, [r3, #MMUControlSoftCopy]
+        MOV     r2, r1
         Pull    "r3,r4,r5,pc"
 
 MMUControl_Flush