From 14f111bc2c2d8a8606af444007b218de7b6f4f07 Mon Sep 17 00:00:00 2001
From: Mike Stephens <mstephen@gitlab.riscosopen.org>
Date: Fri, 20 Oct 2000 15:48:04 +0000
Subject: [PATCH] more use of ARMops in page manipulation, change register
 usage of ARmops tested by kernel boot to star prompt only

Version 5.35, 4.79.2.11. Tagged as 'Kernel-5_35-4_79_2_11'
---
 VersionASM           |  12 +--
 VersionNum           |  16 +--
 hdr/ARMops           |  17 ++-
 hdr/KernelWS         |   2 +
 s/AMBControl/Options |   3 -
 s/AMBControl/memmap  |   6 +-
 s/ARM600             | 241 ++++++++-----------------------------------
 s/ARMops             |  53 +++++++---
 s/ChangeDyn          |  64 +++++-------
 s/Kernel             |   3 +
 s/NewReset           |  12 ++-
 11 files changed, 153 insertions(+), 276 deletions(-)

diff --git a/VersionASM b/VersionASM
index e8fce6ff..88c7f791 100644
--- a/VersionASM
+++ b/VersionASM
@@ -11,10 +11,10 @@
                         GBLS    Module_HelpVersion
 Module_MajorVersion     SETS    "5.35"
 Module_Version          SETA    535
-Module_MinorVersion     SETS    "4.79.2.10"
-Module_Date             SETS    "16 Oct 2000"
-Module_ApplicationDate2 SETS    "16-Oct-00"
-Module_ApplicationDate4 SETS    "16-Oct-2000"
-Module_FullVersion      SETS    "5.35 (4.79.2.10)"
-Module_HelpVersion      SETS    "5.35 (16 Oct 2000) 4.79.2.10"
+Module_MinorVersion     SETS    "4.79.2.11"
+Module_Date             SETS    "20 Oct 2000"
+Module_ApplicationDate2 SETS    "20-Oct-00"
+Module_ApplicationDate4 SETS    "20-Oct-2000"
+Module_FullVersion      SETS    "5.35 (4.79.2.11)"
+Module_HelpVersion      SETS    "5.35 (20 Oct 2000) 4.79.2.11"
                         END
diff --git a/VersionNum b/VersionNum
index 4dfb336a..f55412af 100644
--- a/VersionNum
+++ b/VersionNum
@@ -4,16 +4,16 @@
  *
  */
 #define Module_MajorVersion_CMHG        5.35
-#define Module_MinorVersion_CMHG        4.79.2.10
-#define Module_Date_CMHG                16 Oct 2000
+#define Module_MinorVersion_CMHG        4.79.2.11
+#define Module_Date_CMHG                20 Oct 2000
 
 #define Module_MajorVersion             "5.35"
 #define Module_Version                  535
-#define Module_MinorVersion             "4.79.2.10"
-#define Module_Date                     "16 Oct 2000"
+#define Module_MinorVersion             "4.79.2.11"
+#define Module_Date                     "20 Oct 2000"
 
-#define Module_ApplicationDate2         "16-Oct-00"
-#define Module_ApplicationDate4         "16-Oct-2000"
+#define Module_ApplicationDate2         "20-Oct-00"
+#define Module_ApplicationDate4         "20-Oct-2000"
 
-#define Module_FullVersion              "5.35 (4.79.2.10)"
-#define Module_HelpVersion              "5.35 (16 Oct 2000) (4.79.2.10)"
+#define Module_FullVersion              "5.35 (4.79.2.11)"
+#define Module_HelpVersion              "5.35 (20 Oct 2000) (4.79.2.11)"
diff --git a/hdr/ARMops b/hdr/ARMops
index 06c1383e..f9c1be5c 100644
--- a/hdr/ARMops
+++ b/hdr/ARMops
@@ -44,16 +44,23 @@ CPUFlag_VectorReadException     * 1:SHL:9
 CPUFlag_SplitCache              * 1:SHL:10
 CPUFlag_NoWBDrain               * 1:SHL:11
 
-; The macro to do an ARM operation. All ARM operations are ATCPS, except that they
-; preserve ip (to aid compatiblity with code using r12 as a workspace pointer).
+; The macro to do an ARM operation. All ARM operations are expected
+; to corrupt a1 only
+; This macro corrupts ip unless $zero reg is supplied
 
         MACRO
-        ARMop   $op, $cond, $tailcall
-        MOV$cond a4, #ZeroPage
+        ARMop   $op, $cond, $tailcall, $zero
+ [ "$zero" = ""
+        MOV$cond ip, #ZeroPage
+ ]
  [ "$tailcall" = ""
         MOV$cond lr, pc
  ]
-        LDR$cond pc, [a4, #Proc_$op]
+ [ "$zero" = ""
+        LDR$cond pc, [ip, #Proc_$op]
+ |
+        LDR$cond pc, [$zero, #Proc_$op]
+ ]
         MEND
 
         END
diff --git a/hdr/KernelWS b/hdr/KernelWS
index 96432ae7..bbff4ad6 100644
--- a/hdr/KernelWS
+++ b/hdr/KernelWS
@@ -1181,6 +1181,8 @@ Proc_IMB_Full                   #       4
 Proc_IMB_Range                  #       4
 Proc_MMU_Changing               #       4
 Proc_MMU_ChangingEntry          #       4
+Proc_MMU_ChangingUncached       #       4
+Proc_MMU_ChangingUncachedEntry  #       4
 
  ]
 
diff --git a/s/AMBControl/Options b/s/AMBControl/Options
index 1405df89..185d552b 100644
--- a/s/AMBControl/Options
+++ b/s/AMBControl/Options
@@ -30,9 +30,6 @@ AMBPhysBinMask     * &7F
 
 
 ApplicationStart  * (32*1024)
-PageSize          * (4*1024)
-Log2PageSize      * 12             ;for shifts
-
 AbsMaxAppSize     * (28*1024*1024) ;28 Mb application space limit for RISC OS
 
 ;maximum logical space size cleaned by range strategy
diff --git a/s/AMBControl/memmap b/s/AMBControl/memmap
index c5576cbc..862e1461 100644
--- a/s/AMBControl/memmap
+++ b/s/AMBControl/memmap
@@ -352,7 +352,8 @@ AMB_smme_cachecleanflush
         CMP     r0,#&A000
         BEQ     AMB_smme_cachecleanflush_strongarm
 
-        ARMop   MMU_Changing
+        MOV     r11, #0
+        ARMop   MMU_Changing,,,r11
         Pull    "r0-r4,r7-r11, pc"
 
 AMB_smme_cachecleanflush_strongarm
@@ -430,7 +431,8 @@ AMB_smme_StrongARM_flushrange
         Pull    "r0-r4,r7-r11, pc"
 
 AMB_smme_TLBflush
-        ARMop   TLB_InvalidateAll
+        MOV     r11,#0
+        ARMop   TLB_InvalidateAll,,,r11
 AMB_smme_exit
         Pull    "r0-r4,r7-r11, pc"
 
diff --git a/s/ARM600 b/s/ARM600
index 1e78f2c0..1f18f8d0 100644
--- a/s/ARM600
+++ b/s/ARM600
@@ -194,9 +194,10 @@ SixteenMByte            EQU     (1024*1024 * 16)
 
 ; **************************************************************************************
 ;
-;       BangCamUpdate - Update CAM entry and soft copy
+;       BangCamUpdate - Update CAM, MMU for page move, coping with page currently mapped in
 ;
-; This part of the routine has to do more work on ARM600
+; mjs Oct 2000
+; reworked to use generic ARM ops (vectored to appropriate routines during boot)
 ;
 ; First look in the CamEntries table to find the logical address L this physical page is
 ; currently allocated to. Then check in the Level 2 page tables to see if page L is currently
@@ -263,12 +264,10 @@ BangCamUpdate ROUT
 
 ; **************************************************************************************
 ;
-;       BangCam - Update CAM entry, but not soft copy
+;       BangCam - Update CAM, MMU for page move, assuming page currently mapped out
 ;
 ; This routine maps a physical page to a given logical address
-; For ARM600, I assume that the physical page was previously not mapped
-; anywhere else - on MEMC1 it would automatically unmap any logical
-; address that the physical page was previously at, but on ARM600 it won't
+; It is assumed that the physical page is currently not mapped anywhere else
 ;
 ; in:   r2 = physical page number
 ;       r3 = logical address (2nd copy if doubly mapped)
@@ -284,17 +283,6 @@ BangCamUpdate ROUT
 
 ; This routine must work in 32-bit mode
 
-        GBLL    UsePPLCBBits
-UsePPLCBBits    SETL    {TRUE}
-
-;if we can assume no code above 64Mb (ie. 26bit code space), big optimise for StrongARM
-        GBLL    AssumeNoCodeAbove64Mb
-AssumeNoCodeAbove64Mb  SETL    No32bitCode
-
-;if we just use sledgehammer approach anyway
-        GBLL    AlwaysSledgehammer
-AlwaysSledgehammer SETL {FALSE}
-
 BangCam ROUT
         TST     r11, #DynAreaFlags_DoublyMapped ; if area doubly mapped
         SUBNE   r3, r3, r9              ; then move ptr to 1st copy
@@ -312,17 +300,19 @@ BangCamAltEntry
         ADR     r1, PPLTrans
         AND     r4, r11, #3             ; first use PPL bits
         LDR     r1, [r1, r4, LSL #2]    ; get PPL bits and SmallPage indicator
- [ UsePPLCBBits
+
         TST     r11, #DynAreaFlags_NotCacheable
         TSTEQ   r11, #PageFlags_TempUncacheableBits
         ORREQ   r1, r1, #L2_C           ; if cacheable (area bit CLEAR + temp count zero), then OR in C bit
         TST     r11, #DynAreaFlags_NotBufferable
         ORREQ   r1, r1, #L2_B           ; if bufferable (area bit CLEAR), then OR in B bit
- ]
+
         ORR     r0, r0, r1
 
         LDR     r1, =L2PT               ; point to level 2 page tables
 
+        ;fall through to BangL2PT
+
 ;internal entry point for updating L2PT entry
 ;
 ; entry: r0 = new L2PT value, r1 -> L2PT, r3 = logical address (4k aligned), r11 = PPL
@@ -330,188 +320,46 @@ BangCamAltEntry
 ; exit: r0,r1,r4,r6 corrupted
 ;
 BangL2PT                                        ; internal entry point used only by BangCamUpdate
-  [ AlwaysSledgehammer
-        B       BangL2PT_sledgehammer
-  |
+        Push    "lr"
+        MOV     r6, r0
+
         TST     r11, #DynAreaFlags_DoublyMapped
         BNE     BangL2PT_sledgehammer           ;if doubly mapped, don't try to be clever
-  ]
-  [ ARM810support
-    ;if we are mapping out a cacheable page on an ARM810, must clean+flush cache _before_
-    ;remapping, since ARM810 relies on virtual addresses for writebacks
-        ARM_read_ID r4
-        AND     r4,r4,#&F000                    ;ARM ID nibble now in r4
-        CMP     r0,#0                           ;EQ if map out
-        TSTEQ   r11,#DynAreaFlags_NotCacheable  ;EQ if also cacheable
-        CMPEQ   r4,#&8000                       ;EQ if also ARM 8
-        BNE     BangL2PT_noARM810flush
-    [ ARM810cleanflushbroken
-        ARM8_cleanflush_IDC r6,r4
-        MOV     r4,#&8000
-    |
-        ARM8_cleanflush_IDC r6
-    ]
-BangL2PT_noARM810flush
-  ]
-        STR     r0, [r1, r3, LSR #10]           ;update L2PT
-  [ :LNOT: ARM810support
-        ARM_read_ID r4
-        AND     r4,r4,#&F000                    ;ARM ID nibble in r4
-  ]
-        CMP     r0,#0
-        BEQ     BangL2PT_mapout                 ;the update is a map out => cache(s) may need clean/flush
-;else update is a map in (and nothing should be there at the moment) => no cache worries
-        CMP     r4,#&A000
-        BEQ     BangL2PT_mapin_StrongARM
-
-        MOV     r0, r3
-        Push    "r2,r3,lr"
-        ARMop   TLB_InvalidateEntry             ;invalidate TLB entry for this page
-        Pull    "r2,r3,pc"
-
-BangL2PT_mapin_StrongARM
-        ARMA_drain_WB                           ;in case L2PT entry itself is in a bufferable area
-        ARMA_flush_DTLBentry r3                 ;flush data TLB entry for this page
-  [ AssumeNoCodeAbove64Mb
-        CMP     r3,#64*1024*1024                ;if logical address above 64Mb, assume no code (26 bit)
-        MOVHS   pc,lr
-  ]
-        ARMA_flush_ITLB                         ;but if there is code, we must flush instruction TLB
-        MOV     pc,lr
 
-BangL2PT_mapout
-        CMP     r4,#&A000
-        BEQ     BangL2PT_mapout_StrongARM
-  [ ARM810support
-        CMP     r4,#&8000
-        ARM8_flush_TLBentry r3,EQ                 ;flush TLB entry for this page, ARM 8
-        MOVEQ   pc,lr                             ;ARM8 cache already flushed, if necessary
-  ]
-;else assume ARM 6,7
-        MOV     r0, r3
-        Push    "r2,r3,lr"
-        TST     r11,#DynAreaFlags_NotCacheable
-        ARMop   MMU_ChangingEntry, EQ             ;flush instruction/data cache if necessary
-        TST     r11,#DynAreaFlags_NotCacheable
-        ARMop   TLB_InvalidateEntry, NE           ;flush TLB entry for this page
-        Pull    "r2,r3,pc"
-
-BangL2PT_mapout_StrongARM
-        TST     r11,#DynAreaFlags_NotCacheable
-        BNE     BangL2PT_mapin_StrongARM        ;if NotCacheable, no flush needed (ie. same as mapin)
-;note that we are cleaning *after* remapping, so relying on StrongARM writebacks using physical address
-        MOV     r4,r3
-        ADD     r6,r3,#4*1024                   ;clean/flush data cache over 4k range of page
-
-  [ SAcleanflushbroken        ; ARMA_cleanflush_DCentry instruction seems to be ineffective
-01
-        ARMA_clean_DCentry r4
-        ARMA_flush_DCentry r4
-        ADD     r4,r4,#32
-        ARMA_clean_DCentry r4
-        ARMA_flush_DCentry r4
-        ADD     r4,r4,#32
-        ARMA_clean_DCentry r4
-        ARMA_flush_DCentry r4
-        ADD     r4,r4,#32
-        ARMA_clean_DCentry r4
-        ARMA_flush_DCentry r4
-        ADD     r4,r4,#32
-        CMP     r4,r6
-        BLO     %BT01
-  |
-01
-        ARMA_cleanflush_DCentry r4
-        ADD     r4,r4,#32
-        ARMA_cleanflush_DCentry r4
-        ADD     r4,r4,#32
-        ARMA_cleanflush_DCentry r4
-        ADD     r4,r4,#32
-        ARMA_cleanflush_DCentry r4
-        ADD     r4,r4,#32
-        CMP     r4,r6
-        BLO     %BT01
-  ]
+        ;we sort out cache coherency _before_ remapping, because some ARMs might insist on
+        ;that order (write back cache doing write backs to logical addresses)
+        ;we need to worry about cache only if mapping out a cacheable page
+        ;
+        TEQ     r6, #0                          ;EQ if mapping out
+        TSTEQ   r11, #DynAreaFlags_NotCacheable ;EQ if also cacheable
+        MOV     r0, r3                          ;MMU page entry address
+        ADR     lr, %FT20
+        MOV     r4, #0
+        ARMop   MMU_ChangingEntry, EQ, tailcall, r4
+        ARMop   MMU_ChangingUncachedEntry, NE, tailcall, r4
 
-        ARMA_drain_WB
-        ARMA_flush_DTLBentry r3                 ;flush data TLB entry for this page
-  [ AssumeNoCodeAbove64Mb
-        CMP     r3,#64*1024*1024                ;if logical address above 64Mb, assume no code (26 bit)
-        MOVHS   pc,lr
-  ]
-        ARMA_flush_IC WithoutNOPs
-        MOV     r0,r0                           ;NOPs to ensure 4 instructions before return, after IC flush
-        MOV     r0,r0
-        ARMA_flush_ITLB
-        MOV     pc,lr
+20      STR     r6, [r1, r3, LSR #10]           ;update L2PT entry
+
+        Pull    "pc"
 
 BangL2PT_sledgehammer
-  [ ARM810support
-        ;if necessary, clean+flush _before_ reamapping, since ARM810 writebacks use virtual addresses
-        ARM_read_ID r4
-        AND     r4,r4,#&F000
-        CMP     r4,#&8000
-        TSTEQ   r11,#DynAreaFlags_NotCacheable
-        BNE     BangL2PT_sledge_noARM810flush
-    [ ARM810cleanflushbroken
-        ARM8_cleanflush_IDC r4,r6
-    |
-        ARM8_cleanflush_IDC r4
-    ]
-BangL2PT_sledge_noARM810flush
-  ]
-        BICS    r4, r3, #(3 :SHL: 10)   ; ensure going to be on word boundary
- [ {FALSE}      ; this breaks too many things at the moment
-        BICEQ   r0, r0, #&30            ; if logical page zero, then make 1st 1K no user access
-        ORREQ   r0, r0, #&10
- ]
- [ :LNOT: UsePPLCBBits
-        LDR     r6, [r1, r4, LSR #10]   ; read current contents
-        AND     r6, r6, #L2_C :OR: L2_B ; preserve old CB bits (set up by soft loader)
-        ORR     r0, r0, r6              ; but OR in new address and PPL bits
- ]
-        STR     r0, [r1, r4, LSR #10]!  ; update level 2 page table (and update pointer so we can use bank-to-bank offset
-        TST     r11, #DynAreaFlags_DoublyMapped ; if area doubly mapped
-        STRNE   r0, [r1, r9, LSR #10]   ; then store entry for 2nd copy as well
-        ADDNE   r3, r3, r9              ; and point logical address back at 2nd copy
 
-        ARM_read_ID r4
-        AND     r4,r4,#&F000
-        CMP     r4,#&A000
-        BEQ     BangL2PT_sledgehammer_StrongARM
-  [ ARM810support
-        CMP     r4,#&8000
-        ARM8_flush_TLB EQ
-        MOVEQ   pc, lr      ;ARM8 cache already flushed if necessary
-  ]
-;else assume ARM 6,7
-        Push    "r2,r3,lr"
-        TST     r11,#DynAreaFlags_NotCacheable
-        ARMop   MMU_Changing, EQ             ;flush instruction/data cache+TLB if necessary
-        TST     r11,#DynAreaFlags_NotCacheable
-        ARMop   TLB_InvalidateAll, NE        ;flush TLB
-        Pull    "r2,r3,pc"
-BangL2PT_sledgehammer_StrongARM
-        TST     r11,#DynAreaFlags_NotCacheable
-        BNE     BangL2PT_sledgehammer_StrongARM_NotC
-
-        MOV     r4,#ARMA_Cleaner_flipflop
-        LDR     r0,[r4]                   ;last cleaner address
-        EOR     r0,r0,#16*1024            ;flip it (r0 -> cleaner address to use)
-        STR     r0,[r4]
-
-        ARMA_clean_DC r0,r4,r6            ;effectively, clean/flush DC fully with respect to non-interrupt stuff
-        ARMA_drain_WB
-        ARMA_flush_IC WithoutNOPs         ;do *not* flush DC - there may be some stuff from interrupt routines
-        MOV     r0,r0                     ;NOPs to ensure 4 instructions before return, after IC flush
-        MOV     r0,r0
-        ARMA_flush_TLBs
-        MOV     pc,lr
+        ;sledgehammer is super cautious and does cache/TLB coherency on a global basis
+        ;should only be used for awkward cases
+        ;
+        TEQ     r6, #0                          ;EQ if mapping out
+        TSTEQ   r11, #DynAreaFlags_NotCacheable ;EQ if also cacheable
+        ADR     lr, %FT30
+        MOV     r4, #0
+        ARMop   MMU_Changing, EQ, tailcall, r4
+        ARMop   MMU_ChangingUncached, NE, tailcall, r4
+
+30      STR     r6, [r1, r3, LSR #10]!          ; update level 2 page table (and update pointer so we can use bank-to-bank offset
+        TST     r11, #DynAreaFlags_DoublyMapped ; if area doubly mapped
+        STRNE   r6, [r1, r9, LSR #10]           ; then store entry for 2nd copy as well
+        ADDNE   r3, r3, r9                      ; and point logical address back at 2nd copy
 
-BangL2PT_sledgehammer_StrongARM_NotC    ;no flush necessary if NotCacheable
-        ARMA_drain_WB
-        ARMA_flush_TLBs
-        MOV     pc,lr
+        Pull    "pc"
 
 
 PPLTrans
@@ -2540,9 +2388,10 @@ MMUControl_ModifyControl ROUT
         TST     lr, #MMUC_C             ; if cache turning on then flush cache before we do it
         BEQ     %FT10
 
-        Push    "r0-r3"
-        ARMop   Cache_InvalidateAll
-        Pull    "r0-r3"
+        Push    "r0"
+        MOV     r0, #0
+        ARMop   Cache_InvalidateAll,,,r0
+        Pull    "r0"
 10
   [ ARM810support
         CMP     r5,#8
diff --git a/s/ARMops b/s/ARMops
index 970273d1..666e9683 100644
--- a/s/ARMops
+++ b/s/ARMops
@@ -181,8 +181,12 @@ Analyse_ARMv3
 
         ADR     a1, MMU_Changing_ARMv3
         ADR     a2, MMU_ChangingEntry_ARMv3
+        ADR     a3, MMU_ChangingUncached_ARMv3
+        ADR     a4, MMU_ChangingUncachedEntry_ARMv3
         STR     a1, [v6, #Proc_MMU_Changing]
         STR     a2, [v6, #Proc_MMU_ChangingEntry]
+        STR     a3, [v6, #Proc_MMU_ChangingUncached]
+        STR     a4, [v6, #Proc_MMU_ChangingUncachedEntry]
         B       %FT90
 
 Analyse_WriteThroughUnified
@@ -205,8 +209,12 @@ Analyse_WriteThroughUnified
 
         ADR     a1, MMU_Changing_Writethrough
         ADR     a2, MMU_ChangingEntry_Writethrough
+        ADR     a3, MMU_ChangingUncached
+        ADR     a4, MMU_ChangingUncachedEntry
         STR     a1, [v6, #Proc_MMU_Changing]
         STR     a2, [v6, #Proc_MMU_ChangingEntry]
+        STR     a3, [v6, #Proc_MMU_ChangingUncached]
+        STR     a4, [v6, #Proc_MMU_ChangingUncachedEntry]
         B       %FT90
 
 90
@@ -329,26 +337,16 @@ KnownCPUFlags
         DCD     0,                 0    ; X80200
 
 
-; THE RULES: These routines may corrupt a1-a4, NOT ip.
+; THE RULES: These routines may corrupt a1 only
 
 Cache_Invalidate_ARMv3
         MCR     p15, 0, a1, c7, c0
 NullOp  MOV     pc, lr
 
 WriteBuffer_Drain_ARMv3
-; Do it by turning the write buffer off and back on again, taking care to disable
-; interrupts to prevent soft copy corruption, and not to turn the write buffer on
-; if it was already off.
-        MRS     a1, CPSR
-        MOV     a4, #ZeroPage
-        ORR     a2, a1, #I32_bit
-        MSR     CPSR_c, a2
-
-        LDR     a4, [a4, #MMUControlSoftCopy]
-        BIC     a3, a4, #MMUC_W
-        ARM_write_control a3            ; WB off
-        ARM_write_control a4            ; and restore
-        MSR     CPSR_c, a1              ; restore interrupts
+        ;swap forces unbuffered write, stalling till WB empty
+        SUB     a1, sp, #4
+        SWP     a1, a1, [a1]
         MOV     pc, lr
 
 TLB_Invalidate_ARMv3
@@ -364,21 +362,31 @@ MMU_Changing_ARMv3
         MCR     p15, 0, a1, c5, c0      ; invalidate TLB
         MOV     pc, lr
 
+MMU_ChangingUncached_ARMv3
+        MCR     p15, 0, a1, c5, c0      ; invalidate TLB
+        MOV     pc, lr
+
 MMU_ChangingEntry_ARMv3
         MCR     p15, 0, a1, c7, c0      ; invalidate cache
         MCR     p15, 0, a1, c6, c0      ; invalidate TLB entry
         MOV     pc, lr
 
+MMU_ChangingUncachedEntry_ARMv3
+        MCR     p15, 0, a1, c6, c0      ; invalidate TLB entry
+        MOV     pc, lr
+
 Cache_InvalidateUnified
         MOV     a1, #0
         MCR     p15, 0, a1, c7, c7
         MOV     pc, lr
 
 WriteBuffer_Drain_OffOn
+        Push    "a2"
         ARM_read_control a1
         BIC     a2, a1, #MMUC_W
         ARM_write_control a2
         ARM_write_control a1
+        Pull    "a2"
         MOV     pc, lr
 
 WriteBuffer_Drain
@@ -396,14 +404,25 @@ TLB_InvalidateEntry_Unified
         MOV     pc, lr
 
 MMU_Changing_Writethrough
-        MOV     a4, #0
-        MCR     p15, 0, a4, c7, c7      ; invalidate cache
-        MCR     p15, 0, a4, c8, c7      ; invalidate TLB
+        MOV     a1, #0
+        MCR     p15, 0, a1, c7, c7      ; invalidate cache
+        MCR     p15, 0, a1, c8, c7      ; invalidate TLB
+        MOV     pc, lr
+
+MMU_ChangingUncached
+        MOV     a1, #0
+        MCR     p15, 0, a1, c8, c7      ; invalidate TLB
         MOV     pc, lr
 
 MMU_ChangingEntry_Writethrough
+        Push    "a4"
         MOV     a4, #0
         MCR     p15, 0, a4, c7, c7      ; invalidate cache
+        MCR     p15, 0, a1, c8, c7, 1   ; invalidate TLB entry
+        Pull    "a4"
+        MOV     pc, lr
+
+MMU_ChangingUncachedEntry
         MCR     p15, 0, a1, c8, c7, 1   ; invalidate TLB entry
         MOV     pc, lr
 
diff --git a/s/ChangeDyn b/s/ChangeDyn
index f4d95aff..ebcabf8a 100644
--- a/s/ChangeDyn
+++ b/s/ChangeDyn
@@ -1417,7 +1417,7 @@ AllocateBackingLevel2 Entry "r0-r8,r11"
         STR     lr, [r8, #8]                            ; store entry for 3rd MB
         ADD     lr, lr, #1024                           ; advance L2 pointer
         STR     lr, [r8, #12]                           ; store entry for 4th MB
-        ARM_flush_TLB r6                                ; junk TLB(s) (probably not needed)
+        ;;;ARM_flush_TLB r6                junk TLB(s) shouldn't be needed, would have to be ARMop now
 25
         ADD     r3, r3, #4096                           ; advance L2PT logical address
         ADD     r8, r8, #16                             ; move onto L1 for next 4M
@@ -2474,7 +2474,7 @@ DoTheGrowPagesSpecified
         ORR     lr, lr, #PageFlags_Required     ; set bit in flags to say page will be needed
         STR     lr, [r4, #4]                    ; and store back
 
-; work out physical address direct from physical page number, NOT from logical address, since log addr may be 01F08000 (multiply mapped)
+; work out physical address direct from physical page number, NOT from logical address, since log addr may be Nowhere (multiply mapped)
 
         MOV     r4, #PhysRamTable
 42
@@ -2496,30 +2496,6 @@ DoTheGrowPagesSpecified
         MOV     r1, #Service_PagesUnsafe
         BL      Issue_Service
 
-  [ StrongARM
-    ;
-    ;Ahem! The data is moved to new pages by reading old pages via 'physical address' (flat copy of
-    ;      physical space at virtual addr. 2G). This means data read may not be up to date wrt data in
-    ;      StrongARM's write-back cache, and is a potential cache incoherency anyway (two virtual mappings
-    ;      to one physical).
-    ;      So, clean/flush StrongARM data cache wrt to pages first. Incidentally, since StrongARM does
-    ;      not support burst read LDMs from uncacheable areas, the read is going to be a little slow anyway).
-    ;
-    ;
-    ARM_number r0
-    CMP     r0,#&A
-    BLEQ    dtgps_SAcleanflush
-  ]
-
-  [ ARM810support
-    ;
-    ; ARM810 has writeback cache too
-    ;
-    ARM_number r0
-    CMP     r0,#8
-    BLEQ    dtgps_810cleanflush
-  ]
-
 ; now move the pages
 
         LDR     r2, TotalAmount                 ; amount moving
@@ -2647,7 +2623,8 @@ DoTheGrowPagesSpecified
         MOV     r1, #Service_ClaimFIQ
         BL      Issue_Service
 
-        WritePSRc I_bit+SVC_mode, r6            ; disable IRQs round here
+        WritePSRc I_bit+SVC_mode, r6            ; disable IRQs round here (we don't want interrupt code to update
+                                                ; the old mapping behind us while we're trying to copy it)
 
         LDR     r6, [r8, #0]                    ; r6 = page number required
         LDR     lr, [r8, #PageBlockSize+0]      ; lr = page number of replacement page
@@ -2655,9 +2632,21 @@ DoTheGrowPagesSpecified
         Pull    "r0-r4,r7-r12", EQ              ; then restore registers
         BEQ     %FT76                           ; and skip copy and first page move
 
+        ;mjs
+        ; - if the old page is currently mapped in, copy normally
+        ; - if the old page is not mapped in, copy via temporary mapping
+        ; The old scheme, always copying from other mapping, had interrupt cache coherency hole, at least for
+        ; ARM with writeback cache (bug in 3.7, fixed in Ursula, then lost)
+
         LDR     r0, [r0, lr, LSL #3]            ; r0 = log. address for replacement page (NB use logical address to write to, for cache consistency)
- [ HAL
-        SUB     sp, sp, #4
+
+        LDR     r6, [r8, #4]                    ;logical address of src page
+        LDR     r3, =Nowhere
+        TEQ     r6, r3                          ;will be 'Nowhere' if not mapped in
+        BNE     %FT71
+
+        ASSERT  HAL
+        SUB     sp, sp, #4                      ; for oldp
         Push    "r0,r1"
         MOV     r0, #0
         LDR     r1, [r8, #8]                    ; r1 = physical address of src for copy
@@ -2665,20 +2654,21 @@ DoTheGrowPagesSpecified
         BL      RISCOS_AccessPhysicalAddress
         MOV     r6, r0                          ; r6 = logical address of src for copy
         Pull    "r0,r1"
- |
-        LDR     r6, [r8, #8]                    ; r6 = physical address of src for copy
-        ORR     r6, r6, #PhysSpace              ; must use physical address, as page may be mapped to 01F08000 along with others
- ]
+
+71
         ADD     lr, r6, r5                      ; lr = end src address
 72
         LDMIA   r6!, {r2, r3, r4, r7, r9, r10, r11, r12}
         STMIA   r0!, {r2, r3, r4, r7, r9, r10, r11, r12}
         TEQ     r6, lr
         BNE     %BT72
- [ HAL
-        Pull    "r0"
-        BL      RISCOS_ReleasePhysicalAddress
- ]
+
+        LDR     r0, [r8, #4]                    ;logical address of src page
+        LDR     r3, =Nowhere
+        TEQ     r0, r3
+
+        Pull    "r0", EQ                        ; oldp
+        BLEQ    RISCOS_ReleasePhysicalAddress
 
 ; now check if page we're replacing is in L2PT, and if so then adjust L1PT entries (4 of these)
 
diff --git a/s/Kernel b/s/Kernel
index 48a58cf3..cb0b9cf2 100644
--- a/s/Kernel
+++ b/s/Kernel
@@ -259,6 +259,9 @@ v8      RN      11
 ; Various constants
 ; +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 
+PageSize          * (4*1024)       ;MMU page size (normal pages)
+Log2PageSize      * 12             ;for shifts
+
 MinAplWork * 40*1024         ; minimum size of AplWork
 
 ; Fixed addresses
diff --git a/s/NewReset b/s/NewReset
index 318047f5..7afcf7a3 100644
--- a/s/NewReset
+++ b/s/NewReset
@@ -14,12 +14,20 @@
 ;
         SUBT    => NewReset
 
+; DuffEntry is the address of "Nowhere"
+;  - there should never be any page actually mapped to this address (L2PT entry always 0)
+;  - a page that is not mapped in should always have this special address in its CAM entry,
+;    ie. should only be one Nowhere
+;
  [ HAL32
-DuffEntry *     &FAFF8000               ; Never any memory at this address
+DuffEntry *     &FAFF8000
  |
-DuffEntry *     &01F08000               ; Never any memory at this address
+DuffEntry *     &01F08000
  ]
 
+Nowhere * DuffEntry  ; synonym
+
+
 SoftReset       * 0                     ; Reset types
 PowerOnReset    * 1
 ControlReset    * 2
-- 
GitLab