From fe209e1091fe04415a4f1974c40c302357644829 Mon Sep 17 00:00:00 2001
From: Kevin Bracey <kbracey@gitlab.riscosopen.org>
Date: Mon, 17 Apr 2000 14:22:53 +0000
Subject: [PATCH] Had one of those weekend brainstorms - managed to speed up
 SWI despatcher _and_ add Thumb support to it.

Fixed OS_BreakPt - was confused by PC/PSR split.

Version 5.24. Not tagged
---
 VersionASM    |   2 +-
 VersionNum    |   4 +-
 hdr/KernelWS  |   4 ++
 s/ArthurSWIs  |   4 +-
 s/Kernel      | 103 +++++++++++++++++++++++++++++---------------------
 s/Middle      |  45 ++++++++++++----------
 s/vdu/vduswis |   8 ++++
 7 files changed, 103 insertions(+), 67 deletions(-)

diff --git a/VersionASM b/VersionASM
index d2b0fdf..9e60327 100644
--- a/VersionASM
+++ b/VersionASM
@@ -9,6 +9,6 @@
 Module_MajorVersion     SETS    "5.24"
 Module_Version          SETA    524
 Module_MinorVersion     SETS    ""
-Module_Date             SETS    "13 Apr 2000"
+Module_Date             SETS    "17 Apr 2000"
 Module_FullVersion      SETS    "5.24"
                         END
diff --git a/VersionNum b/VersionNum
index e01243b..d7a6f55 100644
--- a/VersionNum
+++ b/VersionNum
@@ -5,11 +5,11 @@
  */
 #define Module_MajorVersion_CMHG        5.24
 #define Module_MinorVersion_CMHG        
-#define Module_Date_CMHG                13 Apr 2000
+#define Module_Date_CMHG                17 Apr 2000
 
 #define Module_MajorVersion             "5.24"
 #define Module_Version                  524
 #define Module_MinorVersion             ""
-#define Module_Date                     "13 Apr 2000"
+#define Module_Date                     "17 Apr 2000"
 
 #define Module_FullVersion              "5.24"
diff --git a/hdr/KernelWS b/hdr/KernelWS
index d78ca37..08de84c 100644
--- a/hdr/KernelWS
+++ b/hdr/KernelWS
@@ -1505,7 +1505,11 @@ Export_SvcTable                |#|     &400
         ASSERT ?Export_SvcTable = ?SvcTable
 
  ASSERT SvcTable = &01F033FC ; Required for SVC table pokers, 1.20 compatible
+ [ No26bitCode
 SWIDespatch_Size        *       33*4
+ |
+SWIDespatch_Size        *       31*4    ; can save 2 instructions if 26-bit (no Thumb)
+ ]
 SWIDespatch             |#|     SWIDespatch_Size
 
 
diff --git a/s/ArthurSWIs b/s/ArthurSWIs
index d46b110..43dc404 100644
--- a/s/ArthurSWIs
+++ b/s/ArthurSWIs
@@ -988,9 +988,9 @@ checkmoshandlers
 CallAVector_SWI  ; R9 is the vector number (!!)
        STR       lr, [sp, #-4]!         ; save caller PSR on stack
        MOV       R10, R9
-       msr       AL, CPSR_f, R14        ; restore caller CCs
+       msr      ,CPSR_f, R12            ; restore caller CCs (including V)
        BL        CallVector
-       mrs       AL, r10, CPSR          ; restore CCs
+       mrs      ,r10, CPSR              ; restore CCs
        LDR       lr, [sp], #4
        AND       r10, r10, #&F0000000
        BIC       lr, lr, #&F0000000
diff --git a/s/Kernel b/s/Kernel
index 86feac1..c5e2ed8 100644
--- a/s/Kernel
+++ b/s/Kernel
@@ -72,12 +72,16 @@ $l      LDR     $reg1, =VIDC
  ]
         MEND
 
-; Fake a 26-bit pc, given a PSR currently in lr, and the 32-bit address on
+; Fake a 26-bit pc, given a PSR currently in lr (or reg), and the 32-bit address on
 ; the stack. The stacked address is pulled, and the result is left in lr.
 
         MACRO
-        FakeLR  $temp, $dontpull
+        FakeLR  $temp, $dontpull, $reg
+ [ "$reg" = ""
         AND     $temp,lr,#&F0000003
+ |
+        AND     $temp,$reg,#&F0000003
+ ]
         AND     lr,lr,#I32_bit+F32_bit
         ORR     $temp,$temp,lr,LSL #IF32_26Shift
  [ "$dontpull" = "dontpull"
@@ -394,6 +398,7 @@ EndData
 
 ; +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 ; SWI return handler: checks callback
+; no branches or interlocks in the common case: V clear, no callback
 
 SVCDespatcher ROUT
 
@@ -411,31 +416,32 @@ SLVK_TestV * {PC}-SWIRelocation
 SLVK * {PC}-SWIRelocation
  ! 0,"SLVK       at ":CC:(:STR:SLVK)
 
-        LDR     r11, [sp], #4
+        LDR     r12, [sp], #4
+        MOV     r10, #0
+        LDRB    r11, [r10, #CallBack_Flag]
+
         TST     lr, #V_bit
-        BEQ     %FT40
+        BNE     %FT50
+
+SWIReturnWithCallBackFlag * {PC}-SWIRelocation
+ ! 0,"SWIReturnWithCallBackFlag at ":CC:(:STR:SWIReturnWithCallBackFlag)
+
+40      TEQ     r11, #0
+
+        msr EQ ,CPSR_c, #I32_bit + SVC32_mode           ; IRQs off for SPSR use
+        msr EQ ,SPSR_cxsf, lr
+        LDREQ   lr, [sp], #4
+        Pull    "r10-r12", EQ
+        MOVEQS  pc, lr
+
+        B       callback_checking + SWIRelocation
 
  ! 0,"VSetReturn at ":CC:(:STR:({PC}-SWIRelocation))
-        TST     r11, #Auto_Error_SWI_bit
-        BEQ     VSet_GenerateError + SWIRelocation
+50      TST     r12, #Auto_Error_SWI_bit
+        BNE     %BT40
 
-SWIReturn * {PC}-SWIRelocation
- ! 0,"SWIReturn  at ":CC:(:STR:SWIReturn)
+        B       VSet_GenerateError + SWIRelocation
 
-40      MOV     r10, #0
-        LDRB    r11, [r10, #CallBack_Flag]
-        CMP     r11, #0
-        BNE     callback_checking + SWIRelocation
-
-SWIReturnNoCallback * {PC}-SWIRelocation
- ! 0,"SWIReturnNoCallback at ":CC:(:STR:SWIReturnNoCallback)
-back_to_user * SWIReturnNoCallback
-        msr AL, CPSR_c, #I32_bit + SVC32_mode           ; IRQs off for SPSR use
-back_to_user_irqs_already_off * {PC}-SWIRelocation
-        msr     AL, SPSR_cxsf, lr
-        LDR     lr, [sp], #4
-        Pull    "r10-r12"
-        MOVS    pc, lr
 
 ; +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 ; The SWI Despatch routine
@@ -443,28 +449,30 @@ back_to_user_irqs_already_off * {PC}-SWIRelocation
 SVC * {PC}-SWIRelocation
 
         Push    "r10-r12"
-        LDR     r11, [r14, #-4]         ; r11 = calling instruction
-        STR     r14, [r13, #-4]!        ; push return address
-        mrs     AL, r14, SPSR           ; r14 = saved PSR
+ [ No26bitCode
+        mrs    ,r12, SPSR               ; r12 = saved PSR
+        TST     r12, #T32_bit           ; depending on processor state (ARM/Thumb)
+        LDREQ   r11, [r14, #-4]         ; extract SWI number to r11
+        LDRNEB  r11, [r14, #-2]         ; (ordering to prevent interlocks)
+        BICEQ   r11, r11, #&FF000000
+ |
+        LDR     r11, [r14, #-4]         ; extract SWI number to r11
+        mrs    ,r12, SPSR               ; r12 = saved PSR
+        BIC     r11, r11, #&FF000000    ; (ordering to prevent interlocks)
+ ]
 
-SVCContinue * {PC}-SWIRelocation
+        Push    "r11,r14"               ; push SWI number and return address
 
-        BIC     r11, r11, #&FF000000
-        STR     r11, [r13, #-4]!        ; push SWI number
-        [ StrongARM
 SVC_CallASWI * {PC}-SWIRelocation       ; CallASWI,CallASWIR12 re-entry point
-        ]
 
         BICS    r11, r11, #Auto_Error_SWI_bit
         BEQ     SWIWriteC + SWIRelocation
 
-        AND     r10, r14, #I32_bit+F32_bit
+        AND     r10, r12, #I32_bit+F32_bit
         ORR     r10, r10, #SVC2632      ; set IFTMMMMM = IF0x0011
-        msr     AL, CPSR_c, r10         ; restore caller's IRQ state
+        msr    ,CPSR_c, r10             ; restore caller's IRQ state
 
-        CMP     r11, #OS_BreakPt
-        CMPNE   r11, #OS_CallAVector
-        BICNE   r14, r14, #V_bit        ; clear V unless BreakPoint/CallVector
+        BIC     r14, r12, #V_bit        ; clear V (some SWIs need original PSR in r12)
 
         CMP     r11, #OS_WriteI
         LDRLO   pc, [pc, r11, LSL #2]
@@ -681,7 +689,10 @@ VSet_GenerateError ROUT
 
         Pull    lr                      ; which raises error; otherwise just
         BIC     lr, lr, #V_bit          ; return with V clear: error claimed!
-        B       SWIReturn
+
+        MOV     r10, #0                 ; set up r10 and r11 as required
+        LDRB    r11, [r10, #CallBack_Flag]
+        B       SWIReturnWithCallBackFlag
 
         LTORG
 
@@ -755,12 +766,11 @@ ErrHandler ROUT
 callback_checking
 
         TST     lr, #I32_bit+&0F        ; user 26/32 mode, ints enabled?
- [ {FALSE} ; original code
+        msr NE ,CPSR_c, #I32_bit + SVC32_mode
+        msr NE ,SPSR_cxsf, lr
+        LDRNE   lr, [sp], #4
         Pull    "r10-r12", NE
         MOVNES  pc, lr                  ; Skip the branch for SVC code speed
- |
-        BNE     SWIReturnNoCallback     ; hey, it'll be in the cache (probably)
- ]
 
 ; Further checks: postpone callback if returning V set and R0->RAM
 
@@ -775,7 +785,13 @@ callback_checking
         LDRB    r11, [r10, #CallBack_Flag]
         ORR     r11, r11, #CBack_Postpone      ; signal to IRQs
         STRB    r11, [r10, #CallBack_Flag]
-        B       back_to_user_irqs_already_off
+back_to_user
+        msr    ,CPSR_c, #I32_bit + SVC32_mode
+back_to_user_irqs_already_off
+        msr    ,SPSR_cxsf, lr
+        LDR     lr, [sp], #4
+        Pull    "r10-r12"
+        MOVS    pc, lr
 
 Do_CallBack                                    ; CallBack allowed:
         TST     r11, #CBack_VectorReq          ; now process any vector entries
@@ -879,13 +895,14 @@ process_callback_chain ROUT
 ; SWI OS_WriteC
 
 ; In    r11 = 0 (look, up there ^) !
+;       r12 = SPSR
 
 SWIWriteC ROUT
 
         msr    ,CPSR_c, #SVC2632        ; enable interrupts
 
-        BIC     lr, lr, #V_bit          ; clear caller's V cos we didn't before
-        Push    lr
+        BIC     lr, r12, #V_bit         ; clear caller's V cos we didn't before
+        STR     lr, [sp, #-4]!
 
         LDR     r11, [r11, #VecPtrTab+WrchV*4] ; load top node pointer
         CMP     r11, #ROM
diff --git a/s/Middle b/s/Middle
index aa032fc..166b117 100644
--- a/s/Middle
+++ b/s/Middle
@@ -244,22 +244,25 @@ SENTERSWI
 
 SBRKPT ROUT
 
-        ADD     sp, sp, #4              ; discard stacked R11
-        MOV     r12, #0
-        LDR     r12, [r12, #BrkBf]
+        ADD     sp, sp, #4              ; discard stacked SWI number
+        MOV     r10, #0
+        LDR     r10, [r10, #BrkBf]
   [ No26bitCode
-        STR     r14, [r12, #16*4]       ; PSR
-        TST     r14, #&0F
-        Pull    R14
+        STR     r12, [r10, #16*4]       ; original PSR (with V)
+        Pull    r14
+        TST     r12, #T32_bit
+        SUBEQ   r14, r14, #4
+        SUBNE   r14, r14, #2            ; r14 = PC of the SWI
+        TST     r12, #2_01111
   |
-        FakeLR  r11
+        FakeLR  r11,,r12                ; r12+[sp] -> lr (r11 temp reg)
+        SUB     r14, r14, #4            ; r14 = PC of the SWI
         TST     r14, #SVC_mode
   ]
-        SUB     r14, R14, #4
-        STR     r14, [r12, #15*4]       ; PC of the SWI put in.
+        STR     r14, [r10, #15*4]       ; PC of the SWI put in.
         BNE     %FT01                   ; NE if not in user mode
-        STMIA   r12!, {r0}
-        MOV     r0, r12
+        STMIA   r10!, {r0}
+        MOV     r0, r10
         LDMFD   sp, {r10-r12}
 
   [ SASTMhatbroken
@@ -275,23 +278,27 @@ SBRKPT ROUT
         MOV     r12, #BrkAd_ws
         LDMIA   r12, {r12, pc}          ; call breakpoint handler
 
-
-01      AND     r11, r14, #&0F          ; SWI mode?
+; Non-user mode case
+01      AND     r11, r12, #2_01111      ; SVC26/SVC32 mode?
         TEQ     r11, #SVC_mode
         BEQ     %FT02                   ; [yes]
 
-        STMIA   r12!, {r0}
-        MOV     r0, r12
-        LDMFD   sp, {r10-r12}           ; Not banked if IRQ mode
-        msr    ,CPSR_c, R14             ; get at registers
+; Non-user, non-supervisor - must be IRQ, ABT, UND or SYS (no SWIs from FIQ)
+        STMIA   r10!, {r0}
+        MOV     r0, r10
+        BIC     r14, r12, #T32_bit      ; don't go into Thumb mode
+        LDMFD   sp, {r10-r12}           ; Not banked
+        msr    ,CPSR_c, R14             ; get at registers r13 and r14
         STMIA   r0, {r1-r14}
         WritePSRc SVC_mode, r12
         B       %BT10
 
-
+; Supervisor mode case
 02      MOV     r14, r12                ; supervisor mode. R14 in buffer dead
         LDMFD   sp!, {r10-r12}
-        STMIA   r14, {r0-r14}
+        STMIA   r14, {r0-r13}
+        LDR     r12, =&DEADDEAD
+        STR     r12, [r14, #14*4]       ; mark R14 as dead
         B       %BT10
 
 ; +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
diff --git a/s/vdu/vduswis b/s/vdu/vduswis
index 9af0b6e..7d27f8d 100644
--- a/s/vdu/vduswis
+++ b/s/vdu/vduswis
@@ -1303,8 +1303,16 @@ SWIWriteS ROUT
         BVS     %FT90
         Pull    "R0, R14"
 85
+  [ No26bitCode
+        TST     R14, #T32_bit
+        ADDNE   R10, R10, #1            ; if Thumb
+        BICNE   R10, R10, #1            ; round up to next halfword boundary
+        ADDEQ   R10, R10, #3            ; else
+        BICEQ   R10, R10, #3            ; round up to next word boundary
+  |
         ADD     R10, R10, #3
         BIC     R10, R10, #3            ; round up to next word boundary
+  ]
         STR     R10, [R13, #4]          ; Poke new address into stack
         ExitSWIHandler
 
-- 
GitLab