Commit bd0ce26f authored by Jeffrey Lee's avatar Jeffrey Lee

Optimise DivRem and PSR manipulation macros

Detail:
  hdr/Macros - DivRem is now able to use UDIV when building for architectures that support it. A "norem" option is also available, to allow the remainder calculation to be skipped if it's not needed (since we don't get it for free when using UDIV).
  hdr/CPU/Generic32 - PHPSEI, WritePSRc and SetMode now use the ARMv6 CPS instruction where possible, delivering performance gains compared to the equivalent MSR. Where CPS isn't used, WritePSRc now attempts to emit a single MSR instead of the 4 instructions generated by the SCPSR macro.
Admin:
  Tested on Cortex-A15
  PSR manipulation macros trialed on a few different CPU architectures to arrive at the conclusion that CPS is as good or better than MSR


Version 2.60. Tagged as 'HdrSrc-2_60'
parent e75ac298
/* (2.59)
/* (2.60)
*
* This file is automatically maintained by srccommit, do not edit manually.
* Last processed by srccommit version: 1.1.
*
*/
#define Module_MajorVersion_CMHG 2.59
#define Module_MajorVersion_CMHG 2.60
#define Module_MinorVersion_CMHG
#define Module_Date_CMHG 05 Apr 2016
#define Module_Date_CMHG 08 May 2016
#define Module_MajorVersion "2.59"
#define Module_Version 259
#define Module_MajorVersion "2.60"
#define Module_Version 260
#define Module_MinorVersion ""
#define Module_Date "05 Apr 2016"
#define Module_Date "08 May 2016"
#define Module_ApplicationDate "05-Apr-16"
#define Module_ApplicationDate "08-May-16"
#define Module_ComponentName "HdrSrc"
#define Module_ComponentPath "castle/RiscOS/Sources/Programmer/HdrSrc"
#define Module_FullVersion "2.59"
#define Module_HelpVersion "2.59 (05 Apr 2016)"
#define Module_LibraryVersionInfo "2:59"
#define Module_FullVersion "2.60"
#define Module_HelpVersion "2.60 (08 May 2016)"
#define Module_LibraryVersionInfo "2:60"
......@@ -229,7 +229,14 @@ $label
B %FT02
01
]
[ "$regtmp" = "" :LOR: StrongARM_MSR_bug
[ :LNOT: NoARMv6
; CPS is quicker than MSR
; If there's a high probability of IRQs already being disabled, we can
; save even more time by branching over the CPS. But for now assume IRQs
; will mostly be on.
MRS $usereg, CPSR
CPSID i
ELIF "$regtmp" = "" :LOR: StrongARM_MSR_bug
MRS $usereg, CPSR
TST $usereg, #I32_bit ; is I32_bit set?
ORREQ $usereg, $usereg, #I32_bit ; no, then set it
......@@ -698,7 +705,26 @@ $label WritePSRc $value, $regtmp, $cond, $oldpsr
[ ($value :AND::NOT: (I_bit+F_bit+SVC_mode)) <> 0
! 1, "Illegal flags for WritePSRc"
]
[ No26bitCode
; We only care about 32bit (non-thumb) processor modes
; Write the PSR directly to avoid unnecessary bloat from SCPSR
$label
CPU32_bits PSRto32 $value
[ "$oldpsr" <> ""
MRS$cond $oldpsr, CPSR
]
[ NoARMv6 :LOR: ((CPU32_bits :AND: (I32_bit+F32_bit)) <> 0) :LOR: (("$cond" <> "") :LAND: ("$cond" <> "AL"))
MSR$cond CPSR_c, #CPU32_bits :OR: USR32_mode
[ "$cond" <> "" :LAND: "$cond" <> "AL" :LAND: StrongARM_MSR_bug
NOP
]
|
; CPS is faster than CPSR_c, but is limited in that if I+F are being set they must take the same value
CPSIE if, #(CPU32_bits :AND: M32_bits) :OR: USR32_mode
]
|
$label SCPSR $value, (I_bit+F_bit+SVC_mode):EOR:($value), $regtmp, $cond, $oldpsr
]
MEND
] ; :LNOT: No32bitCode
......@@ -874,6 +900,7 @@ op SETA ($op2a) :OR: (0:SHL:25)
; ****************************************************
MACRO
SetMode $newmode, $regtmp, $oldpsr
[ NoARMv6
[ "$oldpsr"=""
MRS $regtmp, CPSR
BIC $regtmp, $regtmp, #M32_bits
......@@ -885,6 +912,12 @@ op SETA ($op2a) :OR: (0:SHL:25)
ORR $regtmp, $regtmp, #$newmode
MSR CPSR_c, $regtmp
]
|
[ "$oldpsr"<>""
MRS $oldpsr, CPSR
]
CPS #$newmode
]
MEND
] ; :LNOT: :DEF: Included_Hdr_CPU_Generic32
......
......@@ -109,7 +109,7 @@ OldOpt SETA {OPT}
;$label ColourConv $in,$out,$tmpR,$tmpG,$tmpB,$red_shift,$red_bits,$green_shift,$green_bits,$blue_shift,$blue_bits,$alpha Convert &BBGGRR00 colours to various formats
;$label DEC $reg,$by Decrement a register (by a value)
;$label DECS $reg,$by Decrement a register (by a value) settng PSR
;$label DivRem $rc, $ra, $rb, $rtemp Get DIV and REM of two values
;$label DivRem $rc, $ra, $rb, $rtemp, $norem Get DIV and optional REM of two values
;$label DoCallTable $jumpreg, $tablename, $work Call a routine in a jump table
;$label DoFastJumpTable $jumpreg, $trash Call a routine in a jump table
;$label DoJumpTable $jumpreg, $tablename, $work1, $work2 Call a routine in a jump table
......@@ -693,14 +693,17 @@ $label SUBS $reg,$reg,#$by
]
MEND
; *************************************************
; *** DivRem - Integer division and remainder ***
; *** rc := ra DIV rb; ra := ra REM rb ***
; *** rb preserved, rtemp corrupt ***
; *************************************************
; **********************************************************
; *** DivRem - Unsigned integer division and remainder ***
; *** rc := ra DIV rb; ra := ra REM rb ***
; *** rb preserved, rtemp corrupt ***
; *** omits remainder (-> ra corrupt) if "$norem" <> "" ***
; *** rb can be a constant if it starts with '#' ***
; **********************************************************
MACRO
$label DivRem $rc, $ra, $rb, $rtemp
$label DivRem $rc, $ra, $rb, $rtemp, $norem
$label
[ NoARMVE
MOV $rtemp, $rb
CMP $rtemp, $ra, LSR #1
01
......@@ -715,6 +718,18 @@ $label
MOV $rtemp, $rtemp, LSR #1
CMP $rtemp, $rb
BCS %BT02
ELIF ("$rb" :LEFT: 1) == "#"
MOV $rtemp, $rb
UDIV $rc, $ra, $rtemp
[ "$norem" == ""
MLS $ra, $rtemp, $rc, $ra
]
|
UDIV $rc, $ra, $rb
[ "$norem" == ""
MLS $ra, $rb, $rc, $ra
]
]
MEND
; *********************
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment