Commit 70865f8b authored by Ben Avison's avatar Ben Avison
Browse files

Unaligned loads/stores optimised for ARMv6+

Detail:
  Having scanned the kernel source for unaligned load code fragments which
  would abort on ARMv6 and v7 and not having found any, I took the opportunity
  to give them build-time switches to use unaligned LDR((S)H)/STR(H)
  instructions if built for a new enough platform. Also added a couple of
  cases of LDRSB that will benefit v4 CPUs and a few instances of the v6
  SXTH instruction, but since objasm doesn't yet understand it (and when it
  does, not everyone will have upgraded) they are currently written as
  DCI statements.
  Most of the changes are to OS_Word handlers, which are notorious in that
  their input/output block is not word-aligned.
Admin:
  Not tested, but it should at least build.

Version 5.35, 4.79.2.98.2.6. Tagged as 'Kernel-5_35-4_79_2_98_2_6'
parent ca8f36f5
......@@ -13,11 +13,11 @@
GBLS Module_ComponentPath
Module_MajorVersion SETS "5.35"
Module_Version SETA 535
Module_MinorVersion SETS "4.79.2.98.2.5"
Module_MinorVersion SETS "4.79.2.98.2.6"
Module_Date SETS "10 May 2009"
Module_ApplicationDate SETS "10-May-09"
Module_ComponentName SETS "Kernel"
Module_ComponentPath SETS "castle/RiscOS/Sources/Kernel"
Module_FullVersion SETS "5.35 (4.79.2.98.2.5)"
Module_HelpVersion SETS "5.35 (10 May 2009) 4.79.2.98.2.5"
Module_FullVersion SETS "5.35 (4.79.2.98.2.6)"
Module_HelpVersion SETS "5.35 (10 May 2009) 4.79.2.98.2.6"
END
......@@ -5,12 +5,12 @@
*
*/
#define Module_MajorVersion_CMHG 5.35
#define Module_MinorVersion_CMHG 4.79.2.98.2.5
#define Module_MinorVersion_CMHG 4.79.2.98.2.6
#define Module_Date_CMHG 10 May 2009
#define Module_MajorVersion "5.35"
#define Module_Version 535
#define Module_MinorVersion "4.79.2.98.2.5"
#define Module_MinorVersion "4.79.2.98.2.6"
#define Module_Date "10 May 2009"
#define Module_ApplicationDate "10-May-09"
......@@ -18,6 +18,6 @@
#define Module_ComponentName "Kernel"
#define Module_ComponentPath "castle/RiscOS/Sources/Kernel"
#define Module_FullVersion "5.35 (4.79.2.98.2.5)"
#define Module_HelpVersion "5.35 (10 May 2009) 4.79.2.98.2.5"
#define Module_FullVersion "5.35 (4.79.2.98.2.6)"
#define Module_HelpVersion "5.35 (10 May 2009) 4.79.2.98.2.6"
#define Module_LibraryVersionInfo "5:35"
......@@ -17,6 +17,7 @@
GET Hdr:ListOpts
GET Hdr:Macros
GET Hdr:System
GET Hdr:CPU.Arch
GET Hdr:Machine.<Machine>
GET Hdr:ImageSize.<ImageSize>
GET Hdr:UserIF.<UserIF>
......
......@@ -102,9 +102,14 @@ DespatchWord
; Osword Zero : Input a line
OsWord00 ROUT
[ NoARMv6
LDRB R0, [R1, #0] ; lo-byte of address
LDRB R2, [R1, #1] ; hi-byte of address
ORR R0, R0, R2, LSL #8 ; R0 := address
|
; Use unaligned load from ARMv6
LDRH R0, [R1] ; 16-bit address(!)
]
LDRB R2, [R1, #3] ; lo limit
LDRB R3, [R1, #4] ; hi limit
LDRB R1, [R1, #2] ; length of buffer
......@@ -133,12 +138,20 @@ OsWord02 ROUT
ADREQ R2, TimerAlpha
ADRNE R2, TimerBeta
Swap R1, R2, CS ; if writing then R2 is destination
[ NoARMv6
MOV R3, #5
10
LDRB R4, [R2], #1
STRB R4, [R1], #1
SUBS R3, R3, #1
BNE %BT10
|
; Use unaligned load/store from ARMv6
LDR R3, [R2], #4
LDRB R4, [R2]
STR R3, [R1], #4
STRB R4, [R1]
]
STRB R0, TimerState ; if writing, switch state
; (if reading, write current state)
MyOsWord
......@@ -156,12 +169,20 @@ OsWord04 ROUT
MOVCS R2, R1 ; if writing then R1 is source
ADRCS R1, IntervalTimer
ADRCC R2, IntervalTimer ; else R2 is source
[ NoARMv6
MOV R0, #5
10
LDRB R3, [R2], #1
STRB R3, [R1], #1
SUBS R0, R0, #1
BNE %BT10
|
; Use unaligned load/store from ARMv6
LDR R0, [R2], #4
LDRB R3, [R2]
STR R0, [R1], #4
STRB R3, [R1]
]
MyOsWord
; *****************************************************************************
......@@ -175,6 +196,7 @@ OsWord07 ROUT
MyOsWord
05
[ NoARMv6
; Block not word aligned, so push it on the stack
SUB R13, R13, #8 ; create stack frame of 8 bytes
......@@ -186,6 +208,11 @@ OsWord07 ROUT
BCS %BT10
Pull "R0, R1" ; then pull stack frame into R0 and R1
|
; Use unaligned load from ARMv6
LDR R0, [R1], #4
LDR R1, [R1]
]
SWI XSound_ControlPacked
MyOsWord
......@@ -196,6 +223,7 @@ OsWord07 ROUT
OsWord09 ROUT
Push R1 ; save pointer
[ NoARMv6
LDRB R2, [R1, #0] ; X lo-byte
LDRB R0, [R1, #1] ; X hi-byte
ORR R0, R2, R0, LSL #8
......@@ -209,6 +237,11 @@ OsWord09 ROUT
MOV R1, R1, LSL #16 ; sign extend Y
MOV R1, R1, ASR #16
|
; Use unaligned load from ARMv6
LDRSH R0, [R1], #2
LDRSH R1, [R1]
]
SWI XOS_ReadPoint ; in: R0=X, R1=Y
; out: R2=colour, R3=tint, R4=0/-1 (on/off)
......@@ -468,6 +501,7 @@ OsWord0EGamma ROUT
OsWord0EDelta ROUT
LDR R1, RealTime +0
[ NoARMv6
STRB R1, [R4, #0]
MOV R1, R1, LSR #8
STRB R1, [R4, #1]
......@@ -475,6 +509,10 @@ OsWord0EDelta ROUT
STRB R1, [R4, #2]
MOV R1, R1, LSR #8
STRB R1, [R4, #3]
|
; Use unaligned store from ARMv6
STR R1, [R4, #0]
]
LDRB R1, RealTime +4
STRB R1, [R4, #4]
......
......@@ -1236,6 +1236,7 @@ DoReadPOSVPOSO
DoSetScreenStart ROUT
Push R14
LDRB R3, [R1, #0] ; R3 = bitmask
[ NoARMv6
LDRB R0, [R1, #1]
LDRB R2, [R1, #2]
ORR R0, R0, R2, LSL #8
......@@ -1243,6 +1244,10 @@ DoSetScreenStart ROUT
ORR R0, R0, R2, LSL #16
LDRB R2, [R1, #4]
ORR R0, R0, R2, LSL #24 ; R0 is now the offset
|
; Use unaligned load from ARMv6
LDR R0, [R1, #1]
]
LDR R2, [WsPtr, #TotalScreenSize]
CMP R0, R2
BCS %FT10 ; offset too large
......
......@@ -224,6 +224,7 @@ Pal_Blocksize # 0
MACRO
LoadCoordPair $x, $y, $basereg, $offset
[ NoARMv6
ASSERT $x < $y
[ ($offset) :AND: 3 = 2
ADD $x, $basereg, #($offset)-2
......@@ -251,6 +252,11 @@ Pal_Blocksize # 0
MOV $x, $x, LSL #16 ; (Xh,Xl, 0, 0)
MOV $x, $x, ASR #16 ; (Xs,Xs,Xh,Xl)
]
|
; Use unaligned loads from ARMv6
LDRSH $x, [$basereg, #$offset]
LDRSH $y, [$basereg, #($offset)+2]
]
MEND
;
......
......@@ -116,12 +116,18 @@ SetPal EntryS
B %FT20
10
[ NoARMv6
LDRB r3, [lr, #2] ; r3 = red
ORR r2, r2, r3, LSL #8 ; r2 = &0000RRSS
LDRB r3, [lr, #3] ; r3 = green
ORR r2, r2, r3, LSL #16 ; r2 = &00GGRRSS
LDRB r3, [lr, #4] ; r3 = blue
ORR r2, r2, r3, LSL #24 ; r2 = &BBGGRRSS
|
; Use unaligned load from ARMv6
LDR r3, [lr, #2] ; r3 = &??BBGGRR
ORR r2, r2, r3, LSL #8 ; r2 = &BBGGRRSS
]
20
BL CallSetPalette
EXITS
......@@ -188,12 +194,17 @@ DoReadPalette Entry
LDROSB r0, FlashState
CMP r0, #1 ; CS => 1st state, CC => 2nd state
MOVCC r2, r3 ; r2 = current state
[ NoARMv6
MOV r1, #4
10
STRB r2, [r4, #1]! ; store 4 bytes of data in block, starting R1+1
MOV r2, r2, LSR #8
SUBS r1, r1, #1
BNE %BT10
|
; Use unaligned store from ARMv6
STR r2, [r4]
]
EXIT
; *****************************************************************************
......
......@@ -1587,22 +1587,34 @@ DoOsWord13 ROUT
RSB R0, R4, R0, LSL R2 ; R0 = (X << XEigFactor)-OrgX
RSB R1, R5, R1, LSL R3 ; R1 = (Y << YEigFactor)-OrgY
[ NoARMv6
STRB R0, [R6], #1
MOV R0, R0, LSR #8
STRB R0, [R6], #1
STRB R1, [R6], #1
MOV R1, R1, LSR #8
STRB R1, [R6], #1
|
; Use unaligned halfword stores available from ARMv6
STRH R0, [R6], #2
STRH R1, [R6], #2
]
ADD R0, WsPtr, #GCsX
LDMIA R0, {R0, R1} ; get current cursor
[ NoARMv6
STRB R0, [R6], #1
MOV R0, R0, LSR #8
STRB R0, [R6], #1
STRB R1, [R6], #1
MOV R1, R1, LSR #8
STRB R1, [R6], #1
|
; Use unaligned halfword stores available from ARMv6
STRH R0, [R6], #2
STRH R1, [R6]
]
Pull "R0-R6"
MOV PC, R14
......
......@@ -240,6 +240,7 @@ SetMouseRectangle ROUT
DoMouseBox ROUT
Push "R1-R6, R14"
[ NoARMv6
LDRB R2, [R1, #1] ; R2 = left
LDRB R0, [R1, #2]
ORR R2, R2, R0, LSL #8
......@@ -255,6 +256,13 @@ DoMouseBox ROUT
LDRB R5, [R1, #7] ; R5 = top
LDRB R0, [R1, #8]
ORR R5, R5, R0, LSL #8
|
; Use unaligned loads from ARMv6
LDRH R2, [R1, #1] ; R2 = left
LDRH R3, [R1, #3] ; R3 = bottom
LDRH R4, [R1, #5] ; R4 = right
LDRH R5, [R1, #7] ; R5 = top
]
DoMouseBoxRegs
......@@ -269,6 +277,7 @@ DoMouseBoxRegs
; now sign extend all coords
[ NoARMv6
MOV R2, R2, LSL #16
MOV R2, R2, ASR #16
MOV R3, R3, LSL #16
......@@ -277,6 +286,13 @@ DoMouseBoxRegs
MOV R4, R4, ASR #16
MOV R5, R5, LSL #16
MOV R5, R5, ASR #16
|
; ARMv6 lets you do this using SXTH, but no support in objasm yet
DCI &E6BF2072 ; SXTH R2, R2
DCI &E6BF3073 ; SXTH R3, R3
DCI &E6BF4074 ; SXTH R4, R4
DCI &E6BF5075 ; SXTH R5, R5
]
; now check right >= left and top >= bottom
......@@ -396,14 +412,22 @@ SetMouseMult ROUT
Push "R11,R14"
MOV R11, #KeyWorkSpace
[ NoARMv4
LDRB R0, [R1, #1]
MOV R0, R0, ASL #24 ; sign extend to 32 bits
MOV R0, R0, ASR #24
|
LDRSB R0, [R1, #1]
]
STR R0, MouseXMult
[ NoARMv4
LDRB R0, [R1, #2]
MOV R0, R0, ASL #24 ; sign extend to 32 bits
MOV R0, R0, ASR #24
|
LDRSB R0, [R1, #2]
]
STR R0, MouseYMult
Pull "R11,PC"
......@@ -416,6 +440,7 @@ SetMouseMult ROUT
;
GetCoordPair ROUT
[ NoARMv6
LDRB R0, [R1, #1] ; get X coordinate
LDRB R2, [R1, #2]
ORR R0, R0, R2, LSL #8
......@@ -435,6 +460,18 @@ GetCoordPair ROUT
MOV R0, R0, ASL #16 ; sign extend 16 to 32
MOV R3, R0, ASR #16
|
; Use unaligned loads and SXTH from ARMv6
LDRH R0, [R1, #1] ; get X coordinate
LDR R2, [WsPtr, #OrgX] ; add on origin
ADD R0, R0, R2
DCI &E6BF2070 ; SXTH R2, R0
LDRH R0, [R1, #3] ; get Y coordinate
LDR R3, [WsPtr, #OrgY] ; add on origin
ADD R0, R0, R3
DCI &E6BF3070 ; SXTH R3, R0
]
MOV PC, R14
......@@ -477,16 +514,26 @@ StoreCoordPair ROUT
LDR R0, [WsPtr, #OrgX] ; subtract off origin
SUB R2, R2, R0
[ NoARMv6
STRB R2, [R1, #1] ; store lo-byte of X
MOV R2, R2, LSR #8
STRB R2, [R1, #2] ; store hi-byte of X
|
; Use unaligned store from ARMv6
STRH R2, [R1, #1] ; store X
]
LDR R0, [WsPtr, #OrgY] ; subtract off origin
SUB R3, R3, R0
[ NoARMv6
STRB R3, [R1, #3] ; store lo-byte of Y
MOV R3, R3, LSR #8
STRB R3, [R1, #4] ; store hi-byte of Y
|
; Use unaligned store from ARMv6
STRH R3, [R1, #3] ; store X
]
MOV PC, R14
......
......@@ -3175,6 +3175,7 @@ DoReadFont
ADDCC R0, R0, #(Ecf1-2*8)
LDMIA R0, {R2,R3}
[ NoARMv6
STRB R2, [R1, #1]
MOV R2, R2, LSR #8
STRB R2, [R1, #2]
......@@ -3190,6 +3191,11 @@ DoReadFont
STRB R3, [R1, #7]
MOV R3, R3, LSR #8
STRB R3, [R1, #8]
|
; Use unaligned store from ARMv6
STR R2, [R1], #4
STR R3, [R1]
]
MOV PC, R14
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment