Commit 8287e52c authored by Jeffrey Lee's avatar Jeffrey Lee
Browse files

ARMv5E, ARMv6 optimisations

Detail:
  s/FillCode - Buffer fill & mix fragments optimised to use SMULWB/T, PKH, QADD16 and QDADD where possible, resulting in new variants optimised for ARMv5E and ARMv6
  s/Handler - Remove old, unoptimised callback buffer mixing code. Add new optimisated code for ARMv6 (SHADD16)
Admin:
  Tested on Raspberry Pi, Iyonix


Version 1.20. Tagged as 'SharedSnd-1_20'
parent 65377222
......@@ -11,13 +11,13 @@
GBLS Module_HelpVersion
GBLS Module_ComponentName
GBLS Module_ComponentPath
Module_MajorVersion SETS "1.19"
Module_Version SETA 119
Module_MajorVersion SETS "1.20"
Module_Version SETA 120
Module_MinorVersion SETS ""
Module_Date SETS "18 Jun 2016"
Module_ApplicationDate SETS "18-Jun-16"
Module_ComponentName SETS "SharedSnd"
Module_ComponentPath SETS "bsd/RiscOS/Sources/Audio/SharedSnd"
Module_FullVersion SETS "1.19"
Module_HelpVersion SETS "1.19 (18 Jun 2016)"
Module_FullVersion SETS "1.20"
Module_HelpVersion SETS "1.20 (18 Jun 2016)"
END
/* (1.19)
/* (1.20)
*
* This file is automatically maintained by srccommit, do not edit manually.
* Last processed by srccommit version: 1.1.
*
*/
#define Module_MajorVersion_CMHG 1.19
#define Module_MajorVersion_CMHG 1.20
#define Module_MinorVersion_CMHG
#define Module_Date_CMHG 18 Jun 2016
#define Module_MajorVersion "1.19"
#define Module_Version 119
#define Module_MajorVersion "1.20"
#define Module_Version 120
#define Module_MinorVersion ""
#define Module_Date "18 Jun 2016"
......@@ -18,6 +18,6 @@
#define Module_ComponentName "SharedSnd"
#define Module_ComponentPath "bsd/RiscOS/Sources/Audio/SharedSnd"
#define Module_FullVersion "1.19"
#define Module_HelpVersion "1.19 (18 Jun 2016)"
#define Module_LibraryVersionInfo "1:19"
#define Module_FullVersion "1.20"
#define Module_HelpVersion "1.20 (18 Jun 2016)"
#define Module_LibraryVersionInfo "1:20"
......@@ -289,6 +289,7 @@ sshD_lp0_$name
AND r14,r9,#&FF000000 ; r14 = integer inc
LDR R0,[R4],r14,LSR#22
BIC r9,r9,#&FF000000 ; keep just the fractional bit
[ NoARME
MOV R14,R0,ASR#16
MUL R14,R10,R14 ; R14= LLLLxxxx = Scaled L volume
MOV R0,R0,LSL#16
......@@ -297,6 +298,17 @@ sshD_lp0_$name
MOV R14,R14,LSR#16
MOV R0,R0,LSR#16
ORR R0,R0,R14,LSL#16
|
SMULWT R14,R10,R0 ; R14 = ssssLLLL (signed)
SMULWB R0,R11,R0 ; R0 = ssssRRRR
[ NoARMv6
MOV R14,R14,LSL #16
MOV R0,R0,LSL #16
ORR R0,R14,R0,LSR #16
|
PKHBT R0,R0,R14,LSL #16 ; R0 = LLLLRRRR
]
]
STR R0,[R1],#4
MEND
......@@ -310,7 +322,9 @@ sshD_lp0_$name
MACRO
MakeSpaceML
[ NoARMv6
MOV R10,#1<<31
]
MEND
MACRO
......@@ -321,6 +335,7 @@ sshD_lp0_$name
LDR R0,[R4],r14,LSR#22 ; R0 = AAAABBBB = Our samples
LDR R14,[R1] ; R14= CCCCDDDD = Current samples
BIC R9,R9,#&FF000000 ; keep just the fractional bit
[ NoARMv6
MOV R0,R0,ROR#16 ; R0 = BBBBAAAA
ADDS R11,R14,R0,LSL#16 ; T2 = eeeexxxx (where eeee = AAAA+CCCC)
SBCVS R11,R10,#0 ; T2 = EEEExxxx (where EEEE = Clip[AAAA+CCCC])
......@@ -328,6 +343,9 @@ sshD_lp0_$name
SBCVS R0,R10,#0 ; R0 = FFFF0000 (where FFFF = Clip[DDDD+CCCC])
AND R11,R11,R10,ASR#15 ; T2 = EEEE0000
ORR R0,R11,R0,LSR#16 ; R0 = EEEEFFFF
|
QADD16 R0,R0,R14 ; Signed saturating halfword addition
]
STR R0,[R1],#4
MEND
......@@ -350,10 +368,23 @@ sshD_lp0_$name
MACRO
MakeSpaceVML
[ NoARME
MOV r10,R7,LSR#16 ; r10 = Left Vol
BIC r11,r7,r10,LSL#16 ; r11 = Right Vol
Push "R7,R12"
MOV R10,#1<<31
MOV R11,r7,LSR#16 ; R11= Left vol
BIC R7,r7,R11,LSL#16 ; R7 = Right vol
MOV R12,#1<<31
|
[ NoARMv6
Push "R7,R12"
LDR r12,=&ffff<<15
AND r10,r12,r7,LSR #1 ; r10 = Left Vol
AND r11,r12,r7,LSL #15 ; r11 = Right Vol
|
MOV r10,R7,LSR#16 ; r10 = Left Vol
BIC r11,r7,r10,LSL#16 ; r11 = Right Vol
Push "R12"
]
]
MEND
MACRO
......@@ -363,24 +394,48 @@ sshD_lp0_$name
AND R14,R9,#&FF000000 ; r14= integer inc
LDR R0,[R4],r14,LSR#22 ; R0 = AAAABBBB = Our samples
BIC R9,R9,#&FF000000 ; keep just the fractional bit
[ NoARME
MOV r14,R0,ASR#16
MUL R12,R11,R14 ; R12= AAAAxxxx
MUL R7,R10,R14 ; R7 = AAAAxxxx
LDR R14,[R1] ; R14= CCCCDDDD = Current samples
MOV R0,R0,LSL#16
MOV R0,R0,ASR#16
MUL R0,R7,R0 ; R0 = BBBBxxxx
ADDS R12,R14,R12 ; T2 = eeeexxxx (where eeee = AAAA+CCCC)
SBCVS R12,R10,#0 ; T2 = EEEExxxx (where EEEE = Clip[AAAA+CCCC])
MUL R0,R11,R0 ; R0 = BBBBxxxx
ADDS R7,R14,R7 ; R7 = eeeexxxx (where eeee = AAAA+CCCC)
SBCVS R7,R12,#0 ; R7 = EEEExxxx (where EEEE = Clip[AAAA+CCCC])
ADDS R0,R0,R14,LSL#16 ; R0 = DDDD0000+BBBB000
SBCVS R0,R10,#0 ; R0 = FFFF0000 (where FFFF = Clip[DDDD+CCCC])
AND R12,R12,R10,ASR#15 ; T2 = EEEE0000
ORR R0,R12,R0,LSR#16 ; R0 = EEEEFFFF
SBCVS R0,R12,#0 ; R0 = FFFF0000 (where FFFF = Clip[DDDD+BBBB])
AND R7,R7,R12,ASR#15 ; R7 = EEEE0000
ORR R0,R7,R0,LSR#16 ; R0 = EEEEFFFF
|
[ NoARMv6
SMULWT R14,R10,R0 ; R14= AAAAxxxx >> 1
LDR R7,[R1] ; R7 = CCCCDDDD = Current samples
SMULWB R0,R11,R0 ; R0 = BBBBxxxx >> 1
BIC R14,R14,R12,LSR #16 ; R14= AAAA0000 >> 1 (must mask off to avoid potential overflow from interaction with low bits of R7)
QDADD R14,R7,R14 ; R14= EEEExxxx (where EEEE = Clip[AAAA+CCCC])
MOV R7,R7,LSL #16
QDADD R0,R7,R0 ; R0 = FFFFxxxx (where FFFF = Clip[DDDD+BBBB])
AND R14,R14,R12,LSL #1 ; R14= EEEE0000
ORR R0,R14,R0,LSR #16 ; R0 = EEEEFFFF
|
SMULWT R14,R10,R0 ; R14= ssssAAAA
LDR R7,[R1] ; R7 = CCCCDDDD = Current samples
SMULWB R0,R11,R0 ; R0 = ssssBBBB
PKHBT R0,R0,R14,LSL #16 ; R0 = AAAABBBB
QADD16 R0,R0,R7 ; R0 = EEEEFFFF
]
]
STR R0,[R1],#4
MEND
MACRO
UnMakeSpaceVML
[ NoARMv6
Pull "R7,R12"
|
Pull "R12"
]
MEND
; Fmute = zero fill
......
......@@ -332,36 +332,19 @@ addCallBackBuffer ROUT
; r2 = End of SoundDMA buffer
; r6 = Length
[ NoARMv6
addCallBackBuffer_VolumeShiftLoop
LDMIA r1,{r3-r6}
MACRO
acbb_vsl $work1,$work2,$getReg
; Shift volumes
[ 0 = 1
; Old dumb way.
MOV $work1,$getReg,LSL #16 ; Put right into work1
EOR $getReg,$getReg,$work1,LSR #16 ; and left in getReg
MOV $work1,$work1,ASR #1 ; DIV 2
MOV $getReg,$getReg,ASR #1 ; DIV 2
;TST $getReg,#1<<15 ; Remove any low bit
;EORNE $getReg,$getReg,#1<<15
BIC $getReg,$getReg,#1<<15
ADD $getReg,$getReg,$work1,LSR #16
|
; New shiny way
MOV $getReg,$getReg,ASR#1
; Now getReg is perfect except possibly for bit 15 that should be the same as
; bit 14. Form a word of differences between bits.
EOR $work1,$getReg,$getReg,LSL#1
AND $work1,$work1,#1<<15
EOR $getReg,$getReg,$work1
]
MEND
acbb_vsl R11,R12,R3
......@@ -373,6 +356,18 @@ addCallBackBuffer_VolumeShiftLoop
CMP r1,r2
BLT addCallBackBuffer_VolumeShiftLoop
|
MOV r11, #0
addCallBackBuffer_VolumeShiftLoop
LDMIA r1,{r3-r6}
SHADD16 r3, r3, r11
SHADD16 r4, r4, r11
SHADD16 r5, r5, r11
SHADD16 r6, r6, r11
STMIA r1!,{r3-r6}
CMP r1,r2
BLT addCallBackBuffer_VolumeShiftLoop
]
B addCallBackBuffer_Exit ; No data available
......@@ -413,6 +408,7 @@ addCallBackBuffer_Loop
B addCallBackBuffer_Final
addCallBackBuffer_Mix
[ NoARMv6
Push "ws"
MVN r12,#0
......@@ -424,38 +420,6 @@ addCallBackBufferMix_Loop
MACRO
acbbm_l $work1,$work2,$getReg,$mixReg
[ 0 = 1
; Old dumb way
MOV $work1,$getReg,LSL #16 ; Put right into work1
EOR $getReg,$getReg,$work1,LSR #16 ; and left in getReg
MOV $work1,$work1,ASR #1 ; DIV 2
MOV $getReg,$getReg,ASR #1 ; DIV 2
;TST $getReg,#1<<15 ; Remove any low bit
;EORNE $getReg,$getReg,#1<<15
BIC $getReg,$getReg,#1<<15
MOV $work2,$mixReg,LSL #16 ; Put right into work2
EOR $mixReg,$mixReg,$work2,LSR #16 ; and left in mixReg
MOV $work2,$work2,ASR #1 ; DIV 2
MOV $mixReg,$mixReg,ASR #1 ; DIV 2
;TST $mixReg,#1<<15 ; Remove any low bit
;EORNE $mixReg,$mixReg,#1<<15
BIC $mixReg,$mixReg,#1<<15
; Now mix
ADD $getReg,$getReg,$mixReg
ADD $work1,$work1,$work2
ADD $getReg,$getReg,$work1,LSR #16
|
; New shiny way
; getReg = abcdefgh
; mixReg = ijklmnop
; target = qrstuvwx
......@@ -469,8 +433,6 @@ addCallBackBufferMix_Loop
ADD $getReg,$getReg,$mixReg,ASR#1 ; getReg= uvwxXXXX
AND $work1,$work1,$work2 ; work1 = qrst0000
ORR $getReg,$work1,$getReg,LSR#16 ; work1 = qrstuvwx
]
MEND
acbbm_l R11,R12,R3,R7
......@@ -484,6 +446,18 @@ addCallBackBufferMix_Loop
BLT addCallBackBufferMix_Loop
Pull "ws"
|
addCallBackBufferMix_Loop
LDMIA r0!,{r3-r6}
LDMIA r1,{r7-r10}
SHADD16 r3, r3, r7
SHADD16 r4, r4, r8
SHADD16 r5, r5, r9
SHADD16 r6, r6, r10
STMIA r1!,{r3-r6}
CMP r1,r2
BLT addCallBackBufferMix_Loop
]
addCallBackBuffer_Final
; Increment current SoundDMA buffer
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment