Commit f3f167bc authored by Jeffrey Lee's avatar Jeffrey Lee
Browse files

When multiple CPU architectures are supported, allow dynamically linked...

When multiple CPU architectures are supported, allow dynamically linked clients to be given optimal versions of routines where possible

Detail:
  This set of changes adds support for representing architecture-specific variants of routines within the export tables which the shared C library uses to fill in the client's stubs.
  This allows builds of the module which target multiple architectures (e.g. IOMD, Raspberry Pi, or softloads) to offer the most optimal versions of routines to clients wherever multiple variants exist.
  If only one architecture is supported, only one version of each routine will be built, and no variant tables will be generated.
  Currently routine selection for staticly linked clients isn't supported - staticly linked targets will only use the most backwards-compatible version of the routines, as before. Also not all routines are catered for yet (e.g. _ll_udiv)
  File changes:
  - s/h_common - New file containing macros used during variant table construction
  - Makefile - Set SHARED_C_LIBRARY to {TRUE} for SCL builds of assembler code, as per the SHARED_C_LIBRARY #define
  - kernel/s/k_body, s/longlong - For routines which have multiple variants available for the target architectures, build all variants, and generate variant table entries using the new RoutineVariant macro. This has required some reordering of the different variants to make sure that when both versions are built, the most backwards-compatible one will be first (and thus will be called by staticly linked clients)
  - s/h_modmacro - "Entry" macro (when building SCL export tables) changed to use DCD instead of B, to allow byte-aligned symbols to be represented (low bits of symbol address is now used as 'is a variant table' flag)
  - s/initmodule - Client stub initialisation rewritten to take into account the B -> DCD change, and to detect and evaluate variant entries
  - clib/s/cl_entries, kernel/s/k_entries, kernel/s/k_entries2 - Update library entry tables to reference the variant forms of routines where relevant
Admin:
  Tested on Raspberry Pi 1, 2, 3 (ROM), RISC OS 3.1 & StrongARM 3.7 (softload 26bit build)
  Requires Kernel-5_35-4_79_2_325


Version 5.91. Tagged as 'RISC_OSLib-5_91'
parent 332c9e74
......@@ -111,6 +111,7 @@ VSFLAGS =\
-DLIB_SHARED="\"Shared \""
ASFLAGS =\
-PD "SHARED_C_LIBRARY SETL {TRUE}"\
${SCL_APCS}
# Setting UROM on the command-line for rlib
......
......@@ -11,13 +11,13 @@
GBLS Module_HelpVersion
GBLS Module_ComponentName
GBLS Module_ComponentPath
Module_MajorVersion SETS "5.90"
Module_Version SETA 590
Module_MajorVersion SETS "5.91"
Module_Version SETA 591
Module_MinorVersion SETS ""
Module_Date SETS "14 May 2016"
Module_ApplicationDate SETS "14-May-16"
Module_Date SETS "24 May 2016"
Module_ApplicationDate SETS "24-May-16"
Module_ComponentName SETS "RISC_OSLib"
Module_ComponentPath SETS "castle/RiscOS/Sources/Lib/RISC_OSLib"
Module_FullVersion SETS "5.90"
Module_HelpVersion SETS "5.90 (14 May 2016)"
Module_FullVersion SETS "5.91"
Module_HelpVersion SETS "5.91 (24 May 2016)"
END
/* (5.90)
/* (5.91)
*
* This file is automatically maintained by srccommit, do not edit manually.
* Last processed by srccommit version: 1.1.
*
*/
#define Module_MajorVersion_CMHG 5.90
#define Module_MajorVersion_CMHG 5.91
#define Module_MinorVersion_CMHG
#define Module_Date_CMHG 14 May 2016
#define Module_Date_CMHG 24 May 2016
#define Module_MajorVersion "5.90"
#define Module_Version 590
#define Module_MajorVersion "5.91"
#define Module_Version 591
#define Module_MinorVersion ""
#define Module_Date "14 May 2016"
#define Module_Date "24 May 2016"
#define Module_ApplicationDate "14-May-16"
#define Module_ApplicationDate "24-May-16"
#define Module_ComponentName "RISC_OSLib"
#define Module_ComponentPath "castle/RiscOS/Sources/Lib/RISC_OSLib"
#define Module_FullVersion "5.90"
#define Module_HelpVersion "5.90 (14 May 2016)"
#define Module_LibraryVersionInfo "5:90"
#define Module_FullVersion "5.91"
#define Module_HelpVersion "5.91 (24 May 2016)"
#define Module_LibraryVersionInfo "5:91"
......@@ -30,9 +30,9 @@
Entry2 |x$stack_overflow|, imported, _kernel_stkovf_split_0frame, _kernel_stkovfs_0v, , IMPORT, __rt_stkovf_split_small
Entry2 |x$stack_overflow_1|, imported, _kernel_stkovf_split, _kernel_stkovfs_v, , IMPORT, __rt_stkovf_split_big
Entry2 |x$udivide| , imported , _kernel_udiv , , 2, ,__rt_udiv
Entry2 |x$udivide| , imported , |_kernel_udiv$variant| , , 2, ,__rt_udiv
Entry |x$uremainder| , imported , _kernel_urem , , 2
Entry2 |x$divide| , imported , _kernel_sdiv , , 2, ,__rt_sdiv
Entry2 |x$divide| , imported , |_kernel_sdiv$variant| , , 2, ,__rt_sdiv
Entry2 |x$divtest| , , , , 1, ,__rt_divtest
Entry |x$remainder| , imported , _kernel_srem , , 2
Entry |x$multiply| , , , direct, 2
......
......@@ -23,6 +23,7 @@
GET h_stack.s
GET h_workspc.s
GET h_common.s
GET Hdr:OSMisc
EXPORT |_kernel_exit|
......@@ -271,13 +272,17 @@ uwb_size # 0
BICS pc, lr, #PSRIBit ; 32-bit OK - in {CONFIG}=26
|
[ NoARMv6
|_kernel_irqs_on_NoARMv6|
MRS ip, CPSR
BIC ip, ip, #PSR32IBit
MSR CPSR_c, ip
|
CPSIE i
Return ,LinkNotStacked
]
[ SupportARMv6 :LAND: (:LNOT: NoARMv6 :LOR: SHARED_C_LIBRARY)
|_kernel_irqs_on_SupportARMv6|
CPSIE i
Return ,LinkNotStacked
]
]
|_kernel_irqs_off|
......@@ -285,13 +290,17 @@ uwb_size # 0
ORRS pc, lr, #PSRIBit
|
[ NoARMv6
|_kernel_irqs_off_NoARMv6|
MRS ip, CPSR
ORR ip, ip, #PSR32IBit
MSR CPSR_c, ip
|
CPSID i
Return ,LinkNotStacked
]
[ SupportARMv6 :LAND: (:LNOT: NoARMv6 :LOR: SHARED_C_LIBRARY)
|_kernel_irqs_off_SupportARMv6|
CPSID i
Return ,LinkNotStacked
]
]
|_kernel_processor_mode|
......@@ -3141,6 +3150,7 @@ Sleep
; Unsigned divide of a2 by a1: returns quotient in a1, remainder in a2
; Destroys a3 and ip
[ NoARMVE
|_kernel_udiv_NoARMVE|
MOV a3, #0
RSBS ip, a1, a2, LSR #3
BCC u_sh2
......@@ -3188,16 +3198,18 @@ u_sh0 RSBS ip, a1, a2
ADCS a3, a3, a3
BCS u_loop
MOV a1, a3
|
Return ,LinkNotStacked
]
[ SupportARMVE :LAND: (:LNOT: NoARMVE :LOR: SHARED_C_LIBRARY)
|_kernel_udiv_SupportARMVE|
; Long delay on UDIV result makes it faster to divide and then check for error
UDIV a3, a2, a1
TEQ a1, #0
BEQ dividebyzero
MLS a2, a3, a1, a2
MOV a1, a3
]
Return ,LinkNotStacked
]
; Unsigned remainder of a2 by a1: returns remainder in a1
; Could be faster (at expense in size) by duplicating code for udiv,
......@@ -3217,6 +3229,7 @@ u_sh0 RSBS ip, a1, a2
|__rt_udiv10|
|_kernel_udiv10|
[ NoARMM
|_kernel_udiv10_NoARMM|
SUB a2, a1, #10
SUB a1, a1, a1, LSR #2
ADD a1, a1, a1, LSR #4
......@@ -3227,7 +3240,10 @@ u_sh0 RSBS ip, a1, a2
SUBS a2, a2, a3, LSL #1
ADDPL a1, a1, #1
ADDMI a2, a2, #10
|
Return ,LinkNotStacked
]
[ SupportARMM :LAND: (:LNOT: NoARMM :LOR: SHARED_C_LIBRARY)
|_kernel_udiv10_SupportARMM|
; For small numbers, UDIV would be faster than this, but not enough to make it
; worth dynamically switching between algorithms.
LDR a2, =&CCCCCCCD ; (8^32) / 10
......@@ -3236,8 +3252,8 @@ u_sh0 RSBS ip, a1, a2
SUB a2, a1, a3, LSL #1
MOV a1, a3
SUB a2, a2, a3, LSL #3
]
Return ,LinkNotStacked
]
|__rt_sdiv|
......@@ -3248,6 +3264,7 @@ u_sh0 RSBS ip, a1, a2
; Sign of remainder = sign of dividend.
; Destroys a3, a4 and ip
[ NoARMVE
|_kernel_sdiv_NoARMVE|
; Negates dividend and divisor, then does an unsigned divide; signs
; get sorted out again at the end.
......@@ -3303,14 +3320,17 @@ s_sh0 RSBS ip, a1, a2
EORS a1, a3, a4, ASR #31
ADD a1, a1, a4, LSR #31
RSBCS a2, a2, #0
|
Return ,LinkNotStacked
]
[ SupportARMVE :LAND: (:LNOT: NoARMVE :LOR: SHARED_C_LIBRARY)
|_kernel_sdiv_SupportARMVE|
SDIV a3, a2, a1
TEQ a1, #0
BEQ dividebyzero
MLS a2, a3, a1, a2
MOV a1, a3
]
Return ,LinkNotStacked
]
; Signed remainder of a2 by a1: returns remainder in a1
......@@ -3328,6 +3348,7 @@ s_sh0 RSBS ip, a1, a2
|__rt_sdiv10|
|_kernel_sdiv10|
[ NoARMM
|_kernel_sdiv10_NoARMM|
MOVS a4, a1
RSBMI a1, a1, #0
SUB a2, a1, #10
......@@ -3343,7 +3364,10 @@ s_sh0 RSBS ip, a1, a2
MOVS a4, a4
RSBMI a1, a1, #0
RSBMI a2, a2, #0
|
Return ,LinkNotStacked
]
[ SupportARMM :LAND: (:LNOT: NoARMM :LOR: SHARED_C_LIBRARY)
|_kernel_sdiv10_SupportARMM|
; Using SMULL here would be tricky due to the need to round towards zero
MOVS a4, a1
LDR a2, =&CCCCCCCD ; (8^32) / 10
......@@ -3355,8 +3379,22 @@ s_sh0 RSBS ip, a1, a2
SUB a2, a2, a3, LSL #3
RSBMI a1, a1, #0
RSBMI a2, a2, #0
]
Return ,LinkNotStacked
]
RoutineVariant _kernel_udiv, ARMVE, UDIV_SDIV, MLS
RoutineVariant _kernel_udiv10, ARMM, UMULL_UMLAL
RoutineVariant _kernel_sdiv, ARMVE, UDIV_SDIV, MLS
RoutineVariant _kernel_sdiv10, ARMM, UMULL_UMLAL
[ {CONFIG}=26
EXPORT |_kernel_irqs_on$variant|
EXPORT |_kernel_irqs_off$variant|
|_kernel_irqs_on$variant| * |_kernel_irqs_on|
|_kernel_irqs_off$variant| * |_kernel_irqs_off|
|
RoutineVariant _kernel_irqs_on, ARMv6, SRS_RFE_CPS
RoutineVariant _kernel_irqs_off, ARMv6, SRS_RFE_CPS
]
EXPORT __rt_div0
__rt_div0
......
......@@ -54,18 +54,18 @@
Entry _kernel_stkovf_copyargs , , , _kernel_NoVeneer, , EXPORT
Entry _kernel_stkovf_copy0args , , , _kernel_NoVeneer
Entry _kernel_udiv , , , , 2
Entry _kernel_udiv , , |_kernel_udiv$variant|, , 2
Entry _kernel_urem , , , , 2
Entry2 _kernel_udiv10 , , , direct, 1, ,__rt_udiv10
Entry _kernel_sdiv , , , , 2
Entry2 _kernel_udiv10 , , |_kernel_udiv10$variant|, direct, 1, ,__rt_udiv10
Entry _kernel_sdiv , , |_kernel_sdiv$variant|, , 2
Entry _kernel_srem , , , , 2
Entry2 _kernel_sdiv10 , , , direct, 1, ,__rt_sdiv10
Entry2 _kernel_sdiv10 , , |_kernel_sdiv10$variant|, direct, 1, ,__rt_sdiv10
Entry _kernel_fpavailable , , , , 0
Entry _kernel_moduleinit , , , unveneered
Entry _kernel_irqs_on , , , unveneered
Entry _kernel_irqs_off , , , unveneered
Entry _kernel_irqs_on , , |_kernel_irqs_on$variant|, unveneered
Entry _kernel_irqs_off , , |_kernel_irqs_off$variant|, unveneered
Entry _kernel_irqs_disabled , , , unveneered
Entry _kernel_entermodule , , , unveneered
......
......@@ -41,14 +41,14 @@
Entry _ll_rsbls, imported, , unveneered
Entry _ll_rsbuu, imported, , unveneered
Entry _ll_rsbss, imported, , unveneered
Entry _ll_mul, imported, , unveneered
Entry _ll_mullu, imported, , unveneered
Entry _ll_mul, imported, |_ll_mul$variant|, unveneered
Entry _ll_mullu, imported, |_ll_mullu$variant|, unveneered
Entry _ll_mulls, imported, , unveneered
Entry _ll_muluu, imported, , unveneered
Entry _ll_mulss, imported, , unveneered
Entry _ll_muluu, imported, |_ll_muluu$variant|, unveneered
Entry _ll_mulss, imported, |_ll_mulss$variant|, unveneered
Entry _ll_udiv, imported, , unveneered
Entry _ll_urdv, imported, , unveneered
Entry _ll_udiv10, imported, , unveneered
Entry _ll_udiv10, imported, |_ll_udiv10$variant|, unveneered
Entry _ll_sdiv, imported, , unveneered
Entry _ll_srdv, imported, , unveneered
Entry _ll_sdiv10, imported, , unveneered
......
; Copyright 2016 Castle Technology Ltd
;
; Licensed under the Apache License, Version 2.0 (the "License");
; you may not use this file except in compliance with the License.
; You may obtain a copy of the License at
;
; http://www.apache.org/licenses/LICENSE-2.0
;
; Unless required by applicable law or agreed to in writing, software
; distributed under the License is distributed on an "AS IS" BASIS,
; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
; See the License for the specific language governing permissions and
; limitations under the License.
;
; True if building shared library (ROM or softload), false if building static
; lib (e.g. ANSILib)
[ :LNOT: :DEF: SHARED_C_LIBRARY
GBLL SHARED_C_LIBRARY
SHARED_C_LIBRARY SETL {FALSE}
]
; Macro for declaring the existence of architecture-specific variants of
; routines, to allow shared library clients to be linked directly to the most
; optimal routine
; N.B. stub 'Entry' macros must link to symbol |xxx$variant| in order for the
; variants to be detected
; N.B. currently only features which have a safe default of 0 can be tested for
; - features where the safe default is 1 (MUL Rd == Rn restriction, etc.) will
; need some modifications making to PickRoutineVariant.
;
; Variant symbols have a byte offset of 1, allowing the stub setup code to
; detect the presence of a variant entry just by looking at the alignment of the
; pointer (no need to update Entry's to reflect whether variants in use)
;
; Note that a suffix of '$variant' was chosen so that the '$' would prevent the
; symbol name being included in the embedded ROM debug symbols - This makes
; sure that (when variants aren't required) the original function name is the
; only one that makes it into the debug symbols.
MACRO
RoutineVariant $sym, $archflag, $featflag0, $featflag1, $featflag2
ALIGN
[ SHARED_C_LIBRARY
LCLS VariantName
VariantName SETS VBar :CC: "$sym" :CC: :CHR:36 :CC: "variant" :CC: VBar
EXPORT $VariantName
[ No$archflag :LAND: Support$archflag
DCB CPUFeature_$featflag0
$VariantName
[ "$featflag1" <> ""
DCB CPUFeature_$featflag1
|
DCB 255
]
[ "$featflag2" <> ""
DCB CPUFeature_$featflag2
|
DCB 255
]
DCB 255 ; PickRoutineVariant will need adjusting if fourth flag needed
DCD $sym._Support$archflag - .
DCD $sym._No$archflag - .
|
$VariantName * $sym
]
]
MEND
END
......@@ -155,7 +155,7 @@ Temps SETS "$sym"
[ "$import"<>""
IMPORT $Temps $fpabi
]
B $Temps
DCD $Temps
MEND
......@@ -171,7 +171,7 @@ Temps SETS "$sym"
[ "$import"<>""
IMPORT $Temps $fpabi
]
B $Temps
DCD $Temps
MEND
......
......@@ -34,6 +34,7 @@ NoStubEntries SETL {TRUE} ; stop h_modmacro defining Entry
GET h_workspc.s
GET Hdr:Proc
GET Hdr:OSMisc
IMPORT |Lib$$Init$$Base|
IMPORT |Lib$$Init$$Limit|
......@@ -189,6 +190,19 @@ LI_DataStart # 4
LI_DataEnd # 4
LI_ItemSize # 0
; In: r4 -> entry table
; Out: r4 incremented
; r6 = absolute address of entry
MACRO
GetRoutineAddress
LDR r6, [r4], #4
TST r6, #1
BEQ %FT10
Push "r14"
BL PickRoutineVariant
Pull "r14"
10
MEND
|_Shared_Lib_Module_SWI_Code|
; Relative SWI number in r11.
......@@ -369,31 +383,28 @@ FixLDRs
FixAddresses
CMP r2, r3
BEQ ChunkEntriesDone
LDR r1, [r4], #+4
MOV r1, r1, ASL #8 ; sign-extend branch offset
ADD r1, r4, r1, ASR #6 ; and convert to bytes
ADD r1, r1, #8-4 ; +8 for pc, -4 for ,#+4 above
LDR r6, [r2], #4 ; patch the stub entry only if it's
CMP r6, r1 ; not already right.
STRNE r1, [r2, #-4]
GetRoutineAddress
LDR r1, [r2], #4 ; patch the stub entry only if it's
CMP r1, r6 ; not already right.
STRNE r6, [r2, #-4]
B FixAddresses
|
; Create a table of BL &xxx
; Create a table of B &xxx
FixBranches
LDR r4, [r14, #LI_EntryBase-LI_ItemSize] ; Our entry table
FixEntries
CMP r2, r3
BEQ ChunkEntriesDone
LDR r1, [r4]
ADD r1, r1, r4, LSR #2
SUB r1, r1, r2, LSR #2
BIC r1, r1, #&FF000000
ORR r1, r1, #&EA000000 ; le branch!
ADD r4, r4, #4
LDR r6, [r2], #4 ; patch the stub entry only if it's
CMP r6, r1 ; not already right.
STRNE r1, [r2, #-4]
GetRoutineAddress
SUB r6, r6, r2
SUB r6, r6, #8
MOV r6, r6, ASR #2
BIC r6, r6, #&FF000000
ORR r6, r6, #&EA000000 ; le branch!
LDR r1, [r2], #4 ; patch the stub entry only if it's
CMP r1, r6 ; not already right.
STRNE r6, [r2, #-4]
B FixEntries
]
......@@ -525,6 +536,39 @@ Failed
StubInitValue
MOV pc, #0
PickRoutineVariant ROUT
; In:
; r6 -> variant block + 1
; word 0: 4x OS_PlatformFeatures 34 flags
; word 1: Offset to routine to use if all features supported
; word 2: Offset to routine to use if features not supported
; Out:
; r6 -> routine to use
Entry "r0-r3"
SUB r2, r6, #1
ADD r6, r6, #3 ; r6 -> word 1
10
LDRB r3, [r2], #1
CMP r3, #255
BEQ %FT20
MOV r0, #OSPlatformFeatures_ReadCodeFeatures
SWI XOS_PlatformFeatures
BVS %FT15
TST r0, #CPUFlag_ExtraReasonCodesFixed ; Is it safe to try calling non-zero reason codes?
MOV r0, #OSPlatformFeatures_ReadCPUFeatures
ASSERT OSPlatformFeatures_ReadCPUFeatures != 1
MOVNE r1, r3
SWINE XOS_PlatformFeatures
15
CMP r0, #1 ; If SWI returned an error, assume error pointer wasn't '1'
BEQ %BT10 ; Feature supported, check next flag
ADD r6, r6, #4 ; Feature(s) not supported, use fallback routine
20
LDR r0, [r6] ; get address of matching routine
ADD r6, r6, r0 ; apply offset
EXIT
ErrorBlock UnknownSWI, "SWI value out of range for module %0", BadSWI
ErrorBlock BadMemory, "Not enough memory for C library", C01
ErrorBlock UnknownLib, "Unknown library chunk", C02
......
......@@ -13,6 +13,7 @@
; limitations under the License.
;
GET objmacs.s
GET h_common.s
CodeArea
......@@ -65,6 +66,7 @@
GET Hdr:ListOpts
GET Hdr:CPU.Arch
GET Hdr:OSMisc
GBLL HaveCLZ
HaveCLZ SETL :LNOT: NoARMv5
......@@ -299,22 +301,9 @@ _ll_rsbss ROUT
; In: (a1,a2),(a3,a4)
; Out: (a1,a2)
_ll_mul ROUT
[ NoARMM
_ll_mul_NoARMM ROUT
FunctionEntry
[ RuntimeArch
CPUArch ip, lr
CMP ip, #CPUArch_v4
BCC mul_hardway
]
[ RuntimeArch :LOR: HaveMULL
; Have UMULL instruction
MOV ip, a1
UMULL a1, lr, a3, a1
MLA lr, ip, a4, lr
MLA a2, a3, a2, lr
Return
]
[ RuntimeArch :LOR: :LNOT: HaveMULL
mul_hardway ROUT
; No UMULL instruction
; Break the operation down thus:
; aaaaaaaa bbbb cccc
......@@ -345,26 +334,32 @@ mul_hardway ROUT
MLA a2, ip, lr, a2 ; msw completed by adding bbbb * eeee
Return
]
[ SupportARMM :LAND: (:LNOT: NoARMM :LOR: SHARED_C_LIBRARY)
_ll_mul_SupportARMM
FunctionEntry
; Have UMULL instruction
MOV ip, a1
UMULL a1, lr, a3, a1
MLA lr, ip, a4, lr
MLA a2, a3, a2, lr
Return
]
; Multiply a 64-bit number by a uint32_t
; In: (a1,a2),a3
; Out: (a1,a2)
_ll_mullu ROUT
FunctionEntry
[ RuntimeArch
CPUArch ip, lr
CMP ip, #CPUArch_v4
MOVCC a4, #0
BCC mul_hardway
[ NoARMM
_ll_mullu_NoARMM
MOV a4, #0
B _ll_mul_NoARMM
]
[ RuntimeArch :LOR: HaveMULL
[ SupportARMM :LAND: (:LNOT: NoARMM :LOR: SHARED_C_LIBRARY)
_ll_mullu_SupportARMM
; Have UMULL instruction
UMULL a1, lr, a3, a1
MLA a2, a3, a2, lr
Return
|
MOV a4, #0
B mul_hardway
UMULL a1, ip, a3, a1
MLA a2, a3, a2, ip
Return ,, LinkNotStacked
]
; Multiply a 64-bit number by an int32_t
......@@ -374,52 +369,44 @@ _ll_mulls ROUT
MOV a4, a3, ASR #31
B _ll_mul
; Create a 64-bit number by multiplying two uint32_t numbers
; In: a1,a2
; Out: (a1,a2)
; Create a 64-bit number by multiplying two uint32_t numbers
; In: a1,a2
; Out: (a1,a2)
_ll_muluu ROUT
FunctionEntry
[ RuntimeArch
CPUArch ip, lr
CMP ip, #CPUArch_v4
BCC %FT50
]
[ RuntimeArch :LOR: HaveMULL
; Have UMULL instruction
MOV lr, a1
UMULL a1, a2, lr, a2
Return
]
[ RuntimeArch :LOR: :LNOT: HaveMULL
50 ; No UMULL instruction
[ NoARMM
_ll_muluu_NoARMM
; No UMULL instruction
MOV a3, a2
MOV a2, #0
MOV a4, #0
B mul_hardway
B _ll_mul_NoARMM
]
[ SupportARMM :LAND: (:LNOT: NoARMM :LOR: SHARED_C_LIBRARY)
_ll_muluu_SupportARMM
; Have UMULL instruction
MOV ip, a1
UMULL a1, a2, ip, a2
Return ,, LinkNotStacked
]
; Create a 64-bit number by multiplying two int32_t numbers
; In: a1,a2
; Out: (a1,a2)
; Create a 64-bit number by multiplying two int32_t numbers
; In: a1,a2
; Out: (a1,a2)