Commit afa04ff1 authored by Jeffrey Lee's avatar Jeffrey Lee Committed by ROOL
Browse files

More flexible handling of atomics on pre-ARMv6K

For pre-ARMv6K, we generally have two sets of atomic routines available:

* Fast routines which use a SWP spinlock, but are unsafe for use from
interrupt handlers and the like
* Safe routines which disable IRQs, but (in usermode) are much slower
due to requiring SWI calls to enable/disable IRQs.

Currently CLib decides which set of routines to use: modules get the
slow & safe ones, while applications get the fast & unsafe ones.

This change gives control of that choice to the program, by introducing
the _kernel_init_flags variable. The stub code will (weakly) import this
symbol and pass the value to CLib when calling the init SWI. CLib will
then use the value to decide whether to fill the stubs with the safe or
unsafe versions of the routines.

With this change, the default is now to use the slow & safe routines,
for both modules and applications. This is to ensure that code which is
written/tested for ARMv6K+ machines won't run into unexpected issues
when r...
parent 934d7520
......@@ -9,12 +9,12 @@
GBLS Module_ApplicationDate
GBLS Module_HelpVersion
GBLS Module_ComponentName
Module_MajorVersion SETS "6.13"
Module_Version SETA 613
Module_MajorVersion SETS "6.14"
Module_Version SETA 614
Module_MinorVersion SETS ""
Module_Date SETS "16 Jun 2022"
Module_ApplicationDate SETS "16-Jun-22"
Module_Date SETS "20 Aug 2022"
Module_ApplicationDate SETS "20-Aug-22"
Module_ComponentName SETS "RISC_OSLib"
Module_FullVersion SETS "6.13"
Module_HelpVersion SETS "6.13 (16 Jun 2022)"
Module_FullVersion SETS "6.14"
Module_HelpVersion SETS "6.14 (20 Aug 2022)"
END
/* (6.13)
/* (6.14)
*
* This file is automatically maintained by srccommit, do not edit manually.
*
*/
#define Module_MajorVersion_CMHG 6.13
#define Module_MajorVersion_CMHG 6.14
#define Module_MinorVersion_CMHG
#define Module_Date_CMHG 16 Jun 2022
#define Module_Date_CMHG 20 Aug 2022
#define Module_MajorVersion "6.13"
#define Module_Version 613
#define Module_MajorVersion "6.14"
#define Module_Version 614
#define Module_MinorVersion ""
#define Module_Date "16 Jun 2022"
#define Module_Date "20 Aug 2022"
#define Module_ApplicationDate "16-Jun-22"
#define Module_ApplicationDate "20-Aug-22"
#define Module_ComponentName "RISC_OSLib"
#define Module_FullVersion "6.13"
#define Module_HelpVersion "6.13 (16 Jun 2022)"
#define Module_LibraryVersionInfo "6:13"
#define Module_FullVersion "6.14"
#define Module_HelpVersion "6.14 (20 Aug 2022)"
#define Module_LibraryVersionInfo "6:14"
......@@ -61,6 +61,31 @@ extern "C"
{
#endif
extern int __root_stack_size;
/*
* Size of the root stack chunk to allocate on initialisation. This must
* be a multiple of 1KB.
*
* Note: This is a weak symbol, defined here for documentation purposes.
* To use it you must declare and initialise it at global scope, e.g.:
*
* int __root_stack_size = 64<<10;
* int main(int argc,char *argv) { ... };
*
* If left undefined, an implementation-defined default will be used.
*/
extern int _kernel_init_flags;
/*
* Extra initialisation flags to control Shared C Library behaviour.
* Like __root_stack_size, this is a weak symbol, which must be declared and
* initialised at global scope.
*
* If left undefined, a default value of 0 will be used.
*/
#define _kernel_INITFLAG_UNSAFE_ATOMICS 2 /* See comments in stdatomic.h */
extern _kernel_stack_chunk *_kernel_current_stack_chunk(void);
extern void _kernel_setreturncode(unsigned code);
......
......@@ -31,7 +31,7 @@
* qualifier (section 6.7.3).
* - Due to limitations in the current version of the compiler, the
* ATOMIC_*_LOCK_FREE #defines (section 7.17.1) always report a value of 1
* (type is sometimes lock-free, section 7.17.5)
* ("type is sometimes lock-free", section 7.17.5)
* - Due to limitations in the current version of the compiler, all atomic
* operations are performed via library function calls
* - For correct operation, all atomic types and pointer arguments must be
......@@ -43,30 +43,41 @@
* - For correct operation, atomic types must only be placed in normal,
* cacheable memory, or as otherwise required by the ARMv6+ load/store
* exclusive instructions.
* - For application clients of the Shared C Library, beware that the
* (non lock-free) atomic types are only safe for use within foreground
* usermode threads. Attempting to access them from within C signal handlers,
* or RISC OS interrupt handlers, callback handlers, or event handlers is
* unsafe and may deadlock the program. The C18 specification allows for this
* (see sections 5.1.2.3 and 7.14.1.1).
* - The reason for this restriction is that prior to ARMv6K, many of the
* operations can only be made fully atomic (from the perspective of the
* CPU) by disabling interrupts, which would have a significant performance
* impact on usermode code. As a compromise, application clients will
* instead prefer to use usermode-friendly implementations of the atomic
* functions which don't disable interrupts (e.g. by instead using a
* SWP-based spinlock).
* - This warning does not apply to types which are lock-free, as advertised
* by atomic_is_lock_free() or the ATOMIC_*_LOCK_FREE #defines.
* - This warning does not apply to module clients of the Shared C Library.
* Non lock-free types used by module clients will use routines which
* enforce atomicity by disabling interrupts, making them safe for
* everything except FIQ handlers (see below).
* - Prior to ARMv6K, none of the implementations are atomic from the
* perspective of FIQ handlers.
* - On everything except ARM2, atomic_flag is fully lock-free and atomic for
* both IRQ and FIQ handlers. The ARM2 implementation must disable interrupts,
* and isn't atomic for FIQ handlers.
*
* For atomic_flag:
*
* - On ARM2a and newer CPUs, the atomic_flag implementation uses either the SWP
* or LDREX/STREX instructions, making it fully lock-free and atomic across
* all execution environments: threads, C signal handlers, IRQ or FIQ
* handlers, RISC OS callback handlers, RISC OS event handlers, SWI and abort
* handlers, etc.
* - On ARM2 CPUs, the atomic_flag implementation enforces atomicity by
* temporarily disabling interrupts, and isn't atomic for FIQ handlers.
*
* For the other atomic types:
*
* - On ARMv6K and newer CPUs, the library functions are implemented using the
* LDREX/STREX family of instructions. This makes them fully lock-free and
* atomic across all execution environments: threads, C signal handlers, IRQ
* or FIQ handlers, RISC OS callback handlers, RISC OS event handlers, SWI and
* abort handlers, etc.
* - On older CPUs, none of the types are atomic from the perspective of FIQ
* handlers, and the routines typically enforce atomicity by temporarily
* disabling interrupts.
* - If full atomicity isn't required, an alternative implementation can be
* selected by setting the _kernel_INITFLAG_UNSAFE_ATOMICS initialisation
* flag (see kernel.h). This will cause the Shared C Library to implement
* the atomic operations using a SWP-based spinlock instead of disabling
* interrupts. This should significantly improve performance for usermode
* code (by avoiding the need for SWI calls to disable/enable interrupts),
* but it has the major drawback that the routines will only be safe to use
* from foreground threads. Attempting to use them from other environments
* (C signal handlers, RISC OS IRQ handlers, etc.) is unsafe and may cause
* incorrect behaviour or deadlocks. The C18 specification does allow for
* this kind of unsafe behaviour (see sections 5.1.2.3 and 7.14.1.1), but
* for safety the library defaults to the slower but safer implementation.
* - On ARM2, there is no SWP instruction, so the UNSAFE_ATOMICS flag is
* ignored and the IRQ-based routines will be used regardless.
*/
......
......@@ -68,6 +68,7 @@ SharedLibrary SETL {TRUE}
IMPORT |Image$$RW$$Limit|
IMPORT |Image$$ZI$$Base|
IMPORT |__root_stack_size|, WEAK
IMPORT |_kernel_init_flags|, WEAK
[ :LNOT::DEF:AnsiLib
IMPORT |Stub$$Init$$Base|
]
......@@ -105,6 +106,7 @@ SharedLibrary SETL {TRUE}
; r5 pointer to end of statics to copy
; r6 = requested stack size (in K) << 16
; r6 bit 0 indicates 32-bit mode
; r6 bits 1-15 are _kernel_init_flags
LDR r0, =|Stub$$Init$$Base|
; ADR r0, |_lib_init_table|
......@@ -119,6 +121,10 @@ SharedLibrary SETL {TRUE}
TEQ pc, pc ; EQ if in a 32-bit mode, NE if 26-bit
ORREQ r6, r6, #1
]
LDR lr, =|_kernel_init_flags|
CMP lr, #0
LDRNE lr, [lr]
ORRNE r6, r6, lr
[ Code_Destination = "RAM"
[ APCS_Type = "APCS-R"
SWI X:OR:Lib_Init + 1
......@@ -259,6 +265,10 @@ LookupError
TEQ pc, pc ; EQ if in a 32-bit mode, NE if 26-bit
ORREQ r6, r6, #1
]
LDR lr, =|_kernel_init_flags|
CMP lr, #0
LDRNE lr, [lr]
ORRNE r6, r6, lr
[ Code_Destination = "RAM"
[ APCS_Type = "APCS-R"
STMFD sp!, {r8}
......
......@@ -94,9 +94,10 @@ CPUFeature_Halfword * CPUFeature_LDRH_LDRSH_STRH
CPUFeature_ARMK * CPUFeature_CLREX_LDREXB_LDREXH_STREXB_STREXH
CPUFeature_ARMv7 * CPUFeature_DMB_DSB_ISB
; We also define these flags for usermode-only ARMa & Halfword
; This allows usermode-only clients to use more efficient routines which don't
; need to disable IRQs to provide safety with IRQ handlers
; We also define these 'U' flags which are used to identify faster (but unsafe)
; routine variants for ARMa & Halfword CPUs. These variants are faster because
; they don't disable IRQs, but this also means they're only really safe from
; foreground threads - not from signal handlers or IRQ handlers.
GBLL AtomicNoUARMa
GBLL AtomicSupUARMa
GBLL AtomicNoUHalfword
......@@ -106,8 +107,9 @@ AtomicSupUARMa SETL AtomicSupARMa
AtomicNoUHalfword SETL AtomicNoHalfword
AtomicSupUHalfword SETL AtomicSupHalfword
; Fake CPUFeature flag that will only be set for usermode clients
CPUFeature_UsermodeOnly * 254
; Fake CPUFeature flag that will only be set for clients that allow use of the
; 'U' unsafe variants
CPUFeature_UnsafeAtomics * 254
; Define our own versions of No26bitCode & No32bitCode, so we can support
; 32bit-neutral builds for standalone testing
......@@ -163,10 +165,10 @@ Variants SETS "$Variants $variant"
MACRO
CPUFeatureFlags $variant
[ ("$variant" :LEFT: 1) == "U"
; Usermode-only variant
; Unsafe variant
LCLS V
V SETS "$variant" :RIGHT: ((:LEN: "$variant")-1)
DCB CPUFeature_$V,CPUFeature_UsermodeOnly,255,255
DCB CPUFeature_$V,CPUFeature_UnsafeAtomics,255,255
|
DCB CPUFeature_$variant,255,255,255
]
......
......@@ -54,6 +54,8 @@ NoStubEntries SETL {TRUE} ; stop h_modmacro defining Entry
|_Lib$Reloc$Off| * -SL_Lib_Offset
|_Mod$Reloc$Off| * -SL_Client_Offset
|_kernel_INITFLAG_UNSAFE_ATOMICS| * 2
OverflowBit * &10000000
n_module_claim EQU 6
......@@ -170,8 +172,13 @@ XMessageTrans_CloseFile EQU &61504 ; Put in hdr file someday
; r6 [0] = 0 if client is running in a 26-bit mode (may still be APCS-32)
; = 1 if client is running in a 32-bit mode (=> must be APCS-32,
; and address constant table must follow vectors)
; r6 [1:15] = 0
; r6 [16:31] = requested root stack size (Kb)
; r6 [1:15] = _kernel_init_flags:
; r6 [1] = 1 if client consents to signal-unsafe atomics
; r6 [16:31] = requested root stack size (kB units)
; NOTE: Historic bug with handling of the root stack size means
; that bits 6 & 7 of r6 must be zero, because all of r6 [6:31]
; gets treated as the root stack size, resulting in an unaligned
; SP if bits 6 or 7 are set.
; Returns with stub vector patched
; if input r5>r4, user statics copied and [sl, #SL_Client_Offset]
; initialised
......@@ -230,6 +237,8 @@ LI_ItemSize # 0
CLRPSR F_bit:OR:I_bit, lr ; ensure IRQs enabled
MOV r10, r6 ; Remember init flags for use by PickRoutineVariant
; Check memory constraints work:
; lr = end of required workspace (data + stack), ie stack top for non-modules
; r12 = stack base
......@@ -245,7 +254,8 @@ LI_ItemSize # 0
ADD r12, r1, lr ; heap base plus size of copied statics
; For things linked with old stubs r6 may be zero - if so,
; we use the old default root stack size
MOVS r6, r6, LSR #6
MOVS r6, r6, LSR #16
MOVNE r6, r6, LSL #10
MOVEQ r6, #OldRootStackSize
ADD lr, r12, r6 ; plus size of root stack
CheckEnoughStore
......@@ -560,6 +570,7 @@ PickRoutineVariant ROUT
; word 0: 4x OS_PlatformFeatures 34 flags
; word 1: Offset to routine to use if all features supported
; word 2: Offset to routine to use if features not supported
; r10 = _kernel_init_flags
; r11 = relative SWI number, as per _Shared_Lib_Module_SWI_Code
; Out:
; r6 -> routine to use
......@@ -572,10 +583,11 @@ PickRoutineVariant ROUT
CMP r3, #254
BHI %FT20
BLO %FT14
; 254 is used as a special flag which means "client must be usermode only"
TEQ r11, #2 ; APCS-26 module
TEQNE r11, #4 ; APCS-32 module
BNE %BT10 ; Neither of those, the test passes
; 254 is used as a special flag which is used for signal-unsafe versions
; of the atomic routines. Only pass this test if the client has agreed
; to their use, via _kernel_init_flags.
TST r10, #|_kernel_INITFLAG_UNSAFE_ATOMICS|
BNE %BT10
B %FT19
14
MOV r0, #OSPlatformFeatures_ReadCodeFeatures
......
......@@ -8,12 +8,11 @@ The *AtomicTest command runs some basic unit tests to verify functionality.
The *AtomicBench command runs benchmarks, producing CSV output comparing the performance of the stdatomic code against the equivalent non-atomic operations. Figures are in ops per second, with the non-atomic ("noatomic") figure in the first column and the stdatomic figure in the second column.
The tests/benchmarks can run in usermode, privileged mode, or both:
The tests/benchmarks can run in usermode, privileged mode, or both, by specifying the appropriate flags on the command line:
The 'u' flag runs the code in usermode
The 'p' flag runs the code in SVC mode
The 'm' flag controls the behaviour of the usermode code, when the stdatomic implementation has been directly included:
- With the 'm' flag specified, the usermode test will run as if it were a module client of CLib
- With the 'm' flag absent, the usermode test will run as if it were an application client of CLib
Note: when testing the running CLib module, behaviour will always be as if the 'm' flag was specified. However, atomic1.c and bench.c can each be individually compiled as applications, to allow testing of CLib application clients.
If the stdatomic implementation has been directly included, you can also use the 'n' flag to request that the unsafe versions of the routines should be used (equivalent to if the _kernel_INITFLAG_UNSAFE_ATOMICS flag had been set)
atomic1.c and bench.c can also be individually compiled as applications. They both accept the 'n' flag, and bench.c accepts a 'c' flag for CSV or plain output.
\ No newline at end of file
......@@ -26,6 +26,7 @@
#define assert(X) if (!(X)) { printf("Test failure: %s @ %s %d\n",#X,__FILE__,__LINE__); return; }
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define CONCAT(X,Y) CONCAT2(Y,X)
#define CONCAT2(Y,X) X##Y
......@@ -246,7 +247,7 @@ void runtests(void)
int main(int argc,char **argv)
{
#ifdef USE_ROATOMIC
roatomic_init();
roatomic_init((argc > 1) && strchr(argv[1],"n"));
#endif
runtests();
}
......
......@@ -162,8 +162,8 @@ void runbenchmarks(bool csv)
int main(int argc,char **argv)
{
#ifdef USE_ROATOMIC
roatomic_init();
roatomic_init((argc > 1) && strchr(argv[1],'n'));
#endif
runbenchmarks((argc > 1) && !strcmp(argv[1],"--csv"));
runbenchmarks((argc > 1) && strchr(argv[1],'c'));
}
#endif
......@@ -37,7 +37,6 @@ extern void runtests(void);
extern void runbenchmarks(bool csv);
static int mode = 0;
static bool reinit = false;
static void *module_pw;
......@@ -55,15 +54,14 @@ _kernel_oserror *module_finalise(int fatal, int podule, void *pw)
_kernel_oserror *module_command(const char *arg_string, int argc, int cmd_no, void *pw)
{
#ifdef USE_ROATOMIC
roatomic_init();
#endif
/* Run in usermode? */
bool usermode = strchr(arg_string,'u');
/* Run in privileged mode? */
bool privileged = strchr(arg_string,'p');
/* Is usermode code testing a module client? (only does something for roatomic) */
reinit = !strchr(arg_string,'m');
/* Test the unsafe version of the routines? (only does something for roatomic) */
#ifdef USE_ROATOMIC
roatomic_init(strchr(arg_string,'n') != NULL);
#endif
switch (cmd_no)
{
case CMD_AtomicTest:
......@@ -110,22 +108,10 @@ int main(int argc,char **argv)
{
case 1:
printf("user mode:\n");
#ifdef USE_ROATOMIC
if (reinit)
{
roatomic_init();
}
#endif
runtests();
break;
case 2:
printf("user mode:\n");
#ifdef USE_ROATOMIC
if (reinit)
{
roatomic_init();
}
#endif
runbenchmarks(true);
break;
}
......
......@@ -25,8 +25,8 @@ help-string: AtomicTest 0.01
date-string: 05 Apr 2022
command-keyword-table: module_command
AtomicTest(min-args:1, max-args:1, invalid-syntax:"Syntax: *AtomicTest [u][p][m]", help-text:"*AtomicTest tests stuff"),
AtomicBench(min-args:1, max-args:1, invalid-syntax:"Syntax: *AtomicBench [u][p][m]", help-text:"*AtomicBench benchmarks stuff")
AtomicTest(min-args:1, max-args:1, invalid-syntax:"Syntax: *AtomicTest [u][p][n]", help-text:"*AtomicTest tests stuff"),
AtomicBench(min-args:1, max-args:1, invalid-syntax:"Syntax: *AtomicBench [u][p][n]", help-text:"*AtomicBench benchmarks stuff")
module-is-runnable:
......
......@@ -15,7 +15,7 @@
#ifndef ROATOMIC_H
#define ROATOMIC_H
extern void roatomic_init(void);
extern void roatomic_init(int unsafe);
/* Include the stadatomic header from these clib sources */
#include "../../clib/stdatomic.h"
......
......@@ -45,6 +45,7 @@ roatomic_init
MOV pc, lr
|
FunctionEntry
MOV r12, a1
MOV r2, #0 ; Variant flags
ADR r3, %FT90
10
......@@ -58,12 +59,10 @@ roatomic_init
ORREQ r2, r2, r1
B %BT10
50
; If we're in a privileged mode, disable UARMa+UHalfword
; This should allow testing of the usermode-only and privileged variants of the routines
EOR a1, pc, pc
MRS a1, CPSR
TST a1, #3
BICNE r2, r2, #VariantFlag_UARMa+VariantFlag_UHalfword
; Clear the 'U' flags if r12 is zero (i.e. we're not running in unsafe
; mode)
CMP r12, #0
BICEQ r2, r2, #VariantFlag_UARMa+VariantFlag_UHalfword
LDR a1, =variant_flags
STRB r2, [a1]
Return
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment