Commit 9024d1f6 authored by Ben Avison's avatar Ben Avison Committed by ROOL

Support permanent mapping of IO above 4GB using supersections

This is facilitated by two extended calls. From the HAL:
* RISCOS_MapInIO64 allows the physical address to be specified as 64-bit

From the OS:
* OS_Memory 21 acts like OS_Memory 13, but takes a 64-bit physical address

There is no need to extend RISCOS_LogToPhys, instead we change its return
type to uint64_t. Any existing HALs will only read the a1 register, thereby
narrowing the result to 32 bits, which is fine because all existing HALs
only expected a 32-bit physical address space anyway.

Internally, RISCOS_MapInIO has been rewritten to detect and use supersections
for IO regions that end above 4GB. Areas that straddle the 4GB boundary should
also work, although if you then search for a sub-area that doesn't, it won't
find a match and will instead map it in again using vanilla sections - this is
enough of an edge case that I don't think we need to worry about it too much.

The rewrite also conveniently fixes a bug in the old code: if the area being
mapped in went all the way up to physical address 0xFFFFFFFF (inclusive) then
only the first megabyte of the area was actually mapped in due to a loop
termination issue.

Requires RiscOS/Sources/Programmer/HdrSrc!2
parent b3b95b4b
......@@ -1029,6 +1029,21 @@ It returns the resultant virtual address corresponding to phys, or 0 for
failure. Failure can only occur if no RAM is available for page tables, or
if the virtual address space is exhausted.
-- void RISCOS_AddDevice(unsigned int flags, struct device *d)
-- uint64_t RISCOS_LogToPhys(const void *log)
-- int RISCOS_IICOpV(IICDesc *descs, uint32_t ndesc_and_bus)
-- void *RISCOS_MapInIO64(unsigned int flags, uint64_t phys, unsigned int size)
As for RISCOS_MapInIO, but accepting a 64-bit physical address argument.
-- void *RISCOS_AccessPhysicalAddress(unsigned int flags, void *phys, void **oldp)
flags: bit 2 => make memory bufferable
......
......@@ -1216,6 +1216,8 @@ Cache_HALDevice # 4 ; Pointer to any HAL cache dev
IOAllocPtr # 4 ; current lowpoint of mapped I/O space (also upper limit on DAs)
IOAllocLimit # 4 ; current lowest allowed I/O space (DA creation may move this up)
PhysIllegalMask # 4 ; mask of invalid bits in upper word of physical addresses
[ :DEF: ShowWS
! 0, "Free space before DebuggerSpace = ":CC::STR:(&300-@)
......
......@@ -22,6 +22,7 @@ OS_MapInIO # 1
OS_AddDevice # 1
OS_LogToPhys # 1
OS_IICOpV # 1
OS_MapInIO64 # 1
HighestOSEntry * :INDEX: @ - 1
......
......@@ -111,6 +111,7 @@ OSMemReason_MemoryAccessPrivileges * 17 ; Decode AP numbers into permission flag
OSMemReason_FindAccessPrivilege * 18 ; Find best AP number from given permission flags
OSMemReason_DMAPrep * 19 ; Convert PA <-> LA, perform cache maintenance required for DMA
OSMemReason_Compatibility * 20 ; Get/set compatibility settings
OSMemReason_MapIO64Permanent * 21 ; Map in IO area from 64-bit space
OSMemReason_CheckMemoryAccess * 24 ; Return attributes/permissions for a logical address range
; OS_Memory 17/18 permission flags
......
......@@ -1847,6 +1847,51 @@ AllocateL2PT ROUT
EXIT
; in: ip = ZeroPage
; out: v3 = PhysIllegalMask
DeterminePhysIllegalMask
Push "lr"
MOV v3, #&FFFFFFFF ; By default any upper bits are illegal
ARM_read_ID lr
AND lr, lr, #&F :SHL: 16
CMP lr, #ARMvF :SHL: 16 ; Check that feature registers are implemented
BNE %FT01 ; Some ARMv5 chips supported supersections, but let's not worry about them
MRC p15, 0, lr, c0, c1, 7 ; ID_MMFR3
TST lr, #&F :SHL: 28
MOVEQ v3, #&FFFFFF00
01 STR v3, [ip, #PhysIllegalMask]
Pull "pc"
MACRO
$lab ConstructIOPTE $pte, $phys_mb, $flags, $tmp
; $pte (output) L1 page table entry word
; $phys_mb (input, preserved) physical address, in megabytes
; for vanilla sections:
; bits 0..11 go in bits 20..31
; for supersections:
; bits 0..3 assumed zero
; bits 4..11 go in bits 24..31
; bits 12..15 go in bits 20..23
; bits 16..20 go in bits 5..8
; $flags (input, preserved) page table attribute bits
;
; UBFXNE should be safe pre v6T2, since we won't attempt to use
; supersections on such CPUs and they won't trap untaken undefined instructions
ASSERT $pte <> $phys_mb
ASSERT $pte <> $flags
ASSERT $pte <> $tmp
ASSERT $tmp <> $phys_mb
ASSERT $tmp <> $flags
$lab ANDS $tmp, $flags, #L1_SS
UBFXNE $tmp, $phys_mb, #32-20, #L1_SSb32Width
ORR $pte, $phys_mb, $tmp
UBFXNE $tmp, $phys_mb, #36-20, #L1_SSb36Width
ASSERT L1_SSb32Shift = 20
ORR $pte, $flags, $pte, LSL #L1_SSb32Shift
ORRNE $pte, $pte, $tmp, LSL #L1_SSb36Shift
MEND
; void *RISCOS_AccessPhysicalAddress(unsigned int flags, void *addr, void **oldp)
RISCOS_AccessPhysicalAddress ROUT
; Only flag user can ask for is bufferable
......@@ -2193,6 +2238,20 @@ InitProcVecsEnd
; a3 = size
; Out: a1 = assigned logical address, or 0 if failed (no room)
;
RISCOS_MapInIO ROUT
MOV a4, a3
MOV a3, #0
; drop through...
;
; In: a1 = flags (L1_B,L1_C,L1_TEX)
; bit 20 set if doubly mapped
; bit 21 set if L1_AP specified (else default to AP_None)
; a2,a3 = physical address
; a4 = size
; Out: a1 = assigned logical address, or 0 if failed (no room)
;
RISCOS_MapInIO64 ROUT
; Will detect and return I/O space already mapped appropriately, or map and return new space
; For simplicity and speed of search, works on a section (1Mb) granularity
;
......@@ -2209,7 +2268,6 @@ InitProcVecsEnd
MapInFlag_DoublyMapped * 1:SHL:20
MapInFlag_APSpecified * 1:SHL:21
RISCOS_MapInIO ROUT
TST a1, #MapInFlag_APSpecified
BICEQ a1, a1, #L1_AP
; For VMSAv6, assume HAL knows what it's doing and requests correct settings for AP_ROM
......@@ -2221,114 +2279,111 @@ RISCOS_MapInIO ROUT
ORR a1, a1, #L1_Section
]
RISCOS_MapInIO_PTE ; a1 bits 0-19 = L1 section entry flags, bits 20+ = our extra flags
Entry "v1-v5,v7"
LDR v7, =(1:SHL:20)-1
AND v4, a2, v7 ; v4 = offset of original within section-aligned area
ADD a3, a2, a3 ; a3 -> end (exclusive)
BIC a2, a2, v7 ; round a2 down to a section boundary
ADD a3, a3, v7
BIC a3, a3, v7 ; round a3 up to a section boundary
ANDS v5, a1, #MapInFlag_DoublyMapped
SUBNE v5, a3, a2 ; v5 = offset of second mapping or 0
Entry "a2,v1-v8"
LDR ip, =ZeroPage
LDR a4, =L1PT
AND a1, a1, v7 ; mask out our extra flags
LDR v2, =IO ; logical end (exclusive) of currently mapped IO
LDR v1, [ip, #IOAllocPtr] ; logical start (inclusive)
SUB v1, v1, #&100000
SUB a4, a4, #1 ; reduce by 1 so end physical address is inclusive
ADDS v1, a2, a4
ADC v2, a3, #0 ; v1,v2 = end physical address
LDR v3, [ip, #PhysIllegalMask]
TEQ v3, #0
BLEQ DeterminePhysIllegalMask
TST v2, v3
MOVNE a1, #0
BNE %FT90 ; can't map in physical addresses in this range
MOVS v3, v2
MOVNE v3, #&F ; v3 = number of MB to use in rounding (0 for sections, 15 for supersections)
MOV v4, a2, LSR #20
ORR v4, v4, a3, LSL #12
BIC v4, v4, v3 ; v4 = physical start MB to map
MOV v5, v1, LSR #20
ORR v5, v5, v2, LSL #12
ADD v5, v5, #1 ; make exclusive
ADD v5, v5, v3
BIC v5, v5, v3 ; v5 = physical end MB to map
ANDS a2, a1, #MapInFlag_DoublyMapped
SUBNE a2, v5, v4 ; a2 = offset of second mapping (in MB) or 0
LDR v6, =&FFFFF
AND a1, a1, v6 ; mask out our extra flags
CMP v5, #&1000
ORRHI a1, a1, #L1_SS ; set supersection flag if necessary
LDR a3, [ip, #IOAllocPtr]
MOV a3, a3, LSR #20
ADD a3, a3, v3
BIC a3, a3, v3 ; a3 = logical MB that we're checking for a match
ConstructIOPTE a4, v4, a1, lr ; a4 = first PT entry to match
ADD v3, v3, #1 ; v3 = number of MB to step between sections or supersections
LDR v1, =L1PT
LDR v2, =IO :SHR: 20 ; v2 = last logical MB to check (exclusive)
SUB a3, a3, v3 ; no increment on first iteration
10
ADD v1, v1, #&100000 ; next mapped IO section
CMP v1, v2
BHS %FT32 ; no more currently mapped IO
LDR v3, [a4, v1, LSR #(20-2)] ; L1PT entry (must be for mapped IO)
MOV lr, v3, LSR #20 ; physical address bits
CMP lr, a2, LSR #20
BNE %BT10 ; no address match
AND lr, v3, v7
TEQ lr, a1
BNE %BT10 ; no flags match
TEQ v5, #0 ; doubly mapped?
BEQ %FT19
ADD lr, v1, v5 ; address of second copy
CMP lr, v2
BHS %FT32
LDR v3, [a4, lr, LSR #(20-2)]
MOV lr, v3, LSR #20 ; physical address bits
CMP lr, a2, LSR #20
BNE %BT10 ; no address match
AND lr, v3, v7
TEQ lr, a1
BNE %BT10 ; no flags match
19
;
; alright, found start of requested IO already mapped, and with required flags
;
Push "a2, v1"
ADD a3, a3, v3 ; next logical MB to check
ADD ip, a3, a2 ; logical MB of base mapping or second mapping if there is one
CMP ip, v2
BHS %FT50 ; run out of logical addresses to check
LDR lr, [v1, a3, LSL #2] ; check only or first entry
TEQ lr, a4
LDREQ lr, [v1, ip, LSL #2] ; check only or second entry
TEQEQ lr, a4
BNE %BT10
; Found start of requested IO already mapped, and with required flags
; Now check that the remaining secions are all there too
MOV v6, v4 ; v6 = expected physical MB
MOV v7, a3 ; v7 = logical MB we expect to find it at
20
ADD a2, a2, #&100000
CMP a2, a3
Pull "a2, v1", HS
BHS %FT40 ; its all there already!
ADD v1, v1, #&100000 ; next mapped IO section
CMP v1, v2
BHS %FT30 ; no more currently mapped IO
LDR v3, [a4, v1, LSR #(20-2)] ; L1PT entry
MOV lr, v3, LSR #20 ; physical address bits
CMP lr, a2, LSR #20
BNE %FT29 ; address match failed
AND lr, v3, v7
TEQ lr, a1
TEQEQ v5, #0 ; doubly mapped?
BEQ %BT20 ; address and flags match so far
ADD lr, v1, v5 ; where duplicate should be
CMP lr, v2
BHS %FT30 ; no more currently mapped IO
LDR v3, [a4, lr, LSR #(20-2)]
MOV lr, v3, LSR #20 ; physical address bits
CMP lr, a2, LSR #20
BNE %FT29 ; address match failed
AND lr, v3, v7
TEQ lr, a1
BEQ %BT20
29
Pull "a2, v1"
B %BT10
30
Pull "a2, v1"
;
; request not currently mapped, only partially mapped, or mapped with wrong flags
;
32
ADD v6, v6, v3 ; next physical MB
ADD v7, v7, v3 ; next logical MB
ADD ip, v7, a2 ; logical MB of base mapping or second mapping if there is one
CMP v6, v5
MOVHS a4, a3, LSL #20
BHS %FT80 ; reached end and everything matched
CMP ip, v2
BHS %FT50 ; run out of logical addresses to check
ConstructIOPTE v8, v6, a1, lr
LDR lr, [v1, v7, LSL #2] ; check only or first entry
TEQ lr, v8
LDREQ lr, [v1, ip, LSL #2] ; check only or second entry
TEQEQ lr, v8
BEQ %BT20 ; good so far, try next entry
B %BT10 ; mismatch, continue outer loop
50 ; Request not currently mapped, only partially mapped, or mapped with wrong flags
LDR ip, =ZeroPage
LDR v2, [ip, #IOAllocPtr]
ADD v1, v2, a2
SUB v1, v1, a3 ; attempt to allocate size of a3-a2
SUB v1, v1, v5 ; double if necessary
LDR v3, [ip, #IOAllocLimit] ; can't extend down below limit
CMP v1, v3
MOVLS a1, #0
ADD v8, v3, #1 ; v8 = number of MB to use in rounding (0 for sections, 15 for supersections)
LDR a3, [ip, #IOAllocPtr]
MOV a3, a3, LSR #20
BIC a3, a3, v8 ; round down to 1MB or 16MB boundary (some memory may remain unmapped above when we map in a supersection)
SUB a4, v5, v4
ADD a4, a4, a2 ; a4 = number of MB required
SUB a3, a3, a4
MOV a4, a3, LSL #20
LDR v6, [ip, #IOAllocLimit]
CMP a4, v6 ; run out of room to allocate IO?
MOVLS a1, #0 ; LS is to match previous version of the code - perhaps should be LO?
BLS %FT90
STR v1, [ip, #IOAllocPtr]
ORR a2, a2, a1 ; first L1PT value
34
STR a2, [a4, v1, LSR #(20-2)]
TEQ v5, #0
ADDNE v2, v1, v5
STRNE a2, [a4, v2, LSR #(20-2)]
ADD a2, a2, #&100000
ADD v1, v1, #&100000 ; next section
CMP a2, a3
BLO %BT34
STR a4, [ip, #IOAllocPtr]
60
ConstructIOPTE v8, v4, a1, lr ; v8 = page table entry for this (super)section
MOV v7, v3 ; number of consecutive entries to program the same
70
ADD v6, a3, a2
STR v8, [v1, a3, LSL #2] ; write only or first entry
ADD a3, a3, #1
STR v8, [v1, v6, LSL #2] ; write only or second entry
SUBS v7, v7, #1
BNE %BT70
ADD v4, v4, v3
CMP v4, v5
BLO %BT60
PageTableSync
LDR v1, [ip, #IOAllocPtr]
40
ADD a1, v1, v4 ; logical address for request
80
LDR a2, [sp] ; retrieve original physical address from stack
BIC a2, a2, #&FF000000 ; distance from 16MB boundary for supersections
TST a1, #L1_SS
BICEQ a2, a2, #&00F00000 ; distance from 1MB boundary for sections
ADD a1, a4, a2
90
EXIT
......@@ -2338,12 +2393,13 @@ RISCOS_AddDevice
ADDS a1, a2, #0 ; also clears V
B HardwareDeviceAdd_Common
; uint32_t RISCOS_LogToPhys(const void *log)
; uint64_t RISCOS_LogToPhys(const void *log)
RISCOS_LogToPhys ROUT
Push "r4,r5,r8,r9,lr"
MOV r4, a1
LDR r8, =L2PT
BL logical_to_physical
MOVCC a2, #0 ; assume L2 page tables only used for bottom 4GB for now
MOVCC a1, r5
BCC %FT10
; Try checking L1PT for any section mappings (logical_to_physical only
......@@ -2355,8 +2411,13 @@ RISCOS_LogToPhys ROUT
ASSERT L1_Section = 2
EOR a1, a1, #2
TST a1, #3
MOVNE a2, #-1
MOVNE a1, #-1
BNE %FT10
; Check if it's a supersection
TST a1, #L1_SS
BNE %FT20
MOV a2, #0 ; vanilla sections only map bottom 4GB of physical space
; Apply offset from bits 0-19 of logical addr
[ NoARMT2
MOV a1, a1, LSR #20
......@@ -2368,6 +2429,13 @@ RISCOS_LogToPhys ROUT
10
Pull "r4,r5,r8,r9,pc"
20 MOV a3, a1, LSR #L1_SSb36Shift
UBFX a2, a1, #L1_SSb32Shift, #L1_SSb32Width
BFI a2, a3, #36-32, #L1_SSb36Width
; Apply offset from bits 0-23 of logical addr
BFI a1, r4, #0, #24
Pull "r4,r5,r8,r9,pc"
; int RISCOS_IICOpV(IICDesc *descs, uint32_t ndesc_and_bus)
RISCOS_IICOpV ROUT
Push "lr"
......
......@@ -331,6 +331,7 @@ RISCOS_Entries
DCD RISCOS_AddDevice - RISCOS_Entries
DCD RISCOS_LogToPhys - RISCOS_Entries
DCD RISCOS_IICOpV - RISCOS_Entries
DCD RISCOS_MapInIO64 - RISCOS_Entries
RISCOS_Entries_End
; +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
......
......@@ -71,9 +71,9 @@ MemReturn
B FindAccessPrivilege ; 18
B DMAPrep ; 19
B ChangeCompatibility ; 20
B %BT20 ; 21 |
B %BT20 ; 22 | Reserved for us
B %BT20 ; 23 |
B MapIO64permanent ; 21
B %BT20 ; 22 reserved for us
B %BT20 ; 23 reserved for us
B CheckMemoryAccess ; 24
; 25+ reserved for ROL
40
......@@ -930,7 +930,33 @@ RP_failed
;
MapIOpermanent ROUT
Entry "r0-r2,r12"
; Convert the input flags to some DA flags
MOV r3, r2
MOV r2, #0
B %FT10
;----------------------------------------------------------------------------------------
;MapIO64permanent - map IO space (if not already mapped) from 64-bit physical space
;and return logical address
;
; In: r0 bits 0..7 = 21 (reason code 21)
; r0 bit 8 = 1 to map bufferable space (0 is normal, non-bufferable)
; r0 bit 9 = 1 to map cacheable space (0 is normal, non-cacheable)
; r0 bits 10..12 = cache policy
; r0 bits 13..15 = 0 (reserved flags)
; r0 bit 16 = 1 to doubly map
; r0 bit 17 = 1 if access privileges specified
; r0 bits 18..23 = 0 (reserved flags)
; r0 bits 24..27 = access privileges (if bit 17 set)
; r0 bits 28..31 = 0 (reserved flags)
; r1,r2 = physical address of base of IO space required
; r3 = size of IO space required (bytes)
;
; Out: r3 = logical address of base of IO space
; - or error if not possible (no room)
;
MapIO64permanent
ALTENTRY
10 ; Convert the input flags to some DA flags
TST r0, #1:SHL:17
MOVEQ r12, #2 ; Default AP: SVC RW, USR none
MOVNE r12, r0, LSR #24 ; Else use given AP
......
......@@ -628,6 +628,13 @@ DecodeL1Entry
MOVEQ r0, r0, LSL #10
EXIT
50
; If it's a supersection that maps beyond 4GB, pretend it faults for now
TST a1, #L1_SSb32Mask
TSTEQ a1, #L1_SSb36Mask
TSTNE a1, #L1_SS
MOVNE r1, #-1
EXIT NE
; Find entry in PPL table
LDR r3, =ZeroPage
LDR lr, =L2_AP
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment