Commit 65fa6a28 authored by Jeffrey Lee's avatar Jeffrey Lee
Browse files

Implement support for cacheable pagetables

Detail:
  Modern ARMs (ARMv6+) introduce the possibility for the page table walk hardware to make use of the data cache(s) when performing memory accesses. This can significantly reduce the cost of a TLB miss on the system, and since the accesses are cache-coherent with the CPU it allows us to make the page tables cacheable for CPU (program) accesses also, improving the performance of page table manipulation by the OS.
  Even on ARMs where the page table walk can't use the data cache, it's been measured that page table manipulation operations can still benefit from placing the page tables in write-through or bufferable memory.
  So with that in mind, this set of changes updates the OS to allow cacheable/bufferable page tables to be used by the OS + MMU, using a system-appropriate cache policy.
  File changes:
  - hdr/KernelWS - Allocate workspace for storing the page flags that are to be used by the page tables
  - hdr/OSMem - Re-specify CP_CB_AlternativeDCache as having a different behaviour on ARMv6+ (inner write-through, outer write-back)
  - hdr/Options - Add CacheablePageTables option to allow switching back to non-cacheable page tables if necessary. Add SyncPageTables var which will be set {TRUE} if either the OS or the architecture requires a DSB after writing to a faulting page table entry.
  - s/ARM600, s/VMSAv6 - Add new SetTTBR & GetPageFlagsForCacheablePageTables functions. Update VMSAv6 for wider XCBTable (now 2 bytes per element)
  - s/ARMops - Update pre-ARMv7 MMU_Changing ARMops to drain the write buffer on entry if cacheable pagetables are in use (ARMv7+ already has this behaviour due to architectural requirements). For VMSAv6 Normal memory, change the way that the OS encodes the cache policy in the page table entries so that it's more compatible with the encoding used in the TTBR.
  - s/ChangeDyn - Update page table page flag handling to use PageTable_PageFlags. Make use of new PageTableSync macro.
  - s/Exceptions, s/AMBControl/memmap - Make use of new PageTableSync macro.
  - s/HAL - Update MMU initialisation sequence to make use of PageTable_PageFlags + SetTTBR
  - s/Kernel - Add PageTableSync macro, to be used after any write to a faulting page table entry
  - s/MemInfo - Update OS_Memory 0 page flag conversion. Update OS_Memory 24 to use new symbol for page table access permissions.
  - s/MemMap2 - Use PageTableSync. Add routines to enable/disable cacheable pagetables
  - s/NewReset - Enable cacheable pagetables once we're fully clear of the MMU initialision sequence (doing earlier would be trickier due to potential double-mapping)
Admin:
  Tested on pretty much everything currently supported
  Delivers moderate performance benefits to page table ops on old systems (e.g. 10% faster), astronomical benefits on some new systems (up to 8x faster)
  Stats: https://www.riscosopen.org/forum/forums/3/topics/2728?page=2#posts-58015


Version 5.71. Tagged as 'Kernel-5_71'
parent 9a96263a
......@@ -11,13 +11,13 @@
GBLS Module_HelpVersion
GBLS Module_ComponentName
GBLS Module_ComponentPath
Module_MajorVersion SETS "5.70"
Module_Version SETA 570
Module_MajorVersion SETS "5.71"
Module_Version SETA 571
Module_MinorVersion SETS ""
Module_Date SETS "13 Dec 2016"
Module_ApplicationDate SETS "13-Dec-16"
Module_ComponentName SETS "Kernel"
Module_ComponentPath SETS "castle/RiscOS/Sources/Kernel"
Module_FullVersion SETS "5.70"
Module_HelpVersion SETS "5.70 (13 Dec 2016)"
Module_FullVersion SETS "5.71"
Module_HelpVersion SETS "5.71 (13 Dec 2016)"
END
/* (5.70)
/* (5.71)
*
* This file is automatically maintained by srccommit, do not edit manually.
* Last processed by srccommit version: 1.1.
*
*/
#define Module_MajorVersion_CMHG 5.70
#define Module_MajorVersion_CMHG 5.71
#define Module_MinorVersion_CMHG
#define Module_Date_CMHG 13 Dec 2016
#define Module_MajorVersion "5.70"
#define Module_Version 570
#define Module_MajorVersion "5.71"
#define Module_Version 571
#define Module_MinorVersion ""
#define Module_Date "13 Dec 2016"
......@@ -18,6 +18,6 @@
#define Module_ComponentName "Kernel"
#define Module_ComponentPath "castle/RiscOS/Sources/Kernel"
#define Module_FullVersion "5.70"
#define Module_HelpVersion "5.70 (13 Dec 2016)"
#define Module_LibraryVersionInfo "5:70"
#define Module_FullVersion "5.71"
#define Module_HelpVersion "5.71 (13 Dec 2016)"
#define Module_LibraryVersionInfo "5:71"
......@@ -1230,6 +1230,8 @@ DebuggerSpace_Size * &1000
IICBus_Count * 5 ; 5 buses is enough for all current machines
IICBus_Base # IICBus_Size*IICBus_Count
PageTable_PageFlags # 4 ; Page flags used for page tables. L2PT uses this directly, L1PT adds in PageFlags_Unavailable.
AlignSpace 16 ; skipped bit must end on 16-byte boundary (ClearPhysRAM does 4 words at a time for skipped areas)
SkippedTablesEnd # 0
......
......@@ -49,6 +49,7 @@ DynAreaFlags_Shrinkable * 1 :SHL: 9 ; whether area may be shrunk whe
DynAreaFlags_SparseMap * 1 :SHL: 10 ; whether area may have non-contiguous mapping of pages (Holey dynamic areas Batman!)
DynAreaFlags_PiersBinding * 1 :SHL: 11 ; whether area is bound to client application, and so may be swapped out with it (not implemented yet)
DynAreaFlags_CPBits * 7 :SHL: 12 ; cache policy variant for NotBufferable and NotCacheable bits
DynAreaFlags_CPShift * 12 ; shift amount for the above
DynAreaFlags_NeedsDMA * 1 :SHL: 15 ; only allocate from DMAable memory
; Bits 16-19 are used by RISCOS Ltd. We can reuse them for internal flags, but
; should probably avoid allocating any public flags.
......@@ -71,7 +72,7 @@ CP_CB_Default * 0 ; OS decides cache policy (WB if
CP_CB_Writethrough * 1 ; Writethrough cacheable, read allocate. If not available, NCB_Merging
CP_CB_WritebackReadAlloc * 2 ; Writeback cacheable, read allocate. If not available, writethrough.
CP_CB_WritebackWriteAlloc * 3 ; Writeback cacheable, write allocate. If not available, WB/R.
CP_CB_AlternativeDCache * 4 ; Use XScale/SA11x0 mini-data cache. If not available, CB_Default.
CP_CB_AlternativeDCache * 4 ; Use XScale/SA11x0 mini-data cache. If not available, CB_Default. For ARMv6+, this selects inner write-through, outer write-back (write alloc).
;
; Public page flags (note - may overlap DA flags)
......
......@@ -188,6 +188,12 @@ PMPRAMFS_Size * 256 ; Number of logical pages (physi
MaxRAMFS_Size SETA 128 ; Max size available for RAM Disc
]
GBLL CacheablePageTables
CacheablePageTables SETL {TRUE} ; Use cacheable page tables where possible
GBLL SyncPageTables
SyncPageTables SETL (MEMM_Type = "VMSAv6") :LOR: CacheablePageTables ; Any page table modification (specifically, overwriting faulting entries) requires synchronisation
GBLL UseNewFX0Error
UseNewFX0Error SETL ((Version :AND: 1) = 1) ; Whether *FX 0 should show the ROM link date instead of the UtilityModule date
......
......@@ -663,12 +663,7 @@ AMB_movepagesin_L2PT ROUT
SUBS r8,r8,#1
BNE %BT30
35
[ MEMM_Type = "VMSAv6"
; DSB + ISB required to ensure effect of page table write is fully
; visible (after overwriting a faulting entry)
myDSB ,r0
myISB ,r0,,y
]
PageTableSync
EXIT
; ----------------------------------------------------------------------------------
......
......@@ -531,4 +531,24 @@ DecodeL1Entry
; Jump to common code to do AP decode + PCBTrans search
B %BT20
; In:
; r0 = phys addr (aligned)
; r1 -> ZeroPage
; Out:
; TTBR and any other related registers updated
; If MMU is currently on, it's assumed the mapping of ROM+stack will not be
; affected by this change
SetTTBR ROUT
ARM_MMU_transbase r0
MOV pc, lr
[ CacheablePageTables
; Out: R0 = desired page flags for the page tables
GetPageFlagsForCacheablePageTables ROUT
; For ARMv5 and below the MMU can't read from the L1 cache, so the
; best we can do is a write-through cache policy
LDR r0, =AreaFlags_PageTablesAccess :OR: (CP_CB_Writethrough :SHL: DynAreaFlags_CPShift)
MOV pc, lr
]
END
......@@ -59,13 +59,16 @@ Init_ARMarch
MOV a1, a1, LSR #16
MOV pc, lr
; Called pre-MMU to set up some (temporary) PCBTrans and PPLTrans pointers
; Called pre-MMU to set up some (temporary) PCBTrans and PPLTrans pointers,
; and the initial PageTable_PageFlags value
; Also used post-MMU for VMSAv6 case
; In:
; a1 -> ZeroPage
; Out:
; a1-a4, ip corrupt
Init_PCBTrans ROUT
LDR a2, =AreaFlags_PageTablesAccess :OR: DynAreaFlags_NotCacheable :OR: DynAreaFlags_NotBufferable
STR a2, [a1, #PageTable_PageFlags]
[ MEMM_Type = "VMSAv6"
ADRL a2, XCBTableVMSAv6
STR a2, [a1, #MMU_PCBTrans]
......@@ -1232,17 +1235,32 @@ TLB_InvalidateEntry_ARMv3
MOV pc, lr
MMU_Changing_ARMv3
[ CacheablePageTables
SUB sp, sp, #4
SWP a1, a1, [sp]
ADD sp, sp, #4
]
MCR p15, 0, a1, c5, c0 ; invalidate TLB
MCR p15, 0, a1, c7, c0 ; invalidate cache
MOV pc, lr
MMU_ChangingUncached_ARMv3
[ CacheablePageTables
SUB sp, sp, #4
SWP a1, a1, [sp]
ADD sp, sp, #4
]
MCR p15, 0, a1, c5, c0 ; invalidate TLB
MOV pc, lr
; a1 = page affected (page aligned address)
;
MMU_ChangingEntry_ARMv3
[ CacheablePageTables
Push "a1"
SWP a1, a1, [sp]
ADD sp, sp, #4
]
MCR p15, 0, a1, c6, c0 ; invalidate TLB entry
MCR p15, 0, a1, c7, c0 ; invalidate cache
MOV pc, lr
......@@ -1254,6 +1272,9 @@ MMU_ChangingEntries_ARMv3 ROUT
CMP a2, #16 ; arbitrary-ish threshold
BHS MMU_Changing_ARMv3
Push "a2"
[ CacheablePageTables
SWP a2, a2, [sp]
]
10
MCR p15, 0, a1, c6, c0 ; invalidate TLB entry
SUBS a2, a2, #1 ; next page
......@@ -1266,6 +1287,11 @@ MMU_ChangingEntries_ARMv3 ROUT
; a1 = page affected (page aligned address)
;
MMU_ChangingUncachedEntry_ARMv3
[ CacheablePageTables
Push "a1"
SWP a1, a1, [sp]
ADD sp, sp, #4
]
MCR p15, 0, a1, c6, c0 ; invalidate TLB entry
MOV pc, lr
......@@ -1276,6 +1302,9 @@ MMU_ChangingUncachedEntries_ARMv3 ROUT
CMP a2, #16 ; arbitrary-ish threshold
BHS MMU_ChangingUncached_ARMv3
Push "a2"
[ CacheablePageTables
SWP a2, a2, [sp]
]
10
MCR p15, 0, a1, c6, c0 ; invalidate TLB entry
SUBS a2, a2, #1 ; next page
......@@ -1331,12 +1360,26 @@ TLB_InvalidateEntry_Unified
MOV pc, lr
MMU_Changing_Writethrough
[ CacheablePageTables
; Yuck - this is probably going to be quite slow. Something to fix
; properly if/when we port to a system that uses this type of CPU.
Push "lr"
LDR a1, =ZeroPage
ARMop DSB_ReadWrite,,,a1
Pull "lr"
]
MOV a1, #0
MCR p15, 0, a1, c8, c7 ; invalidate TLB
MCR p15, 0, a1, c7, c7 ; invalidate cache
MOV pc, lr
MMU_ChangingUncached
[ CacheablePageTables
Push "lr"
LDR a1, =ZeroPage
ARMop DSB_ReadWrite,,,a1
Pull "lr"
]
MOV a1, #0
MCR p15, 0, a1, c8, c7 ; invalidate TLB
MOV pc, lr
......@@ -1344,6 +1387,12 @@ MMU_ChangingUncached
; a1 = page affected (page aligned address)
;
MMU_ChangingEntry_Writethrough
[ CacheablePageTables
Push "a1,lr"
LDR a1, =ZeroPage
ARMop DSB_ReadWrite,,,a1
Pull "a1,lr"
]
MCR p15, 0, a1, c8, c7, 1 ; invalidate TLB entry
MOV a1, #0
MCR p15, 0, a1, c7, c7 ; invalidate cache
......@@ -1356,6 +1405,12 @@ MMU_ChangingEntries_Writethrough ROUT
CMP a2, #16 ; arbitrary-ish threshold
BHS MMU_Changing_Writethrough
Push "a2"
[ CacheablePageTables
Push "a1,lr"
LDR a1, =ZeroPage
ARMop DSB_ReadWrite,,,a1
Pull "a1,lr"
]
10
MCR p15, 0, a1, c8, c7, 1 ; invalidate TLB entry
SUBS a2, a2, #1 ; next page
......@@ -1368,6 +1423,12 @@ MMU_ChangingEntries_Writethrough ROUT
; a1 = page affected (page aligned address)
;
MMU_ChangingUncachedEntry
[ CacheablePageTables
Push "a1,lr"
LDR a1, =ZeroPage
ARMop DSB_ReadWrite,,,a1
Pull "a1,lr"
]
MCR p15, 0, a1, c8, c7, 1 ; invalidate TLB entry
MOV pc, lr
......@@ -1378,6 +1439,12 @@ MMU_ChangingUncachedEntries ROUT
CMP a2, #16 ; arbitrary-ish threshold
BHS MMU_ChangingUncached
Push "a2"
[ CacheablePageTables
Push "a1,lr"
LDR a1, =ZeroPage
ARMop DSB_ReadWrite,,,a1
Pull "a1,lr"
]
10
MCR p15, 0, a1, c8, c7, 1 ; invalidate TLB entry
SUBS a2, a2, #1 ; next page
......@@ -1583,8 +1650,15 @@ ICache_InvalidateRange_WB_CR7_LDa ROUT
Pull "pc"
TLB_InvalidateAll_WB_CR7_LDa ROUT
MMU_ChangingUncached_WB_CR7_LDa
MMU_ChangingUncached_WB_CR7_LDa ROUT
[ CacheablePageTables
MOV a1, #0
MCR p15, 0, a1, c7, c10, 4 ; drain WBuffer
[ MEMM_Type = "VMSAv6"
MCR p15, 0, a1, c7, c5, 4 ; ISB
]
]
TLB_InvalidateAll_WB_CR7_LDa
MOV a1, #0
MCR p15, 0, a1, c8, c7, 0 ; invalidate ITLB and DTLB
MOV pc, lr
......@@ -1592,8 +1666,17 @@ MMU_ChangingUncached_WB_CR7_LDa
; a1 = page affected (page aligned address)
;
TLB_InvalidateEntry_WB_CR7_LDa ROUT
MMU_ChangingUncachedEntry_WB_CR7_LDa
MMU_ChangingUncachedEntry_WB_CR7_LDa ROUT
[ CacheablePageTables
Push "a1"
MOV a1, #0
MCR p15, 0, a1, c7, c10, 4 ; drain WBuffer
[ MEMM_Type = "VMSAv6"
MCR p15, 0, a1, c7, c5, 4 ; ISB
]
Pull "a1"
]
TLB_InvalidateEntry_WB_CR7_LDa
MCR p15, 0, a1, c8, c5, 1 ; invalidate ITLB entry
MCR p15, 0, a1, c8, c6, 1 ; invalidate DTLB entry
MOV pc, lr
......@@ -1663,6 +1746,15 @@ IMB_List_WB_CR7_LDa ROUT
Pull "v1-v2,pc"
MMU_Changing_WB_CR7_LDa ROUT
[ CacheablePageTables
Push "a1"
MOV a1, #0
MCR p15, 0, a1, c7, c10, 4 ; drain WBuffer
[ MEMM_Type = "VMSAv6"
MCR p15, 0, a1, c7, c5, 4 ; ISB
]
Pull "a1"
]
MOV a1, #0
MCR p15, 0, a1, c8, c7, 0 ; invalidate ITLB and DTLB
B Cache_CleanInvalidateAll_WB_CR7_LDa
......@@ -1670,6 +1762,15 @@ MMU_Changing_WB_CR7_LDa ROUT
; a1 = page affected (page aligned address)
;
MMU_ChangingEntry_WB_CR7_LDa ROUT
[ CacheablePageTables
Push "a1"
MOV a1, #0
MCR p15, 0, a1, c7, c10, 4 ; drain WBuffer
[ MEMM_Type = "VMSAv6"
MCR p15, 0, a1, c7, c5, 4 ; ISB
]
Pull "a1"
]
[ MEMM_Type = "ARM600"
Push "a2, lr"
MCR p15, 0, a1, c8, c6, 1 ; invalidate DTLB entry
......@@ -1699,6 +1800,13 @@ MMU_ChangingEntry_WB_CR7_LDa ROUT
;
MMU_ChangingEntries_WB_CR7_LDa ROUT
Push "a2, a3, lr"
[ CacheablePageTables
MOV a3, #0
MCR p15, 0, a3, c7, c10, 4 ; drain WBuffer
[ MEMM_Type = "VMSAv6"
MCR p15, 0, a3, c7, c5, 4 ; ISB
]
]
MOV a2, a2, LSL #Log2PageSize
LDR lr, =ZeroPage
LDR a3, [lr, #DCache_RangeThreshold] ;check whether cheaper to do global clean
......@@ -1741,6 +1849,15 @@ MMU_ChangingEntries_WB_CR7_LDa ROUT
; a2 = number of pages
;
MMU_ChangingUncachedEntries_WB_CR7_LDa ROUT
[ CacheablePageTables
Push "a1"
MOV a1, #0
MCR p15, 0, a1, c7, c10, 4 ; drain WBuffer
[ MEMM_Type = "VMSAv6"
MCR p15, 0, a1, c7, c5, 4 ; ISB
]
Pull "a1"
]
CMP a2, #32 ; arbitrary-ish threshold
BHS %FT20
Push "a2"
......@@ -1850,13 +1967,19 @@ Cache_RangeThreshold_WB_Crd
LDR a1, [a1, #DCache_RangeThreshold]
MOV pc, lr
TLB_InvalidateAll_WB_Crd
MMU_ChangingUncached_WB_Crd
[ CacheablePageTables
MCR p15, 0, a1, c7, c10, 4 ; drain WBuffer
]
TLB_InvalidateAll_WB_Crd
MCR p15, 0, a1, c8, c7, 0 ;flush ITLB and DTLB
MOV pc, lr
TLB_InvalidateEntry_WB_Crd
MMU_ChangingUncachedEntry_WB_Crd
[ CacheablePageTables
MCR p15, 0, a1, c7, c10, 4 ; drain WBuffer
]
TLB_InvalidateEntry_WB_Crd
MCR p15, 0, a1, c8, c6, 1 ;flush DTLB entry
MCR p15, 0, a1, c8, c5, 0 ;flush ITLB
MOV pc, lr
......@@ -1905,6 +2028,9 @@ IMB_List_WB_Crd ROUT
MMU_Changing_WB_Crd
Push "lr"
[ CacheablePageTables
MCR p15, 0, a1, c7, c10, 4 ; drain WBuffer
]
MCR p15, 0, a1, c8, c7, 0 ;flush ITLB and DTLB
BL Cache_CleanAll_WB_Crd ;clean DCache (wrt to non-interrupt stuff)
MCR p15, 0, a1, c7, c5, 0 ;flush ICache
......@@ -1917,6 +2043,9 @@ MMU_ChangingEntry_WB_Crd ROUT
;are for the same virtual address (and that virtual address will not be
;involved in interrupts, since it is involved in remapping)
;
[ CacheablePageTables
MCR p15, 0, a1, c7, c10, 4 ; drain WBuffer
]
Push "a2, lr"
ADD a2, a1, #PageSize
LDR lr, =ZeroPage
......@@ -1938,6 +2067,9 @@ MMU_ChangingEntries_WB_Crd ROUT
;
;same comments as MMU_ChangingEntry_WB_Crd
;
[ CacheablePageTables
MCR p15, 0, a1, c7, c10, 4 ; drain WBuffer
]
Push "a2, a3, lr"
MOV a2, a2, LSL #Log2PageSize
LDR lr, =ZeroPage
......@@ -2042,6 +2174,9 @@ Cache_CleanInvalidateRange_WB_Crd ROUT
Pull "a2, a3, pc"
MMU_ChangingUncachedEntries_WB_Crd ROUT
[ CacheablePageTables
MCR p15, 0, a1, c7, c10, 4 ; drain WBuffer
]
CMP a2, #32 ;arbitrary-ish threshold
BHS %FT20
Push "lr"
......@@ -2222,15 +2357,21 @@ Cache_RangeThreshold_WB_Cal_LD ROUT
MOV pc, lr
TLB_InvalidateAll_WB_Cal_LD ROUT
MMU_ChangingUncached_WB_Cal_LD
MMU_ChangingUncached_WB_Cal_LD ROUT
[ CacheablePageTables
MCR p15, 0, a1, c7, c10, 4 ; drain WBuffer (waits, so no need for CPWAIT)
]
TLB_InvalidateAll_WB_Cal_LD
MCR p15, 0, a1, c8, c7, 0 ; invalidate ITLB and DTLB
CPWAIT
MOV pc, lr
TLB_InvalidateEntry_WB_Cal_LD ROUT
MMU_ChangingUncachedEntry_WB_Cal_LD
MMU_ChangingUncachedEntry_WB_Cal_LD ROUT
[ CacheablePageTables
MCR p15, 0, a1, c7, c10, 4 ; drain WBuffer (waits, so no need for CPWAIT)
]
TLB_InvalidateEntry_WB_Cal_LD
MCR p15, 0, a1, c8, c5, 1 ; invalidate ITLB entry
MCR p15, 0, a1, c8, c6, 1 ; invalidate DTLB entry
CPWAIT
......@@ -2303,6 +2444,9 @@ IMB_List_WB_Cal_LD ROUT
MMU_Changing_WB_Cal_LD ROUT
[ CacheablePageTables
MCR p15, 0, a1, c7, c10, 4 ; drain WBuffer (waits, so no need for CPWAIT)
]
Push "lr"
MCR p15, 0, a1, c8, c7, 0 ; invalidate ITLB and DTLB
BL Cache_CleanAll_WB_Cal_LD
......@@ -2317,6 +2461,9 @@ MMU_ChangingEntry_WB_Cal_LD ROUT
;are for the same virtual address (and that virtual address will not be
;involved in interrupts, since it is involved in remapping)
;
[ CacheablePageTables
MCR p15, 0, a1, c7, c10, 4 ; drain WBuffer (waits, so no need for CPWAIT)
]
Push "a2, lr"
ADD a2, a1, #PageSize
LDR lr, =ZeroPage
......@@ -2346,6 +2493,9 @@ MMU_ChangingEntries_WB_Cal_LD ROUT
;
;same comments as MMU_ChangingEntry_WB_Cal_LD
;
[ CacheablePageTables
MCR p15, 0, a1, c7, c10, 4 ; drain WBuffer (waits, so no need for CPWAIT)
]
Push "a2, a3, lr"
MOV a2, a2, LSL #Log2PageSize
LDR lr, =ZeroPage
......@@ -2475,6 +2625,9 @@ Cache_CleanInvalidateRange_WB_Cal_LD ROUT
B Cache_CleanInvalidateAll_WB_Cal_LD
MMU_ChangingUncachedEntries_WB_Cal_LD ROUT
[ CacheablePageTables
MCR p15, 0, a1, c7, c10, 4 ; drain WBuffer (waits, so no need for CPWAIT)
]
CMP a2, #32 ; arbitrary-ish threshold
BHS %FT20
Push "lr"
......@@ -3872,16 +4025,34 @@ XCBTableXScaleNoExt ; C+B CNB NCB N
[ MEMM_Type = "VMSAv6"
; VMSAv6/v7 L2 memory attributes (short descriptor format, TEX remap disabled, identical inner/outer attributes)
; VMSAv6/v7 L2 memory attributes (short descriptor format, TEX remap disabled)
L2_SO_S * 0 ; Strongly-ordered, shareable
L2_Dev_S * L2_B ; Device, shareable
L2_Nrm_WT * L2_C ; Normal, WT/RA, S bit determines shareability
L2_Nrm_WBRA * L2_C+L2_B ; Normal, WB/RA, S bit determines shareability
L2_Nrm_NC * 1:SHL:L2_TEXShift ; Normal, non-cacheable (but bufferable), S bit determines shareability
L2_Nrm_WBWA * (1:SHL:L2_TEXShift)+L2_C+L2_B ; Normal, WB/WA, S bit determines shareability
L2_Dev_nS * 2:SHL:L2_TEXShift ; Device, non-shareable
; For Normal memory types, use the form that is explicit about inner and outer
; cacheability. This provides a nice mapping to the way cacheability is
; specified in the TTBR (see SetTTBR)
VMSAv6_Cache_NC * 0
VMSAv6_Cache_WBWA * 1
VMSAv6_Cache_WT * 2
VMSAv6_Cache_WBRA * 3
ASSERT L2_C = L2_B:SHL:1
MACRO
VMSAv6_Nrm_XCB $inner, $outer
L2_Nrm_$inner._$outer * ((4+VMSAv6_Cache_$outer):SHL:L2_TEXShift) + (VMSAv6_Cache_$inner * L2_B)
[ "$outer" == "$inner"
L2_Nrm_$inner * L2_Nrm_$inner._$outer
]
MEND
VMSAv6_Nrm_XCB WT, WT ; Normal, WT/RA, S bit determines shareability
VMSAv6_Nrm_XCB WBRA, WBRA ; Normal, WB/RA, S bit determines shareability
VMSAv6_Nrm_XCB NC, NC ; Normal, non-cacheable (but bufferable), S bit determines shareability
VMSAv6_Nrm_XCB WBWA, WBWA ; Normal, WB/WA, S bit determines shareability
VMSAv6_Nrm_XCB WT, WBWA ; Normal, inner WT, outer WB/WA, S bit determines shareability
; Generic XCB table for VMSAv6/v7
; * NCNB is roughly equivalent to "strongly ordered".
......@@ -3904,26 +4075,26 @@ L2_Dev_nS * 2:SHL:L2_TEXShift ; Device, non-shareable
; (downgrade WB to WT to NC), but we may end up doing more cache maintenance
; than needed if the hardware downgrades some areas to NC.
XCBTableVMSAv6 ; C+B CNB NCB NCNB
= L2_Nrm_WBWA, L2_SO_S, L2_Nrm_NC, L2_SO_S ; Default
= L2_Nrm_WT, L2_SO_S, L2_Dev_S, L2_SO_S ; WT, WT, Non-merging, X
= L2_Nrm_WBRA, L2_SO_S, L2_Dev_S, L2_SO_S ; WB/RA, WB, Merging, X
= L2_Nrm_WBWA, L2_SO_S, L2_Nrm_NC, L2_SO_S ; WB/WA, X, Idempotent, X
= L2_Nrm_WBWA, L2_SO_S, L2_Nrm_NC, L2_SO_S ; Alt DCache, X, X, X
= L2_Nrm_WBWA, L2_SO_S, L2_Nrm_NC, L2_SO_S ; X, X, X, X
= L2_Nrm_WBWA, L2_SO_S, L2_Nrm_NC, L2_SO_S ; X, X, X, X
= L2_Nrm_WBWA, L2_SO_S, L2_Nrm_NC, L2_SO_S ; X, X, X, X
XCBTableVMSAv6 ; C+B CNB NCB NCNB
DCW L2_Nrm_WBWA, L2_SO_S, L2_Nrm_NC, L2_SO_S ; Default
DCW L2_Nrm_WT, L2_SO_S, L2_Dev_S, L2_SO_S ; WT, WT, Non-merging, X
DCW L2_Nrm_WBRA, L2_SO_S, L2_Dev_S, L2_SO_S ; WB/RA, WB, Merging, X
DCW L2_Nrm_WBWA, L2_SO_S, L2_Nrm_NC, L2_SO_S ; WB/WA, X, Idempotent, X
DCW L2_Nrm_WT_WBWA,L2_SO_S,L2_Nrm_NC,L2_SO_S ; Alt DCache, X, X, X
DCW L2_Nrm_WBWA, L2_SO_S, L2_Nrm_NC, L2_SO_S ; X, X, X, X
DCW L2_Nrm_WBWA, L2_SO_S, L2_Nrm_NC, L2_SO_S ; X, X, X, X
DCW L2_Nrm_WBWA, L2_SO_S, L2_Nrm_NC, L2_SO_S ; X, X, X, X
; This second set of entries deals with when pages are made
; temporarily uncacheable - we need to change the cacheability without
; changing the memory type.
= L2_Nrm_NC, L2_SO_S, L2_Nrm_NC, L2_SO_S ; Default
= L2_Nrm_NC, L2_SO_S, L2_Dev_S, L2_SO_S ; WT, WT, Non-merging, X
= L2_Nrm_NC, L2_SO_S, L2_Dev_S, L2_SO_S ; WB/RA, WB, Merging, X
= L2_Nrm_NC, L2_SO_S, L2_Nrm_NC, L2_SO_S ; WB/WA, X, Idempotent, X
= L2_Nrm_NC, L2_SO_S, L2_Nrm_NC, L2_SO_S ; Alt DCache, X, X, X
= L2_Nrm_NC, L2_SO_S, L2_Nrm_NC, L2_SO_S ; X, X, X, X
= L2_Nrm_NC, L2_SO_S, L2_Nrm_NC, L2_SO_S ; X, X, X, X
= L2_Nrm_NC, L2_SO_S, L2_Nrm_NC, L2_SO_S ; X, X, X, X
DCW L2_Nrm_NC, L2_SO_S, L2_Nrm_NC, L2_SO_S ; Default
DCW L2_Nrm_NC, L2_SO_S, L2_Dev_S, L2_SO_S ; WT, WT, Non-merging, X
DCW L2_Nrm_NC, L2_SO_S, L2_Dev_S, L2_SO_S ; WB/RA, WB, Merging, X
DCW L2_Nrm_NC, L2_SO_S, L2_Nrm_NC, L2_SO_S ; WB/WA, X, Idempotent, X
DCW L2_Nrm_NC, L2_SO_S, L2_Nrm_NC, L2_SO_S ; Alt DCache, X, X, X
DCW L2_Nrm_NC, L2_SO_S, L2_Nrm_NC, L2_SO_S ; X, X, X, X
DCW L2_Nrm_NC, L2_SO_S, L2_Nrm_NC, L2_SO_S ; X, X, X, X
DCW L2_Nrm_NC, L2_SO_S, L2_Nrm_NC, L2_SO_S ; X, X, X, X
] ; MEMM_Type = "VMSAv6"
......
......@@ -142,8 +142,7 @@ FreePool DefAreaFlags None, DynAreaFlags_NotCacheable :OR: DynAreaFlags_
Duff DefAreaFlags None, DynAreaFlags_NotCacheable :OR: DynAreaFlags_NotBufferable
CursorChunkCacheable DefAreaFlags Read, PageFlags_Unavailable ; Should be OSAP_None?
CursorChunk DefAreaFlags Read, PageFlags_Unavailable :OR: DynAreaFlags_NotCacheable
L2PT DefAreaFlags None, DynAreaFlags_NotCacheable :OR: DynAreaFlags_NotBufferable
L1PT DefAreaFlags None, DynAreaFlags_NotCacheable :OR: DynAreaFlags_NotBufferable :OR: PageFlags_Unavailable
PageTablesAccess DefAreaFlags None ; n.b. just the AP value, for full page flags use PageTable_PageFlags workspace var
HALWorkspace DefAreaFlags Read, PageFlags_Unavailable
HALWorkspaceNCNB DefAreaFlags None, DynAreaFlags_NotCacheable :OR: DynAreaFlags_NotBufferable :OR: PageFlags_Unavailable
ZeroPage DefAreaFlags Read, PageFlags_Unavailable
......@@ -2962,14 +2961,11 @@ DynArea_PMP_LogOp ROUT
ARMop MMU_ChangingUncached
B %FT75
72
[ MEMM_Type = "VMSAv6"
[ SyncPageTables
LDR r0, PMPLogOp_UnsafeMapIn
TEQ r0, #0
BEQ %FT75
; DSB + ISB required to ensure effect of page table write is fully
; visible (after overwriting a faulting entry)
myDSB ,r0
myISB ,r0,,y
PageTableSync
]
75
; Release CDASemaphore
......@@ -3915,7 +3911,8 @@ AllocateBackingLevel2 Entry "r0-r8,r11"
LDR lr, =L1PT
ADD r8, lr, r8, LSR #18 ; point r8 at start of L1 we may be updating
ADD r1, r7, r3, LSR #10 ; point r1 at L2PT for r3 again
MOV r11, #AreaFlags_L2PT ; access privs (+CB bits)
LDR r11, =ZeroPage
LDR r11, [r11, #PageTable_PageFlags] ; access privs (+CB bits)
20
LDR r6, [r1], #4 ; get L2PT entry again
TST r6, #3 ; if no fault
......@@ -3941,6 +3938,10 @@ AllocateBackingLevel2 Entry "r0-r8,r11"
TEQ r0, r3
BNE %BT15