Commit aca7f939 authored by Jeffrey Lee's avatar Jeffrey Lee
Browse files

Perform extra TLB maintenance on ARMv6+. Other cache/TLB maintenance tweaks.

Detail:
  s/ARMops - Implement Cache_RangeThreshold for PL310 (helps AMBControl to decide what type of TLB maintenance is best). Fix MMU_ChangingEntry_PL310 doing more work than is necessary; was attempting to flush all ways for a given address tag, when really it should have only been flushing all the lines within a page and letting the cache worry about the tags/indices they correspond to.
  s/ChangeDyn, s/VMSAv6, s/AMBControl/memmap - Do extra TLB maintenance following writes to the page tables, as mandated by the ARMv6+ memory order model. Fixes frequent crashes on Cortex-A9 when running with lazy task swapping disabled (and presumably fixes other crashes too)
  s/MemInfo - Fix OS_Memory cache/uncache so that it does cache/TLB maintenance on a per-page basis instead of a global basis. Vastly improves performance when you have a large cache, but may need tweaking again in future to do a global op if large numbers of pages are being modified.
Admin:
  Tested on Pandaboard


Version 5.35, 4.79.2.255. Tagged as 'Kernel-5_35-4_79_2_255'
parent 9c55b854
......@@ -13,11 +13,11 @@
GBLS Module_ComponentPath
Module_MajorVersion SETS "5.35"
Module_Version SETA 535
Module_MinorVersion SETS "4.79.2.254"
Module_Date SETS "17 Jan 2015"
Module_ApplicationDate SETS "17-Jan-15"
Module_MinorVersion SETS "4.79.2.255"
Module_Date SETS "20 Jan 2015"
Module_ApplicationDate SETS "20-Jan-15"
Module_ComponentName SETS "Kernel"
Module_ComponentPath SETS "castle/RiscOS/Sources/Kernel"
Module_FullVersion SETS "5.35 (4.79.2.254)"
Module_HelpVersion SETS "5.35 (17 Jan 2015) 4.79.2.254"
Module_FullVersion SETS "5.35 (4.79.2.255)"
Module_HelpVersion SETS "5.35 (20 Jan 2015) 4.79.2.255"
END
......@@ -5,19 +5,19 @@
*
*/
#define Module_MajorVersion_CMHG 5.35
#define Module_MinorVersion_CMHG 4.79.2.254
#define Module_Date_CMHG 17 Jan 2015
#define Module_MinorVersion_CMHG 4.79.2.255
#define Module_Date_CMHG 20 Jan 2015
#define Module_MajorVersion "5.35"
#define Module_Version 535
#define Module_MinorVersion "4.79.2.254"
#define Module_Date "17 Jan 2015"
#define Module_MinorVersion "4.79.2.255"
#define Module_Date "20 Jan 2015"
#define Module_ApplicationDate "17-Jan-15"
#define Module_ApplicationDate "20-Jan-15"
#define Module_ComponentName "Kernel"
#define Module_ComponentPath "castle/RiscOS/Sources/Kernel"
#define Module_FullVersion "5.35 (4.79.2.254)"
#define Module_HelpVersion "5.35 (17 Jan 2015) 4.79.2.254"
#define Module_FullVersion "5.35 (4.79.2.255)"
#define Module_HelpVersion "5.35 (20 Jan 2015) 4.79.2.255"
#define Module_LibraryVersionInfo "5:35"
......@@ -286,6 +286,12 @@ AMB_movepagesin_L2PT ROUT
SUBS r8,r8,#1
BNE %BT30
35
[ MEMM_Type = "VMSAv6"
; DSB + ISB required to ensure effect of page table write is fully
; visible (after overwriting a faulting entry)
myDSB ,r0
myISB ,r0,,y
]
Pull "r0-r10,r12,pc"
; ----------------------------------------------------------------------------------
......@@ -440,6 +446,17 @@ AMB_movepagesout_L2PT ROUT
SUBS r8,r8,#1
BNE %BT30
35
[ MEMM_Type = "VMSAv6"
; In order to guarantee that the result of a page table write is
; visible, the ARMv6+ memory order model requires us to perform TLB
; maintenance (equivalent to the MMU_ChangingUncached ARMop) after we've
; performed the write. Performing the maintenance beforehand (as we've
; done traditionally) will work most of the time, but not always.
LDR r0, [sp, #4*4]
LDR r1, [sp, #8*4]
LDR r2, =ZeroPage
ARMop MMU_ChangingUncachedEntries,,,r2
]
Pull "r0-r8,pc"
; ----------------------------------------------------------------------------------
......@@ -528,10 +545,6 @@ AMB_smme_mapnotlimpid
;all pages sourced from same old logical page Nowhere, ie. pages currently mapped out, no cache worries
;
AMB_smme_mapin
MOV r0,r4 ;address of 1st page
MOV r1,r8 ;number of pages
LDR r3,=ZeroPage
ARMop MMU_ChangingUncachedEntries,,,r3 ;TLB coherency, possibly not needed (TLBs shouldn't cache 0 entries)
MOV r3,r5
BL AMB_movepagesin_L2PT
BL AMB_movepagesin_CAM
......@@ -616,6 +629,24 @@ AMB_SetMemMapEntries_SparseMapOut ROUT
LDR lr,=L2PT ;lr -> L2PT
MOV r2, #0
STR r2,[lr,r4,LSR #(Log2PageSize-2)] ;L2PT entry for page set to 0 (means translation fault)
[ MEMM_Type = "VMSAv6"
; In order to guarantee that the result of a page table write is
; visible, the ARMv6+ memory order model requires us to perform TLB
; maintenance (equivalent to the MMU_ChangingUncached ARMop) after we've
; performed the write. Performing the maintenance beforehand (as we've
; done traditionally) will work most of the time, but not always.
TEQ r6, #0
BNE %FT15
[ ZeroPage != 0
LDR r2,=ZeroPage
]
MOV r0,r4
ARMop MMU_ChangingUncachedEntry,,,r2
[ ZeroPage != 0
MOV r2,#0
]
15
]
SUBS r3,r3,#1
STREQ r2,[r5,#-4] ;make sure we clear last word of bitmap, and...
BEQ %FT20 ;done
......@@ -630,6 +661,18 @@ AMB_SetMemMapEntries_SparseMapOut ROUT
B %BT10
20
[ MEMM_Type = "VMSAv6"
; In order to guarantee that the result of a page table write is
; visible, the ARMv6+ memory order model requires us to perform TLB
; maintenance (equivalent to the MMU_ChangingUncached ARMop) after we've
; performed the write. Performing the maintenance beforehand (as we've
; done traditionally) will work most of the time, but not always.
TEQ r6, #0
BEQ %FT25
LDR r2,=ZeroPage
ARMop MMU_ChangingUncached,,,r2
25
]
Pull "r0-r11,pc"
......
......@@ -2326,6 +2326,10 @@ Cache_InvalidateAll_PL310 ROUT
PullEnv
B Cache_InvalidateAll_WB_CR7_Lx
Cache_RangeThreshold_PL310 ROUT
MOV a1, #1024*1024
MOV pc, lr
WriteBuffer_Drain_PL310 ROUT
Entry
LDR lr, =ZeroPage
......@@ -2393,31 +2397,22 @@ MMU_ChangingEntry_PL310 ROUT
LDR a2, =ZeroPage
LDR a2, [a2, #Cache_HALDevice]
LDR a2, [a2, #HALDevice_Address]
LDR a1, [a2, #PL310_REG1_AUX_CONTROL]
[ NoARMT2
AND a1, a1, #7<<17
MOV a1, a1, LSR #17
|
UBFX a1, a1, #17, #3
]
LDR lr, =&FF<<5
ORR a1, lr, lr, LSL a1 ; a1 = max index number (inclusive)
; Ensure we haven't re-entered an in-progress op
10
20
LDR lr, [a2, #PL310_REG7_CLEAN_INV_PA]
TST lr, #1
BNE %BT10
BNE %BT20
; Clean & invalidate each line/index of the page
ADD a1, a4, a1
20
STR a4, [a2, #PL310_REG7_CLEAN_INV_PA]
ADD a1, a4, #&FE0 ; last line within the page
30
STR a4, [a2, #PL310_REG7_CLEAN_INV_PA]
40
LDR lr, [a2, #PL310_REG7_CLEAN_INV_PA]
TST lr, #1
BNE %BT30
BNE %BT40
TEQ a4, a1
ADD a4, a4, #1<<5 ; next index
BNE %BT20
BNE %BT30
; Sync
PL310Sync a2, a1
50
......@@ -2428,7 +2423,7 @@ MMU_ChangingEntry_PL310 ROUT
; a1 = first page affected (page aligned address)
; a2 = number of pages
;
MMU_ChangingEntries_PL310
MMU_ChangingEntries_PL310 ROUT
Entry "a2-a3"
; Keep this one simple and just split it into a series of per-page operations
; This will result in some unnecessary TLB invalidate & PL310 sync thrashing, so in the future a more advanced implementation might be nice.
......@@ -2515,7 +2510,7 @@ KnownHALCaches ROUT
DCD Cache_CleanInvalidateAll_PL310
DCD Cache_CleanAll_PL310
DCD Cache_InvalidateAll_PL310
DCD 0 ; Cache_RangeThreshold - may want to tweak?
DCD Cache_RangeThreshold_PL310
DCD 0 ; TLB_InvalidateAll
DCD 0 ; TLB_InvalidateEntry
DCD WriteBuffer_Drain_PL310
......
......@@ -3403,6 +3403,26 @@ AreaShrink ROUT
ADD r1, r1, r5
SUBS r4, r4, r5
BNE %BT20
[ FastCDA_UpFront :LAND: (MEMM_Type = "VMSAv6")
; In order to guarantee that the result of a page table write is
; visible, the ARMv6+ memory order model requires us to perform TLB
; maintenance (equivalent to the MMU_ChangingUncached ARMop) after we've
; performed the write. Performing the maintenance beforehand (as we've
; done traditionally) will work most of the time, but not always.
; N.B. for the case of mapping in a page to an address which was
; previously faulting only DSB & ISB are required. For this we rely
; on the DSB + ISB performed by the ARMop for the src region
Push "r0-r1,r3"
MOV r1, r2, LSR #12
LDR r3, =ZeroPage
ARMop MMU_ChangingUncachedEntries,,,r3 ; src region
LDR r6, [r11, #DANode_Flags] ; r6 = src flags
TST r6, #DynAreaFlags_DoublyMapped
LDRNE r0, [sp]
SUBNE r0, r0, r9
ARMop MMU_ChangingUncachedEntries,NE,,r3 ; doubly mapped src region
Pull "r0-r1,r3"
]
ADD r3, r3, r2
STR r3, [r12, #DANode_Size] ; store increased destination size
......@@ -3439,6 +3459,18 @@ AreaShrink ROUT
BL MovePageAtR0ToR1WithAccessR6
SUBS r4, r4, r5
BNE %BT25
[ FastCDA_UpFront :LAND: (MEMM_Type = "VMSAv6")
; In order to guarantee that the result of a page table write is
; visible, the ARMv6+ memory order model requires us to perform TLB
; maintenance (equivalent to the MMU_ChangingUncached ARMop) after we've
; performed the write. Performing the maintenance beforehand (as we've
; done traditionally) will work most of the time, but not always.
Push "r0-r1,r3"
MOV r1, r3, LSR #12
LDR r3, =ZeroPage
ARMop MMU_ChangingUncachedEntries,,,r3
Pull "r0-r1,r3"
]
30
[ EarlierReentrancyInDAShrink
......@@ -3925,6 +3957,20 @@ DoTheGrow ROUT
ADD r1, r1, r5 ; advance dst ptr
SUBS r4, r4, r5 ; one less page to move
BNE %BT15 ; loop if more
[ FastCDA_UpFront :LAND: (MEMM_Type = "VMSAv6")
; In order to guarantee that the result of a page table write is
; visible, the ARMv6+ memory order model requires us to perform TLB
; maintenance (equivalent to the MMU_ChangingUncached ARMop) after we've
; performed the write. Performing the maintenance beforehand (as we've
; done traditionally) will work most of the time, but not always.
Push "r0-r1,r3"
SUB r0, r1, r3
ADD r1, r3, r2
MOV r1, r1, LSR #12
LDR r3, =ZeroPage
ARMop MMU_ChangingUncachedEntries,,,r3
Pull "r0-r1,r3"
]
Pull "r0, r1" ; restore original regs
20
ADD r9, r3, r2 ; set up offset from 1st copy to 2nd copy (= new size)
......@@ -3961,6 +4007,22 @@ DoTheGrow ROUT
ADD r4, r4, r5
CMP r4, r10 ; have we done all of it?
BNE %BT30 ; [no, so loop]
[ FastCDA_UpFront :LAND: (MEMM_Type = "VMSAv6")
; In order to guarantee that the result of a page table write is
; visible, the ARMv6+ memory order model requires us to perform TLB
; maintenance (equivalent to the MMU_ChangingUncached ARMop) after we've
; performed the write. Performing the maintenance beforehand (as we've
; done traditionally) will work most of the time, but not always.
; N.B. for the case of mapping in a page to an address which was
; previously faulting (i.e. the dest region) only DSB & ISB are
; required. For this we rely on the DSB + ISB performed by the ARMop
; for the src region
Push "r0-r1,r3"
MOV r1, r10, LSR #12
LDR r3, =ZeroPage
ARMop MMU_ChangingUncachedEntries,,,r3
Pull "r0-r1,r3"
]
35
LDR r3, [r12, #DANode_Size]
ADD r3, r3, r10
......@@ -4132,6 +4194,23 @@ DoTheGrowPagesSpecified ROUT
SUBS r4, r4, r5 ; one less page to move
BNE %BT45 ; loop if more
FastCDA_ProfEnd DoublyMovePages, r0, r9, lr
[ FastCDA_UpFront :LAND: (MEMM_Type = "VMSAv6")
; In order to guarantee that the result of a page table write is
; visible, the ARMv6+ memory order model requires us to perform TLB
; maintenance (equivalent to the MMU_ChangingUncached ARMop) after we've
; performed the write. Performing the maintenance beforehand (as we've
; done traditionally) will work most of the time, but not always.
; N.B. for the case of mapping in a page to an address which was
; previously faulting (i.e. dest region) only DSB & ISB are required.
; For this we rely on the DSB + ISB performed by the ARMop for the src
; region
Push "r3"
SUB r0, r0, r3
MOV r1, r3, LSR #12
LDR r3, =ZeroPage
ARMop MMU_ChangingUncachedEntries,,,r3
Pull "r3"
]
Pull "r0, r1" ; restore original regs
50
ADD r9, r3, r2 ; set up offset from 1st copy to 2nd copy (= new size)
......@@ -4554,6 +4633,23 @@ DoTheGrowNotSpecified ROUT
ADD r1, r1, r5 ; advance dst ptr
SUBS r4, r4, r5 ; one less page to move
BNE %BT15 ; loop if more
[ FastCDA_UpFront :LAND: (MEMM_Type = "VMSAv6")
; In order to guarantee that the result of a page table write is
; visible, the ARMv6+ memory order model requires us to perform TLB
; maintenance (equivalent to the MMU_ChangingUncached ARMop) after we've
; performed the write. Performing the maintenance beforehand (as we've
; done traditionally) will work most of the time, but not always.
; N.B. for the case of mapping in a page to an address which was
; previously faulting (i.e. dest region) only DSB & ISB are required.
; For this we rely on the DSB + ISB performed by the ARMop for the src
; region
Push "r3"
SUB r0, r0, r3
MOV r1, r3, LSR #12
LDR r3, =ZeroPage
ARMop MMU_ChangingUncachedEntries,,,r3
Pull "r3"
]
Pull "r0, r1" ; restore original regs
20
ADD r9, r3, r2 ; set up offset from 1st copy to 2nd copy (= new size)
......@@ -4588,6 +4684,22 @@ DoTheGrowNotSpecified ROUT
ADD r4, r4, r5
CMP r4, r10 ; have we done all of it?
BNE %BT30 ; [no, so loop]
[ FastCDA_UpFront :LAND: (MEMM_Type = "VMSAv6")
; In order to guarantee that the result of a page table write is
; visible, the ARMv6+ memory order model requires us to perform TLB
; maintenance (equivalent to the MMU_ChangingUncached ARMop) after we've
; performed the write. Performing the maintenance beforehand (as we've
; done traditionally) will work most of the time, but not always.
; N.B. for the case of mapping in a page to an address which was
; previously faulting (i.e. dest region) only DSB & ISB are required.
; For this we rely on the DSB + ISB performed by the ARMop for the src
; region
Push "r0-r1,r3"
MOV r1, r2, LSR #12
LDR r3, =ZeroPage
ARMop MMU_ChangingUncachedEntries,,,r3
Pull "r0-r1,r3"
]
35
LDR r3, [r12, #DANode_Size]
ADD r3, r3, r10
......
......@@ -119,9 +119,6 @@ logical_bits * ((logical :SHL: 4) :OR: logical)
physical_bits * ((physical :SHL: 4) :OR: physical)
cacheable_bit * 1:SHL:15
alter_cacheable * 1:SHL:16
flush_tlb * 1:SHL:30 ; Internal flag
flush_cache * 1:SHL:31 ; Internal flag.
flush_all * flush_tlb :OR: flush_cache
MemoryConvert ROUT
Entry "r0-r11" ; Need lots of registers!!
......@@ -149,8 +146,6 @@ MemoryConvert ROUT
LDR r7, [r6, #MaxCamEntry]
LDR r6, [r6, #CamEntriesPointer]
LDR r8, =L2PT
BIC r0, r0, #flush_all ; Bit set if any page is really made uncacheable (we must flush the cache).
10
SUBS r2, r2, #1
BCC %FT70
......@@ -224,29 +219,24 @@ MemoryConvert ROUT
MOV r4, r4, LSR #12
ADD r4, r8, r4, LSL #2 ; Address of L2 entry for logical address.
LDR r5, [r4] ; Get L2 entry (safe as we know address is valid).
ORR r0, r0, #flush_tlb
TST r0, #cacheable_bit
BICEQ r5, r5, #L2_C ; Disable/enable cacheability.
ORREQ r0, r0, #flush_cache ; If disable then we must flush cache later.
ORRNE r5, r5, #L2_C
STR r5, [r4] ; Write back new L2 entry.
B %BT10 ; Do next entry.
70
MOV r5, r0
ASSERT (L2PT :SHL: 12) = 0 ; Ensure we can convert r4 back to the page log addr
MOV r0, r4, LSL #10
; *** KJB - this assumes that uncacheable pages still allow cache hits (true on all
; ARMs so far).
; *** Also, fix it up to do it page by page.
TST r0, #flush_tlb
BEQ %FT75 ; If not flush_tlb, can't have flush_cache
MOV r1, r0
LDR r0, =ZeroPage
ARMop TLB_InvalidateAll,,,r0
TST r1, #flush_cache ; If any page has been made uncacheable in L2 then flush!
BEQ %FT75
LDR r0, =ZeroPage
ARMop Cache_CleanInvalidateAll,,,r0
75
LDR r3, =ZeroPage
ADR lr, %FT65
ARMop MMU_ChangingEntry,EQ,tailcall,r3 ; Clean cache & TLB
ARMop MMU_ChangingUncachedEntry,NE,tailcall,r3 ; Clean TLB
65
MOV r0, r5
B %BT10
70
CLRV
EXIT
......
......@@ -205,7 +205,7 @@ BangL2PT ; internal entry point used only
MOV r6, r0
TST r11, #PageFlags_Unsafe
BNE %FT30
BNE BangL2PT_unsafe
TST r11, #DynAreaFlags_DoublyMapped
BNE BangL2PT_sledgehammer ;if doubly mapped, don't try to be clever
......@@ -223,8 +223,14 @@ BangL2PT ; internal entry point used only
ARMop MMU_ChangingUncachedEntry, NE, tailcall, r4
20 STR r6, [r1, r3, LSR #10] ;update L2PT entry
Pull "pc"
; In order to guarantee that the result of a page table write is
; visible, the ARMv6+ memory order model requires us to perform TLB
; maintenance (equivalent to the MMU_ChangingUncached ARMop) after we've
; performed the write. Performing the maintenance beforehand (as we've
; done traditionally) will work most of the time, but not always.
Pull "lr"
MOV r0, r3
ARMop MMU_ChangingUncachedEntry,,tailcall,r4
BangL2PT_sledgehammer
......@@ -242,7 +248,19 @@ BangL2PT_sledgehammer
TST r11, #DynAreaFlags_DoublyMapped ; if area doubly mapped
STRNE r6, [r1, r9, LSR #10] ; then store entry for 2nd copy as well
ADDNE r3, r3, r9 ; and point logical address back at 2nd copy
; In order to guarantee that the result of a page table write is
; visible, the ARMv6+ memory order model requires us to perform TLB
; maintenance (equivalent to the MMU_ChangingUncached ARMop) after we've
; performed the write. Performing the maintenance beforehand (as we've
; done traditionally) will work most of the time, but not always.
Pull "lr"
ARMop MMU_ChangingUncached,,tailcall,r4
BangL2PT_unsafe
STR r6, [r1, r3, LSR #10]! ; update level 2 page table (and update pointer so we can use bank-to-bank offset
TST r11, #DynAreaFlags_DoublyMapped ; if area doubly mapped
STRNE r6, [r1, r9, LSR #10] ; then store entry for 2nd copy as well
ADDNE r3, r3, r9 ; and point logical address back at 2nd copy
Pull "pc"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment