Commit 943c4964 authored by Jeffrey Lee's avatar Jeffrey Lee
Browse files

Misc memory management tweaks & fixes

Detail:
  s/ChangeDyn - Fix OS_DynamicArea 20 to work properly with sparse & PMP DAs. It now checks against the max extent of the area rather than the current size; this matches the logic used for checking fixed system workspace areas. The call only determines the ownership of a logical address, and it's considered the caller's responsibility to check if there's actually a page at the given address.
  s/ChangeDyn - Revise OS_DynamicArea 25 to remove the redundant 'PMP page flags' entry, and to allow pages to be looked up by either PMP page index, phys page number, or DA page index
  s/ChangeDyn - Tidy up InitDynamicAreas by adding the NextFreePage routine to help determine the next page to be added to the free pool.
  s/AMBControl/Workspace, s/AMBControl/main, s/AMBControl/memmap - Fix lazy mapping in of pages to use the correct L2PT flags for the default CB cache policy
  s/AMBControl/allocate - Get rid of magic constant when extracting page flags from DA flags, and make note of the fact that assorted bits of code ignore the flags
  s/AMBControl/growp, s/AMBControl/shrinkp - Reverse the page order when growing/shrinking areas, to match OS_ChangeDynamicArea. This helps both DAs and application space to have pages allocated to them in contiguous physical order - which in turn helps produce shorter, more optimal scatter lists for DMA
Admin:
  Tested on Pandaboard


Version 5.35, 4.79.2.287. Tagged as 'Kernel-5_35-4_79_2_287'
parent c84929d7
......@@ -13,11 +13,11 @@
GBLS Module_ComponentPath
Module_MajorVersion SETS "5.35"
Module_Version SETA 535
Module_MinorVersion SETS "4.79.2.286"
Module_Date SETS "01 Sep 2015"
Module_ApplicationDate SETS "01-Sep-15"
Module_MinorVersion SETS "4.79.2.287"
Module_Date SETS "06 Sep 2015"
Module_ApplicationDate SETS "06-Sep-15"
Module_ComponentName SETS "Kernel"
Module_ComponentPath SETS "castle/RiscOS/Sources/Kernel"
Module_FullVersion SETS "5.35 (4.79.2.286)"
Module_HelpVersion SETS "5.35 (01 Sep 2015) 4.79.2.286"
Module_FullVersion SETS "5.35 (4.79.2.287)"
Module_HelpVersion SETS "5.35 (06 Sep 2015) 4.79.2.287"
END
......@@ -5,19 +5,19 @@
*
*/
#define Module_MajorVersion_CMHG 5.35
#define Module_MinorVersion_CMHG 4.79.2.286
#define Module_Date_CMHG 01 Sep 2015
#define Module_MinorVersion_CMHG 4.79.2.287
#define Module_Date_CMHG 06 Sep 2015
#define Module_MajorVersion "5.35"
#define Module_Version 535
#define Module_MinorVersion "4.79.2.286"
#define Module_Date "01 Sep 2015"
#define Module_MinorVersion "4.79.2.287"
#define Module_Date "06 Sep 2015"
#define Module_ApplicationDate "01-Sep-15"
#define Module_ApplicationDate "06-Sep-15"
#define Module_ComponentName "Kernel"
#define Module_ComponentPath "castle/RiscOS/Sources/Kernel"
#define Module_FullVersion "5.35 (4.79.2.286)"
#define Module_HelpVersion "5.35 (01 Sep 2015) 4.79.2.286"
#define Module_FullVersion "5.35 (4.79.2.287)"
#define Module_HelpVersion "5.35 (06 Sep 2015) 4.79.2.287"
#define Module_LibraryVersionInfo "5:35"
......@@ -44,6 +44,9 @@ AMBMappedInNode # 4 ;node ptr of mapped-in task, or 0 for non
AMBNodeHandles # 4 ;ptr to node handle array (1 word per entry)
AMBPhysBin # 4 ;ptr to physical page bin array
AMBPhysBinEntries # 4 ;no. of entries in physical page bin array
[ AMB_LazyMapIn
AMBPageFlags # 4 ;L2PT page flags to use when (lazily) mapping in pages
]
AMBAnchorNode # AMBNode_HdrSize ;dummy node - see note (1) below
;
[ ChocolateAMB
......
......@@ -65,9 +65,10 @@ allocate
MOV R4,#ApplicationStart
STR R4,[R2,#AMBNode_startaddr]
LDR R4,=ZeroPage+AppSpaceDANode
LDR R4,[R4,#DANode_Flags]
AND R4,R4,#&7F
STR R4,[R2,#AMBNode_PPL] ;PPL from bottom 8 bits of DA flags
LDR R4,[R4,#DANode_Flags] ;Get the page flags from the DA.
LDR LR,=DynAreaFlags_AccessMask;Note that this is rather academic
AND R4,R4,LR ;because various bits of code ignore
STR R4,[R2,#AMBNode_PPL] ;or overwrite these flags!
;do the actual MMU page allocation (grow from 0), for R1 pages, using node R2
BL growpages
......
......@@ -55,17 +55,19 @@ growpages ROUT
LDR R5,[R1,#DANode_PMP]
LDR LR,[R1,#DANode_PMPSize]
ADD R5,R5,LR,LSL #2 ;current end of FreePool
SUB R5,R5,R3,LSL #2 ;R5 := first required page in PMP
;R3 = no. of pages, R4 -> buffer for page entries,
;R5 := start page in PMP
;R5 -> end of free pool PMP
; Reverse the order as we copy in order to match OS_ChangeDynamicArea
; (helps ensure pages are physically contiguous - to help with any
; potential DMA)
MOV R2,R3
MOV R6,#-1
10
LDR LR,[R5]
LDR LR,[R5,#-4]!
SUBS R2,R2,#1
STR LR,[R4],#4
STR R6,[R5],#4
STR R6,[R5]
BNE %BT10
SUB R4,R4,R3,LSL #2
LDR R2,=ZeroPage+AppSpaceDANode ;R2 := dest for pages
......
......@@ -98,6 +98,22 @@ AMBControl_Init
TST R0,#CPUFlag_AbortRestartBroken ;but wait! can't use for bugged chips (eg. pre rev T StrongARM)
MOVNE R1,#AMBFlag_LazyMapIn_disable
STR R1,AMBFlags
; Calculate default page flags
LDR r2,=ZeroPage
[ MEMM_Type = "VMSAv6"
ADRL r1,PPLTrans
|
LDR r1, [r2, #ProcessorFlags]
TST r1, #CPUFlag_ExtendedPages
ADREQL r1, PPLTrans
ADRNEL r1, PPLTransX
]
LDR r3,[r1] ; Page type + access flags
LDR r2,[r2,#MMU_PCBTrans]
LDRB r2,[r2] ; Cacheability + other attribs
ORR r3,r3,r2
STR r3,AMBPageFlags
|
MOV R1,#AMBFlag_LazyMapIn_disable
STR R1,AMBFlags
......
......@@ -108,15 +108,8 @@ AMB_LazyFixUp ROUT
ADD r1,r1,r0,LSL #2 ;r1 -> page involved, in node page list
LDR r2,AMBPhysBin
; Calculate the L2PT protection bits in a nice way that won't produce broken code if we change MMU model
; This should match the AP_Full entry from the PPLTrans table that gets used by BangCam (plus C+B bits)
[ MEMM_Type = "VMSAv6"
MOV r3,#(AP_Full*L2X_APMult)+L2_ExtPage+L2_C+L2_B
|
ASSERT (AP_Full*L2_APMult)+L2_SmallPage+L2_C+L2_B = &FFE
MOV r3,#&FF0
ORR r3,r3,#&E
]
; Get the correct default page flags
LDR r3,AMBPageFlags
LDR r4,[r1]
MOV r6,r4
PageNumToL2PT r4,r2,r3,r5
......
......@@ -85,9 +85,13 @@ shrinkpages
LDR R7,=ZeroPage
LDR R7,[R7,#CamEntriesPointer]
ADD R7,R7,#CAM_PMP
ADD R4,R4,R3,LSL #2
MOV R0,R3
; Reverse the page order as we copy, to match the reverse we performed
; on grow (try and keep free pool pages in optimal order for future
; grows)
10
LDR LR,[R4],#4
LDR LR,[R4,#-4]!
SUBS R0,R0,#1
STR LR,[R5,R6,LSL #2] ;add to free pool PMP
ADD LR,R7,LR,LSL #CAM_EntrySizeLog2
......
......@@ -2518,7 +2518,7 @@ DynArea_Locate Entry "r2-r5"
B %BT50
60
LDR r1, [r10, #DANode_Flags]
LDR r0, [r10, #DANode_Size]
LDR r0, [r10, #DANode_MaxSize]
TST r1, #DynAreaFlags_DoublyMapped
LDR r1, [r10, #DANode_Base]
ADD r2, r1, r0 ; r1:=base r2:=top
......@@ -2539,11 +2539,11 @@ DynArea_Locate Entry "r2-r5"
;
; in: r0 = reason code (21)
; r1 = area number
; r2 = pointer to array of (PMP page index, phys page index, PMP page flag) tuples
; r2 = pointer to array of (PMP page index, phys page index, page flag) tuples
; phys page index -1 to release
; phys page index -2 to let kernel pick page
; otherwise page number to use
; PMP page flags are defined by DynAreaFlags_PMPPhysOpAccessMask
; page flags are defined by DynAreaFlags_PMPPhysOpAccessMask
; r3 = number of entries
;
; out: r0-r1 preserved (error if not all of region successfully updated)
......@@ -3528,17 +3528,13 @@ DynArea_PMP_GetInfo ROUT
;
; Internal routine called by DynamicAreaSWI
;
; Although designed for use with PMPs, this call works with regular DAs
; too (just returns zero for r6 & r7)
;
; in: r0 = reason code (25)
; r1 = area number
; r2 = pointer to input/output array:
; +0: PMP page index (filled in on entry)
; +4: phys page index (filled in on exit, -1 if none)
; +8: PMP page flags (filled in on exit, 0 if none)
; +12: DA page index (filled in on exit, -1 if not mapped)
; +16: page flags (filled in on exit, 0 if not mapped)
; +0: PMP page index
; +4: phys page number
; +8: DA page index
; +12: page flags
; r3 = number of entries
;
; out: r0-r3 preserved
......@@ -3546,6 +3542,16 @@ DynArea_PMP_GetInfo ROUT
; All other registers preserved
; Array updated with page details
;
; On entry, for each array entry either the PMP page index, phys page number, or
; DA page index must be provided, with the other indices set to -1 (page flags
; are ignored).
;
; On exit, if the page is a member of the PMP, the entries will be filled in as
; appropriate. If the page isn't mapped in (and it was a lookup by PMP page
; index/phys page number) the DA page index will be set to -1. If no physical
; page is allocated (or the page isn't a member of the PMP) the page flags will
; be set to 0.
;
DynArea_PMP_GetPages ROUT
Entry "r0-r9"
......@@ -3556,52 +3562,125 @@ DynArea_PMP_GetPages ROUT
]
BCC %FT90 ; [it doesn't]
; r10 -> DANode
LDR r8, [r10, #DANode_PMP]
CMP r8, #0
LDR r6, [r10, #DANode_PMP]
CMP r6, #0
BEQ %FT90
LDR r9, [r10, #DANode_PMPMaxSize]
BEQ %FT90
LDR r7, =ZeroPage
LDR r11, =ZeroPage
LDR r5, [r10, #DANode_Base]
LDR r7, [r7, #CamEntriesPointer]
LDR r11, =DynAreaFlags_PMPLogOpAccessMask
LDR r12, =Nowhere
LDR r7, [r11, #MaxCamEntry]
LDR r8, =L2PT
LDR r11, [r11, #CamEntriesPointer]
LDR r12, =DynAreaFlags_PMPLogOpAccessMask
; Usage in main loop:
; r2 -> input page list
; r3 = length
; r4 = current entry PMP index
; r5 -> DA base
; r7 -> CAM
; r8 -> PMP
; r6 -> PMP
; r7 = MaxCamEntry
; r8 -> L2PT
; r9 = PMP size
; r10 -> DANode
; r11 = PMPLogOpAccessMask
; r12 = Nowhere
; r0, r1, r5, r6 temp
; r11 -> CAM
; r12 = DynAreaFlags_PMPLogOpAccessMask
; r0, r1, r4 temp
10
SUBS r3, r3, #1
BLT %FT80
LDR r4, [r2], #4
; Get the entry
LDMIA r2, {r0, r1, r4}
; PMP page provided?
CMP r0, #-1
BNE %FT50
; Phys page provided?
CMP r1, #-1
BNE %FT20
; DA page provided
; n.b. skipping any range check here since it won't hurt if the page
; doesn't belong to us
Push "r3, r5, r9-r11"
ADD r4, r5, r4, LSL #12
BL logical_to_physical
BLCC physical_to_ppn
MOV r0, r3
Pull "r3, r5, r9-r11"
BCS %FT15
; r0 = PPN, check to see if it belongs to us
ADD r1, r11, r0, LSL #CAM_EntrySizeLog2
ASSERT CAM_PageFlags=4
ASSERT CAM_PMP=8
ASSERT CAM_PMPIndex=12
LDMIB r1, {r1, r4, lr}
TST r1, #DynAreaFlags_PMP
BEQ %FT15
CMP r4, r10
BNE %FT15
STR lr, [r2], #4 ; Store PMP page index
AND r1, r1, r12
STR r0, [r2], #8 ; Store phys page number, skip DA page index
STR r1, [r2], #4 ; Store page flags
B %BT10
15
; Bad DA page index
; PMP page index & phys page number are already known to be -1, so just
; store flags
MOV r0, #0
STR r0, [r2, #12]
ADD r2, r2, #16
B %BT10
20
; Check for silly phys page number
CMP r1, r7
BHI %FT91
ADD r1, r11, r1, LSL #CAM_EntrySizeLog2
ASSERT CAM_LogAddr=0
ASSERT CAM_PageFlags=4
ASSERT CAM_PMP=8
ASSERT CAM_PMPIndex=12
LDMIA r1, {r0, r1, r4, lr}
TST r1, #DynAreaFlags_PMP
BEQ %FT25
CMP r4, r10
BNE %FT25
LDR r4, =Nowhere
STR lr, [r2], #8 ; Store PMP page index, skip phys page number
TEQ r0, r4
B %FT55
25
; Bad phys page number
; PMP page index known to be -1, so store DA page index + flags
ADD r2, r2, #8
MOV r0, #-1
MOV r1, #0
STMIA r2!, {r0-r1}
B %BT10
50
; Check for silly PMP page index
CMP r4, r9
CMP r0, r9
BHS %FT91
; Look up the page that's currently in the PMP
LDR r0, [r8, r4, LSL #2]
MOV r1, #0 ; Currently, no flags
STMIA r2!, {r0-r1}
LDR r0, [r6, r0, LSL #2]
ADD r2, r2, #4
STR r0, [r2], #4 ; Store phys page number
; Does the page exist?
CMP r0, #-1
ADDNE r0, r7, r0, LSL #CAM_EntrySizeLog2
LDR r4, =Nowhere
ADDNE r0, r11, r0, LSL #CAM_EntrySizeLog2
MOVEQ r1, #0 ; No physical page, so no flags
ASSERT CAM_LogAddr=0
ASSERT CAM_PageFlags=4
LDMNEIA r0, {r0-r1} ; Get log addr, flags from CAM
TEQNE r0, r12
TEQNE r0, r4
55
MOVEQ r0, #-1 ; No physical page, or not mapped
SUBNE r0, r0, r5
MOVEQ r1, #0
MOVNE r0, r0, LSR #12
AND r1, r1, r11 ; Mask returned flags
STMIA r2!, {r0-r1}
AND r1, r1, r12 ; Mask returned flags
STMIA r2!, {r0-r1} ; Store DA page index, flags
B %BT10
80
......@@ -3619,6 +3698,7 @@ DynArea_PMP_GetPages ROUT
|
SETV
]
FRAMSTR r0
EXIT
......@@ -4447,11 +4527,9 @@ DynArea_AddrLookup_loop
]
; Now that the system heap is initialised we can create a page list for the
; free pool and start pushing the free pages into it. We want the pages at the
; start of the list to be the VRAM block, followed by all the other pages in
; order of increasing speed. However it's highly unlikely that we'll be able to
; build the full page list without having to grow the system heap - for which
; we'd want the fast pages to be available.
; free pool and start pushing the free pages into it. However it's highly
; unlikely that we'll be able to build the full page list without having to
; grow the system heap - for which we'd want the fast pages to be available.
; So to cope with this we start by putting the fast pages into the page list,
; growing the system heap for every page we insert (a bit slow but reliable).
......@@ -4459,8 +4537,6 @@ DynArea_AddrLookup_loop
; on success we then switch to a different algorithm which fills the main page
; list.
; TODO - Make sure pages are inserted in an order which keeps them in consecutive physical order when being transferred to a DA (and make sure shrink/grow preserves order!)
SUB sp, sp, #4 ; Store the initial list on the stack
LDR r5, =ZeroPage
LDR r6, =ZeroPage+FreePoolDANode
......@@ -4482,14 +4558,12 @@ DynArea_AddrLookup_loop
ADD r9, r5, #PhysRamTable
LDMIA r9!, {r0, r10} ; get VRAM info
MOV r10, r10, LSR #12 ; r10 = current page number
LDMIA r9!, {r0, r11} ; get first regular RAM chunk
SUB r10, r10, #1 ; set things up so the first call to NextFreePage will return the first page of the block
MOV r11, r11, LSR #12
ADD r11, r11, #1
LDR r4, [r5, #CamEntriesPointer]
10
LDMIA r9!, {r0, r11} ; get next block
MOVS r11, r11, LSR #12 ; if no more blocks left...
MOVEQ r10, #0
ADDEQ r9, r5, #PhysRamTable ; ...panic and use the VRAM
BEQ %BT10
15
; See if we have enough space
LDR r3, [r5, #MaxCamEntry]
ADD r3, r3, #1
......@@ -4498,22 +4572,14 @@ DynArea_AddrLookup_loop
MOV r0, #HeapReason_Desc
BL DoSysHeapOpWithExtension ; HACK - check space before calling, to avoid crashing when the grow fails and tries to generate an error (vector table not initialised yet, so crashes when UKSWIV is invoked in order to call MessageTrans)
Pull "r3"
SUB r2, r2, #4096 ; Paranoia
SUB r2, r2, #4096 ; Paranoia
CMP r2, r3
BLT %FT20
BL ClaimSysHeapNode
BVC %FT40
; Find a page we can use to grow the system heap
20
CMP r10, r7
CMPHS r8, r10
BHS %FT30 ; page is in statics
; Check the CAM map to see if the page is already taken - this will detect the DMA regions, which aren't included in InitUsedStart/InitUsedEnd
ADD r0, r4, r10, LSL #CAM_EntrySizeLog2
LDR lr, [r0, #CAM_PageFlags]
TST lr, #PageFlags_Unavailable
BNE %FT30
; Found a page, add it to the free pool
; Find a page we can use to grow the system heap
BL NextFreePage ; n.b. no out-of-pages check
STR r10, [sp]
LDR lr, =AP_FreePool :AND: DynAreaFlags_AccessMask
STR lr, [r0, #CAM_PageFlags]
......@@ -4526,12 +4592,8 @@ DynArea_AddrLookup_loop
MOV r0, #ChangeDyn_SysHeap
MOV r1, #4096
SWI XOS_ChangeDynamicArea
30
; Move on to next page
SUBS r11, r11, #1
ADD r10, r10, #1
BEQ %BT10
B %BT15
B %BT10
40
; We've successfully allocated the memory for the PMP - start filling
; it in. To ensure the pages are in the correct order we need to fill
......@@ -4548,34 +4610,10 @@ DynArea_AddrLookup_loop
MOV r1, r3
STR r2, [r6, #DANode_PMP]
45
CMP r10, r7
CMPHS r8, r10
BHS %FT50 ; page is in statics
; Check the CAM map to see if the page is already taken - this will detect the DMA regions, which aren't included in InitUsedStart/InitUsedEnd
ADD r0, r4, r10, LSL #CAM_EntrySizeLog2
LDR lr, [r0, #CAM_PageFlags]
TST lr, #PageFlags_Unavailable
BNE %FT50
; Found a page, add it to the free pool
STR r10, [r3, #-4]!
50
; Move on to next page
SUBS r11, r11, #1
ADD r10, r10, #1
BNE %BT45
ADD lr, r5, #PhysRamTable+8
CMP lr, r9 ; if we've just processed the VRAM chunk, we're done
BEQ %FT55
LDMIA r9!, {r0, r11} ; else get next block
MOVS r11, r11, LSR #12 ; if no more blocks left...
BNE %BT45
MOV r10, #0
MOV r9, lr
LDMDB r9, {r0, r11} ; ...then process VRAM
MOVS r11, r11, LSR #12 ; And if no VRAM...
BNE %BT45
; ...then we're done
55
BL NextFreePage
CMP r10, #-1
STRNE r10, [r3, #-4]!
BNE %BT45 ; Keep going until we run out of pages
; Left with:
; r1 -> end of memory block
; r2 -> start of memory block
......@@ -4621,6 +4659,83 @@ DynArea_AddrLookup_loop
]
EXIT
;
; NextFreePage - Find next page to insert into the free pool on startup
;
; In:
; r4 -> CAM
; r7 = page number of start of static chunk
; r8 = page number of end of static chunk
; r9 -> next PhysRamTable entry
; r10 = Current page number
; r11 = Number of pages left in current chunk
; Out:
; r0 -> CAM entry for page
; r10 = Next free page in optimal order, -1 if no more pages
; r9, r11 updated
;
; We have to move all free pages (ie ones not occupied by the static pages)
; into the free pool.
; By default, pages will get taken from the end of the free pool when other
; dynamic areas are initialised or grown. So make sure that the slowest RAM
; is at the start of the free pool and the fastest is at the end; this is the
; reverse of the order in PhysRamTable. Also, within each group of pages (i.e.
; PhysRamTable entry), we want the pages to be in decreasing physical address
; order - so that when they are moved to a DA they end up in increasing address
; order, leading to more optimal DMA transfer lists.
;
; Also note that the VRAM block is kept at the start of the free pool, mainly
; to match old behaviour (it's not clear whether moving it elsewhere will have
; any significant impact on the system - especially when you consider that
; shrinking screen memory will end up adding the pages to the end of the pool
; rather than the start).
;
; Over time this optimal ordering will be lost, so at a later date it might be
; nice to re-sort pages as they are added back into the free pool (and move the
; VRAM block to the end of PhysRamTable, so that it's in order fast RAM -> slow
; RAM -> fast DMA -> slow DMA -> VRAM, so that sorting by page number is
; all that's required to deal with both contiguity and desirability)
;
; In terms of this routine, we fill the free pool from the highest entry down,
; so we want the first page returned to be the lowest-numbered page from the
; first (non-VRAM) PhysRamTable entry.
;
NextFreePage ROUT
Entry
10
SUBS r11, r11, #1
ADD r10, r10, #1
BEQ %FT30
20
CMP r10, r7
CMPHS r8, r10
BHS %BT10 ; page is in statics
; Check the CAM map to see if the page is already taken - this will detect the DMA regions, which aren't included in InitUsedStart/InitUsedEnd
ADD r0, r4, r10, LSL #CAM_EntrySizeLog2
LDR lr, [r0, #CAM_PageFlags]
TST lr, #PageFlags_Unavailable
BNE %BT10
; Page is good
EXIT
30
; Advance to next block
LDR lr, =ZeroPage+PhysRamTable+8
CMP lr, r9 ; if we've just processed the VRAM block, we're done
BEQ %FT90
LDMIA r9!, {r0, r11} ; else get next block
MOVS r11, r11, LSR #12 ; if no more blocks left...
BNE %BT20
MOV r10, #0
MOV r9, lr
LDMDB lr, {r0, r11} ; ...then process VRAM
MOVS r11, r11, LSR #12 ; And if no VRAM...
BNE %BT20
90 ; ...then we're done
MOV r10, #-1
EXIT
LTORG
InitFreePoolTable
......@@ -6009,7 +6124,7 @@ DoTheGrowPagesSpecified ROUT
LDR r6, [r8, #4] ;logical address of src page
; If the required page is in the free pool, we don't need to preserve its contents
; TODO - have 'volatile' PMP page flag which can be used to indicate that pages can just be taken? (so will work with any PMP)
; TODO - have 'volatile' page flag which can be used to indicate that pages can just be taken? (so will work with any PMP)
TST r2, #DynAreaFlags_PMP
BEQ %FT73
CMP r3, r11
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment