From df4efb686a1dcc7ec50b1bd6c8e294e60fbf8477 Mon Sep 17 00:00:00 2001 From: Jeffrey Lee <me@phlamethrower.co.uk> Date: Mon, 18 Nov 2019 22:31:54 +0000 Subject: [PATCH] Support RAM banks with high physical addresses This changes PhysRamTable to store the address of each RAM bank in terms of (4KB) pages instead of bytes, effectively allowing it to support a 44 bit physical address space. This means that (when the long descriptor page table format is used) the OS can now make use of memory located outside the lower 4GB of the physical address space. However some public APIs still need extending to allow for all operations to be supported on high RAM (e.g. OS_Memory logical to physical address lookups) OS_Memory 12 (RecommendPage) has been extended to allow R4-R7 to be used to specify a (64bit) physical address range which the recommended pages must lie within. For backwards compatibility this defaults to 0-4GB. --- hdr/KernelWS | 15 +++-- hdr/OSRSI6 | 1 + s/AMBControl/memmap | 3 +- s/ARM600 | 3 +- s/ChangeDyn | 27 +++++---- s/HAL | 120 ++++++++++++++++++++++----------------- s/LongDesc | 7 ++- s/MemInfo | 135 ++++++++++++++++++++++++++++++++------------ s/Middle | 4 ++ s/NewReset | 6 +- s/ShortDesc | 2 + s/VMSAv6Long | 3 +- s/VMSAv6Short | 3 +- s/vdu/vdudriver | 6 ++ 14 files changed, 220 insertions(+), 115 deletions(-) diff --git a/hdr/KernelWS b/hdr/KernelWS index 4b0b971..fef13fe 100644 --- a/hdr/KernelWS +++ b/hdr/KernelWS @@ -1035,13 +1035,13 @@ IICBus_Size # 0 InitWsStart # 0 InitIRQHandler # 4 ; pointer to IRQ handler (LDR PC'ed from IRQ HW vector) InitIRQWs # 16 ; workspace for IRQ handler -InitUsedStart # 4 ; start of used pages (L2PT etc) not to be cleared -InitUsedEnd # 4 ; end of used pages +InitUsedStart # 4 ; start of used pages (L2PT etc) not to be cleared (32bit address) +InitUsedEnd # 4 ; end of used pages (page units) InitUsedBlock # 4 ; current block in PhysRamTable InitClearRamWs # 10*4 ; preserve registers during ClearPhysRAM InitDMABlock # 8 ; block of DMAable memory extracted from PhysRamTable InitDMAOffset # 4 ; offset+8 into PhysRamTable where memory was taken -InitDMAEnd # 4 ; current DMA alloc pos +InitDMAEnd # 4 ; current DMA alloc pos (32bit address) AlignSpace 32 ; because we clear 32 at a time InitWsEnd # 0 @@ -1147,8 +1147,11 @@ Oscli_CmdHashLists # 4 ;anchor for hashed command lists structure SkippedTables # 0 PhysRamTable # 0 ; Pairs of words (physaddr, size+flags) - ; indicating RAM present in machine - ; Unused entries have size of zero + ; indicating RAM present in machine. physaddr + ; is in units of pages. size is in bytes, with + ; the flags in the low 12 bits. Individual + ; entries don't cross 4GB barriers. Unused + ; entries have size+flags of zero. VideoPhysAddr # 4 ; Address of video RAM (in the case of DRAM-only machines, VideoSizeFlags # 4 ; this is actually a chunk out of DRAM) DRAMPhysAddrA # 4 ; Next the DRAM @@ -1345,7 +1348,7 @@ MaxCamEntry # 4 ; maximum index into the cam map, ie ; 511 for 16MByte machines, 383 for 12MBytes ; 255 for 8MBytes, otherwise 127 -RAMLIMIT # 4 +RAMLIMIT # 4 ; Number of pages of RAM ROMPhysAddr # 4 diff --git a/hdr/OSRSI6 b/hdr/OSRSI6 index dbf4335..0ecf265 100644 --- a/hdr/OSRSI6 +++ b/hdr/OSRSI6 @@ -88,5 +88,6 @@ OSRSI6_VecPtrTab * 85 OSRSI6_NVECTORS * 86 OSRSI6_CAMFormat * 87 ; 0 = 8 bytes per entry, 1 = 16 bytes per entry OSRSI6_ABTSTK * 88 +OSRSI6_PhysRamtableFormat * 89 ; 0 = addresses are in byte units, 1 = addresses are in 4KB units END diff --git a/s/AMBControl/memmap b/s/AMBControl/memmap index 718d423..25e6e72 100644 --- a/s/AMBControl/memmap +++ b/s/AMBControl/memmap @@ -468,7 +468,8 @@ AMB_LazyFixUp ROUT CMP r6,r5 SUBHS r6,r6,r5 BHS %BT10 - ADD r4,r4,r6,LSL #12 + ADD r4,r4,r6 + MOV r4,r4,ROR #20 ;High address packed into low bits for LongDesc MOV r1,#DynAreaFlags_PMP GetPTE r4,4K,r4,r1 ; diff --git a/s/ARM600 b/s/ARM600 index ea5548b..852a371 100644 --- a/s/ARM600 +++ b/s/ARM600 @@ -86,8 +86,9 @@ BangCamUpdate ROUT BCS %BT10 ; if more than that, go onto next bank ADD r6, r6, r4, LSR #12 ; put back the ones which were too many - ADD r0, r0, r6, LSL #12 ; move on address by the number of pages left + ADD r0, r0, r6 ; move on address by the number of pages left LDR r6, [sp] ; reload old logical address + MOV r0, r0, LSL #12 ; convert from page units to bytes ; now we have r6 = old logical address, r2 = physical page number, r0 = physical address diff --git a/s/ChangeDyn b/s/ChangeDyn index 20538cd..19a0f2e 100644 --- a/s/ChangeDyn +++ b/s/ChangeDyn @@ -407,10 +407,7 @@ ReadDynamicArea ROUT ReadMemMapInfo_Code LDR R10, =ZeroPage LDR R0, [R10, #Page_Size] - LDR R1, [R10, #RAMLIMIT] ; = total memory size - ADRL R11, PageShifts-1 - LDRB R11, [R11, R0, LSR #12] - MOV R1, R1, LSR R11 + LDR R1, [R10, #RAMLIMIT] ; = total number of pages ExitSWIHandler ; ************************************************************************ @@ -867,8 +864,8 @@ DAC_notsparse LDR r10, =ZeroPage LDR r11, [r10, #Page_Size] LDR r10, [r10, #RAMLIMIT] ; get total RAM size - CMP r5, r10 ; if requested maximum size is > total - MOVHI r5, r10 ; then set max to total (NB. -1 passed in always yields HI) + CMP r10, r5, LSR #Log2PageSize ; if requested maximum size is > total + MOVLS r5, r10, LSL #Log2PageSize ; then set max to total. Note no special handling of R5=-1 is needed (R5=-1 will get treated as 4GB-1. If RAMLIMIT < 4GB then R5 will be clamped correctly, if RAMLIMIT >= 4GB then the request will fail regardless because we only have limited logical address space to work with) DAC_roundup SUB r10, r11, #1 ; also round up to a page multiple @@ -4591,11 +4588,12 @@ DynArea_AddrLookup_loop LDR r0, [r5, #InitUsedStart] ADD r0, r0, #DRAMOffset_FirstFixed - DRAMOffset_PageTables - MOV r1, #0 ; only know 32-bit addresses for now + MOV r1, #0 ; start of init block is always 32bit address BL PhysAddrToPageNo MOV r7, r0 ; r7 = page number of start of static chunk LDR r0, [r5, #InitUsedEnd] - MOV r1, #0 ; only know 32-bit addresses for now + MOV r1, r0, LSR #20 + MOV r0, r0, LSL #12 BL PhysAddrToPageNo SUB r8, r0, #1 ; r8 = page number of last page in statics ADD r9, r5, #PhysRamTable @@ -5678,7 +5676,11 @@ DoTheGrowPagesSpecified ROUT BCS %BT06 ADD r3, r3, lr, LSR #12 ; put back what could not be subtracted - ADD r8, r8, r3, LSL #12 ; and add onto base address + ADD r8, r8, r3 ; and add onto base address + ! 0, "LongDescTODO 4GB" + CMP r8, #1:SHL:20 ; 4GB limit + BHS DoTheGrowPageUnavailable + MOV r8, r8, LSL #12 STR r8, [r1, #8-12] ; store physical address in page block SUBS r2, r2, #1 @@ -5807,7 +5809,10 @@ DoTheGrowPagesSpecified ROUT MOV r3, r6 BL ppn_to_physical MOV r10, r8 + ! 0, "LongDescTODO 4GB" + CMP r9, #0 Pull "r3,r5,r8,r9" + BNE %BT64 ; DREG r6, "Using page number " 68 @@ -6406,11 +6411,13 @@ CallPreGrow ROUT 20 ADD r2,r2,r12,LSR #12 ; advance page number 21 - LDR r12,[r0],#8 ; get next chunk details + LDMIA r0!,{r3,r12} ; get next chunk details CMP r12,#0 BEQ %FT90 TST r12,#OSAddRAM_NoDMA BNE %BT20 + CMP r3,#1:SHL:20 ; stick to lower 4GB for compatibility with old code + BHS %BT20 ; Check the CAM map to see if any pages here are free MOV r12,r12,LSR #12 30 diff --git a/s/HAL b/s/HAL index ca64001..6ed0b92 100644 --- a/s/HAL +++ b/s/HAL @@ -476,8 +476,18 @@ RISCOS_Start B %BT31 32 - ; Fill in the Kernel's permanent memory table, sorting by speed and DMA ability - ; Non-DMAable RAM is preferred over DMAable, as the kernel requires very little DMAable RAM, and we don't want to permanently claim DMAable RAM if we're not actually using it for DMA (in case machine only has a tiny amount available) + ; Fill in the Kernel's permanent memory table, sorting by address, speed and DMA ability. + ; * Address: All memory that falls in the low 4GB of the physical map + ; comes first. This makes it easier for our initial memory allocation + ; (no danger of allocating pages which can't be accessed with the MMU + ; off), but may also help with wider software compatibility (all low- + ; RAM pages occupy the lowest physical page numbers) + ; * Non-DMAable RAM is preferred over DMAable, as the kernel requires + ; very little DMAable RAM, and we don't want to permanently claim + ; DMAable RAM if we're not actually using it for DMA (in case machine + ; only has a tiny amount available) + ; * Speed: Fastest RAM is listed first, so that we'll prefer to allocate + ; it for these important kernel/system areas ADD ip, v1, #DRAMOffset_PageZero ASSERT DRAMOffset_PageZero > 0 ; If the workspace block is the block containing the OS_AddRAM list, make sure the two don't overlap otherwise we might corrupt it while we copy it @@ -491,12 +501,11 @@ RISCOS_Start ; First put the VRAM information in to free up some regs ADD v7, ip, #VideoPhysAddr - MOV v4, v4, LSL #12 ; 32bit only for now - ! 0, "LongDescTODO VRAM selection doesn't guarantee 32bit address" STMIA v7!, {v4, v6} ; Now fill in the rest ASSERT DRAMPhysAddrA = VideoPhysAddr+8 + MOV v1, v1, LSR #12 ADDS v2, v2, #4096 ; Store true length ADDCS v2, v2, #1:SHL:31 ; If it overflowed, must have been 4GB block, so clamp at 2GB (loop below will add the second 2GB) STMIA v7!, {v1, v2} ; workspace block must be first @@ -504,29 +513,31 @@ RISCOS_Start TEQ v8, a4 BEQ %FT39 LDMIA v8!, {v1, v2} - CMP v1, #1:SHL:20 - BHS %BT33 ; skip >4GB addresses for now - MOV v1, v1, LSL #12 ADDS v2, v2, #4096 ; Get true length ADDCS v2, v2, #1:SHL:31 ; If it overflowed, must have been 4GB block, so split into two 2GB blocks SUBCS v2, v2, #4096 - ADDCS v1, v1, #1:SHL:31 + ADDCS v1, v1, #1:SHL:(31-12) STMCSDB v8!, {v1, v2} ADDCS v2, v2, #4096 - SUBCS v1, v1, #1:SHL:31 + SUBCS v1, v1, #1:SHL:(31-12) ADD a1, ip, #DRAMPhysAddrA LDMIA a1!, {a2, a3} TEQ v1, a2 BEQ %BT33 ; don't duplicate the initial block ; Perform insertion sort ; a1-a3, v3-v6, ip, lr free - AND v3, v2, #&F*OSAddRAM_Speed+OSAddRAM_NoDMA - ASSERT OSAddRAM_Speed = 1:SHL:8 - ASSERT OSAddRAM_NoDMA < OSAddRAM_Speed - MOV v3, v3, ROR #8 ; Give NoDMA flag priority over speed when sorting + AND v3, v2, #&F*OSAddRAM_Speed + CMP v1, #1:SHL:20 + ORRLO v3, v3, #1:SHL:31 ; Low RAM takes priority + TST v2, #OSAddRAM_NoDMA + ORRNE v3, v3, #1:SHL:30 ; Followed by non-DMA 34 - AND v4, a3, #&F*OSAddRAM_Speed+OSAddRAM_NoDMA - CMP v3, v4, ROR #8 + AND v4, a3, #&F*OSAddRAM_Speed + CMP a2, #1:SHL:20 + ORRLO v4, v4, #1:SHL:31 ; Low RAM takes priority + TST a3, #OSAddRAM_NoDMA + ORRNE v4, v4, #1:SHL:30 ; Followed by non-DMA + CMP v3, v4 ; Compare priority value BHI %FT35 TEQ a1, v7 LDMNEIA a1!, {a2, a3} @@ -554,7 +565,6 @@ RISCOS_Start ADD a2, a2, v2, LSR #12 ; add on size TEQ v6, v7 BNE %BT40 - MOV a2, a2, LSL #12 ; Work out how much DMAable RAM the HAL/kernel needs LDR a1, [sp, #8] @@ -576,7 +586,8 @@ RISCOS_Start ; Claim it as normal, but set InitDMAEnd to v1+DRAMOffset_LastFixed so ; that the already used bit won't get used for DMA ; We also need to be careful later on when picking the initial v2 value - ADD lr, v1, #DRAMOffset_LastFixed + MOV lr, v1, LSL #12 + ADD lr, lr, #DRAMOffset_LastFixed STR lr, [ip, #InitDMAEnd] B %FT43 41 @@ -588,8 +599,11 @@ RISCOS_Start BNE %BT42 CMP v2, a1 BLO %BT42 + CMP v1, #1:SHL:20 ; <4GB only for now + BHS %BT42 ; Make a note of this block - STR v1, [ip, #InitDMAEnd] + MOV lr, v1, LSL #12 + STR lr, [ip, #InitDMAEnd] 43 STR v1, [ip, #InitDMABlock] STR v2, [ip, #InitDMABlock+4] @@ -597,7 +611,7 @@ RISCOS_Start STR lr, [ip, #InitDMAOffset] ; Now shrink/remove this memory from PhysRamTable SUB v2, v2, a1 - ADD v1, v1, a1 + ADD v1, v1, a1, LSR #12 CMP v2, #4096 ; Block all gone? STMHSDB a4, {v1, v2} ; no, just shrink it BHS %FT55 @@ -609,7 +623,7 @@ RISCOS_Start BNE %BT45 SUB v7, v7, #8 -; a2 = Total memory size (bytes) +; a2 = Total memory size (pages) ; a3 = PhysRamTable ; v7 = After last used entry in PhysRamTable ; ip -> ZeroPage @@ -672,29 +686,29 @@ RISCOS_Start ADD v1, a3, #DRAMOffset_PageZero - DRAMOffset_PageTables ADD v2, a3, #DRAMOffset_LastFixed - DRAMOffset_PageTables STR a2, [v1, #RAMLIMIT] ; remember the RAM size - MOV lr, a2, LSR #12 - SUB lr, lr, #1 + SUB lr, a2, #1 STR lr, [v1, #MaxCamEntry] - MOV lr, a2, LSR #12-CAM_EntrySizeLog2+12 - CMP a2, lr, LSL #12-CAM_EntrySizeLog2+12 - ADDNE lr, lr, #1 + MOV lr, a2, LSR #12-CAM_EntrySizeLog2 ; no. of pages needed for CAM + CMP a2, lr, LSL #12-CAM_EntrySizeLog2 + ADDNE lr, lr, #1 ; round up MOV lr, lr, LSL #12 STR lr, [v1, #SoftCamMapSize] STR a3, [v1, #InitUsedStart] ; store start of L1PT ADD v1, v1, #DRAMPhysAddrA + MOV v2, v2, LSR #12 MOV v3, a3 ; Detect if the DMA claiming adjusted the first block ; If so, we'll need to reset v2 to the start of the block at v1 LDR a1, [v1] - ADD lr, a1, #DRAMOffset_LastFixed + ADD lr, a1, #DRAMOffset_LastFixed:SHR:12 TEQ lr, v2 MOVNE v2, a1 ; For the next batch of allocation routines, v1-v3 are treated as globals. ; v1 -> current entry in PhysRamTable -; v2 -> next address to allocate in v1 (may point at end of v1) +; v2 -> next address to allocate in v1 (may point at end of v1), in units of pages ; v3 -> L1PT (or 0 if MMU on - not yet) ; Set up some temporary PCBTrans and PPLTrans pointers, and the initial page flags used by the page tables @@ -1527,8 +1541,8 @@ ROMDecompAlign * 20 ADD v3, v3, v8 ; Work out whether the block was removed or merely shrunk LDMDB v3, {v4-v5} - ADD v6, v1, v2 - ADD v7, v4, v5 + ADD v6, v1, v2, LSR #12 + ADD v7, v4, v5, LSR #12 STMDB v3, {v1-v2} TEQ v6, v7 BEQ %FT40 ; End addresses match, it was shrunk @@ -1672,19 +1686,19 @@ CountPageTablePages ROUT ; Returns -1 if address is not in RAM. PhysAddrToPageNo - TEQ a2, #0 - BNE %FT90 ; only handle addresses under 4GB for now + ; Convert address to 4K addressing + MOV a1, a1, LSR #12 + ORR a1, a1, a2, LSL #20 MOV a4, #0 LDR ip, =ZeroPage + PhysRamTable 10 LDMIA ip!, {a2, a3} ; get phys addr, size MOVS a3, a3, LSR #12 ; end of list? (size=0) BEQ %FT90 ; then it ain't RAM SUB a2, a1, a2 ; a2 = amount into this bank - CMP a2, a3, LSL #12 ; if more than size - ADDHS a4, a4, a3, LSL #12 ; increase counter by size of bank + CMP a2, a3 ; if more than size + ADDHS a4, a4, a3 ; increase counter by size of bank BHS %BT10 ; and move to next - ADD a4, a4, a2 ; add offset to counter - MOV a1, a4, LSR #12 ; convert counter to a page number + ADD a1, a4, a2 ; add offset to counter MOV pc, lr 90 MOV a1, #-1 @@ -1754,9 +1768,9 @@ ConstructCAMfromPageTables ; ; On entry: ; v1 -> current entry in PhysRamTable -; v2 -> end of last used physical page +; v2 -> end of last used physical page (page units) ; On exit: -; a1 -> next free page +; a1 -> next free page (assumed 32bit address) ; v1, v2 updated ; ; No out of memory check... @@ -1764,11 +1778,11 @@ ConstructCAMfromPageTables Init_ClaimPhysicalPage MOV a1, v2 LDMIA v1, {a2, a3} - MOV a3, a3, LSR #12 - ADD a2, a2, a3, LSL #12 ; ip = end of this bank + ADD a2, a2, a3, LSR #12 ; a2 = end of this bank CMP v2, a2 ; advance v2 to next bank if LDRHS a1, [v1, #8]! ; this bank is fully used - ADD v2, a1, #4096 + ADD v2, a1, #1 + MOV a1, a1, LSL #12 ; Convert to byte address MOV pc, lr ; Allocate and map in some RAM. @@ -1778,7 +1792,7 @@ Init_ClaimPhysicalPage ; a2 = access permissions (see Init_MapIn) ; a3 = length (4K multiple) ; v1 -> current entry in PhysRamTable -; v2 = next physical address +; v2 = next physical address (page units) ; v3 -> L1PT ; ; On exit: @@ -1794,23 +1808,23 @@ Init_MapInRAM ROUT 10 LDMIA v1, {v4, ip} ; v4 = addr of bank, ip = len+flags MOV ip, ip, LSR #12 SUB v4, v2, v4 ; v4 = amount of bank used - RSBS v4, v4, ip, LSL #12 ; v4 = amount of bank left + RSBS v4, v4, ip ; v4 = amount of bank left (pages) LDREQ v2, [v1, #8]! ; move to next bank if 0 left BEQ %BT10 CMP v8, #-1 ; is this the first bank? MOVEQ v8, v2 ; remember it - CMP v4, v5 ; sufficient in this bank? + CMP v4, v5, LSR #12 ; sufficient in this bank? MOVHS a4, v5 - MOVLO a4, v4 ; a4 = amount to take + MOVLO a4, v4, LSL #12 ; a4 = amount to take - MOV a1, v2 ; set up parameters for MapIn call + MOV a1, v2, LSL #12 ; set up parameters for MapIn call MOV a2, v6 ; then move globals (in case MapIn MOV a3, v7 ; needs to allocate for L2PT) - ADD v2, v2, a4 ; advance physaddr + ADD v2, v2, a4, LSR #12 ; advance physaddr SUB v5, v5, a4 ; decrease wanted - ADD v6, v6, a4 ; advance address pointer + ADD v6, v6, a4 ; advance log address pointer BL Init_MapIn ; map in the RAM TEQ v5, #0 ; more memory still required? BNE %BT10 @@ -1835,7 +1849,7 @@ Init_MapInRAM_Clear ROUT ; same as Init_MapInRAM but also ; a2 = access permissions (see Init_MapIn) ; a3 = length (4K multiple) ; v1 -> current entry in PhysRamTable -; v2 = next physical address +; v2 = next physical address (page units) ; v3 -> L1PT ; ; On exit: @@ -1872,12 +1886,12 @@ Init_MapInRAM_DMA ROUT ; Map a range of physical addresses to a range of logical addresses. ; ; On entry: -; a1 = physical address +; a1 = physical address (32bit) ; a2 = logical address ; a3 = DA flags ; a4 = area size (4K multiple) ; v1 -> current entry in PhysRamTable -; v2 = last used physical address +; v2 = last used physical address (page units) ; v3 -> L1PT (or 0 if MMU on) Init_MapIn ROUT @@ -1955,7 +1969,7 @@ Init_MapIn ROUT ; Map a logical page to a physical page, allocating L2PT as necessary. ; ; On entry: -; a1 = physical address +; a1 = physical address (32bit) ; a2 = logical address [ LongDesc ; a3 = high & low page attributes merged into one word @@ -1963,7 +1977,7 @@ Init_MapIn ROUT ; a3 = access permissions + C + B bits + size (all non-address bits, of appropriate type) ] ; v1 -> current entry in PhysRamTable -; v2 = last used physical address +; v2 = last used physical address (page units) ; v3 -> L1PT (or 0 if MMU on) ; On exit: ; a1 = logical address @@ -2022,7 +2036,7 @@ Init_MapInPage ROUT ; a1 = virtual address L2PT required for ; a2 = number of bytes of virtual space ; v1 -> current entry in PhysRamTable -; v2 = last used physical address +; v2 = last used physical address (page units) ; v3 -> L1PT (or 0 if MMU on) ; On exit ; a1-a4,ip corrupt diff --git a/s/LongDesc b/s/LongDesc index 15edc3a..cb1bcd7 100644 --- a/s/LongDesc +++ b/s/LongDesc @@ -321,13 +321,14 @@ UpdateL1PTForPageReplacement ROUT MACRO PageNumToL3PT $pnum,$pnum2,$ptable,$cache0,$cache1,$cache2,$pbits,$pbits2 - MOV $pnum2,$pbits2 + MOV $pnum2,$pbits2 ; Save $pbits2 so it can be used as cache func in/out SUB $pbits2,$pnum,$cache0 ; no. pages into block CMP $pbits2,$cache2 BLHS PageNumToL3PTCache_$ptable._$cache0._$cache1._$cache2._$pbits2 - ADD $pnum,$cache1,$pbits2,LSL #Log2PageSize ; physical address of page - ORR $pnum,$pbits,$pnum ; munge in protection bits + ADD $pnum,$cache1,$pbits2 ; physical address of page (in page units) MOV $pbits2,$pnum2 + ORR $pnum2,$pnum2,$pnum,LSR #20 ; High attr + high addr + ORR $pnum,$pbits,$pnum,LSL #12 ; Low attr + low addr MEND MACRO diff --git a/s/MemInfo b/s/MemInfo index 7b47cde..89a3326 100644 --- a/s/MemInfo +++ b/s/MemInfo @@ -169,6 +169,7 @@ MemoryConvertNoFIQCheck ROUT BCC %FT70 LDMIA r1!, {r3-r4,r8} ; Get next three word entry (PN,LA,PA) and move on pointer. + ! 0, "LongDescTODO 4GB" MOV r9, #0 ; Top half of PA is zero [ AMB_LazyMapIn @@ -183,6 +184,8 @@ MemoryConvertNoFIQCheck ROUT BL ppn_to_logical ; Else get LA from PN (PA wanted (not given) & LA not given => PN given). BLCC ppn_to_physical ; And get PA from PN (more accurate than getting PA from LA - page may be mapped out) 15 + ! 0, "LongDescTODO 4GB" + CMPCC r9, #1 BCS %FT80 TST r0, #logical,wanted STRNE r4, [r1, #-8] ; Store back LA if wanted. @@ -485,12 +488,13 @@ physical_to_ppn ROUT LDR r5, =ZeroPage+PhysRamTable MOV r3, #0 ; Start at page 0. MOV r8, r8, LSR #12 + ORR r8, r8, r9, LSL #20 10 CMP r7, r3 ; Stop if we run out of pages BCC meminfo_returncs_pullr8 LDMIA r5!, {r10,r11} ; Get start address and size of next block. - SUB r10, r8, r10, LSR #12 ; Determine if given address is in this block. + SUB r10, r8, r10 ; Determine if given address is in this block. CMP r10, r11, LSR #12 ADDCS r3, r3, r11, LSR #12 ; Move on to next block. BCS %BT10 @@ -523,8 +527,9 @@ ppn_to_physical ROUT SUBHS r3, r3, lr BHS %BT10 - ADD r8, r8, r3, LSL #12 - MOV r9, #0 + ADD r8, r8, r3 + MOV r9, r8, LSR #20 + MOV r8, r8, LSL #12 Pull "r3,pc" 20 SEC @@ -536,9 +541,8 @@ ppn_to_physical ROUT ; ; Shifts to determine number of bytes/words to allocate in table. -BitShift * 10 -ByteShift * BitShift + 3 -WordShift * ByteShift + 2 +ByteShift * 1 ; 2^1 pages per byte +WordShift * ByteShift + 2 ; 2^3 pages per word ; Bit patterns for different types of memory. NotPresent * &00000000 @@ -606,8 +610,8 @@ MemoryReadPhys ROUT LDR r1, [sp, #4] ; Get table address back MOV r3, r9, LSR #WordShift LDR r3, [r1, r3, LSL #2]! ; Get first word of block - MOV r4, r9, LSR #BitShift - AND r4, r4, #(1<<(WordShift-BitShift))-1 ; Bit offset of first page in the word + MOV r4, r9, LSL #3 + AND r4, r4, #31 ; Bit offset of first page in the word RSB r4, r4, #32 ; number of bits left to process MOV r3, r3, LSL r4 @@ -652,9 +656,9 @@ MemoryReadPhys ROUT LDR r0, =ZeroPage LDR r0, [r0, #ROMPhysAddr] LDR r1, [sp, #4] - ADD r0, r1, r0, LSR #ByteShift + ADD r0, r1, r0, LSR #ByteShift+12 LDR r1, =DRAM_Pattern :OR: NotAvailable - MOV r2, #(OSROM_ImageSize*1024) :SHR: ByteShift + MOV r2, #(OSROM_ImageSize :SHR: 2) :SHR: ByteShift BL memset 40 CLRV @@ -698,11 +702,10 @@ MemoryAmounts ROUT LDR r3, [r1, #VideoSizeFlags] TST r3, #OSAddRAM_IsVRAM MOVNE r3, r3, LSR #12 ; Extract size from flags when genuine VRAM - MOVNE r3, r3, LSL #12 MOVEQ r3, #0 LDR r1, [r1, #RAMLIMIT] SUB r1, r1, r3 ; DRAM = RAMLIMIT - VRAMSize - B %FT97 + B %FT98 20 LDR r1, =ZeroPage LDR r1, [r1, #VideoSizeFlags] @@ -740,6 +743,7 @@ MemoryAmounts ROUT B %FT97 97 MOV r1, r1, LSR #12 ; Return as number of pages. +98 MOV r2, #4*1024 ; Return page size. CLRV EXIT @@ -802,31 +806,59 @@ MemoryIOSpace ROUT ; ; In: r0 bits 0..7 = 12 (reason code 12) ; r0 bit 8 = 1 if region must be DMAable -; r0 bits 9..31 = 0 (reserved flags) +; r0 bit 9 = 1 if r4-r7 provided +; r0 bits 10..31 = 0 (reserved flags) ; r1 = size of physically contiguous RAM region required (bytes) ; r2 = log2 of required alignment of base of region (eg. 12 = 4k, 20 = 1M) +; r4,r5 = lowest acceptable physical address (inclusive) (if bit 9 of r0 set) +; r6,r7 = highest acceptable physical address (inclusive) (if bit 9 of r0 set) ; ; Out: r3 = page number of first page of recommended region that could be ; grown as specific pages by dynamic area handler (only guaranteed ; if grow is next page claiming operation) ; - or error if not possible (eg too big, pages unavailable) ; +; Notes: +; * Default address range in r4-r7 is for the lower 4GB of physical space +; * The high address in r6,r7 is for the end of the memory block, not the start +; RecommendPage ROUT - Push "r0-r2,r4-r11,lr" + Entry "r0-r2,r4-r12" CMP r2,#30 BHI RP_failed ;refuse to look for alignments above 1G ANDS r11,r0,#1:SHL:8 ;convert flag into something usable in the loop MOVNE r11,#OSAddRAM_NoDMA +; + TST r0,#1:SHL:9 ;If no range specified, limit to lower 4GB + MOVEQ r10,#0 + MOVEQ r12,#1:SHL:20 + BEQ %FT10 + CMP r5,#1:SHL:8 + BHS RP_failed ; LPAE/long descriptor format limits us to 40 bit physical addresses (although technically PhysRamTable can store 44 bit addresses) + CMP r7,#1:SHL:8 ; Clamp high address + MOVCS r7,#&FF + MOVCS r6,#-1 + LDR lr,=4095 + ADD r10,r4,lr ; Round up low address + MOV r10,r10,LSR #12 + ORR r10,r10,r5,LSL #20 + MOV r12,r6,LSR #12 ; Round down high address + ORR r12,r12,r7,LSL #20 + ADD r12,r12,#1 ; Make exclusive +10 ; ADD r1,r1,#&1000 SUB r1,r1,#1 - MOV r1,r1,LSR #12 - MOVS r1,r1,LSL #12 ;size rounded up to whole no. of pages + MOV r1,r1,LSR #12 ;size rounded up to whole no. of pages ; - CMP r2,#12 - MOVLO r2,#12 ;log2 alignment must be at least 12 (4k pages) + SUBS r2,r2,#12 ;log2 alignment, in terms of pages + MOVLT r2,#0 ;must be at least zero MOV r0,#1 - MOV r4,r0,LSL r2 ;required alignment-1 + MOV r4,r0,LSL r2 ;required alignment, page units +; + SUB r12,r12,r1 + MOV r12,r12,LSR r2 + MOV r12,r12,LSL r2 ; Last acceptable block start address ; LDR r0,=ZeroPage+PhysRamTable MOV r3,#0 ;page number, starts at 0 @@ -838,22 +870,40 @@ RecommendPage ROUT RP_nextchunk ADD r3,r3,r8,LSR #12 ;page no. of first page of next chunk LDMIA r0!,{r7,r8} ;address,size of next physical chunk +; R0 -> PhysRamTable +; R1 = Required length in pages +; R2 = Required log2 alignment-12 +; R3 = current phys page no. +; R4 = Required alignment, page units +; R5 -> CAM +; R7,R8 = Current PhysRamTable entry +; R10 = Low address limit +; R11 = Flags +; R12 = High address limit +; R6,R9 = spare CMP r8,#0 BEQ RP_failed TST r8,r11 ;ignore non-DMA regions if bit 8 of R0 was set BNE RP_nextchunk ; MOV r8,r8,LSR #12 - ADD r6,r7,r4 + CMP r7,r10 + ADDLO r6,r10,r4 + ADDHS r6,r7,r4 MOV r8,r8,LSL #12 SUB r6,r6,#1 ;round up MOV r6,r6,LSR r2 - MOV r6,r6,LSL r2 + MOV r6,r6,LSL r2 ;address of first page of acceptable alignment + SUBS lr,r12,r6 + BLS RP_nextchunk ;exceeded upper address limit SUB r6,r6,r7 ;adjustment to first address of acceptable alignment - CMP r6,r8 + CMP r6,r8,LSR #12 BHS RP_nextchunk ;negligible chunk - ADD r7,r3,r6,LSR #12 ;first page number of acceptable alignment - SUB r9,r8,r6 ;remaining size of chunk + ADD r7,r3,r6 ;first page number of acceptable alignment + RSB r9,r6,r8,LSR #12 ;remaining size of chunk + CMP r9,lr + ADDHI r9,lr,r1 ;clamp effective chunk length if we're going to hit the upper address limit + ; ;find first available page RP_nextpage @@ -864,36 +914,48 @@ RP_nextpage TST r6,#PageFlags_Unavailable :OR: PageFlags_Required TSTEQ r6,#PageFlags_Reserved BEQ RP_checkotherpages -RP_nextpagecontinue CMP r9,r4 BLS RP_nextchunk - ADD r7,r7,r4,LSR #12 ;next page of suitable alignment + ADD r7,r7,r4 ;next page of suitable alignment SUB r9,r9,r4 B RP_nextpage ; +RP_nextpagecontinue + ; r7 = start page, r6 = page that failed + ; No point checking any of r7...r6 again, so skip ahead past r6 + SUB r6,r6,r7 ;number of pages to skip (minus 1) + ADD r6,r6,r4 + MOV r6,r6,LSR r2 + MOV r6,r6,LSL r2 ;number to skip, rounded up by alignment + CMP r9,r6 + BLS RP_nextchunk + ADD r7,r7,r6 ;next page of suitable alignment + SUB r9,r9,r6 + B RP_nextpage +; RP_checkotherpages - ADD r10,r7,r1,LSR #12 - SUB r10,r10,#1 ;last page required + ADD r6,r7,r1 + SUB r6,r6,#1 ;last page required RP_checkotherpagesloop - LDR r6,[r5,r10,LSL #CAM_EntrySizeLog2] ;page flags from CAM - TST r6,#PageFlags_Unavailable :OR: PageFlags_Required - TSTEQ r6,#PageFlags_Reserved + LDR lr,[r5,r6,LSL #CAM_EntrySizeLog2] ;page flags from CAM + TST lr,#PageFlags_Unavailable :OR: PageFlags_Required + TSTEQ lr,#PageFlags_Reserved BNE RP_nextpagecontinue - SUB r10,r10,#1 - CMP r10,r7 + SUB r6,r6,#1 + CMP r6,r7 BHI RP_checkotherpagesloop ; ;success! ; MOV r3,r7 - Pull "r0-r2,r4-r11,pc" + Exit RP_failed MOV r3,#0 ADR r0,ErrorBlock_NoMemChunkAvailable SETV - STR r0,[sp] - Pull "r0-r2,r4-r11,pc" + FRAMSTR r0 + Exit MakeErrorBlock NoMemChunkAvailable @@ -1719,6 +1781,7 @@ DMAPrep_Translate B %FT30 20 MOV r8, r4 + ! 0, "LongDescTODO 4GB" MOV r9, #0 BL physical_to_ppn ; r7, r8, r9 -> r3 BCS %BT95 diff --git a/s/Middle b/s/Middle index b638f91..595f7bd 100644 --- a/s/Middle +++ b/s/Middle @@ -224,6 +224,9 @@ SSTENV Push "R0, R1, lr" LDR R12, =ZeroPage LDR R2, [R12, #RAMLIMIT] ; this is read-only + CMP R2, #DynArea_PMP_BigPageCount + MOVLO R2, R2, LSL #12 + LDRHS R2, =DynArea_PMP_BigByteCount ; more RAM than any Brazil could hope for MOV R3, #0 ; never any Brazil-type buffering ; m2 tools will complain if there is! Pull "R0, R1, lr" @@ -1590,6 +1593,7 @@ osri6_table DCD NVECTORS ;86 DCD 1 ;87 CAM format: 0 = 8 bytes/entry, 1 = 16 bytes/entry DCD ABTSTK ;88 + DCD 1 ;89 PhysRamTable format: 0 = addresses are in byte units, 1 = addresses are in 4KB units osri6_maxvalue * (.-4-osri6_table) :SHR: 2 diff --git a/s/NewReset b/s/NewReset index 46b41b6..25dd484 100644 --- a/s/NewReset +++ b/s/NewReset @@ -72,7 +72,7 @@ MassageScreenSize ROUT LDR r0, =ZeroPage ] LDR r0, [r0, #RAMLIMIT] - CMP r0, #512*1024 + CMP r0, #(512*1024):SHR:12 MOVEQ r0, #80*1024 MOVNE r0, #160*1024 CmosScreenWillDo @@ -731,9 +731,9 @@ init_other_modules LDR R0, =ZeroPage LDR R0, [R0, #RAMLIMIT] - MLA R0, R1, R2, R0 ; convert pages to bytes and add in + ADD R0, R0, R1 - MOV R0, R0, LSR #20 ; /(1024*1024) down to megabytes + MOV R0, R0, LSR #20-Log2PageSize ; down to megabytes LDR R1, =GeneralMOSBuffer MOV R2, #?GeneralMOSBuffer SWI XOS_ConvertInteger4 diff --git a/s/ShortDesc b/s/ShortDesc index bcfaa04..31e1f3f 100644 --- a/s/ShortDesc +++ b/s/ShortDesc @@ -359,6 +359,7 @@ UpdateL1PTForPageReplacement ROUT LDR $ptable,=ZeroPage+PhysRamTable MOV $cache0,#0 LDMIA $ptable,{$cache1,$cache2} + MOV $cache1,$cache1,LSL #12 MOV $cache2,$cache2,LSR #12 MEND @@ -373,6 +374,7 @@ PageNumToL2PTCache_r4_r5_r6_r7_r12 ROUT SUBHS r12,r12,r7 ADDHS r5,r5,r7 BHS %BT10 + MOV r6,r6,LSL #12 EXIT ; r5-r7 = cache entry, r12 = offset into entry ; ---------------------------------------------------------------------------------- diff --git a/s/VMSAv6Long b/s/VMSAv6Long index 4df8cee..673abad 100644 --- a/s/VMSAv6Long +++ b/s/VMSAv6Long @@ -86,8 +86,9 @@ BangCamUpdate ROUT BCS %BT10 ; if more than that, go onto next bank ADD r6, r6, r4, LSR #12 ; put back the ones which were too many - ADD r0, r0, r6, LSL #12 ; move on address by the number of pages left + ADD r0, r0, r6 ; move on address by the number of pages left LDR r6, [sp] ; reload old logical address + MOV r0, r0, ROR #20 ; High address bits packed into low, ready for Get4PTE ; now we have r6 = old logical address, r2 = physical page number, r0 = physical address diff --git a/s/VMSAv6Short b/s/VMSAv6Short index ed5b766..3d0d24b 100644 --- a/s/VMSAv6Short +++ b/s/VMSAv6Short @@ -90,8 +90,9 @@ BangCamUpdate ROUT BCS %BT10 ; if more than that, go onto next bank ADD r6, r6, r4, LSR #12 ; put back the ones which were too many - ADD r0, r0, r6, LSL #12 ; move on address by the number of pages left + ADD r0, r0, r6 ; move on address by the number of pages left LDR r6, [sp] ; reload old logical address + MOV r0, r0, LSL #12 ; convert units from bytes to pages ; now we have r6 = old logical address, r2 = physical page number, r0 = physical address diff --git a/s/vdu/vdudriver b/s/vdu/vdudriver index 4f4196c..42cbb71 100644 --- a/s/vdu/vdudriver +++ b/s/vdu/vdudriver @@ -103,6 +103,8 @@ VduInit ROUT Push R14 LDR R0, =ZeroPage LDR R14, [R0, #VideoPhysAddr] + ! 0, "LongDescTODO 4GB" + MOV R14, R14, LSL #12 ASSERT (ZeroPage :AND: 255) = 0 STRB R0, [R0, #OsbyteVars + :INDEX: VDUqueueItems] ;purge queue STRB R0, [WsPtr, #ScreenBlankFlag] ; not blanked @@ -299,6 +301,8 @@ InitialiseMode ROUT ; Screen DA is in use LDR r0, =ZeroPage LDR r0, [r0, #VideoPhysAddr] + ! 0, "LongDescTODO 4GB" + MOV r0, r0, LSL #12 STR r0, [WsPtr, #TrueVideoPhysAddr] ; Point TrueVideoPhysAddr at the base of screen DA MOV r0, #2 SWI XOS_ReadDynamicArea @@ -839,6 +843,8 @@ ModeChangeSub ROUT 581 LDR r0, =ZeroPage LDR r0, [r0, #VideoPhysAddr] + ! 0, "LongDescTODO 4GB" + MOV r0, r0, LSL #12 STR r0, [WsPtr, #TrueVideoPhysAddr] ; Point TrueVideoPhysAddr at the base of screen DA MOV r0, #2 SWI XOS_ReadDynamicArea -- GitLab