diff --git a/hdr/KernelWS b/hdr/KernelWS index 4b0b9718d4b202267a636a9dcfc2598fc163af45..fef13feec87ce63db76ee787570e882f8c87020d 100644 --- a/hdr/KernelWS +++ b/hdr/KernelWS @@ -1035,13 +1035,13 @@ IICBus_Size # 0 InitWsStart # 0 InitIRQHandler # 4 ; pointer to IRQ handler (LDR PC'ed from IRQ HW vector) InitIRQWs # 16 ; workspace for IRQ handler -InitUsedStart # 4 ; start of used pages (L2PT etc) not to be cleared -InitUsedEnd # 4 ; end of used pages +InitUsedStart # 4 ; start of used pages (L2PT etc) not to be cleared (32bit address) +InitUsedEnd # 4 ; end of used pages (page units) InitUsedBlock # 4 ; current block in PhysRamTable InitClearRamWs # 10*4 ; preserve registers during ClearPhysRAM InitDMABlock # 8 ; block of DMAable memory extracted from PhysRamTable InitDMAOffset # 4 ; offset+8 into PhysRamTable where memory was taken -InitDMAEnd # 4 ; current DMA alloc pos +InitDMAEnd # 4 ; current DMA alloc pos (32bit address) AlignSpace 32 ; because we clear 32 at a time InitWsEnd # 0 @@ -1147,8 +1147,11 @@ Oscli_CmdHashLists # 4 ;anchor for hashed command lists structure SkippedTables # 0 PhysRamTable # 0 ; Pairs of words (physaddr, size+flags) - ; indicating RAM present in machine - ; Unused entries have size of zero + ; indicating RAM present in machine. physaddr + ; is in units of pages. size is in bytes, with + ; the flags in the low 12 bits. Individual + ; entries don't cross 4GB barriers. Unused + ; entries have size+flags of zero. VideoPhysAddr # 4 ; Address of video RAM (in the case of DRAM-only machines, VideoSizeFlags # 4 ; this is actually a chunk out of DRAM) DRAMPhysAddrA # 4 ; Next the DRAM @@ -1345,7 +1348,7 @@ MaxCamEntry # 4 ; maximum index into the cam map, ie ; 511 for 16MByte machines, 383 for 12MBytes ; 255 for 8MBytes, otherwise 127 -RAMLIMIT # 4 +RAMLIMIT # 4 ; Number of pages of RAM ROMPhysAddr # 4 diff --git a/hdr/OSRSI6 b/hdr/OSRSI6 index dbf43355fb370c1c87185554c5abd0c15c8e0160..0ecf2653d1916e02175c37847e465f1630edd7a4 100644 --- a/hdr/OSRSI6 +++ b/hdr/OSRSI6 @@ -88,5 +88,6 @@ OSRSI6_VecPtrTab * 85 OSRSI6_NVECTORS * 86 OSRSI6_CAMFormat * 87 ; 0 = 8 bytes per entry, 1 = 16 bytes per entry OSRSI6_ABTSTK * 88 +OSRSI6_PhysRamtableFormat * 89 ; 0 = addresses are in byte units, 1 = addresses are in 4KB units END diff --git a/s/AMBControl/memmap b/s/AMBControl/memmap index 718d423b1424a01b5e821d0dd5dd430004521099..25e6e72b9ec13bbb5678f2dfeb7f770e9f952f1c 100644 --- a/s/AMBControl/memmap +++ b/s/AMBControl/memmap @@ -468,7 +468,8 @@ AMB_LazyFixUp ROUT CMP r6,r5 SUBHS r6,r6,r5 BHS %BT10 - ADD r4,r4,r6,LSL #12 + ADD r4,r4,r6 + MOV r4,r4,ROR #20 ;High address packed into low bits for LongDesc MOV r1,#DynAreaFlags_PMP GetPTE r4,4K,r4,r1 ; diff --git a/s/ARM600 b/s/ARM600 index ea5548bdb2f9bcd87c55f4e277021c57fd2bc500..852a3714fa709a022fb18a93fc69956ca518f5df 100644 --- a/s/ARM600 +++ b/s/ARM600 @@ -86,8 +86,9 @@ BangCamUpdate ROUT BCS %BT10 ; if more than that, go onto next bank ADD r6, r6, r4, LSR #12 ; put back the ones which were too many - ADD r0, r0, r6, LSL #12 ; move on address by the number of pages left + ADD r0, r0, r6 ; move on address by the number of pages left LDR r6, [sp] ; reload old logical address + MOV r0, r0, LSL #12 ; convert from page units to bytes ; now we have r6 = old logical address, r2 = physical page number, r0 = physical address diff --git a/s/ChangeDyn b/s/ChangeDyn index 20538cd7695670e579978b17745b5fe6eb75cbae..19a0f2e82658f2de0db6223dffb13354cb2dfec1 100644 --- a/s/ChangeDyn +++ b/s/ChangeDyn @@ -407,10 +407,7 @@ ReadDynamicArea ROUT ReadMemMapInfo_Code LDR R10, =ZeroPage LDR R0, [R10, #Page_Size] - LDR R1, [R10, #RAMLIMIT] ; = total memory size - ADRL R11, PageShifts-1 - LDRB R11, [R11, R0, LSR #12] - MOV R1, R1, LSR R11 + LDR R1, [R10, #RAMLIMIT] ; = total number of pages ExitSWIHandler ; ************************************************************************ @@ -867,8 +864,8 @@ DAC_notsparse LDR r10, =ZeroPage LDR r11, [r10, #Page_Size] LDR r10, [r10, #RAMLIMIT] ; get total RAM size - CMP r5, r10 ; if requested maximum size is > total - MOVHI r5, r10 ; then set max to total (NB. -1 passed in always yields HI) + CMP r10, r5, LSR #Log2PageSize ; if requested maximum size is > total + MOVLS r5, r10, LSL #Log2PageSize ; then set max to total. Note no special handling of R5=-1 is needed (R5=-1 will get treated as 4GB-1. If RAMLIMIT < 4GB then R5 will be clamped correctly, if RAMLIMIT >= 4GB then the request will fail regardless because we only have limited logical address space to work with) DAC_roundup SUB r10, r11, #1 ; also round up to a page multiple @@ -4591,11 +4588,12 @@ DynArea_AddrLookup_loop LDR r0, [r5, #InitUsedStart] ADD r0, r0, #DRAMOffset_FirstFixed - DRAMOffset_PageTables - MOV r1, #0 ; only know 32-bit addresses for now + MOV r1, #0 ; start of init block is always 32bit address BL PhysAddrToPageNo MOV r7, r0 ; r7 = page number of start of static chunk LDR r0, [r5, #InitUsedEnd] - MOV r1, #0 ; only know 32-bit addresses for now + MOV r1, r0, LSR #20 + MOV r0, r0, LSL #12 BL PhysAddrToPageNo SUB r8, r0, #1 ; r8 = page number of last page in statics ADD r9, r5, #PhysRamTable @@ -5678,7 +5676,11 @@ DoTheGrowPagesSpecified ROUT BCS %BT06 ADD r3, r3, lr, LSR #12 ; put back what could not be subtracted - ADD r8, r8, r3, LSL #12 ; and add onto base address + ADD r8, r8, r3 ; and add onto base address + ! 0, "LongDescTODO 4GB" + CMP r8, #1:SHL:20 ; 4GB limit + BHS DoTheGrowPageUnavailable + MOV r8, r8, LSL #12 STR r8, [r1, #8-12] ; store physical address in page block SUBS r2, r2, #1 @@ -5807,7 +5809,10 @@ DoTheGrowPagesSpecified ROUT MOV r3, r6 BL ppn_to_physical MOV r10, r8 + ! 0, "LongDescTODO 4GB" + CMP r9, #0 Pull "r3,r5,r8,r9" + BNE %BT64 ; DREG r6, "Using page number " 68 @@ -6406,11 +6411,13 @@ CallPreGrow ROUT 20 ADD r2,r2,r12,LSR #12 ; advance page number 21 - LDR r12,[r0],#8 ; get next chunk details + LDMIA r0!,{r3,r12} ; get next chunk details CMP r12,#0 BEQ %FT90 TST r12,#OSAddRAM_NoDMA BNE %BT20 + CMP r3,#1:SHL:20 ; stick to lower 4GB for compatibility with old code + BHS %BT20 ; Check the CAM map to see if any pages here are free MOV r12,r12,LSR #12 30 diff --git a/s/HAL b/s/HAL index ca6400172ee98146b0d78858d555f742e3c3235b..6ed0b928b1cdffe8b7216a62b78c56947fad8d00 100644 --- a/s/HAL +++ b/s/HAL @@ -476,8 +476,18 @@ RISCOS_Start B %BT31 32 - ; Fill in the Kernel's permanent memory table, sorting by speed and DMA ability - ; Non-DMAable RAM is preferred over DMAable, as the kernel requires very little DMAable RAM, and we don't want to permanently claim DMAable RAM if we're not actually using it for DMA (in case machine only has a tiny amount available) + ; Fill in the Kernel's permanent memory table, sorting by address, speed and DMA ability. + ; * Address: All memory that falls in the low 4GB of the physical map + ; comes first. This makes it easier for our initial memory allocation + ; (no danger of allocating pages which can't be accessed with the MMU + ; off), but may also help with wider software compatibility (all low- + ; RAM pages occupy the lowest physical page numbers) + ; * Non-DMAable RAM is preferred over DMAable, as the kernel requires + ; very little DMAable RAM, and we don't want to permanently claim + ; DMAable RAM if we're not actually using it for DMA (in case machine + ; only has a tiny amount available) + ; * Speed: Fastest RAM is listed first, so that we'll prefer to allocate + ; it for these important kernel/system areas ADD ip, v1, #DRAMOffset_PageZero ASSERT DRAMOffset_PageZero > 0 ; If the workspace block is the block containing the OS_AddRAM list, make sure the two don't overlap otherwise we might corrupt it while we copy it @@ -491,12 +501,11 @@ RISCOS_Start ; First put the VRAM information in to free up some regs ADD v7, ip, #VideoPhysAddr - MOV v4, v4, LSL #12 ; 32bit only for now - ! 0, "LongDescTODO VRAM selection doesn't guarantee 32bit address" STMIA v7!, {v4, v6} ; Now fill in the rest ASSERT DRAMPhysAddrA = VideoPhysAddr+8 + MOV v1, v1, LSR #12 ADDS v2, v2, #4096 ; Store true length ADDCS v2, v2, #1:SHL:31 ; If it overflowed, must have been 4GB block, so clamp at 2GB (loop below will add the second 2GB) STMIA v7!, {v1, v2} ; workspace block must be first @@ -504,29 +513,31 @@ RISCOS_Start TEQ v8, a4 BEQ %FT39 LDMIA v8!, {v1, v2} - CMP v1, #1:SHL:20 - BHS %BT33 ; skip >4GB addresses for now - MOV v1, v1, LSL #12 ADDS v2, v2, #4096 ; Get true length ADDCS v2, v2, #1:SHL:31 ; If it overflowed, must have been 4GB block, so split into two 2GB blocks SUBCS v2, v2, #4096 - ADDCS v1, v1, #1:SHL:31 + ADDCS v1, v1, #1:SHL:(31-12) STMCSDB v8!, {v1, v2} ADDCS v2, v2, #4096 - SUBCS v1, v1, #1:SHL:31 + SUBCS v1, v1, #1:SHL:(31-12) ADD a1, ip, #DRAMPhysAddrA LDMIA a1!, {a2, a3} TEQ v1, a2 BEQ %BT33 ; don't duplicate the initial block ; Perform insertion sort ; a1-a3, v3-v6, ip, lr free - AND v3, v2, #&F*OSAddRAM_Speed+OSAddRAM_NoDMA - ASSERT OSAddRAM_Speed = 1:SHL:8 - ASSERT OSAddRAM_NoDMA < OSAddRAM_Speed - MOV v3, v3, ROR #8 ; Give NoDMA flag priority over speed when sorting + AND v3, v2, #&F*OSAddRAM_Speed + CMP v1, #1:SHL:20 + ORRLO v3, v3, #1:SHL:31 ; Low RAM takes priority + TST v2, #OSAddRAM_NoDMA + ORRNE v3, v3, #1:SHL:30 ; Followed by non-DMA 34 - AND v4, a3, #&F*OSAddRAM_Speed+OSAddRAM_NoDMA - CMP v3, v4, ROR #8 + AND v4, a3, #&F*OSAddRAM_Speed + CMP a2, #1:SHL:20 + ORRLO v4, v4, #1:SHL:31 ; Low RAM takes priority + TST a3, #OSAddRAM_NoDMA + ORRNE v4, v4, #1:SHL:30 ; Followed by non-DMA + CMP v3, v4 ; Compare priority value BHI %FT35 TEQ a1, v7 LDMNEIA a1!, {a2, a3} @@ -554,7 +565,6 @@ RISCOS_Start ADD a2, a2, v2, LSR #12 ; add on size TEQ v6, v7 BNE %BT40 - MOV a2, a2, LSL #12 ; Work out how much DMAable RAM the HAL/kernel needs LDR a1, [sp, #8] @@ -576,7 +586,8 @@ RISCOS_Start ; Claim it as normal, but set InitDMAEnd to v1+DRAMOffset_LastFixed so ; that the already used bit won't get used for DMA ; We also need to be careful later on when picking the initial v2 value - ADD lr, v1, #DRAMOffset_LastFixed + MOV lr, v1, LSL #12 + ADD lr, lr, #DRAMOffset_LastFixed STR lr, [ip, #InitDMAEnd] B %FT43 41 @@ -588,8 +599,11 @@ RISCOS_Start BNE %BT42 CMP v2, a1 BLO %BT42 + CMP v1, #1:SHL:20 ; <4GB only for now + BHS %BT42 ; Make a note of this block - STR v1, [ip, #InitDMAEnd] + MOV lr, v1, LSL #12 + STR lr, [ip, #InitDMAEnd] 43 STR v1, [ip, #InitDMABlock] STR v2, [ip, #InitDMABlock+4] @@ -597,7 +611,7 @@ RISCOS_Start STR lr, [ip, #InitDMAOffset] ; Now shrink/remove this memory from PhysRamTable SUB v2, v2, a1 - ADD v1, v1, a1 + ADD v1, v1, a1, LSR #12 CMP v2, #4096 ; Block all gone? STMHSDB a4, {v1, v2} ; no, just shrink it BHS %FT55 @@ -609,7 +623,7 @@ RISCOS_Start BNE %BT45 SUB v7, v7, #8 -; a2 = Total memory size (bytes) +; a2 = Total memory size (pages) ; a3 = PhysRamTable ; v7 = After last used entry in PhysRamTable ; ip -> ZeroPage @@ -672,29 +686,29 @@ RISCOS_Start ADD v1, a3, #DRAMOffset_PageZero - DRAMOffset_PageTables ADD v2, a3, #DRAMOffset_LastFixed - DRAMOffset_PageTables STR a2, [v1, #RAMLIMIT] ; remember the RAM size - MOV lr, a2, LSR #12 - SUB lr, lr, #1 + SUB lr, a2, #1 STR lr, [v1, #MaxCamEntry] - MOV lr, a2, LSR #12-CAM_EntrySizeLog2+12 - CMP a2, lr, LSL #12-CAM_EntrySizeLog2+12 - ADDNE lr, lr, #1 + MOV lr, a2, LSR #12-CAM_EntrySizeLog2 ; no. of pages needed for CAM + CMP a2, lr, LSL #12-CAM_EntrySizeLog2 + ADDNE lr, lr, #1 ; round up MOV lr, lr, LSL #12 STR lr, [v1, #SoftCamMapSize] STR a3, [v1, #InitUsedStart] ; store start of L1PT ADD v1, v1, #DRAMPhysAddrA + MOV v2, v2, LSR #12 MOV v3, a3 ; Detect if the DMA claiming adjusted the first block ; If so, we'll need to reset v2 to the start of the block at v1 LDR a1, [v1] - ADD lr, a1, #DRAMOffset_LastFixed + ADD lr, a1, #DRAMOffset_LastFixed:SHR:12 TEQ lr, v2 MOVNE v2, a1 ; For the next batch of allocation routines, v1-v3 are treated as globals. ; v1 -> current entry in PhysRamTable -; v2 -> next address to allocate in v1 (may point at end of v1) +; v2 -> next address to allocate in v1 (may point at end of v1), in units of pages ; v3 -> L1PT (or 0 if MMU on - not yet) ; Set up some temporary PCBTrans and PPLTrans pointers, and the initial page flags used by the page tables @@ -1527,8 +1541,8 @@ ROMDecompAlign * 20 ADD v3, v3, v8 ; Work out whether the block was removed or merely shrunk LDMDB v3, {v4-v5} - ADD v6, v1, v2 - ADD v7, v4, v5 + ADD v6, v1, v2, LSR #12 + ADD v7, v4, v5, LSR #12 STMDB v3, {v1-v2} TEQ v6, v7 BEQ %FT40 ; End addresses match, it was shrunk @@ -1672,19 +1686,19 @@ CountPageTablePages ROUT ; Returns -1 if address is not in RAM. PhysAddrToPageNo - TEQ a2, #0 - BNE %FT90 ; only handle addresses under 4GB for now + ; Convert address to 4K addressing + MOV a1, a1, LSR #12 + ORR a1, a1, a2, LSL #20 MOV a4, #0 LDR ip, =ZeroPage + PhysRamTable 10 LDMIA ip!, {a2, a3} ; get phys addr, size MOVS a3, a3, LSR #12 ; end of list? (size=0) BEQ %FT90 ; then it ain't RAM SUB a2, a1, a2 ; a2 = amount into this bank - CMP a2, a3, LSL #12 ; if more than size - ADDHS a4, a4, a3, LSL #12 ; increase counter by size of bank + CMP a2, a3 ; if more than size + ADDHS a4, a4, a3 ; increase counter by size of bank BHS %BT10 ; and move to next - ADD a4, a4, a2 ; add offset to counter - MOV a1, a4, LSR #12 ; convert counter to a page number + ADD a1, a4, a2 ; add offset to counter MOV pc, lr 90 MOV a1, #-1 @@ -1754,9 +1768,9 @@ ConstructCAMfromPageTables ; ; On entry: ; v1 -> current entry in PhysRamTable -; v2 -> end of last used physical page +; v2 -> end of last used physical page (page units) ; On exit: -; a1 -> next free page +; a1 -> next free page (assumed 32bit address) ; v1, v2 updated ; ; No out of memory check... @@ -1764,11 +1778,11 @@ ConstructCAMfromPageTables Init_ClaimPhysicalPage MOV a1, v2 LDMIA v1, {a2, a3} - MOV a3, a3, LSR #12 - ADD a2, a2, a3, LSL #12 ; ip = end of this bank + ADD a2, a2, a3, LSR #12 ; a2 = end of this bank CMP v2, a2 ; advance v2 to next bank if LDRHS a1, [v1, #8]! ; this bank is fully used - ADD v2, a1, #4096 + ADD v2, a1, #1 + MOV a1, a1, LSL #12 ; Convert to byte address MOV pc, lr ; Allocate and map in some RAM. @@ -1778,7 +1792,7 @@ Init_ClaimPhysicalPage ; a2 = access permissions (see Init_MapIn) ; a3 = length (4K multiple) ; v1 -> current entry in PhysRamTable -; v2 = next physical address +; v2 = next physical address (page units) ; v3 -> L1PT ; ; On exit: @@ -1794,23 +1808,23 @@ Init_MapInRAM ROUT 10 LDMIA v1, {v4, ip} ; v4 = addr of bank, ip = len+flags MOV ip, ip, LSR #12 SUB v4, v2, v4 ; v4 = amount of bank used - RSBS v4, v4, ip, LSL #12 ; v4 = amount of bank left + RSBS v4, v4, ip ; v4 = amount of bank left (pages) LDREQ v2, [v1, #8]! ; move to next bank if 0 left BEQ %BT10 CMP v8, #-1 ; is this the first bank? MOVEQ v8, v2 ; remember it - CMP v4, v5 ; sufficient in this bank? + CMP v4, v5, LSR #12 ; sufficient in this bank? MOVHS a4, v5 - MOVLO a4, v4 ; a4 = amount to take + MOVLO a4, v4, LSL #12 ; a4 = amount to take - MOV a1, v2 ; set up parameters for MapIn call + MOV a1, v2, LSL #12 ; set up parameters for MapIn call MOV a2, v6 ; then move globals (in case MapIn MOV a3, v7 ; needs to allocate for L2PT) - ADD v2, v2, a4 ; advance physaddr + ADD v2, v2, a4, LSR #12 ; advance physaddr SUB v5, v5, a4 ; decrease wanted - ADD v6, v6, a4 ; advance address pointer + ADD v6, v6, a4 ; advance log address pointer BL Init_MapIn ; map in the RAM TEQ v5, #0 ; more memory still required? BNE %BT10 @@ -1835,7 +1849,7 @@ Init_MapInRAM_Clear ROUT ; same as Init_MapInRAM but also ; a2 = access permissions (see Init_MapIn) ; a3 = length (4K multiple) ; v1 -> current entry in PhysRamTable -; v2 = next physical address +; v2 = next physical address (page units) ; v3 -> L1PT ; ; On exit: @@ -1872,12 +1886,12 @@ Init_MapInRAM_DMA ROUT ; Map a range of physical addresses to a range of logical addresses. ; ; On entry: -; a1 = physical address +; a1 = physical address (32bit) ; a2 = logical address ; a3 = DA flags ; a4 = area size (4K multiple) ; v1 -> current entry in PhysRamTable -; v2 = last used physical address +; v2 = last used physical address (page units) ; v3 -> L1PT (or 0 if MMU on) Init_MapIn ROUT @@ -1955,7 +1969,7 @@ Init_MapIn ROUT ; Map a logical page to a physical page, allocating L2PT as necessary. ; ; On entry: -; a1 = physical address +; a1 = physical address (32bit) ; a2 = logical address [ LongDesc ; a3 = high & low page attributes merged into one word @@ -1963,7 +1977,7 @@ Init_MapIn ROUT ; a3 = access permissions + C + B bits + size (all non-address bits, of appropriate type) ] ; v1 -> current entry in PhysRamTable -; v2 = last used physical address +; v2 = last used physical address (page units) ; v3 -> L1PT (or 0 if MMU on) ; On exit: ; a1 = logical address @@ -2022,7 +2036,7 @@ Init_MapInPage ROUT ; a1 = virtual address L2PT required for ; a2 = number of bytes of virtual space ; v1 -> current entry in PhysRamTable -; v2 = last used physical address +; v2 = last used physical address (page units) ; v3 -> L1PT (or 0 if MMU on) ; On exit ; a1-a4,ip corrupt diff --git a/s/LongDesc b/s/LongDesc index 15edc3a07a382148d22d834a596c87991f313bec..cb1bcd775986226d669fe722b9aa64d9a88c5175 100644 --- a/s/LongDesc +++ b/s/LongDesc @@ -321,13 +321,14 @@ UpdateL1PTForPageReplacement ROUT MACRO PageNumToL3PT $pnum,$pnum2,$ptable,$cache0,$cache1,$cache2,$pbits,$pbits2 - MOV $pnum2,$pbits2 + MOV $pnum2,$pbits2 ; Save $pbits2 so it can be used as cache func in/out SUB $pbits2,$pnum,$cache0 ; no. pages into block CMP $pbits2,$cache2 BLHS PageNumToL3PTCache_$ptable._$cache0._$cache1._$cache2._$pbits2 - ADD $pnum,$cache1,$pbits2,LSL #Log2PageSize ; physical address of page - ORR $pnum,$pbits,$pnum ; munge in protection bits + ADD $pnum,$cache1,$pbits2 ; physical address of page (in page units) MOV $pbits2,$pnum2 + ORR $pnum2,$pnum2,$pnum,LSR #20 ; High attr + high addr + ORR $pnum,$pbits,$pnum,LSL #12 ; Low attr + low addr MEND MACRO diff --git a/s/MemInfo b/s/MemInfo index 7b47cdefbe7e5b4c92c01db386f0cbd50aa04c3a..89a332694771012088e9d7e0d594a696edbbb9ca 100644 --- a/s/MemInfo +++ b/s/MemInfo @@ -169,6 +169,7 @@ MemoryConvertNoFIQCheck ROUT BCC %FT70 LDMIA r1!, {r3-r4,r8} ; Get next three word entry (PN,LA,PA) and move on pointer. + ! 0, "LongDescTODO 4GB" MOV r9, #0 ; Top half of PA is zero [ AMB_LazyMapIn @@ -183,6 +184,8 @@ MemoryConvertNoFIQCheck ROUT BL ppn_to_logical ; Else get LA from PN (PA wanted (not given) & LA not given => PN given). BLCC ppn_to_physical ; And get PA from PN (more accurate than getting PA from LA - page may be mapped out) 15 + ! 0, "LongDescTODO 4GB" + CMPCC r9, #1 BCS %FT80 TST r0, #logical,wanted STRNE r4, [r1, #-8] ; Store back LA if wanted. @@ -485,12 +488,13 @@ physical_to_ppn ROUT LDR r5, =ZeroPage+PhysRamTable MOV r3, #0 ; Start at page 0. MOV r8, r8, LSR #12 + ORR r8, r8, r9, LSL #20 10 CMP r7, r3 ; Stop if we run out of pages BCC meminfo_returncs_pullr8 LDMIA r5!, {r10,r11} ; Get start address and size of next block. - SUB r10, r8, r10, LSR #12 ; Determine if given address is in this block. + SUB r10, r8, r10 ; Determine if given address is in this block. CMP r10, r11, LSR #12 ADDCS r3, r3, r11, LSR #12 ; Move on to next block. BCS %BT10 @@ -523,8 +527,9 @@ ppn_to_physical ROUT SUBHS r3, r3, lr BHS %BT10 - ADD r8, r8, r3, LSL #12 - MOV r9, #0 + ADD r8, r8, r3 + MOV r9, r8, LSR #20 + MOV r8, r8, LSL #12 Pull "r3,pc" 20 SEC @@ -536,9 +541,8 @@ ppn_to_physical ROUT ; ; Shifts to determine number of bytes/words to allocate in table. -BitShift * 10 -ByteShift * BitShift + 3 -WordShift * ByteShift + 2 +ByteShift * 1 ; 2^1 pages per byte +WordShift * ByteShift + 2 ; 2^3 pages per word ; Bit patterns for different types of memory. NotPresent * &00000000 @@ -606,8 +610,8 @@ MemoryReadPhys ROUT LDR r1, [sp, #4] ; Get table address back MOV r3, r9, LSR #WordShift LDR r3, [r1, r3, LSL #2]! ; Get first word of block - MOV r4, r9, LSR #BitShift - AND r4, r4, #(1<<(WordShift-BitShift))-1 ; Bit offset of first page in the word + MOV r4, r9, LSL #3 + AND r4, r4, #31 ; Bit offset of first page in the word RSB r4, r4, #32 ; number of bits left to process MOV r3, r3, LSL r4 @@ -652,9 +656,9 @@ MemoryReadPhys ROUT LDR r0, =ZeroPage LDR r0, [r0, #ROMPhysAddr] LDR r1, [sp, #4] - ADD r0, r1, r0, LSR #ByteShift + ADD r0, r1, r0, LSR #ByteShift+12 LDR r1, =DRAM_Pattern :OR: NotAvailable - MOV r2, #(OSROM_ImageSize*1024) :SHR: ByteShift + MOV r2, #(OSROM_ImageSize :SHR: 2) :SHR: ByteShift BL memset 40 CLRV @@ -698,11 +702,10 @@ MemoryAmounts ROUT LDR r3, [r1, #VideoSizeFlags] TST r3, #OSAddRAM_IsVRAM MOVNE r3, r3, LSR #12 ; Extract size from flags when genuine VRAM - MOVNE r3, r3, LSL #12 MOVEQ r3, #0 LDR r1, [r1, #RAMLIMIT] SUB r1, r1, r3 ; DRAM = RAMLIMIT - VRAMSize - B %FT97 + B %FT98 20 LDR r1, =ZeroPage LDR r1, [r1, #VideoSizeFlags] @@ -740,6 +743,7 @@ MemoryAmounts ROUT B %FT97 97 MOV r1, r1, LSR #12 ; Return as number of pages. +98 MOV r2, #4*1024 ; Return page size. CLRV EXIT @@ -802,31 +806,59 @@ MemoryIOSpace ROUT ; ; In: r0 bits 0..7 = 12 (reason code 12) ; r0 bit 8 = 1 if region must be DMAable -; r0 bits 9..31 = 0 (reserved flags) +; r0 bit 9 = 1 if r4-r7 provided +; r0 bits 10..31 = 0 (reserved flags) ; r1 = size of physically contiguous RAM region required (bytes) ; r2 = log2 of required alignment of base of region (eg. 12 = 4k, 20 = 1M) +; r4,r5 = lowest acceptable physical address (inclusive) (if bit 9 of r0 set) +; r6,r7 = highest acceptable physical address (inclusive) (if bit 9 of r0 set) ; ; Out: r3 = page number of first page of recommended region that could be ; grown as specific pages by dynamic area handler (only guaranteed ; if grow is next page claiming operation) ; - or error if not possible (eg too big, pages unavailable) ; +; Notes: +; * Default address range in r4-r7 is for the lower 4GB of physical space +; * The high address in r6,r7 is for the end of the memory block, not the start +; RecommendPage ROUT - Push "r0-r2,r4-r11,lr" + Entry "r0-r2,r4-r12" CMP r2,#30 BHI RP_failed ;refuse to look for alignments above 1G ANDS r11,r0,#1:SHL:8 ;convert flag into something usable in the loop MOVNE r11,#OSAddRAM_NoDMA +; + TST r0,#1:SHL:9 ;If no range specified, limit to lower 4GB + MOVEQ r10,#0 + MOVEQ r12,#1:SHL:20 + BEQ %FT10 + CMP r5,#1:SHL:8 + BHS RP_failed ; LPAE/long descriptor format limits us to 40 bit physical addresses (although technically PhysRamTable can store 44 bit addresses) + CMP r7,#1:SHL:8 ; Clamp high address + MOVCS r7,#&FF + MOVCS r6,#-1 + LDR lr,=4095 + ADD r10,r4,lr ; Round up low address + MOV r10,r10,LSR #12 + ORR r10,r10,r5,LSL #20 + MOV r12,r6,LSR #12 ; Round down high address + ORR r12,r12,r7,LSL #20 + ADD r12,r12,#1 ; Make exclusive +10 ; ADD r1,r1,#&1000 SUB r1,r1,#1 - MOV r1,r1,LSR #12 - MOVS r1,r1,LSL #12 ;size rounded up to whole no. of pages + MOV r1,r1,LSR #12 ;size rounded up to whole no. of pages ; - CMP r2,#12 - MOVLO r2,#12 ;log2 alignment must be at least 12 (4k pages) + SUBS r2,r2,#12 ;log2 alignment, in terms of pages + MOVLT r2,#0 ;must be at least zero MOV r0,#1 - MOV r4,r0,LSL r2 ;required alignment-1 + MOV r4,r0,LSL r2 ;required alignment, page units +; + SUB r12,r12,r1 + MOV r12,r12,LSR r2 + MOV r12,r12,LSL r2 ; Last acceptable block start address ; LDR r0,=ZeroPage+PhysRamTable MOV r3,#0 ;page number, starts at 0 @@ -838,22 +870,40 @@ RecommendPage ROUT RP_nextchunk ADD r3,r3,r8,LSR #12 ;page no. of first page of next chunk LDMIA r0!,{r7,r8} ;address,size of next physical chunk +; R0 -> PhysRamTable +; R1 = Required length in pages +; R2 = Required log2 alignment-12 +; R3 = current phys page no. +; R4 = Required alignment, page units +; R5 -> CAM +; R7,R8 = Current PhysRamTable entry +; R10 = Low address limit +; R11 = Flags +; R12 = High address limit +; R6,R9 = spare CMP r8,#0 BEQ RP_failed TST r8,r11 ;ignore non-DMA regions if bit 8 of R0 was set BNE RP_nextchunk ; MOV r8,r8,LSR #12 - ADD r6,r7,r4 + CMP r7,r10 + ADDLO r6,r10,r4 + ADDHS r6,r7,r4 MOV r8,r8,LSL #12 SUB r6,r6,#1 ;round up MOV r6,r6,LSR r2 - MOV r6,r6,LSL r2 + MOV r6,r6,LSL r2 ;address of first page of acceptable alignment + SUBS lr,r12,r6 + BLS RP_nextchunk ;exceeded upper address limit SUB r6,r6,r7 ;adjustment to first address of acceptable alignment - CMP r6,r8 + CMP r6,r8,LSR #12 BHS RP_nextchunk ;negligible chunk - ADD r7,r3,r6,LSR #12 ;first page number of acceptable alignment - SUB r9,r8,r6 ;remaining size of chunk + ADD r7,r3,r6 ;first page number of acceptable alignment + RSB r9,r6,r8,LSR #12 ;remaining size of chunk + CMP r9,lr + ADDHI r9,lr,r1 ;clamp effective chunk length if we're going to hit the upper address limit + ; ;find first available page RP_nextpage @@ -864,36 +914,48 @@ RP_nextpage TST r6,#PageFlags_Unavailable :OR: PageFlags_Required TSTEQ r6,#PageFlags_Reserved BEQ RP_checkotherpages -RP_nextpagecontinue CMP r9,r4 BLS RP_nextchunk - ADD r7,r7,r4,LSR #12 ;next page of suitable alignment + ADD r7,r7,r4 ;next page of suitable alignment SUB r9,r9,r4 B RP_nextpage ; +RP_nextpagecontinue + ; r7 = start page, r6 = page that failed + ; No point checking any of r7...r6 again, so skip ahead past r6 + SUB r6,r6,r7 ;number of pages to skip (minus 1) + ADD r6,r6,r4 + MOV r6,r6,LSR r2 + MOV r6,r6,LSL r2 ;number to skip, rounded up by alignment + CMP r9,r6 + BLS RP_nextchunk + ADD r7,r7,r6 ;next page of suitable alignment + SUB r9,r9,r6 + B RP_nextpage +; RP_checkotherpages - ADD r10,r7,r1,LSR #12 - SUB r10,r10,#1 ;last page required + ADD r6,r7,r1 + SUB r6,r6,#1 ;last page required RP_checkotherpagesloop - LDR r6,[r5,r10,LSL #CAM_EntrySizeLog2] ;page flags from CAM - TST r6,#PageFlags_Unavailable :OR: PageFlags_Required - TSTEQ r6,#PageFlags_Reserved + LDR lr,[r5,r6,LSL #CAM_EntrySizeLog2] ;page flags from CAM + TST lr,#PageFlags_Unavailable :OR: PageFlags_Required + TSTEQ lr,#PageFlags_Reserved BNE RP_nextpagecontinue - SUB r10,r10,#1 - CMP r10,r7 + SUB r6,r6,#1 + CMP r6,r7 BHI RP_checkotherpagesloop ; ;success! ; MOV r3,r7 - Pull "r0-r2,r4-r11,pc" + Exit RP_failed MOV r3,#0 ADR r0,ErrorBlock_NoMemChunkAvailable SETV - STR r0,[sp] - Pull "r0-r2,r4-r11,pc" + FRAMSTR r0 + Exit MakeErrorBlock NoMemChunkAvailable @@ -1719,6 +1781,7 @@ DMAPrep_Translate B %FT30 20 MOV r8, r4 + ! 0, "LongDescTODO 4GB" MOV r9, #0 BL physical_to_ppn ; r7, r8, r9 -> r3 BCS %BT95 diff --git a/s/Middle b/s/Middle index b638f9104ff53a5fd72481a2c764e68f94210d9e..595f7bdefab233a5bbfc0148d5554cab3ef399ee 100644 --- a/s/Middle +++ b/s/Middle @@ -224,6 +224,9 @@ SSTENV Push "R0, R1, lr" LDR R12, =ZeroPage LDR R2, [R12, #RAMLIMIT] ; this is read-only + CMP R2, #DynArea_PMP_BigPageCount + MOVLO R2, R2, LSL #12 + LDRHS R2, =DynArea_PMP_BigByteCount ; more RAM than any Brazil could hope for MOV R3, #0 ; never any Brazil-type buffering ; m2 tools will complain if there is! Pull "R0, R1, lr" @@ -1590,6 +1593,7 @@ osri6_table DCD NVECTORS ;86 DCD 1 ;87 CAM format: 0 = 8 bytes/entry, 1 = 16 bytes/entry DCD ABTSTK ;88 + DCD 1 ;89 PhysRamTable format: 0 = addresses are in byte units, 1 = addresses are in 4KB units osri6_maxvalue * (.-4-osri6_table) :SHR: 2 diff --git a/s/NewReset b/s/NewReset index 46b41b65771d5fe276fa34ac7066cd2cfd39ee69..25dd484cb2fe49aa601e1c9e960870b877c3a748 100644 --- a/s/NewReset +++ b/s/NewReset @@ -72,7 +72,7 @@ MassageScreenSize ROUT LDR r0, =ZeroPage ] LDR r0, [r0, #RAMLIMIT] - CMP r0, #512*1024 + CMP r0, #(512*1024):SHR:12 MOVEQ r0, #80*1024 MOVNE r0, #160*1024 CmosScreenWillDo @@ -731,9 +731,9 @@ init_other_modules LDR R0, =ZeroPage LDR R0, [R0, #RAMLIMIT] - MLA R0, R1, R2, R0 ; convert pages to bytes and add in + ADD R0, R0, R1 - MOV R0, R0, LSR #20 ; /(1024*1024) down to megabytes + MOV R0, R0, LSR #20-Log2PageSize ; down to megabytes LDR R1, =GeneralMOSBuffer MOV R2, #?GeneralMOSBuffer SWI XOS_ConvertInteger4 diff --git a/s/ShortDesc b/s/ShortDesc index bcfaa0459fc672fc5dfb874ae865c58e4609b055..31e1f3fe155b5f3afafd2251a5d60a9c4bad3644 100644 --- a/s/ShortDesc +++ b/s/ShortDesc @@ -359,6 +359,7 @@ UpdateL1PTForPageReplacement ROUT LDR $ptable,=ZeroPage+PhysRamTable MOV $cache0,#0 LDMIA $ptable,{$cache1,$cache2} + MOV $cache1,$cache1,LSL #12 MOV $cache2,$cache2,LSR #12 MEND @@ -373,6 +374,7 @@ PageNumToL2PTCache_r4_r5_r6_r7_r12 ROUT SUBHS r12,r12,r7 ADDHS r5,r5,r7 BHS %BT10 + MOV r6,r6,LSL #12 EXIT ; r5-r7 = cache entry, r12 = offset into entry ; ---------------------------------------------------------------------------------- diff --git a/s/VMSAv6Long b/s/VMSAv6Long index 4df8ceed3c09a4490cd2d818af059223b74319fb..673abad9a947739a1aa0dc27dc3601b5051f13dd 100644 --- a/s/VMSAv6Long +++ b/s/VMSAv6Long @@ -86,8 +86,9 @@ BangCamUpdate ROUT BCS %BT10 ; if more than that, go onto next bank ADD r6, r6, r4, LSR #12 ; put back the ones which were too many - ADD r0, r0, r6, LSL #12 ; move on address by the number of pages left + ADD r0, r0, r6 ; move on address by the number of pages left LDR r6, [sp] ; reload old logical address + MOV r0, r0, ROR #20 ; High address bits packed into low, ready for Get4PTE ; now we have r6 = old logical address, r2 = physical page number, r0 = physical address diff --git a/s/VMSAv6Short b/s/VMSAv6Short index ed5b766308352f6be2b42b360a4a838490b425c6..3d0d24b9466b3179b1d1aa2bf06bab8253e3df23 100644 --- a/s/VMSAv6Short +++ b/s/VMSAv6Short @@ -90,8 +90,9 @@ BangCamUpdate ROUT BCS %BT10 ; if more than that, go onto next bank ADD r6, r6, r4, LSR #12 ; put back the ones which were too many - ADD r0, r0, r6, LSL #12 ; move on address by the number of pages left + ADD r0, r0, r6 ; move on address by the number of pages left LDR r6, [sp] ; reload old logical address + MOV r0, r0, LSL #12 ; convert units from bytes to pages ; now we have r6 = old logical address, r2 = physical page number, r0 = physical address diff --git a/s/vdu/vdudriver b/s/vdu/vdudriver index 4f4196c6d2f3265223213d1c3e7bf8ac9c1e7983..42cbb71b0406735aa5473daffa13386473b04d64 100644 --- a/s/vdu/vdudriver +++ b/s/vdu/vdudriver @@ -103,6 +103,8 @@ VduInit ROUT Push R14 LDR R0, =ZeroPage LDR R14, [R0, #VideoPhysAddr] + ! 0, "LongDescTODO 4GB" + MOV R14, R14, LSL #12 ASSERT (ZeroPage :AND: 255) = 0 STRB R0, [R0, #OsbyteVars + :INDEX: VDUqueueItems] ;purge queue STRB R0, [WsPtr, #ScreenBlankFlag] ; not blanked @@ -299,6 +301,8 @@ InitialiseMode ROUT ; Screen DA is in use LDR r0, =ZeroPage LDR r0, [r0, #VideoPhysAddr] + ! 0, "LongDescTODO 4GB" + MOV r0, r0, LSL #12 STR r0, [WsPtr, #TrueVideoPhysAddr] ; Point TrueVideoPhysAddr at the base of screen DA MOV r0, #2 SWI XOS_ReadDynamicArea @@ -839,6 +843,8 @@ ModeChangeSub ROUT 581 LDR r0, =ZeroPage LDR r0, [r0, #VideoPhysAddr] + ! 0, "LongDescTODO 4GB" + MOV r0, r0, LSL #12 STR r0, [WsPtr, #TrueVideoPhysAddr] ; Point TrueVideoPhysAddr at the base of screen DA MOV r0, #2 SWI XOS_ReadDynamicArea