From df4efb686a1dcc7ec50b1bd6c8e294e60fbf8477 Mon Sep 17 00:00:00 2001
From: Jeffrey Lee <me@phlamethrower.co.uk>
Date: Mon, 18 Nov 2019 22:31:54 +0000
Subject: [PATCH] Support RAM banks with high physical addresses

This changes PhysRamTable to store the address of each RAM bank in terms
of (4KB) pages instead of bytes, effectively allowing it to support a 44
bit physical address space. This means that (when the long descriptor
page table format is used) the OS can now make use of memory located
outside the lower 4GB of the physical address space. However some
public APIs still need extending to allow for all operations to be
supported on high RAM (e.g. OS_Memory logical to physical address
lookups)

OS_Memory 12 (RecommendPage) has been extended to allow R4-R7 to be used
to specify a (64bit) physical address range which the recommended pages
must lie within. For backwards compatibility this defaults to 0-4GB.
---
 hdr/KernelWS        |  15 +++--
 hdr/OSRSI6          |   1 +
 s/AMBControl/memmap |   3 +-
 s/ARM600            |   3 +-
 s/ChangeDyn         |  27 +++++----
 s/HAL               | 120 ++++++++++++++++++++++-----------------
 s/LongDesc          |   7 ++-
 s/MemInfo           | 135 ++++++++++++++++++++++++++++++++------------
 s/Middle            |   4 ++
 s/NewReset          |   6 +-
 s/ShortDesc         |   2 +
 s/VMSAv6Long        |   3 +-
 s/VMSAv6Short       |   3 +-
 s/vdu/vdudriver     |   6 ++
 14 files changed, 220 insertions(+), 115 deletions(-)

diff --git a/hdr/KernelWS b/hdr/KernelWS
index 4b0b971..fef13fe 100644
--- a/hdr/KernelWS
+++ b/hdr/KernelWS
@@ -1035,13 +1035,13 @@ IICBus_Size    # 0
 InitWsStart     #       0
 InitIRQHandler  #       4               ; pointer to IRQ handler (LDR PC'ed from IRQ HW vector)
 InitIRQWs       #       16              ; workspace for IRQ handler
-InitUsedStart   #       4               ; start of used pages (L2PT etc) not to be cleared
-InitUsedEnd     #       4               ; end of used pages
+InitUsedStart   #       4               ; start of used pages (L2PT etc) not to be cleared (32bit address)
+InitUsedEnd     #       4               ; end of used pages (page units)
 InitUsedBlock   #       4               ; current block in PhysRamTable
 InitClearRamWs  #       10*4            ; preserve registers during ClearPhysRAM
 InitDMABlock    #       8               ; block of DMAable memory extracted from PhysRamTable
 InitDMAOffset   #       4               ; offset+8 into PhysRamTable where memory was taken
-InitDMAEnd      #       4               ; current DMA alloc pos
+InitDMAEnd      #       4               ; current DMA alloc pos (32bit address)
                 AlignSpace 32           ; because we clear 32 at a time
 InitWsEnd       #       0
 
@@ -1147,8 +1147,11 @@ Oscli_CmdHashLists      # 4          ;anchor for hashed command lists structure
 SkippedTables   #       0
 
 PhysRamTable    #       0       ; Pairs of words (physaddr, size+flags)
-                                ; indicating RAM present in machine
-                                ; Unused entries have size of zero
+                                ; indicating RAM present in machine. physaddr
+                                ; is in units of pages. size is in bytes, with
+                                ; the flags in the low 12 bits. Individual
+                                ; entries don't cross 4GB barriers. Unused
+                                ; entries have size+flags of zero.
 VideoPhysAddr   #       4       ; Address of video RAM (in the case of DRAM-only machines,
 VideoSizeFlags  #       4       ; this is actually a chunk out of DRAM)
 DRAMPhysAddrA   #       4       ; Next the DRAM
@@ -1345,7 +1348,7 @@ MaxCamEntry     #       4       ; maximum index into the cam map, ie
                                 ; 511 for 16MByte machines, 383 for 12MBytes
                                 ; 255 for 8MBytes, otherwise 127
 
-RAMLIMIT        #       4
+RAMLIMIT        #       4       ; Number of pages of RAM
 
 ROMPhysAddr     #       4
 
diff --git a/hdr/OSRSI6 b/hdr/OSRSI6
index dbf4335..0ecf265 100644
--- a/hdr/OSRSI6
+++ b/hdr/OSRSI6
@@ -88,5 +88,6 @@ OSRSI6_VecPtrTab                               * 85
 OSRSI6_NVECTORS                                * 86
 OSRSI6_CAMFormat                               * 87 ; 0 = 8 bytes per entry, 1 = 16 bytes per entry
 OSRSI6_ABTSTK                                  * 88
+OSRSI6_PhysRamtableFormat                      * 89 ; 0 = addresses are in byte units, 1 = addresses are in 4KB units
 
         END
diff --git a/s/AMBControl/memmap b/s/AMBControl/memmap
index 718d423..25e6e72 100644
--- a/s/AMBControl/memmap
+++ b/s/AMBControl/memmap
@@ -468,7 +468,8 @@ AMB_LazyFixUp ROUT
         CMP     r6,r5
         SUBHS   r6,r6,r5
         BHS     %BT10
-        ADD     r4,r4,r6,LSL #12
+        ADD     r4,r4,r6
+        MOV     r4,r4,ROR #20                            ;High address packed into low bits for LongDesc
         MOV     r1,#DynAreaFlags_PMP
         GetPTE  r4,4K,r4,r1
 ;
diff --git a/s/ARM600 b/s/ARM600
index ea5548b..852a371 100644
--- a/s/ARM600
+++ b/s/ARM600
@@ -86,8 +86,9 @@ BangCamUpdate ROUT
         BCS     %BT10                           ; if more than that, go onto next bank
 
         ADD     r6, r6, r4, LSR #12             ; put back the ones which were too many
-        ADD     r0, r0, r6, LSL #12             ; move on address by the number of pages left
+        ADD     r0, r0, r6                      ; move on address by the number of pages left
         LDR     r6, [sp]                        ; reload old logical address
+        MOV     r0, r0, LSL #12                 ; convert from page units to bytes
 
 ; now we have r6 = old logical address, r2 = physical page number, r0 = physical address
 
diff --git a/s/ChangeDyn b/s/ChangeDyn
index 20538cd..19a0f2e 100644
--- a/s/ChangeDyn
+++ b/s/ChangeDyn
@@ -407,10 +407,7 @@ ReadDynamicArea ROUT
 ReadMemMapInfo_Code
       LDR      R10, =ZeroPage
       LDR      R0, [R10, #Page_Size]
-      LDR      R1, [R10, #RAMLIMIT]    ; = total memory size
-      ADRL     R11, PageShifts-1
-      LDRB     R11, [R11, R0, LSR #12]
-      MOV      R1, R1, LSR R11
+      LDR      R1, [R10, #RAMLIMIT]    ; = total number of pages
       ExitSWIHandler
 
 ; ************************************************************************
@@ -867,8 +864,8 @@ DAC_notsparse
         LDR     r10, =ZeroPage
         LDR     r11, [r10, #Page_Size]
         LDR     r10, [r10, #RAMLIMIT]   ; get total RAM size
-        CMP     r5, r10                 ; if requested maximum size is > total
-        MOVHI   r5, r10                 ; then set max to total (NB. -1 passed in always yields HI)
+        CMP     r10, r5, LSR #Log2PageSize ; if requested maximum size is > total
+        MOVLS   r5, r10, LSL #Log2PageSize ; then set max to total. Note no special handling of R5=-1 is needed (R5=-1 will get treated as 4GB-1. If RAMLIMIT < 4GB then R5 will be clamped correctly, if RAMLIMIT >= 4GB then the request will fail regardless because we only have limited logical address space to work with)
 
 DAC_roundup
         SUB     r10, r11, #1            ; also round up to a page multiple
@@ -4591,11 +4588,12 @@ DynArea_AddrLookup_loop
 
         LDR     r0, [r5, #InitUsedStart]
         ADD     r0, r0, #DRAMOffset_FirstFixed - DRAMOffset_PageTables
-        MOV     r1, #0                          ; only know 32-bit addresses for now
+        MOV     r1, #0                          ; start of init block is always 32bit address
         BL      PhysAddrToPageNo
         MOV     r7, r0                          ; r7 = page number of start of static chunk
         LDR     r0, [r5, #InitUsedEnd]
-        MOV     r1, #0                          ; only know 32-bit addresses for now
+        MOV     r1, r0, LSR #20
+        MOV     r0, r0, LSL #12
         BL      PhysAddrToPageNo
         SUB     r8, r0, #1                      ; r8 = page number of last page in statics
         ADD     r9, r5, #PhysRamTable
@@ -5678,7 +5676,11 @@ DoTheGrowPagesSpecified ROUT
         BCS     %BT06
 
         ADD     r3, r3, lr, LSR #12             ; put back what could not be subtracted
-        ADD     r8, r8, r3, LSL #12             ; and add onto base address
+        ADD     r8, r8, r3                      ; and add onto base address
+        ! 0, "LongDescTODO 4GB"
+        CMP     r8, #1:SHL:20                   ; 4GB limit
+        BHS     DoTheGrowPageUnavailable
+        MOV     r8, r8, LSL #12
         STR     r8, [r1, #8-12]                 ; store physical address in page block
 
         SUBS    r2, r2, #1
@@ -5807,7 +5809,10 @@ DoTheGrowPagesSpecified ROUT
         MOV     r3, r6
         BL      ppn_to_physical
         MOV     r10, r8
+        ! 0, "LongDescTODO 4GB"
+        CMP     r9, #0
         Pull    "r3,r5,r8,r9"
+        BNE     %BT64
 
 ;        DREG    r6, "Using page number "
 68
@@ -6406,11 +6411,13 @@ CallPreGrow ROUT
 20
         ADD     r2,r2,r12,LSR #12        ; advance page number
 21
-        LDR     r12,[r0],#8              ; get next chunk details
+        LDMIA   r0!,{r3,r12}             ; get next chunk details
         CMP     r12,#0
         BEQ     %FT90
         TST     r12,#OSAddRAM_NoDMA
         BNE     %BT20
+        CMP     r3,#1:SHL:20             ; stick to lower 4GB for compatibility with old code
+        BHS     %BT20
         ; Check the CAM map to see if any pages here are free
         MOV     r12,r12,LSR #12
 30
diff --git a/s/HAL b/s/HAL
index ca64001..6ed0b92 100644
--- a/s/HAL
+++ b/s/HAL
@@ -476,8 +476,18 @@ RISCOS_Start
         B       %BT31
 32
 
-        ; Fill in the Kernel's permanent memory table, sorting by speed and DMA ability
-        ; Non-DMAable RAM is preferred over DMAable, as the kernel requires very little DMAable RAM, and we don't want to permanently claim DMAable RAM if we're not actually using it for DMA (in case machine only has a tiny amount available)
+        ; Fill in the Kernel's permanent memory table, sorting by address, speed and DMA ability.
+        ; * Address: All memory that falls in the low 4GB of the physical map
+        ;   comes first. This makes it easier for our initial memory allocation
+        ;   (no danger of allocating pages which can't be accessed with the MMU
+        ;   off), but may also help with wider software compatibility (all low-
+        ;   RAM pages occupy the lowest physical page numbers)
+        ; * Non-DMAable RAM is preferred over DMAable, as the kernel requires
+        ;   very little DMAable RAM, and we don't want to permanently claim
+        ;   DMAable RAM if we're not actually using it for DMA (in case machine
+        ;   only has a tiny amount available)
+        ; * Speed: Fastest RAM is listed first, so that we'll prefer to allocate
+        ;   it for these important kernel/system areas
         ADD     ip, v1, #DRAMOffset_PageZero
         ASSERT  DRAMOffset_PageZero > 0         ; If the workspace block is the block containing the OS_AddRAM list, make sure the two don't overlap otherwise we might corrupt it while we copy it
 
@@ -491,12 +501,11 @@ RISCOS_Start
 
         ; First put the VRAM information in to free up some regs
         ADD     v7, ip, #VideoPhysAddr
-        MOV     v4, v4, LSL #12                 ; 32bit only for now
-        ! 0, "LongDescTODO VRAM selection doesn't guarantee 32bit address"
         STMIA   v7!, {v4, v6}
 
         ; Now fill in the rest
         ASSERT  DRAMPhysAddrA = VideoPhysAddr+8
+        MOV     v1, v1, LSR #12
         ADDS    v2, v2, #4096                   ; Store true length
         ADDCS   v2, v2, #1:SHL:31               ; If it overflowed, must have been 4GB block, so clamp at 2GB (loop below will add the second 2GB)
         STMIA   v7!, {v1, v2}                   ; workspace block must be first
@@ -504,29 +513,31 @@ RISCOS_Start
         TEQ     v8, a4
         BEQ     %FT39
         LDMIA   v8!, {v1, v2}
-        CMP     v1, #1:SHL:20
-        BHS     %BT33                           ; skip >4GB addresses for now
-        MOV     v1, v1, LSL #12
         ADDS    v2, v2, #4096                   ; Get true length
         ADDCS   v2, v2, #1:SHL:31               ; If it overflowed, must have been 4GB block, so split into two 2GB blocks
         SUBCS   v2, v2, #4096
-        ADDCS   v1, v1, #1:SHL:31
+        ADDCS   v1, v1, #1:SHL:(31-12)
         STMCSDB v8!, {v1, v2}
         ADDCS   v2, v2, #4096
-        SUBCS   v1, v1, #1:SHL:31
+        SUBCS   v1, v1, #1:SHL:(31-12)
         ADD     a1, ip, #DRAMPhysAddrA
         LDMIA   a1!, {a2, a3}
         TEQ     v1, a2
         BEQ     %BT33                           ; don't duplicate the initial block
         ; Perform insertion sort
         ; a1-a3, v3-v6, ip, lr free
-        AND     v3, v2, #&F*OSAddRAM_Speed+OSAddRAM_NoDMA
-        ASSERT  OSAddRAM_Speed = 1:SHL:8
-        ASSERT  OSAddRAM_NoDMA < OSAddRAM_Speed
-        MOV     v3, v3, ROR #8                  ; Give NoDMA flag priority over speed when sorting
+        AND     v3, v2, #&F*OSAddRAM_Speed
+        CMP     v1, #1:SHL:20
+        ORRLO   v3, v3, #1:SHL:31               ; Low RAM takes priority
+        TST     v2, #OSAddRAM_NoDMA
+        ORRNE   v3, v3, #1:SHL:30               ; Followed by non-DMA
 34
-        AND     v4, a3, #&F*OSAddRAM_Speed+OSAddRAM_NoDMA
-        CMP     v3, v4, ROR #8
+        AND     v4, a3, #&F*OSAddRAM_Speed
+        CMP     a2, #1:SHL:20
+        ORRLO   v4, v4, #1:SHL:31               ; Low RAM takes priority
+        TST     a3, #OSAddRAM_NoDMA
+        ORRNE   v4, v4, #1:SHL:30               ; Followed by non-DMA
+        CMP     v3, v4                          ; Compare priority value
         BHI     %FT35
         TEQ     a1, v7
         LDMNEIA a1!, {a2, a3}
@@ -554,7 +565,6 @@ RISCOS_Start
         ADD     a2, a2, v2, LSR #12             ; add on size
         TEQ     v6, v7
         BNE     %BT40
-        MOV     a2, a2, LSL #12
 
         ; Work out how much DMAable RAM the HAL/kernel needs
         LDR     a1, [sp, #8]
@@ -576,7 +586,8 @@ RISCOS_Start
         ; Claim it as normal, but set InitDMAEnd to v1+DRAMOffset_LastFixed so
         ; that the already used bit won't get used for DMA
         ; We also need to be careful later on when picking the initial v2 value
-        ADD     lr, v1, #DRAMOffset_LastFixed
+        MOV     lr, v1, LSL #12
+        ADD     lr, lr, #DRAMOffset_LastFixed
         STR     lr, [ip, #InitDMAEnd]
         B       %FT43
 41
@@ -588,8 +599,11 @@ RISCOS_Start
         BNE     %BT42
         CMP     v2, a1
         BLO     %BT42
+        CMP     v1, #1:SHL:20 ; <4GB only for now
+        BHS     %BT42
         ; Make a note of this block
-        STR     v1, [ip, #InitDMAEnd]
+        MOV     lr, v1, LSL #12
+        STR     lr, [ip, #InitDMAEnd]
 43
         STR     v1, [ip, #InitDMABlock]
         STR     v2, [ip, #InitDMABlock+4]
@@ -597,7 +611,7 @@ RISCOS_Start
         STR     lr, [ip, #InitDMAOffset]
         ; Now shrink/remove this memory from PhysRamTable
         SUB     v2, v2, a1
-        ADD     v1, v1, a1
+        ADD     v1, v1, a1, LSR #12
         CMP     v2, #4096               ; Block all gone?
         STMHSDB a4, {v1, v2}            ; no, just shrink it
         BHS     %FT55
@@ -609,7 +623,7 @@ RISCOS_Start
         BNE     %BT45
         SUB     v7, v7, #8
 
-; a2 = Total memory size (bytes)
+; a2 = Total memory size (pages)
 ; a3 = PhysRamTable
 ; v7 = After last used entry in PhysRamTable
 ; ip -> ZeroPage
@@ -672,29 +686,29 @@ RISCOS_Start
         ADD     v1, a3, #DRAMOffset_PageZero - DRAMOffset_PageTables
         ADD     v2, a3, #DRAMOffset_LastFixed - DRAMOffset_PageTables
         STR     a2, [v1, #RAMLIMIT]                     ; remember the RAM size
-        MOV     lr, a2, LSR #12
-        SUB     lr, lr, #1
+        SUB     lr, a2, #1
         STR     lr, [v1, #MaxCamEntry]
-        MOV     lr, a2, LSR #12-CAM_EntrySizeLog2+12
-        CMP     a2, lr, LSL #12-CAM_EntrySizeLog2+12
-        ADDNE   lr, lr, #1
+        MOV     lr, a2, LSR #12-CAM_EntrySizeLog2       ; no. of pages needed for CAM
+        CMP     a2, lr, LSL #12-CAM_EntrySizeLog2
+        ADDNE   lr, lr, #1                              ; round up
         MOV     lr, lr, LSL #12
         STR     lr, [v1, #SoftCamMapSize]
         STR     a3, [v1, #InitUsedStart]                ; store start of L1PT
 
         ADD     v1, v1, #DRAMPhysAddrA
+        MOV     v2, v2, LSR #12
         MOV     v3, a3
 
         ; Detect if the DMA claiming adjusted the first block
         ; If so, we'll need to reset v2 to the start of the block at v1
         LDR     a1, [v1]
-        ADD     lr, a1, #DRAMOffset_LastFixed
+        ADD     lr, a1, #DRAMOffset_LastFixed:SHR:12
         TEQ     lr, v2
         MOVNE   v2, a1
 
 ; For the next batch of allocation routines, v1-v3 are treated as globals.
 ; v1 -> current entry in PhysRamTable
-; v2 -> next address to allocate in v1 (may point at end of v1)
+; v2 -> next address to allocate in v1 (may point at end of v1), in units of pages
 ; v3 -> L1PT (or 0 if MMU on - not yet)
 
 ; Set up some temporary PCBTrans and PPLTrans pointers, and the initial page flags used by the page tables
@@ -1527,8 +1541,8 @@ ROMDecompAlign * 20
         ADD     v3, v3, v8
         ; Work out whether the block was removed or merely shrunk
         LDMDB   v3, {v4-v5}
-        ADD     v6, v1, v2
-        ADD     v7, v4, v5
+        ADD     v6, v1, v2, LSR #12
+        ADD     v7, v4, v5, LSR #12
         STMDB   v3, {v1-v2}
         TEQ     v6, v7
         BEQ     %FT40                   ; End addresses match, it was shrunk
@@ -1672,19 +1686,19 @@ CountPageTablePages ROUT
 ; Returns -1 if address is not in RAM.
 
 PhysAddrToPageNo
-        TEQ     a2, #0
-        BNE     %FT90                           ; only handle addresses under 4GB for now
+        ; Convert address to 4K addressing
+        MOV     a1, a1, LSR #12
+        ORR     a1, a1, a2, LSL #20
         MOV     a4, #0
         LDR     ip, =ZeroPage + PhysRamTable
 10      LDMIA   ip!, {a2, a3}                   ; get phys addr, size
         MOVS    a3, a3, LSR #12                 ; end of list? (size=0)
         BEQ     %FT90                           ;   then it ain't RAM
         SUB     a2, a1, a2                      ; a2 = amount into this bank
-        CMP     a2, a3, LSL #12                 ; if more than size
-        ADDHS   a4, a4, a3, LSL #12             ;   increase counter by size of bank
+        CMP     a2, a3                          ; if more than size
+        ADDHS   a4, a4, a3                      ;   increase counter by size of bank
         BHS     %BT10                           ;   and move to next
-        ADD     a4, a4, a2                      ; add offset to counter
-        MOV     a1, a4, LSR #12                 ; convert counter to a page number
+        ADD     a1, a4, a2                      ; add offset to counter
         MOV     pc, lr
 
 90      MOV     a1, #-1
@@ -1754,9 +1768,9 @@ ConstructCAMfromPageTables
 ;
 ; On entry:
 ;    v1 -> current entry in PhysRamTable
-;    v2 -> end of last used physical page
+;    v2 -> end of last used physical page (page units)
 ; On exit:
-;    a1 -> next free page
+;    a1 -> next free page (assumed 32bit address)
 ;    v1, v2 updated
 ;
 ; No out of memory check...
@@ -1764,11 +1778,11 @@ ConstructCAMfromPageTables
 Init_ClaimPhysicalPage
         MOV     a1, v2
         LDMIA   v1, {a2, a3}
-        MOV     a3, a3, LSR #12
-        ADD     a2, a2, a3, LSL #12             ; ip = end of this bank
+        ADD     a2, a2, a3, LSR #12             ; a2 = end of this bank
         CMP     v2, a2                          ; advance v2 to next bank if
         LDRHS   a1, [v1, #8]!                   ; this bank is fully used
-        ADD     v2, a1, #4096
+        ADD     v2, a1, #1
+        MOV     a1, a1, LSL #12                 ; Convert to byte address
         MOV     pc, lr
 
 ; Allocate and map in some RAM.
@@ -1778,7 +1792,7 @@ Init_ClaimPhysicalPage
 ;    a2 = access permissions (see Init_MapIn)
 ;    a3 = length (4K multiple)
 ;    v1 -> current entry in PhysRamTable
-;    v2 = next physical address
+;    v2 = next physical address (page units)
 ;    v3 -> L1PT
 ;
 ; On exit:
@@ -1794,23 +1808,23 @@ Init_MapInRAM ROUT
 10      LDMIA   v1, {v4, ip}                    ; v4 = addr of bank, ip = len+flags
         MOV     ip, ip, LSR #12
         SUB     v4, v2, v4                      ; v4 = amount of bank used
-        RSBS    v4, v4, ip, LSL #12             ; v4 = amount of bank left
+        RSBS    v4, v4, ip                      ; v4 = amount of bank left (pages)
         LDREQ   v2, [v1, #8]!                   ; move to next bank if 0 left
         BEQ     %BT10
 
         CMP     v8, #-1                         ; is this the first bank?
         MOVEQ   v8, v2                          ; remember it
 
-        CMP     v4, v5                          ; sufficient in this bank?
+        CMP     v4, v5, LSR #12                 ; sufficient in this bank?
         MOVHS   a4, v5
-        MOVLO   a4, v4                          ; a4 = amount to take
+        MOVLO   a4, v4, LSL #12                 ; a4 = amount to take
 
-        MOV     a1, v2                          ; set up parameters for MapIn call
+        MOV     a1, v2, LSL #12                 ; set up parameters for MapIn call
         MOV     a2, v6                          ; then move globals (in case MapIn
         MOV     a3, v7                          ; needs to allocate for L2PT)
-        ADD     v2, v2, a4                      ; advance physaddr
+        ADD     v2, v2, a4, LSR #12             ; advance physaddr
         SUB     v5, v5, a4                      ; decrease wanted
-        ADD     v6, v6, a4                      ; advance address pointer
+        ADD     v6, v6, a4                      ; advance log address pointer
         BL      Init_MapIn                      ; map in the RAM
         TEQ     v5, #0                          ; more memory still required?
         BNE     %BT10
@@ -1835,7 +1849,7 @@ Init_MapInRAM_Clear ROUT                        ; same as Init_MapInRAM but also
 ;    a2 = access permissions (see Init_MapIn)
 ;    a3 = length (4K multiple)
 ;    v1 -> current entry in PhysRamTable
-;    v2 = next physical address
+;    v2 = next physical address (page units)
 ;    v3 -> L1PT
 ;
 ; On exit:
@@ -1872,12 +1886,12 @@ Init_MapInRAM_DMA ROUT
 ; Map a range of physical addresses to a range of logical addresses.
 ;
 ; On entry:
-;    a1 = physical address
+;    a1 = physical address (32bit)
 ;    a2 = logical address
 ;    a3 = DA flags
 ;    a4 = area size (4K multiple)
 ;    v1 -> current entry in PhysRamTable
-;    v2 = last used physical address
+;    v2 = last used physical address (page units)
 ;    v3 -> L1PT (or 0 if MMU on)
 
 Init_MapIn ROUT
@@ -1955,7 +1969,7 @@ Init_MapIn ROUT
 ; Map a logical page to a physical page, allocating L2PT as necessary.
 ;
 ; On entry:
-;    a1 = physical address
+;    a1 = physical address (32bit)
 ;    a2 = logical address
  [ LongDesc
 ;    a3 = high & low page attributes merged into one word
@@ -1963,7 +1977,7 @@ Init_MapIn ROUT
 ;    a3 = access permissions + C + B bits + size (all non-address bits, of appropriate type)
  ]
 ;    v1 -> current entry in PhysRamTable
-;    v2 = last used physical address
+;    v2 = last used physical address (page units)
 ;    v3 -> L1PT (or 0 if MMU on)
 ; On exit:
 ;    a1 = logical address
@@ -2022,7 +2036,7 @@ Init_MapInPage  ROUT
 ;    a1 = virtual address L2PT required for
 ;    a2 = number of bytes of virtual space
 ;    v1 -> current entry in PhysRamTable
-;    v2 = last used physical address
+;    v2 = last used physical address (page units)
 ;    v3 -> L1PT (or 0 if MMU on)
 ; On exit
 ;    a1-a4,ip corrupt
diff --git a/s/LongDesc b/s/LongDesc
index 15edc3a..cb1bcd7 100644
--- a/s/LongDesc
+++ b/s/LongDesc
@@ -321,13 +321,14 @@ UpdateL1PTForPageReplacement ROUT
 
         MACRO
         PageNumToL3PT $pnum,$pnum2,$ptable,$cache0,$cache1,$cache2,$pbits,$pbits2
-        MOV     $pnum2,$pbits2
+        MOV     $pnum2,$pbits2        ; Save $pbits2 so it can be used as cache func in/out
         SUB     $pbits2,$pnum,$cache0 ; no. pages into block
         CMP     $pbits2,$cache2
         BLHS    PageNumToL3PTCache_$ptable._$cache0._$cache1._$cache2._$pbits2
-        ADD     $pnum,$cache1,$pbits2,LSL #Log2PageSize ; physical address of page
-        ORR     $pnum,$pbits,$pnum ; munge in protection bits
+        ADD     $pnum,$cache1,$pbits2 ; physical address of page (in page units)
         MOV     $pbits2,$pnum2
+        ORR     $pnum2,$pnum2,$pnum,LSR #20 ; High attr + high addr
+        ORR     $pnum,$pbits,$pnum,LSL #12 ; Low attr + low addr
         MEND
 
         MACRO
diff --git a/s/MemInfo b/s/MemInfo
index 7b47cde..89a3326 100644
--- a/s/MemInfo
+++ b/s/MemInfo
@@ -169,6 +169,7 @@ MemoryConvertNoFIQCheck   ROUT
         BCC     %FT70
 
         LDMIA   r1!, {r3-r4,r8}         ; Get next three word entry (PN,LA,PA) and move on pointer.
+        ! 0, "LongDescTODO 4GB"
         MOV     r9, #0                  ; Top half of PA is zero
 
    [ AMB_LazyMapIn
@@ -183,6 +184,8 @@ MemoryConvertNoFIQCheck   ROUT
         BL      ppn_to_logical          ; Else get LA from PN (PA wanted (not given) & LA not given => PN given).
         BLCC    ppn_to_physical         ; And get PA from PN (more accurate than getting PA from LA - page may be mapped out)
 15
+        ! 0, "LongDescTODO 4GB"
+        CMPCC   r9, #1
         BCS     %FT80
         TST     r0, #logical,wanted
         STRNE   r4, [r1, #-8]           ; Store back LA if wanted.
@@ -485,12 +488,13 @@ physical_to_ppn ROUT
         LDR     r5, =ZeroPage+PhysRamTable
         MOV     r3, #0                  ; Start at page 0.
         MOV     r8, r8, LSR #12
+        ORR     r8, r8, r9, LSL #20
 10
         CMP     r7, r3                  ; Stop if we run out of pages
         BCC     meminfo_returncs_pullr8
 
         LDMIA   r5!, {r10,r11}          ; Get start address and size of next block.
-        SUB     r10, r8, r10, LSR #12   ; Determine if given address is in this block.
+        SUB     r10, r8, r10            ; Determine if given address is in this block.
         CMP     r10, r11, LSR #12
         ADDCS   r3, r3, r11, LSR #12    ; Move on to next block.
         BCS     %BT10
@@ -523,8 +527,9 @@ ppn_to_physical ROUT
         SUBHS   r3, r3, lr
         BHS     %BT10
 
-        ADD     r8, r8, r3, LSL #12
-        MOV     r9, #0
+        ADD     r8, r8, r3
+        MOV     r9, r8, LSR #20
+        MOV     r8, r8, LSL #12
         Pull    "r3,pc"
 20
         SEC
@@ -536,9 +541,8 @@ ppn_to_physical ROUT
 ;
 
 ; Shifts to determine number of bytes/words to allocate in table.
-BitShift        *       10
-ByteShift       *       BitShift + 3
-WordShift       *       ByteShift + 2
+ByteShift       *       1             ; 2^1 pages per byte
+WordShift       *       ByteShift + 2 ; 2^3 pages per word
 
 ; Bit patterns for different types of memory.
 NotPresent      *       &00000000
@@ -606,8 +610,8 @@ MemoryReadPhys  ROUT
         LDR     r1, [sp, #4]            ; Get table address back
         MOV     r3, r9, LSR #WordShift
         LDR     r3, [r1, r3, LSL #2]!   ; Get first word of block
-        MOV     r4, r9, LSR #BitShift
-        AND     r4, r4, #(1<<(WordShift-BitShift))-1 ; Bit offset of first page in the word
+        MOV     r4, r9, LSL #3
+        AND     r4, r4, #31             ; Bit offset of first page in the word
         RSB     r4, r4, #32             ; number of bits left to process
         MOV     r3, r3, LSL r4
 
@@ -652,9 +656,9 @@ MemoryReadPhys  ROUT
         LDR     r0, =ZeroPage
         LDR     r0, [r0, #ROMPhysAddr]
         LDR     r1, [sp, #4]
-        ADD     r0, r1, r0, LSR #ByteShift
+        ADD     r0, r1, r0, LSR #ByteShift+12
         LDR     r1, =DRAM_Pattern :OR: NotAvailable
-        MOV     r2, #(OSROM_ImageSize*1024) :SHR: ByteShift
+        MOV     r2, #(OSROM_ImageSize :SHR: 2) :SHR: ByteShift
         BL      memset
 40
         CLRV
@@ -698,11 +702,10 @@ MemoryAmounts   ROUT
         LDR     r3, [r1, #VideoSizeFlags]
         TST     r3, #OSAddRAM_IsVRAM
         MOVNE   r3, r3, LSR #12         ; Extract size from flags when genuine VRAM
-        MOVNE   r3, r3, LSL #12
         MOVEQ   r3, #0
         LDR     r1, [r1, #RAMLIMIT]
         SUB     r1, r1, r3              ; DRAM = RAMLIMIT - VRAMSize
-        B       %FT97
+        B       %FT98
 20
         LDR     r1, =ZeroPage
         LDR     r1, [r1, #VideoSizeFlags]
@@ -740,6 +743,7 @@ MemoryAmounts   ROUT
         B       %FT97
 97
         MOV     r1, r1, LSR #12         ; Return as number of pages.
+98
         MOV     r2, #4*1024             ; Return page size.
         CLRV
         EXIT
@@ -802,31 +806,59 @@ MemoryIOSpace   ROUT
 ;
 ;       In:     r0 bits 0..7  = 12 (reason code 12)
 ;               r0 bit 8 = 1 if region must be DMAable
-;               r0 bits 9..31 = 0 (reserved flags)
+;               r0 bit 9 = 1 if r4-r7 provided
+;               r0 bits 10..31 = 0 (reserved flags)
 ;               r1 = size of physically contiguous RAM region required (bytes)
 ;               r2 = log2 of required alignment of base of region (eg. 12 = 4k, 20 = 1M)
+;               r4,r5 = lowest acceptable physical address (inclusive) (if bit 9 of r0 set)
+;               r6,r7 = highest acceptable physical address (inclusive) (if bit 9 of r0 set)
 ;
 ;       Out:    r3 = page number of first page of recommended region that could be
 ;                    grown as specific pages by dynamic area handler (only guaranteed
 ;                    if grow is next page claiming operation)
 ;        - or error if not possible (eg too big, pages unavailable)
 ;
+; Notes:
+; * Default address range in r4-r7 is for the lower 4GB of physical space
+; * The high address in r6,r7 is for the end of the memory block, not the start
+;
 RecommendPage ROUT
-        Push    "r0-r2,r4-r11,lr"
+        Entry   "r0-r2,r4-r12"
         CMP     r2,#30
         BHI     RP_failed         ;refuse to look for alignments above 1G
         ANDS    r11,r0,#1:SHL:8   ;convert flag into something usable in the loop
         MOVNE   r11,#OSAddRAM_NoDMA
+;
+        TST     r0,#1:SHL:9       ;If no range specified, limit to lower 4GB
+        MOVEQ   r10,#0
+        MOVEQ   r12,#1:SHL:20
+        BEQ     %FT10
+        CMP     r5,#1:SHL:8
+        BHS     RP_failed         ; LPAE/long descriptor format limits us to 40 bit physical addresses (although technically PhysRamTable can store 44 bit addresses)
+        CMP     r7,#1:SHL:8       ; Clamp high address
+        MOVCS   r7,#&FF
+        MOVCS   r6,#-1
+        LDR     lr,=4095
+        ADD     r10,r4,lr         ; Round up low address
+        MOV     r10,r10,LSR #12
+        ORR     r10,r10,r5,LSL #20
+        MOV     r12,r6,LSR #12    ; Round down high address
+        ORR     r12,r12,r7,LSL #20
+        ADD     r12,r12,#1        ; Make exclusive
+10
 ;
         ADD     r1,r1,#&1000
         SUB     r1,r1,#1
-        MOV     r1,r1,LSR #12
-        MOVS    r1,r1,LSL #12     ;size rounded up to whole no. of pages
+        MOV     r1,r1,LSR #12     ;size rounded up to whole no. of pages
 ;
-        CMP     r2,#12
-        MOVLO   r2,#12            ;log2 alignment must be at least 12 (4k pages)
+        SUBS    r2,r2,#12         ;log2 alignment, in terms of pages
+        MOVLT   r2,#0             ;must be at least zero
         MOV     r0,#1
-        MOV     r4,r0,LSL r2      ;required alignment-1
+        MOV     r4,r0,LSL r2      ;required alignment, page units
+;
+        SUB     r12,r12,r1
+        MOV     r12,r12,LSR r2
+        MOV     r12,r12,LSL r2    ; Last acceptable block start address
 ;
         LDR     r0,=ZeroPage+PhysRamTable
         MOV     r3,#0            ;page number, starts at 0
@@ -838,22 +870,40 @@ RecommendPage ROUT
 RP_nextchunk
         ADD     r3,r3,r8,LSR #12 ;page no. of first page of next chunk
         LDMIA   r0!,{r7,r8}      ;address,size of next physical chunk
+; R0 -> PhysRamTable
+; R1 = Required length in pages
+; R2 = Required log2 alignment-12
+; R3 = current phys page no.
+; R4 = Required alignment, page units
+; R5 -> CAM
+; R7,R8 = Current PhysRamTable entry
+; R10 = Low address limit
+; R11 = Flags
+; R12 = High address limit
+; R6,R9 = spare
         CMP     r8,#0
         BEQ     RP_failed
         TST     r8,r11           ;ignore non-DMA regions if bit 8 of R0 was set
         BNE     RP_nextchunk
 ;
         MOV     r8,r8,LSR #12
-        ADD     r6,r7,r4
+        CMP     r7,r10
+        ADDLO   r6,r10,r4
+        ADDHS   r6,r7,r4
         MOV     r8,r8,LSL #12
         SUB     r6,r6,#1         ;round up
         MOV     r6,r6,LSR r2
-        MOV     r6,r6,LSL r2
+        MOV     r6,r6,LSL r2     ;address of first page of acceptable alignment
+        SUBS    lr,r12,r6
+        BLS     RP_nextchunk     ;exceeded upper address limit
         SUB     r6,r6,r7         ;adjustment to first address of acceptable alignment
-        CMP     r6,r8
+        CMP     r6,r8,LSR #12
         BHS     RP_nextchunk     ;negligible chunk
-        ADD     r7,r3,r6,LSR #12 ;first page number of acceptable alignment
-        SUB     r9,r8,r6         ;remaining size of chunk
+        ADD     r7,r3,r6         ;first page number of acceptable alignment
+        RSB     r9,r6,r8,LSR #12 ;remaining size of chunk
+        CMP     r9,lr
+        ADDHI   r9,lr,r1         ;clamp effective chunk length if we're going to hit the upper address limit
+        
 ;
 ;find first available page
 RP_nextpage
@@ -864,36 +914,48 @@ RP_nextpage
         TST     r6,#PageFlags_Unavailable :OR: PageFlags_Required
         TSTEQ   r6,#PageFlags_Reserved
         BEQ     RP_checkotherpages
-RP_nextpagecontinue
         CMP     r9,r4
         BLS     RP_nextchunk
-        ADD     r7,r7,r4,LSR #12   ;next page of suitable alignment
+        ADD     r7,r7,r4           ;next page of suitable alignment
         SUB     r9,r9,r4
         B       RP_nextpage
 ;
+RP_nextpagecontinue
+        ; r7 = start page, r6 = page that failed
+        ; No point checking any of r7...r6 again, so skip ahead past r6
+        SUB     r6,r6,r7           ;number of pages to skip (minus 1)
+        ADD     r6,r6,r4
+        MOV     r6,r6,LSR r2
+        MOV     r6,r6,LSL r2       ;number to skip, rounded up by alignment
+        CMP     r9,r6
+        BLS     RP_nextchunk
+        ADD     r7,r7,r6           ;next page of suitable alignment
+        SUB     r9,r9,r6
+        B       RP_nextpage
+;
 RP_checkotherpages
-        ADD     r10,r7,r1,LSR #12
-        SUB     r10,r10,#1         ;last page required
+        ADD     r6,r7,r1
+        SUB     r6,r6,#1          ;last page required
 RP_checkotherpagesloop
-        LDR     r6,[r5,r10,LSL #CAM_EntrySizeLog2] ;page flags from CAM
-        TST     r6,#PageFlags_Unavailable :OR: PageFlags_Required
-        TSTEQ   r6,#PageFlags_Reserved
+        LDR     lr,[r5,r6,LSL #CAM_EntrySizeLog2] ;page flags from CAM
+        TST     lr,#PageFlags_Unavailable :OR: PageFlags_Required
+        TSTEQ   lr,#PageFlags_Reserved
         BNE     RP_nextpagecontinue
-        SUB     r10,r10,#1
-        CMP     r10,r7
+        SUB     r6,r6,#1
+        CMP     r6,r7
         BHI     RP_checkotherpagesloop
 ;
 ;success!
 ;
         MOV     r3,r7
-        Pull    "r0-r2,r4-r11,pc"
+        Exit
 
 RP_failed
         MOV     r3,#0
         ADR     r0,ErrorBlock_NoMemChunkAvailable
         SETV
-        STR     r0,[sp]
-        Pull    "r0-r2,r4-r11,pc"
+        FRAMSTR r0
+        Exit
 
         MakeErrorBlock NoMemChunkAvailable
 
@@ -1719,6 +1781,7 @@ DMAPrep_Translate
         B       %FT30
 20
         MOV     r8, r4
+        ! 0, "LongDescTODO 4GB"
         MOV     r9, #0
         BL      physical_to_ppn         ; r7, r8, r9 -> r3
         BCS     %BT95
diff --git a/s/Middle b/s/Middle
index b638f91..595f7bd 100644
--- a/s/Middle
+++ b/s/Middle
@@ -224,6 +224,9 @@ SSTENV  Push    "R0, R1, lr"
 
         LDR     R12, =ZeroPage
         LDR     R2, [R12, #RAMLIMIT]    ; this is read-only
+        CMP     R2, #DynArea_PMP_BigPageCount
+        MOVLO   R2, R2, LSL #12
+        LDRHS   R2, =DynArea_PMP_BigByteCount ; more RAM than any Brazil could hope for
         MOV     R3, #0                  ; never any Brazil-type buffering
                                         ; m2 tools will complain if there is!
         Pull    "R0, R1, lr"
@@ -1590,6 +1593,7 @@ osri6_table
     DCD  NVECTORS                                     ;86
     DCD  1                                            ;87 CAM format: 0 = 8 bytes/entry, 1 = 16 bytes/entry
     DCD  ABTSTK                                       ;88
+    DCD  1                                            ;89 PhysRamTable format: 0 = addresses are in byte units, 1 = addresses are in 4KB units
 osri6_maxvalue * (.-4-osri6_table) :SHR: 2
 
 
diff --git a/s/NewReset b/s/NewReset
index 46b41b6..25dd484 100644
--- a/s/NewReset
+++ b/s/NewReset
@@ -72,7 +72,7 @@ MassageScreenSize ROUT
         LDR     r0, =ZeroPage
       ]
         LDR     r0, [r0, #RAMLIMIT]
-        CMP     r0, #512*1024
+        CMP     r0, #(512*1024):SHR:12
         MOVEQ   r0, #80*1024
         MOVNE   r0, #160*1024
 CmosScreenWillDo
@@ -731,9 +731,9 @@ init_other_modules
 
         LDR     R0, =ZeroPage
         LDR     R0, [R0, #RAMLIMIT]
-        MLA     R0, R1, R2, R0          ; convert pages to bytes and add in
+        ADD     R0, R0, R1
 
-        MOV     R0, R0, LSR #20         ; /(1024*1024) down to megabytes
+        MOV     R0, R0, LSR #20-Log2PageSize ; down to megabytes
         LDR     R1, =GeneralMOSBuffer
         MOV     R2, #?GeneralMOSBuffer
         SWI     XOS_ConvertInteger4
diff --git a/s/ShortDesc b/s/ShortDesc
index bcfaa04..31e1f3f 100644
--- a/s/ShortDesc
+++ b/s/ShortDesc
@@ -359,6 +359,7 @@ UpdateL1PTForPageReplacement ROUT
         LDR     $ptable,=ZeroPage+PhysRamTable
         MOV     $cache0,#0
         LDMIA   $ptable,{$cache1,$cache2}
+        MOV     $cache1,$cache1,LSL #12
         MOV     $cache2,$cache2,LSR #12
         MEND
 
@@ -373,6 +374,7 @@ PageNumToL2PTCache_r4_r5_r6_r7_r12 ROUT
         SUBHS   r12,r12,r7
         ADDHS   r5,r5,r7
         BHS     %BT10
+        MOV     r6,r6,LSL #12
         EXIT    ; r5-r7 = cache entry, r12 = offset into entry
 
 ; ----------------------------------------------------------------------------------
diff --git a/s/VMSAv6Long b/s/VMSAv6Long
index 4df8cee..673abad 100644
--- a/s/VMSAv6Long
+++ b/s/VMSAv6Long
@@ -86,8 +86,9 @@ BangCamUpdate ROUT
         BCS     %BT10                           ; if more than that, go onto next bank
 
         ADD     r6, r6, r4, LSR #12             ; put back the ones which were too many
-        ADD     r0, r0, r6, LSL #12             ; move on address by the number of pages left
+        ADD     r0, r0, r6                      ; move on address by the number of pages left
         LDR     r6, [sp]                        ; reload old logical address
+        MOV     r0, r0, ROR #20                 ; High address bits packed into low, ready for Get4PTE
 
 ; now we have r6 = old logical address, r2 = physical page number, r0 = physical address
 
diff --git a/s/VMSAv6Short b/s/VMSAv6Short
index ed5b766..3d0d24b 100644
--- a/s/VMSAv6Short
+++ b/s/VMSAv6Short
@@ -90,8 +90,9 @@ BangCamUpdate ROUT
         BCS     %BT10                           ; if more than that, go onto next bank
 
         ADD     r6, r6, r4, LSR #12             ; put back the ones which were too many
-        ADD     r0, r0, r6, LSL #12             ; move on address by the number of pages left
+        ADD     r0, r0, r6                      ; move on address by the number of pages left
         LDR     r6, [sp]                        ; reload old logical address
+        MOV     r0, r0, LSL #12                 ; convert units from bytes to pages
 
 ; now we have r6 = old logical address, r2 = physical page number, r0 = physical address
 
diff --git a/s/vdu/vdudriver b/s/vdu/vdudriver
index 4f4196c..42cbb71 100644
--- a/s/vdu/vdudriver
+++ b/s/vdu/vdudriver
@@ -103,6 +103,8 @@ VduInit ROUT
         Push    R14
         LDR     R0, =ZeroPage
         LDR     R14, [R0, #VideoPhysAddr]
+        ! 0, "LongDescTODO 4GB"
+        MOV     R14, R14, LSL #12
         ASSERT (ZeroPage :AND: 255) = 0
         STRB    R0, [R0, #OsbyteVars + :INDEX: VDUqueueItems] ;purge queue
         STRB    R0, [WsPtr, #ScreenBlankFlag]   ; not blanked
@@ -299,6 +301,8 @@ InitialiseMode ROUT
         ; Screen DA is in use
         LDR     r0, =ZeroPage
         LDR     r0, [r0, #VideoPhysAddr]
+        ! 0, "LongDescTODO 4GB"
+        MOV     r0, r0, LSL #12
         STR     r0, [WsPtr, #TrueVideoPhysAddr] ; Point TrueVideoPhysAddr at the base of screen DA
         MOV     r0, #2
         SWI     XOS_ReadDynamicArea
@@ -839,6 +843,8 @@ ModeChangeSub ROUT
 581
         LDR     r0, =ZeroPage
         LDR     r0, [r0, #VideoPhysAddr]
+        ! 0, "LongDescTODO 4GB"
+        MOV     r0, r0, LSL #12
         STR     r0, [WsPtr, #TrueVideoPhysAddr] ; Point TrueVideoPhysAddr at the base of screen DA
         MOV     r0, #2
         SWI     XOS_ReadDynamicArea
-- 
GitLab