; Copyright 2000 Pace Micro Technology plc
;
; Licensed under the Apache License, Version 2.0 (the "License");
; you may not use this file except in compliance with the License.
; You may obtain a copy of the License at
;
;     http://www.apache.org/licenses/LICENSE-2.0
;
; Unless required by applicable law or agreed to in writing, software
; distributed under the License is distributed on an "AS IS" BASIS,
; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
; See the License for the specific language governing permissions and
; limitations under the License.
;
; vduhint
;
; VDU hardware interface
;
; part of Kernel/HAL division
;
; Author Mike Stephens (mjs)
; Date   Sep 2000

;;;mjsHAL
;
; vduhint is currently also a repository for VIDC20/IOMD specific HAL
; code, as stage 1 of Kernel/HAL split for video code
; eventually, vduhint should either have any veneer code/defns or
; should disappear altogether

; temp mjs versions of macros to call HAL routines are
; defined in s.Kernel
;
; mjsAddressHAL and mjsCallHAL
;
; the HAL calls will be ARM ATPCS compliant (HAL API defined in C)
; for calls from kernel assembler code this means:
;     r0-r3 (a1-a4) used for up to first 4 args
;     any further args on stack (SP would need adjusting back after call)
;     any scalar result of 'C' function in r0 (a1)
;     HAL w/s pointer must be passed in r9 (SB or static base in ATPCS)
;     So, register usage:
;
;     r0-r3,r12 corrupted (a1-a4,IP)
;     r4-r8,r10,r11 (v1-v5,v7,v8) preserved by all HAL calls
;     r9 is HAL w/s pointer (preserved)
;     r13,r14 are SP,LR as usual!
;
; The real routines make calls via symbols that are indices in a jump
; table - see s.HAL
;
; The mjs routines make calls via symbols that are routines still
; temporarily in kernel, but simulate the same register usage
;
;

  [ HAL

    ;;; nothing at all here so far in HAL case!

    ! 0, "vdu.vduhint compiles as empty in HAL case"

  |

    ;;; non-HAL case, pseudo HAL code

; -----------------------------------------------------------------------------------

;
; TEMP defn for workspace while code still in kernel
; layout of workspace block anchored at mjs_tempHALworkspace
;

                            ^    0
mjs_thalwk_start            #    0
VIDC_Address                #    4   ; so code doesn't need a compile-time constant if in HAL
IOMD_Address                #    4   ; so code doesn't need a compile-time constant if in HAL
VIDC_NextPaletteIndex       #    4   ; last index used in setting normal palette entry
VIDC_SoftPalette0           #    4   ; soft copy of BBGGRRSS for normal palette entry 0
                                     ; (used to support ReadPaletteEntry)
VIDC_Interlace              #    4   ; flag from VIDCList3 SyncPol word
VIDC_VertiDisplaySize       #    4   ; we keep this for vertical pointer clipping
VIDC_PointerVAdjust         #    4   ; vertical adjust for pointer
VIDC_PointerHAdjust         #    4   ; horizontal adjust for pointer
VIDC_ExternalSoftCopy       #    4
VIDC_FSynSoftCopy           #    4
VIDC_ControlSoftCopy        #    4
VIDC_HSWRSoftCopy           #    4   ; horizontal sync width
VIDC_VSWRSoftCopy           #    4   ; vertical sync width
IOMD_VInitSoftCopy          #    4
IOMD_VEndSoftCopy           #    4
mjs_thalwk_end              #    0

mjs_thalwk_size * mjs_thalwk_end - mjs_thalwk_start

mjs_tempHALworkspace_init ROUT
    Push     "r0, lr"
    LDR      r0, =mjs_tempHALworkspace
    LDR      r0, [r0, #0]
    MOV      lr, #VIDC
    STR      lr, [r0, #VIDC_Address]
    MOV      lr, #IOMD_Base
    STR      lr, [r0, #IOMD_Address]
    MOV      lr, #-1
    STR      lr, [r0, #VIDC_NextPaletteIndex]  ; init to invalid
    MOV      lr, #0
    STR      lr, [r0, #VIDC_SoftPalette0]
    STR      lr, [r0, #VIDC_Interlace]
    Pull     "r0, pc"


; -------------------------------------------------------------------------

;VIDC20 parameters size (for table of VIDC20 registers)
;
VIDC20ParmsSize * (128*4) ; 128 words from 80xxxxxx to FFxxxxxx step 01000000

; --- VIDC20 Registers ---

VIDCPalAddress          *       &10000000       ; used in palette programming

LCDOffsetRegister0      *       &30000000
LCDOffsetRegister1      *       &31000000

VIDC20BorderColour      *       &40000000       ; added by mjs
VIDC20PointerColour     *       &50000000       ; added by mjs

HorizCycle              *       &80000000
HorizSyncWidth          *       &81000000
HorizBorderStart        *       &82000000
HorizDisplayStart       *       &83000000
HorizDisplayEnd         *       &84000000
HorizBorderEnd          *       &85000000
HorizCursorStart        *       &86000000       ; used in pointer programming
HorizInterlace          *       &87000000

VertiCycle              *       &90000000
VertiSyncWidth          *       &91000000       ; Needed to set up FSIZE register in IOMD
VertiBorderStart        *       &92000000       ; First register affected by *TV
VertiDisplayStart       *       &93000000
VertiDisplayEnd         *       &94000000
VertiBorderEnd          *       &95000000
VertiCursorStart        *       &96000000
VertiCursorEnd          *       &97000000       ; Last register affected by *TV

VIDCExternal            *       &C0000000
VIDCFSyn                *       &D0000000
VIDCControl             *       &E0000000
VIDCDataControl         *       &F0000000

; Pseudo-registers used to return additional information to kernel

PseudoRegisters         *       5               ; number of pseudo-register entries at end of table

PseudoRegister_HClockSpeed *    &FB000000       ; used to indicate VIDC hclock speed (and use it)
PseudoRegister_ClockSpeed *     &FC000000       ; used to indicate real VIDC rclock speed
;no longer used:
;PseudoRegister_DPMSState *      &FD000000       ; used to return desired DPMS state
; [ ChrontelSupport
;PseudoRegister_PixelRate *      &FE000000       ; used to indicate the required pixel rate
; ]


; Bits in VCSR, VCER
CursorSinglePanel       *       0 :SHL: 13
CursorTopPanel          *       1 :SHL: 13
CursorBottomPanel       *       1 :SHL: 14
CursorStraddle          *       3 :SHL: 13


; Bits in external register

Ext_HSYNCbits   *       3 :SHL: 16
Ext_InvertHSYNC *       1 :SHL: 16
Ext_CompHSYNC   *       2 :SHL: 16
Ext_InvertCompHSYNC *   3 :SHL: 16
Ext_VSYNCbits   *       3 :SHL: 18
Ext_InvertVSYNC *       1 :SHL: 18
Ext_CompVSYNC   *       2 :SHL: 18
Ext_InvertCompVSYNC *   3 :SHL: 18
Ext_HiResMono   *       1 :SHL: 14
Ext_LCDGrey     *       1 :SHL: 13
Ext_DACsOn      *       1 :SHL: 12
Ext_PedsOn      *       7 :SHL: 8
Ext_PedsShift   *       8
Ext_ERegShift   *       4
Ext_ECKOn       *       1 :SHL: 2
Ext_ERegBits    *       3 :SHL: 0
Ext_ERegRed     *       0 :SHL: 0
Ext_ERegGreen   *       1 :SHL: 0
Ext_ERegBlue    *       2 :SHL: 0
Ext_ERegExt     *       3 :SHL: 0       ; use this for lowest power

; Bits in Frequency Synthesizer Register

FSyn_VShift     *       8
FSyn_RShift     *       0
FSyn_ClearV     *       1 :SHL: 15
FSyn_ForceLow   *       1 :SHL: 14
FSyn_ClearR     *       1 :SHL: 7
FSyn_ForceHigh  *       1 :SHL: 6

FSyn_ResetValue *       FSyn_ClearV :OR: FSyn_ClearR :OR: FSyn_ForceLow :OR: (63 :SHL: FSyn_RShift) :OR: (0 :SHL: FSyn_VShift)           ; value to get PLL working properly

; Bits in Control Register

CR_DualPanel    *       1 :SHL: 13
CR_Interlace    *       1 :SHL: 12
CR_FIFOLoadShift *      8
CR_LBPP0        *       0 :SHL: 5
CR_LBPP1        *       1 :SHL: 5
CR_LBPP2        *       2 :SHL: 5
CR_LBPP3        *       3 :SHL: 5
CR_LBPP4        *       4 :SHL: 5
CR_LBPP5        *       6 :SHL: 5 ; spot the gap!
CR_PixelDivShift *      2
CR_VCLK         *       0 :SHL: 0
CR_HCLK         *       1 :SHL: 0
CR_RCLK         *       2 :SHL: 0

; Bits in Data Control Register

DCR_VRAMOff     *       0 :SHL: 18
DCR_VRAMDiv1    *       1 :SHL: 18
DCR_VRAMDiv2    *       2 :SHL: 18
DCR_VRAMDiv4    *       3 :SHL: 18
DCR_BusBits     *       3 :SHL: 16
DCR_Bus31_0     *       1 :SHL: 16
DCR_Bus63_32    *       2 :SHL: 16
DCR_Bus63_0     *       3 :SHL: 16
DCR_HDis        *       1 :SHL: 13
DCR_Sync        *       1 :SHL: 12
DCR_HDWRShift   *       0

; -------------------------------------------------------------------------

;
;  void HAL_Video_SetMode(const void *VIDCList3)
;
;  program VIDC20 registers from VIDCList3 specification
;
;  in: VIDClist -> video mode list (in VIDCList type 3 format)
;      (and sb (r9) -> HAL workspace)
;

HAL_Video_SetMode ROUT
        Push    "r4, r7,r8,r10,r11, lr"
        MOV     r3, r0                    ; r3 -> VIDCList3

        SUB     sp, sp, #VIDC20ParmsSize  ; create workspace for VIDC20 table on stack
        SUB     r11, sp, #(&80*4)         ; r11 indexes into table workspace (allowing for
                                          ; VIDC register numbers starting at &80)

        MOV     r2, #-1                   ; first clear all entries to -1 (means dont program reg)
        MOV     r4, #VIDC20ParmsSize
        MOV     r14, sp
10
        STR     r2, [r14], #4
        SUBS    r4, r4, #4
        BNE     %BT10

        LDR     r2, [r3, #VIDCList3_HorizSyncWidth]
        BIC     r2, r2, #1                ; must be even
        SUB     r2, r2, #8                ; horiz parameters start off at n-8
        ORR     r14, r2, #HorizSyncWidth
        STR     r14, [r11, #HorizSyncWidth :SHR: 22]

        LDR     r4, [r3, #VIDCList3_HorizBackPorch]
        ADD     r2, r2, r4
        BIC     r2, r2, #1
        SUB     r2, r2, #4                ; HBSR is N-12
        ORR     r14, r2, #HorizBorderStart
        STR     r14, [r11, #HorizBorderStart :SHR: 22]

        LDR     r4, [r3, #VIDCList3_HorizLeftBorder]
        ADD     r2, r2, r4
        BIC     r2, r2, #1
        SUB     r2, r2, #6                ; HDSR is N-18
        ORR     r14, r2, #HorizDisplayStart
        STR     r14, [r11, #HorizDisplayStart :SHR: 22]

        LDR     r4, [r3, #VIDCList3_HorizDisplaySize]
        BIC     r4, r4, #1
        LDR     r7, [r3, #VIDCList3_PixelDepth]
        MOV     r10, r4, LSL r7           ; number of bits in one displayed raster (not needed later any more)

        ANDS    r8, r10, #31              ; if line length not multiple of 32
        MOVNE   r8, #DCR_HDis             ; then set HDis bit
        ORR     r8, r8, r10, LSR #5       ; OR in number of words per line

; Note - the DCR_Bus bits get overridden and the HDWR bits modified further down the line by the mode change code
; on the basis of how much VRAM we've got, and on whether we have a dual-panel LCD or not...

        ORR     r8, r8, #DCR_VRAMOff :OR: DCR_Bus31_0 :OR: DCR_Sync
        ORR     r8, r8, #VIDCDataControl
        STR     r8, [r11, #VIDCDataControl :SHR: 22]

        ADD     r2, r2, r4                ; HDER is also N-18
        ORR     r14, r2, #HorizDisplayEnd
        STR     r14, [r11, #HorizDisplayEnd :SHR: 22]

        LDR     r4, [r3, #VIDCList3_HorizRightBorder]
        ADD     r2, r2, r4
        ADD     r2, r2, #6                ; HBER is N-12
        BIC     r2, r2, #1
        ORR     r14, r2, #HorizBorderEnd
        STR     r14, [r11, #HorizBorderEnd :SHR: 22]

        LDR     r4, [r3, #VIDCList3_HorizFrontPorch]
        ADD     r2, r2, r4
        ADD     r2, r2, #4                ; HCR is N-8
        BIC     r2, r2, #3                ; must be mult of 4
        ORR     r14, r2, #HorizCycle
        STR     r14, [r11, #HorizCycle :SHR: 22]

        ADD     r2, r2, #8                ; HIR is N/2
        MOV     r2, r2, LSR #1
        ORR     r14, r2, #HorizInterlace
        STR     r14, [r11, #HorizInterlace :SHR: 22]

        LDR     r2, [r3, #VIDCList3_VertiSyncWidth]
        SUB     r2, r2, #2                ; vertical registers are N-2
        ORR     r14, r2, #VertiSyncWidth
        STR     r14, [r11, #VertiSyncWidth :SHR: 22]

        LDR     r4, [r3, #VIDCList3_VertiBackPorch]
        ADD     r2, r2, r4
        ORR     r14, r2, #VertiBorderStart
        STR     r14, [r11, #VertiBorderStart :SHR: 22]

        LDR     r4, [r3, #VIDCList3_VertiTopBorder]
        ADD     r2, r2, r4
        ORR     r14, r2, #VertiDisplayStart
        STR     r14, [r11, #VertiDisplayStart :SHR: 22]

        LDR     r4, [r3, #VIDCList3_VertiDisplaySize]
        STR     r4, [r9, #VIDC_VertiDisplaySize]        ; save it for cursor clipping
        ADD     r2, r2, r4
        ORR     r14, r2, #VertiDisplayEnd
        STR     r14, [r11, #VertiDisplayEnd :SHR: 22]

        LDR     r4, [r3, #VIDCList3_VertiBottomBorder]
        ADD     r2, r2, r4
        ORR     r14, r2, #VertiBorderEnd
        STR     r14, [r11, #VertiBorderEnd :SHR: 22]

        LDR     r4, [r3, #VIDCList3_VertiFrontPorch]
        ADD     r2, r2, r4
        ORR     r14, r2, #VertiCycle
        STR     r14, [r11, #VertiCycle :SHR: 22]

        LDR     r4, [r3, #VIDCList3_SyncPol]
        TST     r4, #SyncPol_Interlace
        MOVEQ   r14, #0
        MOVNE   r14, #1
        STR     r14, [r9, #VIDC_Interlace]
        MOV     r14, #VIDCExternal
        TST     r4, #SyncPol_InvertHSync
        ORRNE   r14, r14, #Ext_InvertHSYNC
        TST     r4, #SyncPol_InvertVSync
        ORRNE   r14, r14, #Ext_InvertVSYNC
        ORR     r14, r14, #Ext_DACsOn
        ORR     r14, r14, #Ext_ERegExt
        STR     r14, [r11, #VIDCExternal :SHR: 22]

        MOV     r14, #VIDCControl
        STR     r14, [r11, #VIDCControl :SHR: 22]

        Push    "r3"

; Now go through VIDC control parameters list (not all indices can be handled yet)

        ADD     r3, r3, #VIDCList3_ControlList-8  ; point at 1st entry -8
50
        LDR     r4, [r3, #8]!                   ; load next index
        CMP     r4, #-1                         ; if -1 then end of list
        BEQ     %FT60                           ; so skip

        CMP     r4, #0                          ; if non-zero (CS if zero)
        CMPNE   r4, #ControlList_InvalidReason  ; and if known reason
        LDRCC   r2, [r3, #4]                    ; then load value
        BLCC    ProcessControlListItem          ; and process this item
        B       %BT50                           ; go onto next item in list

; put a minimum of 4, cos 800 x 600 x 1bpp don't work otherwise
FIFOLoadTable
        &       0                               ; dummy entry (not used)
        &       0                               ; never use 0
        &       0                               ; use 1 up to (and including) here
        &       0                               ; use 2 up to (and including) here
        &       0                               ; use 3 up to (and including) here
        &       60000 :SHL: 3                   ; use 4 up to (and including) here
        &       75000 :SHL: 3                   ; use 5 up to (and including) here
        &       90000 :SHL: 3                   ; use 6 up to (and including) here
                                                ; else use 7

60
        Pull    "r3"
        LDR     r0, [r3, #VIDCList3_PixelRate]  ; get pixel rate
        MOV     r10, r0, LSL r7                 ; peak mem b/w (x 1E3 bits/sec) - save for FIFO calculation

  ! 0, "mjsHAL - using kernel variable IOSystemType"

 [ MorrisSupport
        MOV     R1, #0
        LDRB    R1, [R1, #IOSystemType]
        TST     R1, #IOST_7500
        LDREQ   R1, =24000              ;RISC PC clocks VIDC20 at 24MHz
        MOVNE   R1, #32000              ;Morris clocks VIDC20L at 32Mhz
 |
        LDR     r1, =rclk       ; eventually will need to replace this if specified in control list
 ]

        BL      ComputeModuli   ; out: r0 = FSync bits, r1 = CR bits

        ORR     r0, r0, #VIDCFSyn
        STR     r0, [r11, #VIDCFSyn :SHR: 22]

        LDR     r0, [r11, #VIDCControl :SHR: 22]
        ORR     r0, r0, r1

        TEQ     r7, #5          ; if 32 bpp, then stick in 6 not 5
        MOVEQ   r7, #6
        ORR     r0, r0, r7, LSL #5

; now work out FIFO load position - r10 is b/w in thousands of bytes/sec

; do it by means of a binary chop on 3 bits

        ADR     r4, FIFOLoadTable
        LDR     r2, [r4, #4*4]                  ; load 0-3/4-7 split
        CMP     r10, r2
        MOVLS   r7, #0                          ; if <=, then bottom half
        MOVHI   r7, #4                          ; else top half
        ADDHI   r4, r4, #4*4                    ; and advance table pointer

        LDR     r2, [r4, #2*4]
        CMP     r10, r2
        ORRHI   r7, r7, #2
        ADDHI   r4, r4, #2*4

        LDR     r2, [r4, #1*4]
        CMP     r10, r2
        ORRHI   r7, r7, #1

        ORR     r0, r0, r7, LSL #CR_FIFOLoadShift
        STR     r0, [r11, #VIDCControl :SHR: 22]

        ADD     R0, r11, #(&80*4)           ; R0 -> VIDC20 table (remove offset for reg indices starting at &80)
        BL      ProgramVIDC20Regs

        ; now make sure video DMA enabled
        ;
        LDR     r7, [r9, #IOMD_Address]
        LDRB    r8, [r7, #IOMD_VIDCR]
        AND     r8, r8, #&7F                 ; knock out IOMD_VIDCR_Dup
        ORR     r8, r8, #IOMD_VIDCR_Enable   ; enable video DMA
        STRB    r8, [r7, #IOMD_VIDCR]

        ADD     sp, sp, #VIDC20ParmsSize   ; drop workspace for table
        Pull    "r4, r7,r8,r10,r11, pc"

; -------------------------------------------------------------------------

; ProgramVIDC20Regs - program registers from table
;
;  entry: r0 -> VIDC table to program into registers
;         (and r9 -> HAL workspace)
;
;  allowed to corrupt any of r0-r4, r7,r8,r10,r11 (only called from HAL_Video_SetMode)

ProgramVIDC20Regs ROUT
        Push    "r6, lr"

        LDR     R4, [R9, #VIDC_Interlace]
        TST     R4, #1
        MOVNE   R4, #CR_Interlace

        MOV     R7, R0                  ; keep copy in R7 in case we go wrong
        LDR     R3, [R9, #VIDC_Address] ; R3 -> VIDC20 h/w
18
        MOV     R1, #(128-PseudoRegisters)*4 ; number of bytes to do (don't program pseudo-registers!)
20
        LDR     R2, [R0], #4            ; Get data from table
        CMP     R2, #-1                 ; unprogrammed register ?
        BEQ     %FT80                   ; then skip

        AND     R6, R2, #&FF000000

        TEQ     R6, #HorizDisplayStart
        STREQ   R2, [R9, #VIDC_PointerHAdjust]   ; save here for later calculation of adjust

        TEQ     R6, #VertiDisplayStart          ; test for display start
        BICEQ   R14, R2, #&FF000000             ; get rid of register bits
        STREQ   R14, [R9, #VIDC_PointerVAdjust] ; save for pointer programming

        TEQ     R6, #HorizSyncWidth             ; if h.sync width register
        STREQ   R2, [R9, #VIDC_HSWRSoftCopy]    ; then save for DPMS stuff
        TEQ     R6, #VertiSyncWidth             ; likewise v.sync width
        STREQ   R2, [R9, #VIDC_VSWRSoftCopy]

        TEQ     R6, #VIDCExternal       ; check for external register (which contains syncs)
        BNE     %FT50

   ! 0, "mjsHAL - currently assume vertical sync rather than find out (by HAL call to OS?)"
;;;
;;;mjsHAL old code that operated on NE if composite sync found from SWI OS_ReadSysInfo 1
;;;
;;;     BICNE   R2, R2, #(Ext_HSYNCbits :OR: Ext_VSYNCbits)     ; if composite sync then don't invert syncs
;;;     ORRNE   R2, R2, #Ext_InvertCompVSYNC :OR: Ext_InvertCompHSYNC ; and force both syncs to be composite (because of lack of
                                                                ; swap in A540 VIDC card)
        B       %FT75
50
        TEQ     R6, #VIDCFSyn
        BNE     %FT60

        LDR     R8, =FSyn_ResetValue    ; set test bits on, and r > v
        ORR     R8, R8, #VIDCFSyn
        STR     R8, [R3]

; we may need some delay in here...

        LDR     R8, =FSyn_ClearR :OR: FSyn_ClearV :OR: FSyn_ForceLow :OR: FSyn_ForceHigh
        ORR     R2, R2, R8
        BIC     R2, R2, #FSyn_ForceHigh ; force test bits on, except this one
        STR     R2, [R3]

; we may also need some delay in here...

        BIC     R2, R2, R8              ; remove test bits
        B       %FT75

60
        TEQ     r6, #VIDCDataControl
        BNE     %FT65

   ! 0, "mjsHAL - using kernel variable VRAMWidth"

        BIC     r2, r2, #DCR_BusBits
        MOV     r14, #0
        LDRB    r14, [r14, #VRAMWidth]
        CMP     r14, #2                 ; if using 64-bit wide VRAM
        ORRCS   r2, r2, #DCR_Bus63_0    ; then data on all 64 bits
        ORRCC   r2, r2, #DCR_Bus31_0    ; else for 32-bit wide VRAM or DRAM-only,
                                        ; data is on low 32 bits
        BCC     %FT65

; dual-bank VRAM, so HDWR value needs to be halved

        MOV     r14, r2, LSL #(31-10)   ; get HDWR bits at top - NB allow bit 10 to be used here!
        BIC     r2, r2, r14, LSR #(31-10) ; knock off bits
        TST     r14, #1 :SHL: (31-10)   ; see if bottom bit would get knocked off
        ORRNE   r2, r2, #DCR_HDis       ; if so, then disable HDis mechanism (for eg mode 29)
        ORREQ   r2, r2, r14, LSR #(31-9) ; otherwise, put bits back one bit further down

65
        TEQ     R6, #VIDCControl        ; if control register
        BNE     %FT75

; programming control register, so EOR sync/interlace bits, save in soft copy
; then work out horizontal pointer adjust from HorizDisplayStart
; (saved in VIDC_PointerHAdjust) and bits-per-pixel in control register

        EOR     R2, R2, R4                      ; then EOR sync/interlace bits
        STR     R2, [R9, #VIDC_ControlSoftCopy] ; and save in copy

; now compute FSIZE properly
        LDR     R10, [R7, #(&94-&80)*4]  ; get vertidisplayend
        BIC     R10, R10, #&FF000000
        LDR     R8, [R7, #(&93-&80)*4]   ; get vertidisplaystart
        BIC     R8, R8, #&FF000000
        SUB     R10, R10, R8             ; verti displayed
        LDR     R8, [R7, #(&90-&80)*4]   ; verti total
        BIC     R8, R8, #&FF000000
        SUB     R10, R8, R10
        ADD     R10, R10, #1             ; vidc parms are n-2, we want n-1
        LDR     R8,  [R9, #IOMD_Address]
        STRB    R10, [R8, #IOMD_FSIZE]

        LDR     R14, [R9, #VIDC_PointerHAdjust] ; R14 = horiz display start (-18)
        BIC     R14, R14, #&FF000000
        ADD     R14, R14, #(18-17)              ; horiz cursor start is programmed with n-17
        STR     R14, [R9, #VIDC_PointerHAdjust]
75
        TEQ     R6, #VIDCExternal
        STREQ   R2, [R9, #VIDC_ExternalSoftCopy]
        TEQ     R6, #VIDCFSyn
        STREQ   R2, [R9, #VIDC_FSynSoftCopy]
        TEQ     R6, #VIDCControl
        STREQ   R2, [R9, #VIDC_ControlSoftCopy]

        STR     R2, [R3]                ; stuff it into VIDC20
80
        SUBS    R1, R1, #4
        BNE     %BT20

        MOV     R2, #VertiCursorStart + 0       ; program cursor start and end
        STR     R2, [R3]
        MOV     R2, #VertiCursorEnd + 0         ; to zero
        STR     R2, [R3]

        Pull    "r6, pc"

; -------------------------------------------------------------------------

;
;       ProcessControlListItem
;
; in:   r2  = value for item
;       r4  = index for item (guaranteed in range)
;       r11 -> VIDC register array
;
; out:  r0-r2, r4, r7, r8, r10 may be corrupted
;       r3, r9, r11 must be preserved

ProcessControlListItem Entry
        LDR     pc, [pc, r4, LSL #2]
        NOP
        &       ProcessControlListNOP                   ; 0 - NOP
        &       ProcessControlListLCDMode               ; 1 - LCD mode
        &       ProcessControlListLCDDualPanelMode      ; 2 - LCD dual-panel mode
        &       ProcessControlListLCDOffsetRegister0    ; 3 - LCD offset register 0
        &       ProcessControlListLCDOffsetRegister1    ; 4 - LCD offset register 1
        &       ProcessControlListHiResMode             ; 5 - Hi-res mode
        &       ProcessControlListDACControl            ; 6 - DAC control
        &       ProcessControlListRGBPedestals          ; 7 - RGB pedestal enables
        &       ProcessControlListExternalRegister      ; 8 - External register
        &       ProcessControlListHClockSelect          ; 9 - HClk select/specify
        &       ProcessControlListNOP                   ; 10 - RClk frequency
        &       ProcessControlListDPMSState             ; 11 - DPMS state
        &       ProcessControlListNOP                   ; 12 - Interlaced mode

  ! 0, "mjsHAL - no LCD support (VIDCList3 control list stuff)"

ProcessControlListLCDMode
        ;;;mjsHAL we have no support
        EXIT

ProcessControlListHiResMode
        MOV     r1, #Ext_HiResMono              ; bit of a misnomer, it's not nec. mono
05
        MOV     r0, #VIDCExternal
10
        MOV     r7, r1
        TEQ     r2, #0                          ; if value non-zero
        MOVNE   r2, r1                          ; then use value in r1
15
        AND     r2, r2, r7                      ; ensure only relevant bits set
        LDR     lr, [r11, r0, LSR #22]           ; load word from register bank
        BIC     lr, lr, r7                      ; knock out bits in mask
        ORR     lr, lr, r2                      ; OR in new bits
        STR     lr, [r11, r0, LSR #22]           ; and store in array
;
ProcessControlListNOP
        EXIT

ProcessControlListDACControl
        MOV     r1, #Ext_DACsOn
        B       %BT05

ProcessControlListRGBPedestals
        MOV     r0, #VIDCExternal
        MOV     r2, r2, LSL #Ext_PedsShift
        MOV     r7, #Ext_PedsOn
        B       %BT15

ProcessControlListExternalRegister
        MOV     r0, #VIDCExternal
        MOV     r7, #&FF
        B       %BT15

ProcessControlListLCDDualPanelMode
        ;;;mjsHAL we have no support
        EXIT

ProcessControlListLCDOffsetRegister0
        ;;;mjsHAL we have no support
        EXIT

ProcessControlListLCDOffsetRegister1
        ;;;mjsHAL we have no support
        EXIT

ProcessControlListHClockSelect
        MOV     r0, #PseudoRegister_HClockSpeed ; pseudo-register holding HClock speed
        ORR     r2, r2, r0                      ; form combined value
        STR     r2, [r11, r0, LSR #22]           ; store in register
        EXIT

ProcessControlListDPMSState
        ; no longer used in HAL code (kernel keeps DPMSState)
        EXIT

; -------------------------------------------------------------------------

;
;       ComputeModuli - Work out VCO moduli for a given frequency
;
; in:   r0  = desired frequency (kHz)
;       r1  = rclk frequency (kHz) (normally 24000)
;       r11 -> VIDC table
;
; out:  r0 = bits to put in bits 0..15 of Frequency Synthesizer Register
;       r1 = bits to put in bits 0..4 of Control Register

rclk    *       24000           ; Reference clock into VIDC20 (in kHz)
VCO_Min *       55000           ; minimum VCO frequency (in kHz)
VCO_Max *      110000           ; maximum VCO frequency (in kHz)

fpshf   *       11              ; Shift value for fixed point arithmetic

        ^       0, sp

BestDInOrOutOfRange     #       4
BestRInOrOutOfRange     #       4
BestVInOrOutOfRange     #       4
BestDInRange            #       4
BestRInRange            #       4
BestVInRange            #       4
BestRangeError          #       4
ComputeModuliStack      *       :INDEX: @

ComputeModuli Entry "r2-r12", ComputeModuliStack
        LDR     r2, [r11, #PseudoRegister_HClockSpeed:SHR:22]           ; are we using HCLK?
        CMP     r2, #-1
        BEQ     %FT05                                                   ; -1 => no, use VCLK/RCLK

        BIC     r1, r2, #&FF000000                                      ; r1 = HCLK frequency
        SUB     r1, r1, r1, LSR #2                                      ; r1 = HCLK * 3/4
        CMP     r0, r1
        MOVLO   r1, #CR_HCLK :OR: ((2-1) :SHL: CR_PixelDivShift)        ; if < 3/4 HCLK, use divide by 2
        MOVHS   r1, #CR_HCLK :OR: ((1-1) :SHL: CR_PixelDivShift)        ; else use divide by 1
        LDR     r0, =(63 :SHL: FSyn_RShift) :OR: (1 :SHL: FSyn_VShift)  ; minimum V, maximum R
        EXIT

; Use VCLK/RCLK
05
        MOV     r12, #-1                ; smallest error for values in or out of VCO range
        MOV     r11, #-1                ; smallest error for values in VCO range
        STR     r11, BestDInRange
        STR     r11, BestVInRange
        STR     r11, BestRInRange
        STR     r11, BestDInOrOutOfRange
        STR     r11, BestVInOrOutOfRange
        STR     r11, BestRInOrOutOfRange
        STR     r11, BestRangeError
        MOV     r5, r1                  ; r5 = rclk frequency, normally 24000 (32000 on Morris)
        LDR     r1, =VCO_Min            ; r1 = minimum VCO frequency (in kHz)
        LDR     r2, =VCO_Max            ; r2 = maximum VCO frequency (in kHz)
        MOV     r3, #1                  ; r3 = D
10
        MOV     r4, #1                  ; r4 = R
15
        MUL     r6, r0, r3              ; r6 = xD
        MUL     r7, r6, r4              ; r7 = xRD
        ADD     r7, r7, r5, LSR #1      ; r7 = xRD + vref/2
        DivRem  r8, r7, r5, r9          ; r8 = (xRD + vref/2) DIV vref = V value

        TEQ     r4, #1                  ; if R=1 then V must be 1, else it's no good
        BNE     %FT20
        TEQ     r8, #1
        BNE     %FT50
        BEQ     %FT25
20
        CMP     r8, #2                  ; if R<>1 then V must be in range 2..64
        RSBCSS  r7, r8, #64
        BCC     %FT50                   ; V out of range, so skip
25
        MUL     r7, r5, r8              ; r7 = V * vref
        MOV     r7, r7, LSL #fpshf      ; r7 = (V * vref) << fixedpointshift
        DivRem  r9, r7, r4, r14         ; r9 = ((V * vref) << fixedpointshift)/R = VCO frequency << fixedpointshift
        MOV     r6, r9
        DivRem  r7, r9, r3, r14         ; r7 = output frequency << fixedpointshift
        SUBS    r7, r7, r0, LSL #fpshf
        RSBCC   r7, r7, #0              ; r7 = absolute error << fixedpointshift

        TEQ     r4, #1                  ; if R=1 then no need to check VCO range
        BEQ     %FT27                   ; because VCO won't be used, so it's a 1st class citizen

        CMP     r6, r1, LSL #fpshf      ; test if VCO freq >= min
        RSBCSS  r14, r6, r2, LSL #fpshf ; and <= max
        BCC     %FT40                   ; not in range, so not a first class citizen
27
        CMP     r7, r11
        BHI     %FT40                   ; worse than the best case for in VCO range, so ignore
        BCC     %FT30                   ; is definitely better than the best case for in or out

        LDR     r14, BestRInRange       ; is equal best for in, so check R value
        CMP     r4, r14                 ; is newR < bestR
        BCS     %FT40                   ; is greater or equal R value (ie not higher comp. freq., so not best)
30
        MOV     r11, r7
        STR     r3, BestDInRange
        STR     r4, BestRInRange
        STR     r8, BestVInRange
        MOV     r14, #0
        B       %FT45

40
        RSBS    r14, r6, r1, LSL #fpshf ; r14 = min-this, if this<min
        SUBCC   r14, r6, r2, LSL #fpshf ; else r14 = this-max, ie r14 = how much this is outside range

        CMP     r7, r12
        BHI     %FT50                   ; worse than the best case for in or out of VCO range, so ignore
        BCC     %FT45                   ; is definitely better than the best case for in or out

        LDR     r9, BestRangeError      ; is equal best for in or out, so check error
        CMP     r14, r9
        BCS     %FT50                   ; not lower error, so skip
45
        MOV     r12, r7
        STR     r3, BestDInOrOutOfRange
        STR     r4, BestRInOrOutOfRange
        STR     r8, BestVInOrOutOfRange
        STR     r14, BestRangeError
50
  [ :LNOT: DontUseVCO                   ; If we don't use the VCO, R has to be 1
        ADD     r4, r4, #1
        CMP     r4, #16                 ; R goes from 2 to 16 (was 2 to 64)
        BLS     %BT15
  ]

        ADD     r3, r3, #1
        CMP     r3, #8                  ; D goes from 1 to 8
        BLS     %BT10

        ADR     r2, BestDInRange
        LDR     r3, [r2]
        CMP     r3, #-1
        ADDEQ   r2, r2, #BestDInOrOutOfRange - BestDInRange
        LDREQ   r3, [r2]                ; r3 = Best D
        LDR     r4, [r2, #BestRInRange - BestDInRange]  ; r4 = Best R
        LDR     r5, [r2, #BestVInRange - BestDInRange]  ; r5 = Best V

        SUBS    r4, r4, #1              ; values in FSyn are n-1
  [ VCOstartfix
        ;do *not* do the very slow trick - this will stall the VCO and it may not restart
        ;properly later (we don't give a fig for power consumption)
        MOVEQ   r4, #3
        MOVEQ   r5, #8                  ; after sub below, (7+1)/(3+1) so VCO runs at twice ref clock
  |
        MOVEQ   r4, #63                 ; if R=V=1 then use max R
        MOVEQ   r5, #2                  ; and min V to make VCO go really slow
  ]

        SUB     r5, r5, #1              ; for both v and r
        ASSERT  FSyn_RShift = 0
        ORR     r0, r4, r5, LSL #FSyn_VShift

        SUB     r3, r3, #1              ; D is also stored as n-1
        MOV     r1, r3, LSL #CR_PixelDivShift
        ASSERT  CR_VCLK = 0
        ORREQ   r1, r1, #CR_RCLK        ; if using VCO then set for VCLK, else RCLK

        EXIT

; -------------------------------------------------------------------------

;
; void HAL_Video_WritePaletteEntry(uint type, uint pcolour, uint index)
;
; write palette entry to video controller
;
;  type     = 0 for normal palette entry
;             1 for border colour
;             2 for pointer colour
;          >= 3 reserved
;  pcolour  = palette entry colour in BBGGRRSS format (Blue,Green,Red,Supremacy)
;  index    = index of entry  (0..255 for normal, 0 for border, 0..3 for pointer)
;             note that RISC OS only uses 1..3 for pointer (0 is assumed to be transparent)
;
;   r9 is workspace pointer, may corrupt r0..r3, r12
;
HAL_Video_WritePaletteEntry ROUT

        AND     r12, r1, #&F0               ; 000000S0 (4 MSbits of supremacy)
        MOV     r1, r1, LSR #8              ; 00BBGGRR
        ORR     r1, r1, r12, LSL #20        ; 0SBBGGRR

        LDR     r12, [r9, #VIDC_Address]

        CMP     r0, #1
        BLO     HV_WritePaletteEntry_type0
        BEQ     HV_WritePalettEntry_type1
;       else fall through to WritePaletteEntry_type2
;
HV_WritePaletteEntry_type2
        CMP     r2, #3                      ; index must be in range 0..3
        MOVHI   pc, lr
        SUBS    r2, r2, #1                  ; reduce 1..3 to 0..2
        MOVMI   pc, lr                      ; pointer colour 0 is always transparent on VIDC20
        ORR     r1, r1,#VIDC20PointerColour ; munge in base address of register
        ADD     r1, r1, r2, LSL #28         ; add in index (0..2), in bits 28,29 of register
        STR     r1, [r12]
        MOV     pc, lr
;
HV_WritePaletteEntry_type0
        ;Note: we only need to hit VIDCPalAddress if the index is not a direct increment
        ;of the last programmed index
        ;but, for insurance against permanent misalignment if any rogue accesses avoid this
        ;interface, we force an update for index 0
        ;
        CMP     r2, #255                  ; index must be in range 0..255
        MOVHI   pc, lr

        CMP     r2, #0

        STREQ   r1, [r9, #VIDC_SoftPalette0]

        LDRNE   r0, [r9, #VIDC_NextPaletteIndex]  ;increment from last index programmed
        MOVEQ   r0, #-1                           ;forced invalid for index 0

        TEQ     r0, r2
        ORRNE   r0, r2, #VIDCPalAddress
        STRNE   r0, [r12]                 ; only update PalAddress if necessary
        STR     r1, [r12]                 ; update palette entry
        ADD     r2, r2, #1
        AND     r2, r2, #&FF
        STR     r2, [r9, #VIDC_NextPaletteIndex]
        MOV     pc, lr
;
;
HV_WritePalettEntry_type1
        CMP     r2, #0                     ; index must be 0
        MOVNE   pc, lr
        ORR     r1, r1,#VIDC20BorderColour ; munge in base address of register
        STR     r1, [r12]
        MOV     pc, lr

; -------------------------------------------------------------------------

;
; void HAL_Video_WritePaletteEntries(uint type, const uint *pcolours, uint index, uint Nentries)
;
; write block of palette entries to video controller
;
;  type     = 0 for normal palette entry
;             1 for border colour
;             2 for pointer colour
;          >= 3 reserved
;  pcolours = pointer to block of palette entry colours in BBGGRRSS format (Blue,Green,Red,Supremacy)
;  index    = start index in palette (for first entry in block)
;             note that RISC OS only uses 1..3 for pointer (0 is assumed to be transparent)
;  Nentries = number of entries in block (must be >= 1)
;
;   r9 is workspace pointer, may corrupt r0..r3, r12
;
HAL_Video_WritePaletteEntries ROUT
        Push    "r4, lr"

        CMP     r2, #255                    ; all indices in loop must be in range 0..255
        BHI     %FT20
        ADD     r4, r2, r3
        CMP     r4, #256
        BHI     %FT20

        CMP     r0, #0
        BNE     %FT50
;
; type 0, try to be efficient
;
        LDR     r12, [r9, #VIDC_Address]

        CMP     r2, #0

        LDREQ   r0, [r1]
        STREQ   r0, [r9, #VIDC_SoftPalette0]

        LDRNE   r0, [r9, #VIDC_NextPaletteIndex]
        MOVEQ   r0,#-1                      ; insurance! (see comments for WritePaletteEntry_type0)

        TEQ     r0, r2
        ORRNE   r0, r2, #VIDCPalAddress
        STRNE   r0, [r12]                   ; only update PalAddress if necessary

        ADD     r0, r2, r3
        AND     r0, r0, #&FF
        STR     r0, [r9, #VIDC_NextPaletteIndex]

        MOV     r4, r1
10
        LDR     r1, [r4], #4
        AND     r0, r1, #&F0                ; 000000S0 (4 msbits of supremacy)
        MOV     r1, r1, LSR #8              ; 00BBGGRR
        ORR     r1, r1, r0, LSL #20         ; 0SBBGGRR
        STR     r1, [r12]
        SUBS    r3, r3, #1
        BNE     %BT10
20
        Pull    "r4, pc"
;
; not type 0
;
50
        MOV     r4, r1
60
        LDR     r1, [r4], #4        ; next pcolour
        Push    "r2, r3"
        BL      HAL_Video_WritePaletteEntry
        Pull    "r2, r3"
        ADD     r2, r2, #1
        SUBS    r3, r3, #1
        BNE     %BT60
        Pull    "r4, pc"

; -------------------------------------------------------------------------

;
; uint HAL_Video_ReadPaletteEntry(uint type, uint pcolour, uint index)
;
; return the effective palette entry after taking into account any hardware
; restrictions in the video controller, assuming it was programmed with pcolour
;
;  type     = 0 for normal palette entry
;             1 for border colour
;             2 for pointer colour
;          >= 3 reserved
;  pcolour  = palette entry colour in BBGGRRSS format (Blue,Green,Red,Supremacy)
;  index    = index of entry  (0..255 for normal, 0 for border, 0..3 for pointer)
;             note that RISC OS only uses 1..3 for pointer (0 is assumed to be transparent)
;  returns  : effective BBGGRRSS
;
;   r9 is workspace pointer, may corrupt r0..r2, r12
;
;  mjs: depending on h/w capabilities, specific HALs may have to
;       remember current settings (eg. bits per pixel), keep soft copy
;       of entries or whatever, in their workspace. Because the HAL API
;       supplies a pcolour, the need to keep a full palette soft copy
;       in the HAL is minimised

HAL_Video_ReadPaletteEntry ROUT

        CMP     r0, #0
        BNE     HV_ReadPaletteEntry_not_type0
;
; type 0
; only 4 bits of S, and only 16 S entries
; S for indices 16..255 comes from palette entry 0
;
        CMP     r2, #16
        LDRHS   r12, [r9, #VIDC_SoftPalette0]
        MOVLO   r12, r1

        AND     r12, r12, #&000000F0  ; effective S bits
        BIC     r0, r1, #&000000FF    ; effective BGR bits
        ORR     r0, r0, r12           ; munge together
        MOV     pc, lr
;
HV_ReadPaletteEntry_not_type0
; no special restrictions, just that there are only 4 bits of S
        BIC     r0, r1, #&0000000F
        MOV     pc, lr

; -------------------------------------------------------------------------

; void HAL_Video_SetInterlace(uint interlace)
;
;  interlace = 0/1 for interlace off/on

HAL_Video_SetInterlace ROUT

        LDR     r1, [r9, #VIDC_ControlSoftCopy]
        BIC     r1, r1, #CR_Interlace
        TST     r0, #1
        ORRNE   r1, r1, #CR_Interlace    ; zero => no interlace

        LDR     r0, [r9, #VIDC_Address]
        STR     r1, [r0]                ; program VIDC
        MOV     pc, lr

; -------------------------------------------------------------------------

; void HAL_Video_SetBlank(uint blank, uint DPMS)
;
; blank = 0/1 for unblank/blank
; DMPS  = 0..3 as specified by monitor DPMSState (from mode file)
;         0 for no DPMS power saving

; HAL is expected to attempt to turn syncs off according to DPMS, and
; to turn video DMA off for blank (and therefore on for unblank) if possible.
; HAL is not expected to do anything else, eg. blank all palette entries.
; Such things are the responsibility of the OS, and also this call is expected
; to be fast. May be called with interrupts off.

HAL_Video_SetBlank ROUT

        LDR     r3, [r9, #VIDC_Address]

        TEQ     r0, #0
        BEQ     %FT50
;
; blanking
;
        TST     r1, #1                  ; if hsyncs should be off,
        LDRNE   r2, =HorizSyncWidth + ((1:SHL:14) -1)   ; maximum value in h.sync width register
        STRNE   r2, [r3]

        TST     r1, #2                  ; if vsyncs should be off,
        LDRNE   r2, =VertiSyncWidth + ((1:SHL:13) -1)   ; maximum value in v.sync width register
        STRNE   r2, [r3]

        LDR     r2, [r9, #VIDC_ExternalSoftCopy]
        AND     r1, r1, #3
        TEQ     r1, #3                                         ; if both syncs off
        BICEQ   r2, r2, #Ext_HSYNCbits :OR: Ext_VSYNCbits
        ORREQ   r2, r2, #Ext_InvertHSYNC :OR: Ext_InvertVSYNC  ; set sync signals to low (less power)
        BIC     r2, r2, #Ext_DACsOn                            ; turn off the DACs
        STR     r2, [r3]

        LDR     r0, [r9, #IOMD_Address]
        LDRB    r1, [r0, #IOMD_VIDCR]
        BIC     r1, r1, #IOMD_VIDCR_Enable   ; disable video DMA
        STRB    r1, [r0, #IOMD_VIDCR]

        MOV     pc, lr
;
; unblanking
;
50      LDR     r2, [r9, #VIDC_ExternalSoftCopy]
        STR     r2, [r3]                                       ; restore DACs and sync type

        TST     r1, #1                                         ; if hsyncs were turned off,
        LDRNE   r2, [r9, #VIDC_HSWRSoftCopy]                   ; then restore from soft copy
        STRNE   r2, [r3]

        TST     r1, #2                                         ; if vsyncs were turned off,
        LDRNE   r2, [R9, #VIDC_VSWRSoftCopy]                   ; then restore from soft copy
        STRNE   r2, [r3]

        LDR     r0, [r9, #IOMD_Address]
        LDRB    r1, [r0, #IOMD_VIDCR]
        ORR     r1, r1, #IOMD_VIDCR_Enable   ; enable video DMA
        STRB    r1, [r0, #IOMD_VIDCR]

        MOV     pc, lr

; -------------------------------------------------------------------------

; void HAL_Video_SetPowerSave(uint powersave)
;
; powersave = 0/1 for power save off/on

HAL_Video_SetPowerSave ROUT

        LDR     r1, [r9, #VIDC_Address]

        TEQ     r0, #0
        BEQ     %FT50
;
; power save on
;
        LDR     r2, =&C0000003    ;dac off, ereg set to external LUT
        STR     r2, [r1]

        LDR     r2, =&D0004000    ;Vclk off, Pcomp=0
        STR     r2, [r1]

        LDR     r2, =&E0004049    ;PoDown, Hclk
        STR     r2, [r1]

        MOV     pc, lr
;
; power save off
;
50
        LDR     r2, [R9, #VIDC_ControlSoftCopy]       ;restore from soft copy
        STR     r2, [r1]

        LDR     r2, [R9, #VIDC_ExternalSoftCopy]      ;restore from soft copy
        STR     r2, [r1]

        LDR     r2, [R9, #VIDC_FSynSoftCopy]          ;restore from soft copy

 [ {TRUE}
        LDR     R3, =FSyn_ResetValue    ; set test bits on, and r > v
        ORR     R3, R3, #VIDCFSyn
        STR     R3, [R1]

; we may need some delay in here...

        LDR     R3, =FSyn_ClearR :OR: FSyn_ClearV :OR: FSyn_ForceLow :OR: FSyn_ForceHigh
        ORR     R2, R2, R3
        BIC     R2, R2, #FSyn_ForceHigh ; force test bits on, except this one
        STR     R2, [R1]

; we may also need some delay in here...

        BIC     R2, R2, R3              ; remove test bits
 ]
        STR     r2, [r1]

        MOV     pc, lr

; -------------------------------------------------------------------------

; void HAL_Video_UpdatePointer(uint flags, int x, int y, const shape_t *shape)
;
; Update the displayed position of the current pointer shape (or turn
; shape off)
;
; HAL code may need to take note of shape updated flag, and make its
; own new copies if true. This is to handle cases like dual scan LCD
; pointer, which typically needs two or more shapes buffers for the
; hardware. This work should _only_ be done when the updated flag
; is true, or possibly because provoked by clipping requirements.
; A simple HAL, using the kernel shape buffer directly, may be able to
; ignore the updated flag.
;
; flags:
;   bit 0  = pointer display enable (0=off, 1=on)
;   bit 1  = pointer shape update (0=no change, 1=updated)
;   bits 2..31 reserved (0)
; xpos = x position of top left of pointer (xpos = 0 for left of display)
; ypos = y position of top left of pointer (ypos = 0 for top of display)
; shape points to shape_t descriptor block:
;   typedef struct shape_t
;   {
;     uint8   width;      /* unpadded width in bytes (see notes) */
;     uint8   height;     /* in pixels */
;     uint8   padding[2]; /* 2 bytes of padding for field alignment */
;     void   *buffLA;     /* logical address of buffer holding pixel data */
;     void   *buffPA;     /* corresponding physical address of buffer */
;   }
;
; Notes:
; 1) if flags bit 0 is 0 (pointer off), x, y, shape are undefined
; 2) the shape data from RISC OS is always padded with transparent pixels
;    on the rhs, to a width of 32 pixels (8 bytes)
; 3) pointer clipping is the responsibility of the HAL (eg. may be able to
;    allow display of pointer in border region on some h/w)
; 4) buffer for pixel data is aligned to a multiple of 256 bytes or better
;
; This call is made by the OS at a time to allow smoothly displayed changes
; (on a VSync)

HAL_Video_UpdatePointer

        Push    "r4, r5, lr"

        LDR     r14, [r9, #VIDC_Address]

        TST     r0, #1
        BEQ     %FT90          ; pointer off

;
; process x (and assume shape width is padded 32 pixels)
;
        LDR     r4, [R9, #VIDC_PointerHAdjust]
        ADDS    r1, r1, r4
        MOVLT   r1, #0                    ; x:= x+fudge, clamped to 0

        CMP     r1, #&4000                ; VIDC has 14 bits for cursor start
        MOVGE   r1, #&4000
        SUBGE   r1, r1, #1

        ORR     r1, r1, #HorizCursorStart
        STR     r1, [r14]
;
; process y
;
        LDRB    r4, [r3, #1]              ; height from shape_t block
        LDR     r5, [r3, #8]              ; buffer physical address from shape_t block

        CMP     r2, #0                    ; if -ve y
        BICLT   r2, r2, #1                ; TEMP FUDGE - really ought to have two copies, one offset by 1 row
                                          ; because VIDC can only cope with 16 byte aligned data pointer
        ADDLT   r4, r4, r2                ; reduce height
        SUBLT   r5, r5, r2, LSL #3        ; and advance data pointer (8 bytes per row) to clip pointer to 0
        MOVLT   r2, #0
        CMP     r4, #0
        BLE     %FT90                     ; pointer off if clipped to oblivion

        LDR     r1, [r9, #VIDC_VertiDisplaySize]
        SUB     r1, r1, r2                       ; if display_height - y < pointer height
        CMP     r1, r4
        MOVLT   r4, r1                           ; clip pointer height
        CMP     r4, #0
        BLE     %FT90                            ; pointer off if clipped to oblivion

        LDR     r1, [R9, #VIDC_PointerVAdjust]
        ADD     r2, r2, r1                       ; y := y+adjust

        ORR     r1, r2, #VertiCursorStart
        STR     r1, [r14]

        ADD     r2, r2, r4                ; y:= y+height
        ORR     r1, r2, #VertiCursorEnd
        STR     r1, [r14]

        LDR     r14, [r9, #IOMD_Address]
        STR     r5, [r14, #IOMD_CURSINIT]

        Pull    "r4, r5, pc"

90
;
; pointer off
;
        MOV     r4, #VertiCursorStart
        STR     r4, [r14]
        MOV     r4, #VertiCursorEnd
        STR     r4, [r14]

        Pull    "r4, r5, pc"

; -------------------------------------------------------------------------

; void HAL_Video_SetDAG(uint DAG, uint paddr)
;
; set Video DMA address generator value to given physical address
;
; DAG   = 0 set start address of current video display
;         1 set start address of total video buffer
;         2 set end address (exclusive) of total video buffer
;         all other values reserved
; paddr = physical address for given DAG
;
; Notes:
; The OS has a video buffer which is >= total display size, and may be using
; bank switching (several display buffers) or hardware scroll within the
; total video buffer.
;
; DAG=1 will be start address of current total video buffer
; DAG=2 will be end address (exclusive) of current total video buffer
; DAG=0 will be start address in buffer for current display
;
; HALs should respond as follows:
; 1) If they have no hardware scroll support, only DAG=0 is significant,
;    and the end address of the current display is implied by the size
;    of the current mode. Calls with DAG=1,2 should be ignored.
; 2) If they support hardware scroll, DAG=0 again defines display start.
;    DAG=2 defines the last address (exclusive) that should be displayed
;    before wrapping back (if reached within display size), and DAG=1
;    defines the address to which accesses should wrap back.

HAL_Video_SetDAG ROUT

        LDR     r12, [r9, #IOMD_Address]

        CMP     r0, #1
        BEQ     %FT20
        BHI     %FT40
;
; DAG=0 program VInit
;
        STR     r1, [r9, #IOMD_VInitSoftCopy]   ; save VInit so that writes to VEnd can check
        LDR     r2, [r9, #IOMD_VEndSoftCopy]
        CMP     r1, r2                         ; if VInit >= VEnd then set L bit
        ORRCS   r1, r1, #IOMD_DMA_L_Bit
        STR     r1, [r12, #IOMD_VIDINIT]
        MOV     pc, lr
;
; DAG=1 program VStart
;
20      STR     r1, [r12, #IOMD_VIDSTART]
        MOV     pc, lr

   ! 0, "mjsHAL - using kernel variable VRAMWidth"
;
; DAG=2 program VEnd
;
40      MOV     r2, #0                          ; we must adjust address to that of
        LDRB    r2, [r2, #VRAMWidth]            ; last DMA fetch, allowing for fetch size
        CMP     r2, #1
        MOVLO   r2, #16                         ; DRAM-only, subtract 16 (quadword)
        MOVEQ   r2, #SAMLength/2                ; 1 bank of VRAM - 1/2 SAM
        MOVHI   r2, #SAMLength                  ; 2 banks of VRAM - 1/2 SAM * 2
        SUB     r1, r1, r2
        STR     r1, [r9, #IOMD_VEndSoftCopy]    ; remember VEnd value
        LDR     r2, [r9, #IOMD_VInitSoftCopy]   ; load current VInit
        CMP     r2, r1                          ; if VInit >= VEnd
        ORRCS   r2, r2, #IOMD_DMA_L_Bit         ; then set L bit
        STR     r2, [r12, #IOMD_VIDINIT]        ; store VInit
        STR     r1, [r12, #IOMD_VIDEND]         ; and VEnd
        MOV     pc, lr

; -------------------------------------------------------------------------

;;;mjsHAL - is the mode workspace really generic enough to pass to HAL?
;;;

;
; int HAL_Video_VetMode(const void *VIDClist, const void *workspace)
;
; VIDClist  -> generic video controller list (VIDC list type 3)
; workspace -> mode workspace (if mode number), or 0
; returns 0 if OK (may be minor adjusts to VIDClist and/or workspace values)
;         non-zero if not OK
;
HAL_Video_VetMode ROUT
        MOV   r0,#0    ; do nothing for now
        MOV   PC,LR

; -------------------------------------------------------------------------

  ] ; big HAL if/else switch around whole file

        END