; > AsmSrc.ColourTrns

;; CHANGE LIST
;;
;;              0.51    Sam     First version released
;; 26-Apr-89    0.52    NRaine  InvalidateCache made to read Log2BPP as well
;; 24-Aug-89            NRaine  Updated to conform to source release format
;; 26-Sep-89    0.53    JRoach  Add ecf handling 
;; 18-OCT-89    0.54    GStark  Added Calibration handling
;;                              Also added CIE SWI's for the future
;; 23-Oct-89    0.55    GStark  Made it allocate its workspace on the first SWI call, not before
;; 24-Oct-89    0.56    GStark  Made default palettes use the calibration. Fixed SelectTable bug
;; 25-Oct-89    0.57    GStark  Don't allocate the workspace on InvalidateCache calls
;; 27-Oct-89    0.58    GStark  Installed *Commands, and Service_WimpSaveDesktop, and SWI WriteCalibrationToFile
;; 28-Oct-89    0.59    GStark  Made the CIE SWI's work. (i.e. wrote the code).
;;                              Don't allocate workspace on SWI WriteCalibrationToFile
;;
;; WISH LIST

        GET     &.Hdr.ListOpts
        GET     &.Hdr.Macros
        GET     &.Hdr.System
        GET     &.Hdr.File
        GET     &.Hdr.ModHand
        GET     &.Hdr.NewErrors
        GET     &.Hdr.VduExt
        GET     &.Hdr.Services
        GET     &.Hdr.Font
        GET     &.Hdr.NewSpace
        GET     &.Hdr.ExtraLong

        GET     Version

        GBLL    Debug
        GBLL    Debugcal
Debugcal SETL   {FALSE}
Debug   SETL    {FALSE}
        GET     &.Hdr.HostFS
        GET     &.Hdr.Debug

      [ Debug
Host_Debug SETL {TRUE}
      ]

        LEADR   Module_LoadAddr

       [ Version >= 053
        ;
        ; Allocation of workspace within the scratch space
        ;

        ^ ScratchSpace
OutBuff         #       0               ; overlays Distances after Distances finished with
Distances       #       256*4
Colours         #       32
Ranks           #       256
       ]

        ;
        ; A cache entry structure
        ;
                ^  0
CacheEntry      # 3     ; BBGGRR
CacheValid      # 1     ; top byte of first word in cache entry: non-zero implies invalid
CachedColour    # 1     ; colour number
CachedGCOL      # 1     ; gcol
CacheEmpty      # 1     ; flag only in first entry; no reinvalidation if no
                        ; cache entries
CachedL2BPP     # 1     ; make word sized: also a number cached on mode change
 [ Version >= 053
CachedCols      # 4     ; ECF colours
CachedRatios    # 4     ; Cached ratios of the colours - if = -1 then ecf info invalid
 ]
CacheEntrySize  # 0

CacheEntries   * 128    ; must be a power of 2

CacheTotalSize * CacheEntries*CacheEntrySize

PseudoPaletteEntry * CacheEmpty + CacheEntrySize        ; byte var in second cache entry
 [ Version >= 053
PaletteIsCached * CachedL2BPP + CacheEntrySize
 ]

 [ Version >= 053
        ;
        ; A palette/stipple structure
        ;
                        ^ 0
Stipple                 # 1
Palette_Red             # 1
Palette_Green           # 1
Palette_Blue            # 1
PaletteStipple_Size     # 0

PaletteStipple_TotalSize * 256*PaletteStipple_Size

        ;
        ; RMA main block allocation
        ;          
                ^ 0
ColourCache     # CacheTotalSize
PaletteStipple  # PaletteStipple_TotalSize
   [ Version >= 054
Calibration_ptr # 4     ; Pointer to calibration table for screen
     [ Version >= 058
Calibration_pending   # 4; For * commands
Calibration_remaining # 4; For * commands
dummy1          # 4     ; Alignment
text_buffer12         # 12; For writing the text
dummy           # 4     ; Alignment
     |
dummy           # 12    ; Alignment
     ]
   ]
RMAMain_Size    # 0
 ]

ModuleStart
        DCD     0
        DCD     Initialise      - ModuleStart
        DCD     Die             - ModuleStart
        DCD     ServiceCalls    - ModuleStart
        DCD     Title           - ModuleStart
        DCD     HelpStr         - ModuleStart
      [ Version >= 058
        DCD     StarComs        - ModuleStart
      |
        DCD     0
      ]
        DCD     ColourTransSWI * Module_SWIChunkSize + Module_SWISystemBase
        DCD     SWIs            - ModuleStart
        DCD     SWInames        - ModuleStart

Title = "ColourTrans",0
HelpStr = "Colour Selector",9,"$VString ($Date)"
      [ Debug
        = " Development version"
      ]
        = 0

      [ Version >= 058
StarComs
        DCB     "ColourTransMapSize",0
        ALIGN
        DCD     ColourTransMapSize_Code - ModuleStart
        DCD     &00030703
        DCD     0
        DCD     HelpStarComs - ModuleStart
        DCB     "ColourTransMap",0
        ALIGN
        DCD     ColourTransMap_Code - ModuleStart
        DCD     &00FF0001
        DCD     0
        DCD     HelpStarComs - ModuleStart
        DCD     0
HelpStarComs
        DCB     "ColourTransMap and ColourTransMapSize commands are for internal use only",0
      ]

SWInames
        DCB     "ColourTrans", 0
        DCB     "SelectTable", 0
        DCB     "SelectGCOLTable", 0
        DCB     "ReturnGCOL",0
        DCB     "SetGCOL",0
        DCB     "ReturnColourNumber",0
        DCB     "ReturnGCOLForMode",0
        DCB     "ReturnColourNumberForMode",0
        DCB     "ReturnOppGCOL",0
        DCB     "SetOppGCOL",0
        DCB     "ReturnOppColourNumber",0
        DCB     "ReturnOppGCOLForMode",0
        DCB     "ReturnOppColourNumberForMode",0
        DCB     "GCOLToColourNumber",0
        DCB     "ColourNumberToGCOL",0
        DCB     "ReturnFontColours",0
        DCB     "SetFontColours",0
        DCB     "InvalidateCache",0
 [ Version >= 054
        DCB     "SetCalibration",0
        DCB     "ReadCalibration",0
        DCB     "ConvertDeviceColour",0
        DCB     "ConvertDevicePalette",0
        DCB     "ConvertRGBToCIE",0
        DCB     "ConvertCIEToRGB",0
 ]
 [ Version >= 058
        DCB     "WriteCalibrationToFile",0
 ]
        DCB     0

        ALIGN

MOSnaff
        DCD     ErrorNumber_BadClaimNum
        DCB     "The colour module needs RISC OS",0
        ALIGN

  [ Version >= 054
        MakeErrorBlock  CantKill
error_cant_rmtidy_colourtrans
        ADR     r0, ErrorBlock_CantKill
        SETV
        Pull    "PC"
  ]

Die
        Push    "lr"
      [ Version >= 054        
        MOV     r0, #ModHandReason_Free
        LDR     r1, [r12]
       [ Version >= 055
        CMP     r1, #0  ;       Nothing to free
        BEQ     nothing_to_free
        MOV     r2, r12
       ]
        LDR     r2, [r1, #Calibration_ptr]
        CMP     r2, #0
        BEQ     freed_calibration
        CMP     r10, #0
        BEQ     error_cant_rmtidy_colourtrans
        SWI     XOS_Module
        Pull    "pc",VS
        MOV     r2, #0
        STR     r2, [r1, #Calibration_ptr]
freed_calibration
       [ Version >= 058
        LDR     r2, [r1, #Calibration_pending]
        CMP     r2, #0
        BEQ     freed_pending
        CMP     r10, #0
        BEQ     error_cant_rmtidy_colourtrans
        SWI     XOS_Module
        Pull    "pc",VS
        MOV     r2, #0
        STR     r2, [r1, #Calibration_pending]
freed_pending
       ]
nothing_to_free
      ]
        MOV     r0, #ColourV
        ADR     r1, ColourVCode
      [ Version >= 055
        MOV     r2, r12
      |
        LDR     r2, [r12]
      ]
        SWI     XOS_Release
        Pull    "pc",,^

Initialise
        Push    "lr"

      [ Debug
        DLINE """:CC::CHR:22:CC::CHR:128:CC:""colourtrans init"
      ]
        
    [ Version >= 055
        MOV     r2, r12
    |
        MOV     r0, #ModHandReason_Claim
       [ Version >= 053
        MOV     r3, #RMAMain_Size
       |
        MOV     r3, #CacheTotalSize
       ]
        SWI     XOS_Module
        Pull    "pc", VS
        STR     r2, [r12]
    ]

        MOVVC   r0, #ColourV
        ADR     r1, ColourVCode
        SWIVC   XOS_Claim
        ADRVS   r0, MOSnaff

        Push    "r0", VS
        BLVS    Die
        Pull    "r0, pc", VS

    [ Version >= 055
        Pull    "pc"
    ]
        MOV     r12, r2
        STRB    pc, [r12, #CacheEmpty]   ; force cache flush (SVC mode here)

      [ Version >= 054
        MOV     r0, #0
        STR     r0, [r12, #Calibration_ptr]
      ]            

        Pull    "lr"

InitCache
        Push    "lr"

        MOV     r0, #-1                    ;; this stuff moved in here by NRaine
        MOV     r1, #VduExt_Log2BPP
        SWI     XOS_ReadModeVariable
        STRB    r2, [r12, #CachedL2BPP]
      [ Debug
        DREG    r2, "----> ColourTrans initcache - log2bpp = "
      ]

      [ Version >= 053
        MOV     r0, #0
        STRB    r0, [r12, #PaletteIsCached]
      ]

        LDRB    r0, [r12, #CacheEmpty]
        CMP     r0, #0
        Pull    "pc", EQ

      [ Debug
        DLINE   "Invalidate cache"
      ]

        LDR     r0, =(CacheEntries-1)*CacheEntrySize + CacheValid
        MOV     r1, #-1
InitCacheLoop
        STRB    r1, [r12, r0]
      [ Version >= 053
        SUB     r0, r0, #CacheValid-CachedRatios
        STR     r1, [r12, r0]
        SUBS    r0, r0, #CacheEntrySize+CachedRatios-CacheValid
      |
        SUBS    r0, r0, #CacheEntrySize
      ]
        BPL     InitCacheLoop
        MOV     r0, #0
        STRB    r0, [r12, #CacheEmpty]

        Pull      "pc"

        LTORG

getbackonvector
        Push    "r0-r2, lr"
        MOV     r0, #ColourV
        ADR     r1, ColourVCode
      [ Version >= 055
        MOV     r2, r12
      |
        LDR     r2, [r12]
      ]
        SWI     XOS_Claim
        Pull    "r0-r2, pc"

ServiceCalls        
      [ Debug
        TEQ     r1, #Service_CalibrationChanged
        BNE     %ft99
        Push    "r0-r12,lr"
        SWI 256+4
        SWI 256+26
        SWI     256+30
        DREG    r1, "Here"
        Pull    "r0-r12,lr"
99
      ]
      [ Version >= 058
        TEQ     r1, #Service_WimpSaveDesktop
        BEQ     save_desktop_to_file
      ]
        CMP     r1, #Service_Reset
        BEQ     getbackonvector
        CMP     r1, #Service_ModeChange
        MOVNE   pc, lr 
        LDR     r12, [r12]         ; drop through to InvalidateCache
      [ Version >= 055
        CMP     r12, #0
        MOVEQ   pc, lr
      ]

InvalidateCache_Code
        Push    "r0, r1, r2, lr"
        BL      InitCache
        Pull    "r0, r1, r2, PC",,^

      [ Version >= 055
validate_workspace_ptr
        Push    "r0-r3,lr"
        LDR     lr, [r12]
        CMP     lr, #0
        MOVNE   r12, lr
        Pull    "r0-r3,pc",NE
        MOV     r0, #ModHandReason_Claim
        MOV     r3, #RMAMain_Size
        SWI     XOS_Module
        ADDVS   sp, sp, #4
        Pull    "r1-r3,pc", VS
        STR     r2, [r12]
        MOV     r12, r2
        STRB    pc, [r12, #CacheEmpty]   ; force cache flush (SVC mode here)
        MOV     r0, #0
        STR     r0, [r12, #Calibration_ptr]
       [ Version >= 058
        STR     r0, [r12, #Calibration_pending]
        STR     r0, [r12, #Calibration_remaining]
       ]
        BL      InitCache
        Pull    "r0-r3,pc"
      ]


SWIs
        TEQP     pc, #SVC_mode       ; enable IRQs
        Push     "r8, r9, lr"
        MOV      r8, r11
        MOV      r9, #ColourV
        SWI      XOS_CallAVector
        Pull     "r8, r9, lr"
        ORRVS    lr, lr, #V_bit
        MOVS     pc, lr

ColourVCode                       
      [ Version >= 057
        CMP     r8, #(SWITable_InvalidateCache-SWITable)/4
       [ Version >= 059
        CMPNE   r8, #(SWITable_WriteCalibrationToFile-SWITable)/4
       ]
        LDREQ   lr, [r12]
        CMPEQ   lr, #0
        Pull    "pc",EQ
      ]
      [ Version >= 055
        BL      validate_workspace_ptr
        Pull    "pc",VS
      ]
        Pull     "lr"
        CMP      r8, #(SWITable_end-SWITable)/4
        ADDLT    pc, pc, r8, LSL #2
        B        naffswi

SWITable
        B       SelectTable_Code
        B       SelectGCOLTable_Code
        B       ReturnGCOL_Code
        B       SetGCOL_Code
        B       ReturnColourNumber_Code
        B       ReturnGCOLForMode_Code
        B       ReturnColourNumberForMode_Code
        B       ReturnOppGCOL_Code
        B       SetOppGCOL_Code
        B       ReturnOppColourNumber_Code
        B       ReturnOppGCOLForMode_Code
        B       ReturnOppColourNumberForMode_Code
        B       GCOLToColourNumber_Code
        B       ColourNumberToGCOL_Code
        B       ReturnFontColours_Code
        B       SetFontColours_Code           
SWITable_InvalidateCache
        B       InvalidateCache_Code
 [ Version >= 054
        B       SetCalibration_Code
        B       ReadCalibration_Code
        B       ConvertDeviceColour_Code
        B       ConvertDevicePalette_Code
        B       ConvertRGBToCIE_Code
        B       ConvertCIEToRGB_Code
 ]                     
 [ Version >= 058
SWITable_WriteCalibrationToFile
        B       WriteCalibrationToFile_Code
 ]
SWITable_end

naffswi
        ADR     r0, ErrorBlock_NoSuchSWI
        ORRS    pc, lr, #V_bit

        MakeErrorBlock NoSuchSWI
        MakeErrorBlock BadMODE

  [ Version >= 058
;*********************************************************
save_desktop_to_file
        LDR     r12, [r12]
        CMP     r12, #0
        MOVEQ   pc, lr
        Push    "r0-r12,lr"
        MOV     r1, r2
        MOV     r0, #0
        BL      WriteCalibrationToFile_Code
        Pull    "r0-r12,pc",VC
        MOV     r1, #0     
        ADD     sp, sp, #8
        Pull    "r2-r12,pc"
;*********************************************************
ColourTransMapSize_Code
        Push    "r1-r7,lr"
        BL      validate_workspace_ptr
; Read first number (unsigned) = r4
        MOVVC   r1, r0
        MOVVC   r0, #10 :OR: (1 :SHL: 31)
        SWIVC   XOS_ReadUnsigned
        Pull    "r1-r7,pc",VS
        MOV     r4, r2
; Move past separators
01
        LDRB    r2, [r1,#1]!
        CMP     r2, #32
        BLE     %bt01
; Read second number = r5
        SWI     XOS_ReadUnsigned
        Pull    "r1-r7,pc",VS
        MOV     r5, r2
; Move past separators
01
        LDRB    r2, [r1,#1]!
        CMP     r2, #32
        BLE     %bt01
; Read third number = r6
        SWI     XOS_ReadUnsigned
        Pull    "r1-r7,pc",VS
        MOV     r6, r2    
; Claim workspace
        ADD     r3, r4, r5
        ADD     r3, r3, r6
        MOV     r3, r3, LSL #2
        ADD     r3, r3, #12
        MOV     r0, #ModHandReason_Claim
        SWI     XOS_Module
        Pull    "r1-r7,pc",VS
; If okay then release old workspace and point at new workspace
        MOV     r1, r2
        LDR     r2, [r12, #Calibration_pending]
        STR     r1, [r12, #Calibration_pending]
        CMP     r2, #0
        BEQ     %ft99
        MOV     r0, #ModHandReason_Free
        SWI     XOS_Module
        Pull    "r1-r7,pc",VS
99                           
; Old workspace released, new workspace allocated and pointed to, so store the three lengths
        STMIA   r1, {r4,r5,r6}
        STR     r3, [r12, #Calibration_remaining]
        Pull    "r1-r7,pc"               

ColourTransMap_Code
        Push    "r1-r7,lr"
        BL      validate_workspace_ptr
        Pull    "r1-r7,pc",VS
        LDR     r4, [r12,#Calibration_pending]
        MOV     r1, r0
; For all the numbers specified in the command
01
; Check there is a number to read
        LDRB    r2, [r1]
        CMP     r2, #32   
        BLT     finished_star_command
        ADDEQ   r1, r1, #1
        BEQ     %BT01
; Check we have room to store the number given
        LDR     r5, [r12, #Calibration_remaining]
        CMP     r5, #12
        BLE     bad_calibration_in_star
; Read the next number
        MOV     r0, #10
        SWI     XOS_ReadUnsigned
; Store the number
        SUB     r5, r5, #4
        STR     r5, [r12, #Calibration_remaining]
        STR     r2, [r4, r5]
; Loop until no more numbers left to read
        B       %BT01
; Bad calibration
bad_calibration_in_star
        ADR     r0, ErrorBlock_BadCalibrationTable
        SETV
        Pull    "r1-r7,PC"
; Finished the calibration star command
finished_star_command                  
        LDR     r5, [r12, #Calibration_remaining]
        CMP     r5, #12
        BNE     more_to_come               
        LDR     r0, [r12, #Calibration_pending]
        BL      SetCalibration_Code
        Pull    "r1-r7,pc",VS
        LDR     r2, [r12, #Calibration_pending]
        MOV     r0, #ModHandReason_Free
        SWI     XOS_Module
        MOV     r1, #0
        STR     r1, [r12, #Calibration_pending]
        Pull    "r1-r7,pc"
more_to_come
        CLRV
        Pull    "r1-r7,pc"
;*********************************************************
WriteCalibrationToFile_Code
; r1 = file handle
; r0 bit 0 is clear if no saving to be done if default calibration
; r0 bit 0 set if saving is always to be done
        Push    "r1-r7,lr"
; Check for null calibration
        LDR     r4, [r12, #Calibration_ptr]
        CMP     r4, #0
        TSTEQ   r0, #1
        Pull    "r1-r7,pc",EQ
        CMP     r4, #0
        ADDR    r4, default_calibration, EQ
; Write out first string - *ColourTransMapSize
        MOV     r2, #19 ;       Length of *ColourTransMapSize
        ADR     r3, textual_store
        BL      output_string_r3
; Write out numbers
        LDRVC   r3, [r4]
        MOVVC   r5, r3
        BLVC    output_number_r3
        LDRVC   r3, [r4, #4]
        ADDVC   r5, r5, r3
        BLVC    output_number_r3
        LDRVC   r3, [r4, #8]
        ADDVC   r5, r5, r3
        BLVC    output_number_r3
        Pull    "r1-r7,pc",VS          
; R5 = length of the calibration table - 4
        MOV     r5, r5, LSL #2
        ADD     r5, r5, #8                                   
01                          
        MOV     r0, #10
        SWI     XOS_BPut 
; Write out the *ColourTransMap command
        MOVVC   r2, #15
        ADRVC   r3, textual_store
        BLVC    output_string_r3
        Pull    "r1-r7,pc",VS
        MOV     r6, r5     
      [ Version >= 059
        SUB     r5, r5, #4*16 ; 16 items, not 4 as it used to
      |
        SUB     r5, r5, #16
      ]
; Now put up to 16 numbers in the file from r5 downwards, but not below 12
02
        CMP     r6, #12
        BLT     finished_outputting_calibration
        CMP     r6, r5
        BLE     %BT01
; Write the number at r4+r6
        LDRVC   r3, [r4, r6]
        BLVC    output_number_r3
        Pull    "r1-r7,pc",VS
        SUB     r6, r6, #4
        B       %bt02
; Finished writing the calibration
finished_outputting_calibration
        MOV     r0, #10
        SWI     XOS_BPut
        Pull    "r1-r7,pc"
textual_store
        DCB     "*ColourTransMapSize",0
        ALIGN             
; Output a number
output_number_r3
        Push    "r1-r2,lr"   
; Output ' &'
        MOV     r0, #32
        SWI     XOS_BPut
        MOVVC   r0, #"&"
        SWIVC   XOS_BPut
        Pull    "r1-r2,pc",VS
; Convert the number to 8 digit hex
        MOV     r0, r3
        ADD     r1, r12, #text_buffer12
        MOV     r2, #12
        SWI     XOS_ConvertHex8
        Pull    "r1-r2,lr"
; Remve leading zeros
        ADD     r3, r12, #text_buffer12
        MOV     r2, #8 
01      
        LDRB    r0, [r3]
        CMP     r0, #48
        BNE     output_string_r3
        SUB     r2, r2, #1
        ADD     r3, r3, #1
; Don't remove last zero
        CMP     r2, #1
        BGT     %BT01
; Output a string
output_string_r3          
        Push    "lr" 
00
        LDRB    r0, [r3], #1
        SWI     XOS_BPut
        Pull    "pc",VS
        SUBS    r2, r2, #1
        Pull    "pc",EQ,^
        B       %BT00

  ] ; Version >= 058
  [ Version >= 054
        MakeErrorBlock BadCalibrationTable
;*********************************************************
; Version 054 SWI's - calibration
SetCalibration_Code
        Push    "r0-r7,lr"
; Invalidate the cache
        BL      InvalidateCache_Code
        MOV     r1, r0
; Free old calibration pointer
        LDR     r2, [r12, #Calibration_ptr]
        CMP     r2, #0
        MOVNE   r0, #ModHandReason_Free
        SWINE   XOS_Module
        ADDVS   sp, sp, #4
        Pull    "r1-r7,pc",VS
        MOV     r2, #0
        STR     r2, [r12, #Calibration_ptr]
;  Create new heap space for new calibration table
        LDMIA   r1!, {r4,r5,r6}
        CMP     r4, #2
        BLT     bad_calibration
        CMP     r5, #2
        BLT     bad_calibration
        CMP     r6, #2
        BLT     bad_calibration
        MOV     r3, #12
        ADD     r3, r3, r4, LSL #2
        ADD     r3, r3, r5, LSL #2
        ADD     r3, r3, r6, LSL #2
        MOV     r0, #ModHandReason_Claim
        SWI     XOS_Module
        ADDVS   sp, sp, #4
        Pull    "r1-r7,pc",VS
; Copy data          
        STR     r2, [r12, #Calibration_ptr]
        STMIA   r2!, {r4,r5,r6}
; Transfer first device colour data
        LDR     r3, [r1]
        TST     r3, #&ff
        BNE     bad_calibration
        AND     r0, r3, #&ff
01
        LDR     r3, [r1], #4
        STR     r3, [r2], #4
        AND     r7, r3, #&ff
        CMP     r7, r0
        BLT     bad_calibration
        MOV     r0, r7
        SUBS    r4, r4, #1
        BNE     %BT01
        CMP     r0, #&ff
        BNE     bad_calibration
; Transfer second device colour data
        LDR     r3, [r1]
        TST     r3, #&ff
        BNE     bad_calibration
        AND     r0, r3, #&ff
01
        LDR     r3, [r1], #4
        STR     r3, [r2], #4
        AND     r7, r3, #&ff
        CMP     r7, r0
        BLT     bad_calibration
        MOV     r0, r7
        SUBS    r5, r5, #1
        BNE     %BT01
        CMP     r0, #&ff
        BNE     bad_calibration
; Transfer third device colour data
        LDR     r3, [r1]
        TST     r3, #&ff
        BNE     bad_calibration
        AND     r0, r3, #&ff
01
        LDR     r3, [r1], #4
        STR     r3, [r2], #4
        AND     r7, r3, #&ff
        CMP     r7, r0
        BLT     bad_calibration
        MOV     r0, r7
        SUBS    r6, r6, #1
        BNE     %BT01
        CMP     r0, #&ff
        BNE     bad_calibration
; Issue a service call
      [ Version >= 058
        MOV     R1, #Service_CalibrationChanged
        SWI     XOS_ServiceCall
        ADDVS   sp,sp,#4
        Pull    "r1-r7,PC",VS
      ]
; Everything transfered okay, so exit
        CLRV
        Pull    "r0-r7,PC"

bad_calibration
; Free old calibration pointer
        LDR     r2, [r12, #Calibration_ptr]
        CMP     r2, #0
        MOVNE   r0, #ModHandReason_Free
        SWINE   XOS_Module
        MOV     r2, #0
        STR     r2, [r12, #Calibration_ptr]
; Issue a service call
      [ Version >= 058
        MOV     R1, #Service_CalibrationChanged
        SWI     XOS_ServiceCall
      ]
; Return the error
        ADR     r0, ErrorBlock_BadCalibrationTable
        SETV
        ADD     sp, sp, #4
        Pull    "r1-r7,PC"
                            
ReadCalibration_Code
        Push    "r0, r2, r3, r4, r5, lr"
        Push    "r1"
        LDR     r2, [r12, #Calibration_ptr]
        CMP     r2, #0
        ADDR    r2, default_calibration, EQ
        CMP     r0, #0
        LDMIA   r2!, {r3,r4,r5}
        MOV     r1, #12
        ADD     r1, r1, r3, LSL #2 
        ADD     r1, r1, r4, LSL #2 
        ADD     r1, r1, r5, LSL #2 
        Pull    "r0", EQ
        BEQ     read_calibration
        STMIA   r0!, {r3,r4,r5}
        SUB     r1, r1, #12
01
        LDR     r3, [r2], #4
        STR     r3, [r0], #4
        SUBS    r1, r1, #4
        BNE     %BT01
        Pull    "r1"
read_calibration    
        CLRV
        Pull    "r0, r2, r3, r4, r5, PC"

ConvertDevicePalette_Code
        Push    "r0,r1,r2,r3,r4,lr"
        CMP     r3, #0
        LDREQ   r3, [r12, #Calibration_ptr]
        CMP     r3, #0
        ADREQL  r3, default_calibration
01
        LDR     r4, [r1], #4
        BL      convert_device_colour
        STR     r4, [r2], #4
        SUBS    r0, r0, #1
        BNE     %BT01
        Pull    "r0,r1,r2,r3,r4,PC"

ConvertDeviceColour_Code
        Push    "r0,r1,r3,r4,lr"
        CMP     r3, #0
        LDREQ   r3, [r12, #Calibration_ptr]
        CMP     r3, #0
        ADREQL  r3, default_calibration
        MOV     r4, r1
        BL      convert_device_colour
        MOV     r2, r4
        Pull    "r0,r1,r3,r4,PC"
      [ Version < 059
ConvertRGBToCIE_Code
ConvertCIEToRGB_Code
        Push    "lr"
        Pull    "PC"
      ]
  ] ; Version >= 054
;*********************************************************
  [ Version >= 059
RGB_to_CIE_matrix
        DCD     &7A6BC1
        DCD     &4C6E67
        DCD     &2CB2AD
        DCD     &4372CE
        DCD     &A7B146
        DCD     &14DBEA
        DCD     &51181
        DCD     &29114A
        DCD     &E86DEE
CIE_to_RGB_matrix
        DCD     &2BD4872
        DCD     &FEDAF45B
        DCD     &FF936FD5
        DCD     &FEE18A27
        DCD     &2074BD3
        DCD     &87C2A
        DCD     &235269
        DCD     &FFAAA2FC
        DCD     &11AD439
        MakeErrorBlock OverflowInConversion
ConvertRGBToCIE_Code
        Push    "r3-r6,r9-r11,lr"
        ADDR     r3, RGB_to_CIE_matrix
        B       multiply_by_matrix
ConvertCIEToRGB_Code
        Push    "r3-r6,r9-r11,lr"
        ADDR     r3, CIE_to_RGB_matrix
multiply_by_matrix
; First split R0, R1, R2
        MOV     r4, r0, ASR #16
        BIC     r0, r0, r4, LSL#16
        MOV     r5, r1, ASR #16
        BIC     r1, r1, r5, LSL#16
        MOV     r6, r2, ASR #16
        BIC     r2, r2, r6, LSL#16
        CLRV   
; First row
        BL      multiply_by_row
        MOVVC   r10, r9
; Second row
        BLVC    multiply_by_row
        MOVVC   r11, r9
; Second row
        BLVC    multiply_by_row
        Pull    "r3-r6,r9-r11,pc",VS
; Return results
        MOV     r2, r9
        MOV     r0, r10
        MOV     r1, r11
        Pull    "r3-r6,r9-r11,pc"

; Multiply r0.r4 - r2.r6 by matrix row in r3. Returns r9 or error.
multiply_by_row              
        Push    "r7-r8,r10-r11,lr"
; First entry for the row
        LDR     r7, [r3], #4
        MOV     r8, r7, ASR #16
        BIC     r7, r7, r8, LSL #16
        MUL     r9, r0, r7
        MUL     r10, r4, r8
        MUL     r11, r0, r8
        ADDS    r9, r9, r11, LSL#16
        ADC     r10, r10, r11, ASR#16
        MUL     r11, r4, r7
        ADDS    r9, r9, r11, LSL#16
        ADC     r10, r10, r11, ASR#16
; Second entry for the row
        LDR     r7, [r3], #4
        MOV     r8, r7, ASR #16
        BIC     r7, r7, r8, LSL #16
        MUL     r11, r1, r7
        ADDS    r9, r9, r11
        MUL     r11, r5, r8
        ADC     r10, r10, r11
        MUL     r11, r1, r8
        ADDS    r9, r9, r11, LSL#16
        ADC     r10, r10, r11, ASR#16
        MUL     r11, r5, r7
        ADDS    r9, r9, r11, LSL#16
        ADC     r10, r10, r11, ASR#16
; Third entry for the row
        LDR     r7, [r3], #4
        MOV     r8, r7, ASR #16
        BIC     r7, r7, r8, LSL #16
        MUL     r11, r2, r7
        ADDS    r9, r9, r11
        MUL     r11, r6, r8
        ADC     r10, r10, r11
        MUL     r11, r2, r8
        ADDS    r9, r9, r11, LSL#16
        ADC     r10, r10, r11, ASR#16
        MUL     r11, r6, r7
        ADDS    r9, r9, r11, LSL#16
        ADC     r10, r10, r11, ASR#16
; Check for overflow
        MOV     r9, r9, LSR #24
        ORR     r9, r9, r10, LSL #8
        MOVS    r10, r10, ASR #23 ; Top 9 bits of r10 must be the same for no overflow
        CMPNE   r10, #-1
        Pull    "r7-r8,r10-r11,pc",EQ,^
; Overflow, so return an error
        ADDR    r0, ErrorBlock_OverflowInConversion
        SETV
        Pull    "r7-r8,r10-r11,pc"
 ] ; Version >= 059
;*********************************************************
ReturnOppGCOL_Code
        Push    "r1,r2, lr"
        MOV     r1, #-1
        MOV     r2, #-1
        BL      ReturnOppGCOLForMode_Code
        B       commoncolourexit

ReturnOppColourNumber_Code
        Push    "r1,r2, lr"
        MOV     r1, #-1
        MOV     r2, #-1
        BL      ReturnOppColourNumberForMode_Code
        B       commoncolourexit

ReturnOppGCOLForMode_Code
        Push    "r1,r2, lr"
        BL      ReturnOppColourNumberForMode_Code
        B       commoncolourexit_togcol

ReturnOppColourNumberForMode_Code
        Push    "r1-r9, lr"
        MOV     r8, sp
        MOV     r9, r0
        MOV     r0, r1
        MOV     r1, r2
        MOV     r2, r9
        BL      build_colours
        SUBVS   r8, r8, #4
        BVS     colourcommon_naffup
        Push    "r8"
        BL      worst_colour
        MOV     r0, r2
        LDR     sp, [sp]
        Pull   "r1-r9, pc",,^

;*********************************************************
; for all of these:
; r0 is colour
; r1 is dest mode
; r2 is dest palette

; for setgcol:
;  r3 is 0 or 128 for fore/background
;  r4 is GCOL action

SetOppGCOL_Code
        Push    "r12, lr"
        BL      ReturnOppGCOL_Code
        B       SetGCOL_Code_AltEntry

SetGCOL_Code
        Push    "r12, lr"
     [ Version >= 058
        TST     r3, #&100
        BEQ     %ft99
     ]
     [ Version >= 053
        BL      SetECF_Code
      [ Version >= 058
        LDR     r2, [r12, #CachedL2BPP]
      ]
        Pull    "r12, pc"
     |
        BL      ReturnGCOL_Code         ; drop through
     ]
      [ Version >= 058
99
        BL      ReturnGCOL_Code         ; drop through
      ]

SetGCOL_Code_AltEntry
        Pull    "r12"
        AND     r3, r3, #&80                            ;; ### corrupts R3!
        Push    "r0, r1"
        BVS     sgc_exit
        LDRB    r2, [r12, #CachedL2BPP]         ;; ### corrupts R2!
        CMP     r2, #3
        MOV     r10, r0, LSL #6
        MOVEQ   r11, r0, LSR #2
        MOVNE   r11, r0
        SWI     XOS_WriteI+18
        MOV     r0, r4
        SWIVC   XOS_WriteC
        ORRVC   r0, r3, r11
        SWIVC   XOS_WriteC
        BNE     sgc_errcheck
        ADRVC   r0, stringybits
        SWIVC   XOS_Write0
        MOVVC   r0, #2
        ADDVC   r0, r0, r3, LSR #7
        SWIVC   XOS_WriteC
        MOVVC   r0, r10
        SWIVC   XOS_WriteC
        ADRVC   r0, morestringybits
        MOV     r1, #6
        SWIVC   XOS_WriteN
sgc_errcheck
        STRVS   r0, [sp]
sgc_exit
        Pull    "r0, r1, lr"
        ORRVS   lr, lr, #V_bit
        MOVS    pc, lr

stringybits
        DCB     23,17
morestringybits
        DCB     0,0,0,0,0,0

ReturnGCOL_Code
        Push    "r1, r2, lr"
        BL      TryCache
        LDREQB  r0, [r1, #CachedGCOL]
        Pull    "r1, r2, pc", EQ
        Push    "r0, r12"

        MOV     r1, #-1
        MOV     r2, #-1
        BL      ReturnGCOLForMode_Code
        Pull    "r2, r12"
        BVS     commoncolourexit
        LDRB    r1, [r12, #CachedL2BPP]
        CMP     r1, #3
        MOV     r1, r0
        BLEQ    GCOLToColourNumber_Code
        BL      WriteCacheEntry
        MOV     r0, r1
      [ Debug
        DREG    r0, "Picked closest GCOL "
      ]
        B       commoncolourexit

ReturnColourNumber_Code
        Push    "r1,r2,lr"
        BL      TryCache
        LDREQB  r0, [r1, #CachedColour]
        Pull    "r1, r2, pc", EQ

        Push    "r0, r12"
        MOV     r1, #-1
        MOV     r2, #-1
        BL      ReturnColourNumberForMode_Code
        Pull    "r2, r12"
        BVS     commoncolourexit
        LDRB    r1, [r12, #CachedL2BPP]
        CMP     r1, #3
        MOV     r1, r0
        BLEQ    ColourNumberToGCOL_Code
        MOV     r10, r0
        MOV     r0, r1
        MOV     r1, r10
        BL      WriteCacheEntry
        B       commoncolourexit

ReturnGCOLForMode_Code
        Push    "r1,r2,lr"
        BL      ReturnColourNumberForMode_Code

commoncolourexit_togcol
        BLVC    ColourNumberToGCOL_Code_testing
commoncolourexit
        Pull    "r1,r2,lr"
        ORRVS   lr, lr, #V_bit
        MOVS    pc, lr

ReturnColourNumberForMode_Code
        Push    "r1-r9, lr"
        MOV     r8, sp
        MOV     r9, r0
        MOV     r0, r1
        MOV     r1, r2
        MOV     r2, r9
        BL      build_colours
        SUBVS   r8, r8, #4
        BVS     colourcommon_naffup
        Push    "r8"
        BL      best_colour
        MOV     r0, r2
        LDR     sp, [sp]
        Pull    "r1-r9, pc",,^

;**********************************************************
;   source mode r0
;   source pal  r1
;   dest mode   r2
;   dest pal    r3
;   buffer      r4

SelectTable_Code
        Push    "r0-r9, lr"
        MOV     r8, sp
        TEQP    pc, #SVC_mode

      [ Debug
        DREG    r0, "SelectTable_Code ",cc
        DREG    r1, ", ",cc
        DREG    r2, ", ",cc
        DREG    r3, ", "
      ]

        BL      build_colours           ; source colours -> r0,r1 table pointers
        MOV     r5, r1
        MOV     r6, r0

        MOVVC   r0, r2
        MOV     r1, r3
        MOV     r3, r6
        BLVC    build_colours           ; dest colours

colourcommon_naffup
        MOVVS   sp, r8
        STRVS   r0, [sp]
        Pull    "r0-r9, lr", VS
        ORRVSS  pc, lr, #V_bit

        Push    "r8"
table_build_loop
        LDR     r2, [r3], #4

      [ Debug
        DREG r2, "colour ",cc
      ]
        BL      best_colour
      [ Debug
        DREG    r2, " -> ",,Byte
      ]
        STRB    r2, [r4], #1
        CMP     r3, r5
        BLT     table_build_loop

        LDR     sp, [sp]                ; discard stack buffers
        Pull    "r0-r9, pc",,^          ; Exit VClear

SelectGCOLTable_Code
        Push    "r0-r2, lr"
        BL      SelectTable_Code

sgtc_boom
        STRVS   r0, [sp]
        Pull    "r0-r2, lr",VS
        ORRVS   lr, lr, #V_bit

        MOV     r0, r2               ; dest mode
        MOV     r1, #VduExt_Log2BPP
        SWI     XOS_ReadModeVariable ; can't be bad mode: prev succeeded
        CMP     r2, #3               ; 256 colours?
        Pull    "r0-r2, pc", NE, ^
        MOV     r1, #255
translate_table
        LDRB    r0, [r4, r1]
        BL      ColourNumberToGCOL_Code
        STRB    r0, [r4, r1]
        SUBS    r1, r1, #1
        BPL     translate_table
        Pull    "r0-r2, pc",, ^

GCOLToColourNumber_Code
        ADR     r10, gtocntable
        B       transskip

ColourNumberToGCOL_Code_testing
        Push    "r0-r2, lr"
        MOV     r0, r1
        MOV     r1, #VduExt_Log2BPP
        SWI     XOS_ReadModeVariable
        CMP     r2, #3
        Pull    "r0-r2, lr"
        MOVNES  pc, lr

ColourNumberToGCOL_Code
        ADR     r10, cntogtable
transskip
        LDRB    r11, [r10, r0, LSR #2]
        AND     r0, r0, #3
        ORR     r0, r0, r11, LSL #2
        MOVS    pc, lr

cntogtable
        DCB     &0,&1,&10,&11,&2,&3,&12,&13,&4,&5,&14,&15
        DCB     &6,&7,&16,&17,&8,&9,&18,&19,&A,&B,&1A
        DCB     &1B,&C,&D,&1C,&1D,&E,&F,&1E,&1F,&20,&21
        DCB     &30,&31,&22,&23,&32,&33,&24,&25,&34,&35
        DCB     &26,&27,&36,&37,&28,&29,&38,&39,&2A,&2B
        DCB     &3A,&3B,&2C,&2D,&3C,&3D,&2E,&2F,&3E,&3F

gtocntable
        DCB     &0,&1,&4,&5,&8,&9,&C,&D,&10,&11,&14,&15
        DCB     &18,&19,&1C,&1D,&2,&3,&6,&7,&A,&B,&E,&F
        DCB     &12,&13,&16,&17,&1A,&1B,&1E,&1F,&20,&21
        DCB     &24,&25,&28,&29,&2C,&2D,&30,&31,&34,&35
        DCB     &38,&39,&3C,&3D,&22,&23,&26,&27,&2A,&2B
        DCB     &2E,&2F,&32,&33,&36,&37,&3A,&3B,&3E,&3F

;***********************************************************
; best_colour:
;  r0 -> colour table
;  r1 -> table end
;  r2 source colour
; return r2 colour index
; corrupts r6-r12

best_colour
        Push    "r5, lr"
        MOV     r12, #&FF
        MOV     r11, #&FFFFFFFF        ; best distance so far
        AND     r10, r12, r2, LSR #24   ; source blue
        AND     r9, r12, r2, LSR #16   ; source green
        AND     r8, r12, r2, LSR #8   ; source red
        MOV     r7, r0

best_colour_loop
        LDR     r6, [r7], #4           ; trial dest
        AND     lr, r12, r6, LSR #16
        SUBS    lr, lr, r9             ; green dist
        RSBMI   lr, lr, #0             ; ensure +ve for faster mul
        ADD     r5, lr, lr, LSL #1     ; *3 (greenweight)
        MUL     r5, lr, r5

        AND     lr, r12, r6, LSR #24
        SUBS    lr, lr, r10            ; blue dist
        RSBMI   lr, lr, #0
        MLA     r5, lr, lr, r5

        AND     r6, r12, r6, LSR #8
        SUBS    r6, r6, r8             ; red dist
        RSBMI   r6, r6, #0
        MOV     lr, r6, LSL #1         ; *2 (redweight)
        MLA     r5, lr, r6, r5

        CMP     r5, r11
        MOVLO   r11, r5
        SUBLO   r2, r7, #4
        CMP     r7, r1
        BLT     best_colour_loop

        SUB     r2, r2, r0
        MOV     r2, r2, LSR #2         ; convert to colour number
        Pull    "r5, pc"

;***********************************************************
; worst_colour:
;  r0 -> colour table
;  r1 -> table end
;  r2 source colour
; return r2 colour index
; corrupts r6-r12

worst_colour
        Push    "r5, lr"
        MOV     r12, #&FF
        MOV     r11, #0                ; worst distance so far
        AND     r10, r12, r2, LSR #24   ; source blue
        AND     r9, r12, r2, LSR #16   ; source green
        AND     r8, r12, r2, LSR #8   ; source red
        MOV     r7, r0

worst_colour_loop
        LDR     r6, [r7], #4           ; trial dest
        AND     lr, r12, r6, LSR #16
        SUB     lr, lr, r9             ; green dist
        ADD     r5, lr, lr, LSL #1     ; *3 (greenweight)
        MUL     r5, lr, r5

        AND     lr, r12, r6, LSR #24
        SUB     lr, lr, r10            ; blue dist
        MLA     r5, lr, lr, r5

        AND     r6, r12, r6, LSR #8
        SUB     r6, r6, r8             ; red dist
        MOV     lr, r6, LSL #1         ; *2 (redweight)
        MLA     r5, lr, r6, r5

        CMP     r5, r11
        MOVHS   r11, r5                ; ensure a colour gets selected!
        SUBHS   r2, r7, #4
        CMP     r7, r1
        BLT     worst_colour_loop

        SUB     r2, r2, r0
        MOV     r2, r2, LSR #2         ; convert to colour number
        Pull    "r5, pc"

;*************************************************
; build_colours: r0 mode number
;                r1 palette
;    return      r0, r1 as limits of a buffer on the stack
;    corrupts    r10-r12

build_colours
        MOV     r10, lr
        MOV     r11, r1
      [ Version >= 054
        Push    "r2"
      |
        MOV     r12, r2
      ]
        MOV     r1, #VduExt_Log2BPP
        SWI     XOS_ReadModeVariable
      [ Version >= 054
        Pull    "lr"
      ]
        ADRCSL  r0, ErrorBlock_BadMODE
        SETV    CS
        MOVVS   pc, r10            ; error: sod register restoration

        CMP     r11, #0            ; default palette?
        ADREQ   r11, defpals
        LDREQ   r1, [r11, r2, LSL #2]
        ADDEQ   r11, r11, r1

        MOV     r1, #1
        MOV     r1, r1, LSL r2
        MOV     r0, #4
        MOV     r2, r0, LSL r1    ; number of words in table
    [ Version >= 056
        MOVEQ   r1, #0            ; r1=0 => default palette for mode, r1<>0 => non-default palette
        BEQ     bc_skip
    ]
        CMP     r11, #-1
        CMPNE   r2, #256*4
        BEQ     bc_skip

        MOV     r0, r11
        ADD     r1, r11, r2
      [ Version >= 054
        MOV     r2, lr
      |
        MOV     r2, r12
      ]
        MOV     pc, r10           ; all done

bc_skip
        MOV     r0, #32*1024
        SUB     r0, r0, #1
        AND     r0, r0, sp
        SUB     r0, r0, #256
        CMP     r0, r2
        BLT     bc_stackoverflow

      [ Version >= 056
        TEQ     r1, #0
      ]
        MOV     r1, sp
        SUB     sp, sp, r2
        MOV     r0, sp           ; stack buffer set up
      [ Version >= 056
        BEQ     calibrate_default_palette
      ]
        CMP     r11, #-1         ; current palette?
        BEQ     just_read_into_stack

; r11 is palette pointer in 256 colours: fill buffer
      [ Version >= 054
        MOV     r2, lr
      |
        MOV     r2, r12
      ]
      [ Version >= 056
        Push    "r2,r3,r8,r10"
        ADR     r3, hardmode_hardbits
      |
        Push    "r2, r8, r10"
        ADR     r12, hardmode_hardbits
      ]
        LDR     r10, =&70307000   ;  mask for palette
        MOV     lr, #255
fill256table
        AND     r2, lr, #15
        LDR     r2, [r11, r2, LSL #2]
        AND     r2, r2, r10
        MOV     r8, lr, LSR #4
      [ Version >= 056
        LDR     r8, [r3, r8, LSL #2]
      |
        LDR     r8, [r12, r8, LSL #2]
      ]
        ORR     r8, r8, r2
        STR     r8, [r0, lr, LSL #2]
        SUBS    lr, lr, #1
        BPL     fill256table
      [ Version >=056
        Pull    "r2,r3,r8,pc"
      |
        Pull    "r2, r8, pc"
      ]
                         
      [ Version >= 056
calibrate_default_palette
        Swap    r2, lr
        Push    "r2, r3, r4, r5, r6, r8, r10"
        LDR     r3, [r12, #Calibration_ptr]
        CMP     r3, #0
        ADDR    r3, default_calibration, EQ
        MOV     r5, lr, LSR #2
        SUB     r6, r5, #1
        ADR     r2, hardmode_hardbits
        LDR     r10, =&70307000   ;  mask for palette
01
        CMP     r5, #256
        AND     r4, r6, #15
        LDR     r4, [r11, r4, LSL #2]
        ANDEQ   r4, r4, r10
        MOVEQ   r8, r6, LSR #4
        LDREQ   r8, [r2, r8, LSL #2]
        ORREQ   r4, r8, r4
        BL      convert_device_colour
        STR     r4, [r0, r6, LSL #2]
        SUBS    r6, r6, #1
        BPL     %BT01
        Pull    "r2, r3, r4, r5, r6, r8, pc"
      ]

just_read_into_stack
        Push    "r3, r10"         ; r3, lr
      [ Version >= 054
        Push    "lr"              ; r2 really
      |
        Push    "r12"             ; r2 really
      ]

        Push    "r0, r1"
        MOV     r10, r0
        MOV     r0, r2, LSR #2
        MOV     r1, #16
jris_loop
        SUBS    r0, r0, #1
        Pull    "r0-r3, pc", MI
        BL      my_read_palette
        STRVC   r2, [r10, r0, LSL #2]
        BVC     jris_loop
        STR     r0, [sp]
      [ Version >= 056
        Pull    "r0-r3, pc",
      |
        Pull    "r0-r3, pc",,^
      ]

bc_stackoverflow
        ADR     r0, ErrorBlock_CDATStackOverflow
        ORRS    pc, r10, #V_bit
        MakeErrorBlock CDATStackOverflow

defpals
        DCD     modetwo         - defpals
        DCD     modefour        - defpals
        DCD     modesixteen     - defpals
        DCD     modetwofivesix  - defpals

modetwo
        DCD     &0              ;  black
        DCD     &FFFFFF00       ;  white

modefour
        DCD     &0              ;  black
        DCD     &FF00           ;  red
        DCD     &FFFF00         ;  yellow
        DCD     &FFFFFF00       ;  white

modesixteen             ;  actual colours
        DCD     &0              ;  black
        DCD     &FF00           ;  red
        DCD     &FF0000         ;  green
        DCD     &FFFF00         ;  yellow
        DCD     &FF000000       ;  blue
        DCD     &FF00FF00       ;  magenta
        DCD     &FFFF0000       ;  cyan
        DCD     &FFFFFF00       ;  white
        DCD     &0              ;  black - flashing
        DCD     &FF00           ;  red
        DCD     &FF0000         ;  green
        DCD     &FFFF00         ;  yellow
        DCD     &FF000000       ;  blue
        DCD     &FF00FF00       ;  magenta
        DCD     &FFFF0000       ;  cyan
        DCD     &FFFFFF00       ;  white

modetwofivesix
        DCD     &0              ;  0000
        DCD     &10101000       ;  0001
        DCD     &20202000       ;  0010
        DCD     &30303000       ;  0011
        DCD     &00004000       ;  0100
        DCD     &10105000       ;  0101
        DCD     &20206000       ;  0110
        DCD     &30307000       ;  0111
        DCD     &40000000       ;  1000
        DCD     &50101000       ;  1001
        DCD     &60202000       ;  1010
        DCD     &70303000       ;  1011
        DCD     &40004000       ;  1100
        DCD     &50105000       ;  1101
        DCD     &60206000       ;  1110
        DCD     &70307000       ;  1111

hardmode_hardbits       ;  translation of top nibble of byte to RGB bits
        DCD     &0              ;  0000
        DCD     &00008000       ;  0001
        DCD     &00400000       ;  0010
        DCD     &00408000       ;  0011
        DCD     &00800000       ;  0100
        DCD     &00808000       ;  0101
        DCD     &00C00000       ;  0110
        DCD     &00C08000       ;  0111
        DCD     &80000000       ;  1000
        DCD     &80008000       ;  1001
        DCD     &80400000       ;  1010
        DCD     &80408000       ;  1011
        DCD     &80800000       ;  1100
        DCD     &80808000       ;  1101
        DCD     &80C00000       ;  1110
        DCD     &80C08000       ;  1111

;**********************************
; Cache Handling
;**********************************

; Look for cache hit: r0 is colour. r12-> private word
; Return r1->cache entry if hit, also EQ
;    NE for cache miss

TryCache
        MOV     r1, r0, LSR #9         ; 7 bits red
        EOR     r1, r1, r0, LSR #19    ; 5 bits green, bottom 2 bits blue
        EOR     r1, r1, r0, LSR #29    ; 3 bits blue
        AND     r1, r1, #CacheEntries-1

      [ Version >= 053
        MOV     r1, r1, LSL #4
        ASSERT  CacheEntrySize = 16
      |
        MOV     r1, r1, LSL #3
        ASSERT  CacheEntrySize = 8
      ]

        LDR     r10, [r1, r12]!
        CMP     r10, r0, LSR #8

        MOV     pc, lr

; r2 is RGB colour, r0 is colour number, r1 is GCOL

WriteCacheEntry
      [ Debug
        DREG    r2, "Write cache entry for ",cc
        DREG    r0, " with ",cc
        DREG    r1, " and "
      ]
        STRB    pc, [r12, #CacheEmpty]   ; non-zero

        MOV     r10, r2, LSR #9          ; 7 bits red
        EOR     r10, r10, r2, LSR #19    ; 5 bits green, bottom 2 bits blue
        EOR     r10, r10, r2, LSR #29    ; 3 bits blue
        AND     r10, r10, #CacheEntries-1

      [ Version >= 053
        MOV     r10, r10, LSL #4
        ASSERT  CacheEntrySize = 16
      |
        MOV     r10, r10, LSL #3
        ASSERT  CacheEntrySize = 8
      ]
        MOV     r11, r2, LSR #8
        STR     r11, [r10, r12]!
        STRB    r1, [r10, #CachedGCOL]
        STRB    r0, [r10, #CachedColour]
      [ Version >= 053
        MOV     r11, #-1
        STR     r11, [r10, #CachedRatios]
      ]

        MOV     pc, lr

;************************************************************************
; ColourTrans_SetFontColours
;
;Input:  r0 = font handle
;        r1 = background palette entry
;        r2 = foreground palette entry
;        r3 = max offset
;
;Output: r0-r3 as passed to Font_SetFontColour
;************************************************************************

SetFontColours_Code  ROUT
        Push    "r0, lr"
        BL      ReturnFontColours_Code
        SWIVC   XFont_SetFontColours
        STRVS   r0, [sp]
        Pull    "r0, lr"
        ORRVS   lr, lr, #V_bit
        MOVS    pc, lr

;************************************************************************
; ColourTrans_ReturnFontColours
;
;Input:  r0 = font handle
;        r1 = background palette entry
;        r2 = foreground palette entry
;        r3 = max offset
;
;Output: r0-r3 as passable to Font_SetFontColour
;************************************************************************

ReturnFontColours_Code  ROUT

        LDRB    r11, [r12, #CachedL2BPP]
      [ Debug
        DREG    r11, "====> ColourTrans cached log2bpp = "
      ]
        CMP     r11, #3
        BNE     sfc_fullchoice
        Push    "r4-r5, lr"
        LDR     lr, =&F0F0F00           ; avoid fontmanager overflows (bugs)
        BIC     r4, r1, lr              ; background
        BIC     r5, r2, lr              ; foreground
        LDRB    r2, [r12, #PseudoPaletteEntry]
        AND     r2, r2, #15
        ADD     lr, r2, #1              ; cycle through the entries
        STRB    lr, [r12, #PseudoPaletteEntry]

        Push    "r0-r3"

 ; now dick about because SetPalette also sets the current colour

        SWI     XFont_CurrentFont
        Push    "r1-r3"
        ADD     lr, sp, #4*4
        LDMIA   lr, {r1-r3}
        SWI     XFont_SetPalette        ; r1, r0 unused
        Pull    "r1-r3"
        MOVVC   r0, #0
        SWIVC   XFont_SetFontColours
        STRVS   r0, [sp]
        Pull    "r0-r5, lr"
      [ Debug
        DREG    r0,"256:Fonthan ",cc
        DREG    r1, " background ",cc
        DREG    r2, " foreground ",cc
        DREG    r3, " offset "
      ]
        ORRVS   lr, lr, #V_bit
        MOVS    pc, lr

sfc_fullchoice
        Push    "r0-r2, r4-r9, lr"

        MOV     r8, sp

        MOV     r0, r1
        BL      TryCache
        LDREQB  r1, [r1, #CachedColour]
        BEQ     sfc_gotR1
        BL      sfc_getpalette
        Push    "r2, r8, r12"
        MOV     r2, r0                  ; colour
        ADD     r0, sp, #12             ; start of buffer
        MOV     r1, r8
        BL      best_colour             ; r2 -> colour number
        MOV     r0, r2                  ; colour number
        MOV     r1, r2                  ; GCOL
        Pull    "r4, r8, r12"
        LDR     r2, [r8, #4]            ; RGB
        BL      WriteCacheEntry
        MOV     r2, r4

sfc_gotR1
        STR     r1, [r8, #4]            ; ready for SetFontColours
        MOV     r0, r2
        BL      TryCache
        LDREQB  r1, [r1, #CachedColour]
        BEQ     sfc_gotR2
        BL      sfc_getpalette
        Push    "r8, r12"
        MOV     r2, r0                  ; colour
        ADD     r0, sp, #8              ; start of buffer
        MOV     r1, r8
        BL      best_colour             ; r2 -> colour number
        MOV     r0, r2                  ; colour number
        MOV     r1, r2                  ; GCOL
        Pull    "r8, r12"
        LDR     r2, [r8, #8]            ; RGB
        BL      WriteCacheEntry
sfc_gotR2
        STR     r1, [r8, #8]            ; ready for SetFontColours

        CMP     r3, #0
        BEQ     sfc_skipcalc
        CMP     r3, #14
        BGT     sfc_skipcalc            ; too bleedin big anyway

  ; calculate distance between the endpoints

        BL      sfc_getpalette
        LDR     r1, [r8, #8]
        SUB     lr, r8, sp
        RSB     lr, r1, lr, LSR #2      ; max positive offset
        CMP     lr, r1                  ; r1 is max negative offset
        MOVLT   lr, r1                  ; lr = max abs offset
        CMP     lr, r3
        MOVLT   r3, lr                  ; trim r3

        LDR     r11, [sp, r1, LSL #2]   ; fore RGB in use
        LDR     r10, [r8, #4]
        LDR     r10, [sp, r10, LSL #2]  ; back RGB in use

        MOV     r12, #&FF
        AND     lr, r12, r10, LSR #16
        AND     r9, r12, r11, LSR #16
        SUBS    lr, lr, r9              ; green dist
        MOV     r2, lr, LSL #8
        RSBMI   lr, lr, #0              ; ensure +ve for faster mul
        ADD     r9, lr, lr, LSL #1      ; *3 (greenweight)
        MUL     r9, lr, r9

        AND     lr, r12, r10, LSR #24
        SUBS    lr, lr, r11, LSR #24    ; blue dist
        MOV     r4, lr, LSL #8
        RSBMI   lr, lr, #0
        MLA     r9, lr, lr, r9

        AND     lr, r12, r10, LSR #8
        AND     r0, r12, r11, LSR #8
        SUBS    lr, lr, r0              ; red dist
        MOV     r0, lr, LSL #8
        RSBMI   lr, lr, #0
        MOV     r10, lr, LSL #1         ; *2 (redweight)
        MLA     r9, lr, r10, r9

        ; r0 is scaled red separation
        ; r1 is foreground colour number
        ; r2 is scaled green separation
        ; r3 is current offset
        ; r4 is scaled blue separation
        ; r8 is palette buffer end
        ; r9 is distance between fore/back colours
        ; r11 is foreground rgb
        ; leaves lr, r12, r10, r7, r6, r5

        Push    "r0, r2, r4"            ; rgb separations

sfc_trythisoffset
        ADD     lr, r3, #1
        MUL     r7, lr, lr
        MOV     r7, r7, LSL #2          ; 4(nc+1)^2
        MOV     r5, r9
        DivRem  r10,r5,r7,r6

        MOVS    r5, r2
        RSBMI   r5, r5, #0
        DivRem  r12, r5, lr, r7         ; green step for this offset
        CMP     r2, #0
        RSBLT   r12, r12, #0            ; fudge sign
        MOVS    r5, r0
        RSBMI   r5, r5, #0
        DivRem  r2, r5, lr, r7          ; red step for this offset
        CMP     r0, #0
        RSBLT   r2, r2, #0
        MOVS    r5, r4
        RSBMI   r5, r5, #0
        DivRem  r0, r5, lr, r7          ; blue step for this offset
        CMP     r4, #0
        RSBLT   r0, r0, #0
        MOV     r0, r0, LSL #16
        MOV     r0, r0, LSR #16         ; convert to halfword
        ORR     r0, r0, r12, LSL #16    ; free up r12
        MOV     r2, r2, LSL #16

        MUL     r5, r2, lr              ; red increment for final colour
        AND     r12, r11, #&FF00        ; red fore
        ADD     r5, r12, r5, LSR #16
        MUL     r12, r0, lr             ; current b,g stepped values
        MOV     r4, r3                  ; trial offset

forward_testing                         ; try fcol+offset for fit
        BL      stepR5                  ; r5 := required colour
        ADD     r7, r1, r4              ; colour number
        ADD     r7, sp, r7, LSL #2
        ADD     r7, r7, #12             ; point to stacked palette
        CMP     r7, r8
        BGE     fsc_forwardfails        ; colour isn't in range
        LDR     r7, [r7]                ; candidate RGB
        BL      is_R5_similar_enough_to_R7
        BGE     fsc_forwardfails
        SUBS    r4, r4, #1
        BNE     forward_testing

  ; r3 now set: ready to do actual setting!

sfc_skipcalc
        MOV     sp, r8                  ; discard junk
        Pull    "r0-r2"

        RSB     r3, r3, #0
        SUB     r2, r2, r3              ; r2+r3 is real foreground colour
      [ Debug
        SWI 256+4
        SWI 256+26
        DREG    r0,"Font handle ",cc
        DREG    r1, " background ",cc
        DREG    r2, " foreground ",cc
        DREG    r3, " offset "
      ]
        Pull    "r4-r9, pc",,^

fsc_forwardfails
        ADD     lr, r3, #1
        MUL     r5, r2, lr              ; red increment for final colour
        AND     r12, r11, #&FF00        ; red fore
        ADD     r5, r12, r5, LSR #16
        MUL     r12, r0, lr             ; current b,g stepped values
        MOV     r4, r3                  ; trial offset

backward_testing                        ; try fcol-offset for fit
        BL      stepR5                  ; r5 := required colour
        SUBS    r7, r1, r4              ; colour number
        BLT     fsc_backwardfails       ; colour isn't in range
        ADD     r7, sp, r7, LSL #2
        ADD     r7, r7, #12             ; point to stacked palette
        LDR     r7, [r7]                ; candidate RGB
        BL      is_R5_similar_enough_to_R7
        BGE     fsc_backwardfails
        SUBS    r4, r4, #1
        BNE     backward_testing
        RSB     r3, r3, #0
        B       sfc_skipcalc

fsc_backwardfails
        LDMFD   sp, {r0, r2, r4}        ; reload rgb separations
        SUBS    r3, r3, #1
        BGT     sfc_trythisoffset
        B       sfc_skipcalc

;+++++++++++++++++++++++++++++++++++++++++++++++++++
; bg step in r0, red step in r2
; cumulative bg step in r12, accumulated red value in r5
; rgb foreground in r11
; separate rgb of r11, add in step values, combine into r5
; corrupts r6

stepR5
        RSB     r5, r2, r5, LSL #16     ; step red
        MOV     r5, r5, LSR #16
        ORR     r12, r12, #&00010000    ; avoid bit propagation between fields
        SUB     r12, r12, r0            ; accumlate gb
        MOV     r6, r11, LSR #16        ; green
        ADD     r6, r6, r12, LSR #24    ; plus green step
        AND     r6, r6, #&FF
        ORR     r5, r5, r6, LSL #16     ; green in place
        MOV     r6, r11, LSR #24        ; blue
        ADD     r6, r6, r12, LSR #8     ; plus blue step
        AND     r6, r6, #&FF
        ORR     r5, r5, r6, LSL #24
        MOV     pc, lr

;+++++++++++++++++++++++++++++++++++++++++++++++++++
; is the distance between r5 and r7 less than r10
; r6 available as temp, r7 corruptible

is_R5_similar_enough_to_R7
        Push    "r12,lr"
        MOV     lr, #&FF

        AND     r6, lr, r5, LSR #16
        AND     r12, lr, r7, LSR #16
        SUBS    r6, r6, r12             ; green dist
        RSBMI   r6, r6, #0              ; ensure +ve for faster mul
        ADD     r12, r6, r6, LSL #1     ; *3 (greenweight)
        MUL     r12, r6, r12

        MOV     r6, r5, LSR #24
        SUBS    r6, r6, r7, LSR #24     ; blue dist
        RSBMI   r6, r6, #0
        MLA     r12, r6, r6, r12

        AND     r6, lr, r5, LSR #8
        AND     r7, lr, r7, LSR #8
        SUBS    r6, r6, r7              ; red dist
        RSBMI   r6, r6, #0
        MOV     r7, r6, LSL #1          ; *2 (redweight)
        MLA     r12, r7, r6, r12

        CMP     r12, r10
        Pull    "r12, pc"


;+++++++++++++++++++++++++++++++++++++++++++++++++++
; corrupts r6, r7, r9,r10, r11; needs r11 = l2bpp

sfc_getpalette
        CMP     r8, sp
        MOVNE   pc, lr                  ; palette already got

        Push    "r0, r2, r3, lr"
        Pull    "r6, r7, r9, r10"
        MOV     r0, #1
        MOV     r11, r0, LSL r11
        MOV     r0, r0, LSL r11          ; no of palette entries

        MOV     r1, #16
sfc_readpal
        SUBS    r0, r0, #1
        BLPL    my_read_palette
        Push    "r2", PL
        BPL     sfc_readpal

        Push    "r6, r7, r9, r10"
        Pull    "r0, r2, r3, pc"

;+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
; using the result from ReadPalette directly leads to a bias in the colour
; selection, as the low nibble of each colour is clear.
;  R0 = colour, R1=16 for normal colour, 24 for border
;  Corrupts R3. Returns R2=standard RGB
my_read_palette
        Push    "lr"
        SWI     XOS_ReadPalette
        LDR     lr, =&F0F0F000
        AND     r2, r2, lr
        ORR     r2, r2, r2, LSR #4      ; copy nibbles
      [ Version >= 054
        BL      convert_screen_colour
      ]
        Pull    "pc"

;+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
; convert a screen colour (r2) to a standard RGB colour (r2)
      [ Version >= 054
convert_screen_colour                                       
        Push    "r3, r4, lr"
        LDR     r3, [r12, #Calibration_ptr]
        CMP     r3, #0
        ADREQ   r3, default_calibration
        MOV     r4, r2
        BL      convert_device_colour
        MOV     r2, r4
        Pull    "r3, r4, pc"

;+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
; default calibration table
default_calibration
        DCD     2,2,2
        DCD     &00000000, &0000FFFF
        DCD     &00000000, &00FF00FF
        DCD     &00000000, &FF0000FF

;+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
; convert a device colour (r4) to a standard RGB colour (r4)
; using calibration pointed to by r3
convert_device_colour
        Push    "r5,r6,r7,r8,r9,r10,lr"
 [ Debugcal
        SWI 256+4
        SWI     256+30
        DREG    r4,"Colour"
 ]
        MOV     r8, #0
        MOV     r9, #0
        MOV     r10, #0

        MOV     r4, r4, LSR #8
        AND     r5, r4, #&ff    ;       r5 = device colour 1 entry
        LDR     r6, [r3]        ;       r6 = number of colour 1 entries
        ADD     r7, r3, #12     ;       r7 -> colour 1 entries
        BL      interpolate_device_colour

        MOV     r4, r4, LSR #8
        ADD     r7, r7, r6, LSL #2 ;    r7 -> colour 2 entries
        AND     r5, r4, #&ff    ;       r5 = device colour 2 entry
        LDR     r6, [r3, #4]    ;       r6 = number of colour 2 entries
        BL      interpolate_device_colour

        MOV     r4, r4, LSR #8     
        ADD     r7, r7, r6, LSL #2 ;    r7 -> colour 3 entries
        AND     r5, r4, #&ff    ;       r5 = device colour 3 entry
        LDR     r6, [r3, #8]    ;       r6 = number of colour 3 entries
        BL      interpolate_device_colour
                           
 [ Debugcal
        DREG    r8, "red coeff"
        DREG    r9, "blue coeff"
        DREG    r10, "green coeff"
  ]
        MOV     r8, r8, ASR#16
        MOV     r9, r9, ASR#16
        MOV     r10, r10, ASR#16

        CMP     r8, #&100       ;       r4 = &0000RR00
        MOVGE   r8, #&FF
        CMP     r8, #0
        MOVLT   r8, #0
        MOV     r4, r8, LSL#8

        CMP     r9, #&100       ;       r4 = &BBGGRR00
        MOVGE   r9, #&FF
        CMP     r9, #0
        MOVLT   r9, #0
        ORR     r4, r4, r9, LSL#16

        CMP     r10, #&100       ;       r4 = &BBGGRR00
        MOVGE   r10, #&FF
        CMP     r10, #0
        MOVLT   r10, #0
        ORR     r4, r4, r10, LSL#24

        Pull    "r5,r6,r7,r8,r9,r10,PC"
;+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
; interpolate a colour from the device table, and add to r8,r9,r10
; r5 = device colour (0..255)
; r6 = number entries
; r7 -> first entry
interpolate_device_colour
        Push    "r0,r1,r2,r3,r4,r5,r6,r7,lr"

; Find first entry with device colour larger than or equal to required
        MOV     lr, #0
01
        LDRB    r0, [r7]
        CMP     r0, r5
        BGE     found_next_highest
        MOV     lr, r7
        ADD     r7, r7, #4
        SUBS    r6, r6, #1
        BHI     %BT01
; No entry found - give an error
99
        MOV     pc, #0

; Found entry higher than or equal to required
found_next_highest         
        MOVEQ   lr, r7       
        CMP     lr, #0
        BEQ     %BT99        
        LDRB    r0, [lr,#1]
        ADD     r8, r8, r0, LSL #16
        LDRB    r1, [lr,#2]
        ADD     r9, r9, r1, LSL #16
        LDRB    r2, [lr,#3]
        ADD     r10, r10, r2, LSL #16
        CMP     r7, lr
        BEQ     found_next_is_equal
; r0,r1,r2 = differences in colour
        LDRB    r3, [r7,#1]
        SUB     r0, r3, r0
        LDRB    r3, [r7,#2]
        SUB     r1, r3, r1
        LDRB    r3, [r7,#3]
        SUB     r2, r3, r2        
; r4 = difference in device colour
        LDRB    r3, [lr]
        LDRB    r4, [r7]
        SUB     r4, r4, r3
; r3 = difference between reqd device and known
        SUB     r3, r5, r3
; Now get 2^16*r3/r4 = r6
        MOV     r3, r3, LSL #16
        DivRem  r6, r3, r4, r5
; Now r8 += r0*r3/r4, r9+=r1*r3/r4, etc.
        MLA     r8, r6, r0, r8
        MLA     r9, r6, r1, r9
        MLA     r10, r6, r2, r10
found_next_is_equal
; Exit
        Pull    "r0,r1,r2,r3,r4,r5,r6,r7,PC"
      ] ; Version >= 054
;+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

      [ Debug
        InsertDebugRoutines
      ]

      [ Version >= 053
SetECF_Code     ROUT
        Push    "r1,r2,r4-r12,lr"
        AND     r3, r3, #&80
        AND     r8, r4, #7
        ORR     r8, r8, r3      ; combine together GCOL action and forground/background
        BL      TryCache
        BEQ     %FT01

        ; Cached missed, so evaluate the ecf and extract the information
        ; from that. Fill in the cache.

        BL      Evaluate_ECF
        Pull    "r1,r2,r4-r12,pc", VS

        ; Get GCOL from colournumber in the ecf
        MOV     r3, r1          ; r3 = cols
        MOV     r4, r2          ; r4 = ratios
        MOV     r2, r0          ; r2 = colour
        AND     r0, r3, #&ff    ; extract cols[0]
        MOV     r1, #-1         ; current mode
        BL      ColourNumberToGCOL_Code_testing

        ; write the cache entry
        MOV     r1, r0          ; r1 = GCOL
        MOV     r7, r0          ; r7 = GCOL
        AND     r0, r3, #&ff    ; r0 = ColourNumber
        ; r2 already has colour in it
        BL      WriteCacheEntry

        ; Fill in the ecf fields of the cache entry
        STR     r3, [r10, #CachedCols]
        STR     r4, [r10, #CachedRatios]

        ; Move the ecf details into the correct registers
        MOV     r1, r3
        MOV     r2, r4
        B       %FT03

01
        ; Cache hit - check the ecf is in the cache
        LDR     r2, [r1, #CachedRatios]
        LDRB    r7, [r1, #CachedGCOL]
        CMP     r2, #-1
        BNE     %FT02

        ; ecf not in cache entry. Evaluate the ecf and put it in the cache
        MOV     r3, r1
        BL      Evaluate_ECF
        Pull    "r1,r2,r4-r12,pc", VS
        STR     r2, [r3, #CachedRatios]
        STR     r1, [r3, #CachedCols]
        B       %FT03

02
        ; Cache hit and ecf present, extract the rest of the ecf information
        LDRB    r7, [r1, #CachedGCOL]
        LDR     r1, [r1, #CachedCols]

03
        ; At this stage:
        ;   r1 contains cols
        ;   r2 contains ratios
        ;   r7 contans GCOL


        Push    "r1,r2"
        MOV     r0, r13
        ADD     r1, r13, #4

        MOV     r2, r8                  ; r2 = combined action/foreground/background
        BL      SetECFFromColoursAndRatios_Code

        ADD     r13, r13, #8            ; drop the pushed registers
        LDR     r2, [r12, #CachedL2BPP]
        AND     r3, r8, #&80
        MOV     r0, r7

        Pull    "r1,r2,r4-r12,pc",,^

; Take the palette and the number of colours in it then rank the colours
; according to how far they are from the passed rgb value.
;
; asm_rank_colours
; (
;       char ranks[ 256 ],      r0
;       int ncols,              r1
;       int palette[ 256 ],     r2
;       int r,                  r3
;       int g,                  [r13,#0*4] 10
;       int b,                  [r13,#1*4] 11
; );

asm_rank_colours        ROUT
        STMFD   r13!, {r4-r12,r14}
        LDR     r4, [r13, #10*4]        ; g
        LDR     r5, [r13, #11*4]        ; b

        LDR     r12, =Distances

        MOV     r6, #0
        B       %FT01
02
        ;
        ; r0,g0,b0 obtained from palette[i]
        ;
        ; Distances[i] = (2*(r0-r)*(r0-r) + 3*(g0-g)*(g0-g) + (b0-b)*(b0-b)) << 8  |  i
        ;
        LDR     r7, [r2, r6, ASL #2]    ; palette[i]
        MOV     r8, r7, LSR #16
        AND     r8, r8, #&ff            ; g0
        SUB     r8, r8, r4              ; (g0-g)
        MUL     r9, r8, r8
        ADD     r10, r9, r9, ASL #1     ; r10 = 3*(g0-g)*(g0-g)
        MOV     r8, r7, LSR #8
        AND     r8, r8, #&ff            ; r0
        SUB     r8, r8, r3              ; (r0-r)
        MUL     r9, r8, r8
        ADD     r10, r10, r9, ASL #1    ; r10 = r10 + 2*(r0-r)*(r0-r)
        MOV     r8, r7, LSR #24         ; b0
        SUB     r8, r8, r5              ; (b0-b)
        MUL     r9, r8, r8
        ADD     r10, r10, r9            ; r10 = r10 + (b0-b)*(b0-b)
        ORR     r10, r6, r10, LSL #8    ; r10 = (r10 << 8) | i
        STR     r10, [r12, r6, ASL #2]  ; distances[i] = r10

        ADD     r6, r6, #1
01
        CMP     r6, r1
        BLT     %BT02

        MOV     r3, r1          ; ncols
        MOV     r4, r0          ; ranks

        ;
        ; This OS_HeapSort should not stomp Scratch space as
        ; it is just a sort cardinals.
        ;
        MOV     r0, r1          ; ncols
        MOV     r1, r12         ; distances
        MOV     r2, #0          ; sort cardinals
        SWI     XOS_HeapSort
        LDMVSFD r13!, {r4-r12,pc}

        MOV     r6, #0
        B       %FT03
04
        LDRB    r0, [r12, r6, ASL #2]   ; LDRB  r0, [r12, r6, ASL #2] due to lower byte of word wanted
        STRB    r0, [r4, r6]            ; ranks[i] = distances[i] & 0xff
        ADD     r6, r6, #1
03
        CMP     r6, r3
        BLT     %BT04

        LDMFD   r13!, {r4-r12,pc}^

        LTORG

; Take the ranked palette and an rgb value and cols[0] already determined
; then find out the next nearest colour which can be combined with cols[0]
; to end up nearer rgb. Return cols[1] = -1 if none found.
; asm_evaluate_second_colour
; (
;       char ranks[ 256 ],      r0
;       int ncols,              r1
;       int palette[ 256 ],     r2
;       int r,                  r3
;       int g,                  [r13,#0*4] 10
;       int b,                  [r13,#1*4] 11
;       char cols[0],            [r13,#2*4+0] 12+0
;       char cols[1],            [r13,#2*4+1] 12+1
;       char cols[2],            [r13,#2*4+2] 12+2
;       char cols[3],            [r13,#2*4+3] 12+3
;       char ratios[0],          [r13,#3*4+0] 13+0
;       char ratios[1],          [r13,#3*4+1] 13+1
;       char ratios[2],          [r13,#3*4+2] 13+2
;       char ratios[3]           [r13,#3*4+3] 13+3
; );

asm_evaluate_second_colour       ROUT
        STMFD   r13!, {r4-r12,r14}

        LDRB    r4, [r13, #12*4+0]
        LDR     r4, [r2, r4, ASL #2]    ; palette[ cols[0] ]
        MOV     r8, #&ff
        AND     r6, r8, r4, LSR #8      ; r0
        AND     r7, r8, r4, LSR #16     ; g0
        MOV     r8, r4, LSR #24         ; b0
        SUB     r3, r3, r6
        ADD     r3, r3, r3              ; 2*(r-r0)
        LDR     r4, [r13, #10*4]
        SUB     r4, r4, r7
        ADD     r4, r4, r4, ASL #1      ; 3*(g-g0)
        LDR     r5, [r13, #11*4]
        SUB     r5, r5, r8              ; (b-b0)

        STMFD   r13!, {r2}              ; store palette on the stack

        MOV     r9, #0
        ; r0 - ranks
        ; r1 - ncols
        ; r2 - palette
        ; r3 - 2*(r-r0)
        ; r4 - 3*(g-g0)
        ; r5 - (b-b0)
        ; r6 - r0
        ; r7 - g0
        ; r8 - b0
        ; r9 - i
        B       %FT01
02
        LDR     r2, [r13]               ; palette

        LDRB    r10, [r0, r9]           ; ranks[i]
        LDRB    r11, [r13, #13*4+0]     ; cols[0]
        CMP     r10, r11
        BEQ     %FT03

        LDR     r10, [r2, r10, ASL #2]  ; palette[ ranks[ i ]]
        MOV     r11, r10, LSR #16
        AND     r11, r11, #&ff
        SUB     r11, r11, r7            ; r11 = g1-g0
        MUL     r12, r11, r4            ; r12 = 3*(g-g0)*(g1-g0)
        MUL     r14, r11, r11           ; r14 = (g1-g0)*(g1-g0)
        ADD     r11, r14, r14, ASL #1   ; r11 = 3*(g1-g0)*(g1-g0)
        MOV     r14, r10, LSR #8
        AND     r14, r14, #&ff
        SUB     r14, r14, r6            ; r14 = r1-r0
        MLA     r12, r14, r3, r12       ; r12 += 2*(r-r0)*(r1-r0)
        MUL     r2, r14, r14            ; r2 = (r1-r0)*(r1-r0)
        ADD     r11, r11, r2, ASL #1    ; r11 += 2*(r1-r0)*(r1-r0)
        MOV     r10, r10, LSR #24
        SUB     r10, r10, r8            ; r10 = b1-b0
        MLA     r12, r10, r5, r12       ; r12 += (b-b0)*(b1-b0)
        MLA     r11, r10, r10, r11      ; r11 += (b1-b0)*(b1-b0)

        CMP     r11, r12, ASL #6        ; if r11 <= r12*64
        BGT     %FT04                   ; then
        CMP     r11, #0                 ; if r11 != 0   (implies two colours same in palette)
        BEQ     %FT04                   ; then
        MOV     r12, r12, ASL #5
        ADD     r12, r12, r11, ASR #1   ; r12 = r12*32+r11/2
        DivRem  r2, r12, r11, r14       ; r2 = r12/r11
        ADD     r13, r13, #1*4          ; drop the r2 pushed earlier
        LDRB    r1, [r0, r9]            ; ranks[i]
        STRB    r1, [r13, #12*4+1]      ; cols[1] = ranks[i]
        RSB     r1, r2, #32
        STRB    r1, [r13, #13*4+0]      ; ratios[0] = 32-r2
        STRB    r2, [r13, #13*4+1]      ; ratios[1] = r2
        LDMFD   r13!,{r4-r12,pc}
04
03
        ADD     r9, r9, #1
01
        CMP     r9, r1
        BLT     %BT02

        ADD     r13, r13, #1*4          ; drop the r2 pushed earlier
        MOV     r3, #-1
        STRB    r3, [r13, #12*4+1]      ; cols[1] = -1
        MOV     r3, #32
        STRB    r3, [r13, #13*4+0]      ; ratios[0] = 32
        MOV     r3, #0
        STRB    r3, [r13, #13*4+1]      ; ratios[1] = 0
        LDMFD   r13!, {r4-r12,pc}

; Find the third colour which will take one closer to rgb.
;
; asm_evaluate_third_colour
; (
;       char ranks[ 256 ],      r0
;       int ncols,              r1
;       int palette[ 256 ],     r2
;       int r,                  r3
;       int g,                  [r13,#0*4] 10
;       int b,                  [r13,#1*4] 11
;       char cols[0],           [r13,#2*4+0] 12+0
;       char cols[1],           [r13,#3*4+1] 12+1
;       char cols[2],           [r13,#4*4+2] 12+2
;       char cols[3],           [r13,#5*4+3] 12+3
;       char ratios[0],         [r13,#6*4+0] 13+0
;       char ratios[1],         [r13,#7*4+1] 13+1
;       char ratios[2],         [r13,#8*4+2] 13+2
;       char ratios[3]          [r13,#9*4+3] 13+3
; );

asm_evaluate_third_colour       ROUT
        STMFD   r13!, {r4-r12,r14}

        MOV     r10, r1                 ; ncols
        MOV     r1, r0                  ; ranks
        MOV     r9, r2                  ; palette
        LDRB    r5, [r13, #12*4+0]      ; cols[0]
        LDRB    r8, [r13, #12*4+1]      ; cols[1]

        LDR     r0, [r9, r5, ASL #2]    ; palette[first]
        MOV     r14, #&ff
        AND     r11, r14, r0, LSR #8    ; r0
        AND     r12, r14, r0, LSR #16   ; g0
        MOV     r14, r0, LSR #24        ; b0

        LDR     r4, [r9, r8, ASL #2]    ; palette[second]

        ; Set r2 to 2*(r1-r0)*(r-r0)
        ; Set r6 to 2*(r1-r0)*(r1-r0)
        SUB     r0, r3, r11             ; r0 = r-r0
        MOV     r3, r4, LSR #8
        AND     r3, r3, #&ff            ; r3 = r1
        SUB     r3, r3, r11             ; r3 = r1-r0
        MUL     r7, r3, r0              ; r7 = (r1-r0)*(r-r0)
        MOV     r2, r7, ASL #1          ; r2 = 2*(r1-r0)*(r-r0)
        MUL     r7, r3, r3              ; r7 = (r1-r0)*(r1-r0)
        MOV     r6, r7, ASL #1          ; r6 = 2*(r1-r0)*(r1-r0)

        ; Add 3*(g1-g0)*(g1-g0) to r6
        ; Add 3*(g1-g0)*(g-g0) to r2
        MOV     r7, r4, LSR #16
        AND     r7, r7, #&ff            ; r7 = g1
        SUB     r7, r7, r12             ; r7 = g1-g0
        STMFD   r13!, {r0,r3,r7}        ; Push r-r0, r1-r0 and g1-g0
        MUL     r0, r7, r7              ; r0 = (g1-g0)*(g1-g0)
        ADD     r3, r0, r0, ASL #1      ; r3 = 3*(g1-g0)*(g1-g0)
        ADD     r6, r6, r3              ; r6 += 3*(g1-g0)*(g1-g0)
        LDR     r0, [r13, #(10+3)*4]    ; r0 = g
        SUB     r0, r0, r12             ; r0 = g-g0
        MUL     r3, r0, r7              ; r3 = (g-g0)*(g1-g0)
        ADD     r3, r3, r3, ASL #1      ; r3 = 3*(g-g0)*(g1-g0)
        ADD     r2, r2, r3              ; r2 += 3*(g1-g0)*(g-g0)

        ; Add (b1-b0)*(b1-b0) to r6
        ; Add (b-b0)*(b1-b0) to r2
        MOV     r3, r4, LSR #24
        AND     r3, r3, #&ff            ; r3 = b1
        SUB     r3, r3, r14             ; r3 = b1-b0
        MLA     r6, r3, r3, r6          ; r6 += (b1-b0)*(b1-b0)
        LDR     r4, [r13, #(11+3)*4]    ; r4 = b
        SUB     r4, r4, r14             ; r4 = b-b0
        MLA     r2, r3, r4, r2          ; r2 += (b-b0)*(b1-b0)
        STMFD   r13!, {r0,r3,r4}        ; Push g-g0, b1-b0 and b-b0

        mextralong_sex r2, r3
        mextralong_sex r6, r7

        ; Stack:
        ;       g1-g0
        ;       r1-r0
        ;       r-r0
        ;       b-b0
        ;       b1-b0
        ; r13-> g-g0

        MOV     r0, #0

        ; Register allocation:
        ; r0 = i
        ; r1 = ranks
        ; r2 = k0.lsd
        ; r3 = k0.msd
        ; r4 = <unused>
        ; r5 = cols[0]
        ; r6 = k1.lsd
        ; r7 = k1.msd
        ; r8 = cols[1]
        ; r9 = palette
        ; r10 = ncols
        ; r11 = r0
        ; r12 = g0
        ; r14 = b0
        B       %FT03

02
        ;r0=I
        ;r1=ranks
        LDRB    r4, [r1, r0]            ; r4 = ranks[i]

        ;if ( ranks[i]!=cols[0] && ranks[i]!=cols[1] )
        CMP     r4, r5
        CMPNE   r4, r8
        BEQ     %FT04

        ;push everything
        STMFD   r13!,{r0-r12,r14}

        ;r9=palette
        ;r9=r9[r4]
        LDR     r9, [r9, r4, ASL #2]

        ;r11=R0
        ;r12=G0
        ;r14=B0


        ; Set r8 to 3*(g2-g0)*(g2-g0)
        ; Set r4 to 3*(g2-g0)*(g1-g0)
        ; Set r0 to 3*(g2-g0)*(g-g0)
        MOV     r1, r9, LSR #16
        AND     r1, r1, #&ff            ; r1 = g2
        SUB     r1, r1, r12             ; r1 = g2 - g0
        MUL     r12, r1, r1             ; r12 = (g2-g0)*(g2-g0)
        ADD     r8, r12, r12, ASL #1    ; r8 = 3*(g2-g0)*(g2-g0)
        LDR     r5, [r13, #19*4]        ; r5 = g1-g0
        MUL     r12, r1, r5             ; r12 = (g2-g0)*(g1-g0)
        ADD     r4, r12, r12, ASL #1    ; r4 = 3*(g2-g0)*(g1-g0)
        LDR     r5, [r13, #14*4]        ; r5 = g-g0
        MUL     r12, r1, r5             ; r12 = (g2-g0)*(g-g0)
        ADD     r0, r12, r12, ASL #1    ; r0 = 3*(g2-g0)*(g-g0)

        ; Add 2*(r2-r0)*(r2-r0) to r8
        ; Add 2*(r2-r0)*(r1-r0) to r4
        ; Add 2*(r2-r0)*(r-r0) to r0
        MOV     r1, r9, LSR #8
        AND     r1, r1, #&ff            ; r1 = r2
        SUB     r1, r1, r11             ; r1 = r2 - r0
        MUL     r12, r1, r1             ; r12 = (r2-r0)*(r2-r0)
        ADD     r8, r8, r12, ASL #1     ; r8 += 2*(r2-r0)*(r2-r0)
        LDR     r5, [r13, #18*4]        ; r5 = r1-r0
        MUL     r12, r1, r5             ; r12 = (r2-r0)*(r1-r0)
        ADD     r4, r4, r12, ASL #1     ; r4 += 2*(r2-r0)*(r1-r0)
        LDR     r5, [r13, #17*4]        ; r5 = r-r0
        MUL     r12, r1, r5             ; r12 = (r2-r0)*(r-r0)
        ADD     r0, r0, r12, ASL #1     ; r0 += 2*(r2-r0)*(r-r0)

        ; Add (b2-b0)*(b2-b0) to r8
        ; Add (b2-b0)*(b1-b0) to r4
        ; Add (b2-b0)*(b-b0) to r0
        MOV     r1, r9, LSR #24         ; r1 = b2
        SUB     r1, r1, r14             ; r1 = b2 - b0
        MLA     r8, r1, r1, r8          ; r8 += (b2-b0)*(b2-b0)
        LDR     r5, [r13, #15*4]        ; r5 = b1-b0
        MLA     r4, r1, r5, r4          ; r4 += (b2-b0)*(b1-b0)
        LDR     r5, [r13, #16*4]        ; r5 = b-b0
        MLA     r0, r1, r5, r0          ; r0 += (b2-b0)*(b-b0)


        ; Sign extend to 64 bit integers
        mextralong_sex r0,r1
        mextralong_sex r4,r5
        mextralong_sex r8,r9

        mextralong_multiply r10, r11, r2, r3, r8, r9    ; 10 = 2 * 8
        mextralong_multiply r12, r14, r6, r7, r8, r9    ; 12 = 6 * 8
        mextralong_multiply r8, r9, r0, r1, r6, r7      ; 8 = 0 * 6
        mextralong_multiply r6, r7, r0, r1, r4, r5      ; 6 = 0 * 4
        mextralong_subtract r0, r1, r6, r7, r10, r11    ; 0 = 6 - 10
        mextralong_multiply r6, r7, r4, r5, r2, r3      ; 6 = 4 * 2
        mextralong_subtract r10, r11, r6, r7, r8, r9    ; 10 = 6 - 8
        mextralong_mov r6, r7, r4, r5                   ; 6 = 4
        mextralong_multiply r2, r3, r4, r5, r6, r7      ; 2 = 4 * 6
        mextralong_subtract r6, r7, r2, r3, r12, r14    ; 6 = 2 - 12

        ; if (r6,r7) is non-zero
        ORRS    r14, r6, r7
        BEQ     %FT01

        mextralong_leftshift r0, r1, r0, r1, 5          ; 0 = 0 << 5
        mextralong_leftshift r2, r3, r6, r7, -1         ; 2 = 6 >> 1
        mextralong_add r0, r1, r0, r1, r2, r3           ; 0 = 0 + 2
        mextralong_divide r4, r5, r0, r1, r6, r7, r8, r9, r12   ; 4 = 0 / 6 using 8,9,12
        mextralong_leftshift r0, r1, r10, r11, 5        ; 0 = 10 << 5
        mextralong_add r0, r1, r2, r3, r0, r1           ; 0 = 2 + 0
        mextralong_divide r2, r3, r0, r1, r6, r7, r8, r9, r10   ; 2 = 0 / 6 using 8,9,12

        ; if neither (r4,r5) nor (r2,r3) are negative
        ORRS    r0, r5, r3
        BMI     %FT01

        ; 0 = 2 + 4
        mextralong_add r0, r1, r2, r3, r4, r5

        ; if (r0,r1) < 32
        CMP     r1, #0
        BNE     %FT01
        CMP     r0, #32
        BHS     %FT01

        LDR     r1, [r13, #4*4]                 ; ranks[i]
        ADD     r13, r13, #20*4                 ; drop the saved registers and 6 values from the stack
        STRB    r1, [r13, #12*4+2]              ; cols[2] = r1
        RSB     r0, r0, #32
        STRB    r0, [r13, #13*4+0]              ; ratios[0] = 32 - s.lsd
        STRB    r4, [r13, #13*4+1]              ; ratios[1] = A.lsd
        STRB    r2, [r13, #13*4+2]              ; ratios[2] = B.lsd
        LDMFD   r13!,{r4-r12,pc}

01
        ;pull   everything
        LDMFD   r13!,{r0-r12,r14}
04
        ; i++
        ADD     r0, r0, #1
03
        ; i < ncols
        CMP     r0, r10
        BLT     %BT02

        ADD     r13, r13, #6*4                  ; drop the 6 values pushed earlier
        MOV     r1, #-1
        STRB    r1, [r13, #12*4+2]              ; cols[2] = -1
        MOV     r1, #0
        STRB    r1, [r13, #13*4+2]              ; ratios[2] = 0
        LDMFD   r13!,{r4-r12,pc}



        ;
        ; This macro constructs the next word of the ecf.
        ;
        ; r1 = BPP for this mode
        ; r3 points through stipple[] and r4 points
        ; at colours[]. r0 is used as temporary workspace.
        ;
        MACRO
        make_stipple_bit        $rs, $row
        MOV     $rs, #1
01
        ; LDRB r0,[r3],#4 is the correct sequence as the stipple values are
        ; stored in the low byte of each palette word.
        LDRB    r0, [r3], #4            ; r0 = *stipple++
        LDRB    r0, [r4, r0]            ; r0 = colours[r0]
        ORRS    $rs, r0, $rs, LSL r1    ; rs = r0 | (rs << bpp)
        BCC     %BT01                   ; go again if the marker bit didn't shift out
        MEND

        ;
        ; This macro peels off a byte from rw and puts it in the output buffer
        ; pointed through by r3
        ;
        MACRO
        write_a_byte    $rw
        AND     r0, $rw, #&ff
        STRB    r0, [r3], #1
        MOV     $rw, $rw, LSR #8
        MEND

; Given colour numbers c[] and quantities of them r[] (which sum
; to 32) then set an ecf and gcol action using them. The stipple
; definition values are to be found in the workspace hanging off r12.
;
; os_error *SetECFFromColoursAndRatios_Code( char *c, char *r, int combined_action )
; r0 = c[]
; r1 = r[]
; r2 = gcol action | forgnd/bgnd
; r12 -> workspace
SetECFFromColoursAndRatios_Code ROUT
        STMFD   r13!, {r4-r12, r14}

        ADD     r3, r12, #PaletteStipple
        LDR     r4, =Colours

        ;
        ; Fill up the colours array with r[x] entries
        ; of c[x] where x goes from 0 to 3.
        ;


        MOV     r7, #0          ; s = 0
        MOV     r8, #4          ; r2 = i = 4 to 1 step -1
04
        LDRB    r6, [r0], #1    ; r6 = *c++
        LDRB    r5, [r1], #1    ; r5 = *r++

        TST     r5, r5          ; while r5 != 0
        BEQ     %FT05
06
        STRB    r6, [r4, r7]    ; colours[s] = r6
        ADD     r7, r7, #1      ; s++
        SUBS    r5, r5, #1      ; if ( --r5 != 0 ) goto 06
        BNE     %BT06
05
        SUBS    r8, r8, #1      ; if ( --r2 != 0 ) goto 04
        BNE     %BT04

        LDRB    r1, [r12, #CachedL2BPP]
        MOV     r0, #1
        MOV     r1, r0, ASL r1          ; r1 = bpp

        make_stipple_bit r5,0
        make_stipple_bit r6,1
        make_stipple_bit r7,2
        make_stipple_bit r8,3
        make_stipple_bit r9,4
        make_stipple_bit r10,5
        make_stipple_bit r11,6
        make_stipple_bit r12,7

        ; write out the 4 set ecf vdu sequences
        MOV     r1, #4
        LDR     r3, =OutBuff
02
        MOV     r0, #23
        STRB    r0, [r3], #1
        RSBVC   r0, r1, #6
        STRB    r0, [r3], #1
        write_a_byte r5
        write_a_byte r6
        write_a_byte r7
        write_a_byte r8
        write_a_byte r9
        write_a_byte r10
        write_a_byte r11
        write_a_byte r12

        SUBS    r1, r1, #1
01
        BNE     %BT02

        ; gcol monster ecf pattern
        MOV     r0, #18
        STRB    r0, [r3], #1
        AND     r0, r2, #7
        ADD     r0, r0, #80
        STRB    r0, [r3], #1
        AND     r0, r2, #&80
        STRB    r0, [r3], #1

        LDR     r0, =OutBuff
        MOV     r1, #43         ; Output sequence length
        SWI     XOS_WriteN

        MOVVC   r0, #0
        LDMFD   r13!, {r4-r12, pc}


; Evaluate_ECF
; In:
;       r0 colour
;
; Out:
;       Error
;     or
;       r0 preserved
;       r1 = cols to use (packed)
;       r2 = ratios of cols to use (packed)
; 
; On entry to each of the evaluate functions the registers
; and stack looks like this:
; r0            ranks   - pointer to colour numbers ranked by nearness to requested colour (in scratch space)
; r1            ncols   - number of colours in current screen mode
; r2            palette - current palette (in RMA workspace)
; r3            r       - requested colour
; [r13, #0*4]   g       - requested colour
; [r13, #1*4]   b       - requested colour
; [r13, #2*4+0] cols[0]
; [r13, #2*4+1] cols[1]
; [r13, #2*4+2] cols[2]
; [r13, #2*4+3] cols[3]
; [r13, #3*4+0] ratios[0]
; [r13, #3*4+1] ratios[1]
; [r13, #3*4+2] ratios[2]
; [r13, #3*4+3] ratios[3]

Evaluate_ECF    ROUT
        STMFD   r13!, {r0,r3-r12, r14}

        LDRB    r3, [r12, #PaletteIsCached]
        CMP     r3, #0
        BNE     %FT02
        Push    "r0"
        BL      CacheThePalette
        BLVC    SetStipple
        Pull    "r1,r2,r3-r12, pc", VS
        Pull    "r0"
        MOV     r3, #-1
        STRB    r3, [r12, #PaletteIsCached]

02
        SUB     r13, r13, #(1+1)*4+4+4    ; g,b,cols[],ratios[]

        ; cols[1] = cols[2] = cols[3] = ratios[1] = ratios[2] = ratios[3] = 0
        ; ratios[0] = 32
        MOV     r3, #0
        STR     r3, [r13, #2*4]
        STR     r3, [r13, #3*4]
        MOV     r3, #32
        STRB    r3, [r13, #3*4+0]

        ; Extract r, g and b
        MOV     r3, #&ff
        AND     r4, r3, r0, LSR #16     ; g
        STR     r4, [r13, #0*4]
        AND     r4, r3, r0, LSR #24     ; b
        STR     r4, [r13, #1*4]
        AND     r3, r3, r0, LSR #8      ; r

        LDR     r0, =Ranks

        LDRB    r1, [r12, #CachedL2BPP]
        MOV     r2, #1
        MOV     r1, r2, ASL r1          ; BPP
        MOV     r1, r2, ASL r1          ; number of colours
        ADD     r2, r12, #PaletteStipple ; Palette (also stipple)

        MOV     r4, r0
        MOV     r5, r1
        MOV     r6, r2
        MOV     r7, r3
        BL      asm_rank_colours        ; rank the colours by nearness to requested
        ADDVS   r13, r13, #(1+1)*4+4+4
        LDMVSFD r13!, {r1,r3-r12, pc}

        LDRB    r8, [r4, #0]            ; r8 = ranks[0]
        STRB    r8, [r13, #2*4+0]       ; cols[0] = r8

        MOV     r0, r4
        MOV     r1, r5
        MOV     r2, r6
        MOV     r3, r7
        BL      asm_evaluate_second_colour ; find the second colour

        LDRB    r8, [r13, #3*4+1]       ; r8 = ratios[1]
        CMP     r8, #0
        BEQ     %FT01

        MOV     r0, r4
        MOV     r1, r5
        MOV     r2, r6
        MOV     r3, r7
        BL      asm_evaluate_third_colour ; find the third colour
01
        LDR     r1, [r13, #2*4]         ; r1 = cols
        LDR     r2, [r13, #3*4]         ; r2 = ratios

        ADD     r13, r13, #(1+1)*4+4+4
        LDMFD   r13!, {r0,r3-r12, pc}^

;
; This routine caches the palette into the RMA workspace
;
; Registers r0-r3 are corrupted
; VS indicates error reading the palette
;
CacheThePalette ROUT
        Push    "lr"
        ADD     r4, r12, #PaletteStipple
        LDRB    r0, [r12, #CachedL2BPP]
        MOV     r1, #1
        MOV     r0, r1, ASL r0
        MOV     r0, r1, ASL r0
        MOV     r1, #16
01
        SUBS    r0, r0, #1
        Pull    "pc", MI, ^
        BL      my_read_palette
        Pull    "pc", VS
        STR     r2, [r4, r0, ASL #2]
        B       %BT01

;
; This routine sets the stipple matrix interleaved with the Palette
; in the RMA workspace.
;
; register r0-r3 are corrupted
; VS on exit indicates an error
;
SetStipple      ROUT

;        ADD     r0, r12, #PaletteStipple
;        MOV     r1, #255
;01
;        AND     r2, r1, #&1f
;        STRB    r2, [r0, r1, ASL #2]
;        SUBS    r1, r1, #1
;        BPL     %BT01
;        MOV     pc, lr









        Push    "r4,r5,r6,r7,r8,r9,r11,lr"
        MOV     r4, #1

; xeig = bbc_modevar( -1, bbc_XEigFactor );
        MOV     r1, #4
        MOV     r0, #-1
        SWI     XOS_ReadModeVariable
        Pull    "r4,r5,r6,r7,r8,r9,r11,pc", VS
        MOV     r7, r2

; yeig = bbc_modevar( -1, bbc_YEigFactor );
        MOV     r1, #5
        SWI     XOS_ReadModeVariable
        Pull    "r4,r5,r6,r7,r8,r9,r11,pc", VS
        MOV     r8, r2

; l2bpp = bbc_modevar( -1, bbc_Log2BPP );
        MOV     r1, #9
        SWI     XOS_ReadModeVariable
        Pull    "r4,r5,r6,r7,r8,r9,r11,pc", VS
        MOV     r6, r2

        SUB     sp, sp, #16

; l2pixel_width = 5 - l2bpp;
        RSB     r11, r6, #5
        MOV     r5, r11

; if ( xeig < yeig )
; {
        CMPS    r7, r8
        BGE     |L00018c.J5.set_stipple|

;   if ( l2pixel_width > 3 )
;      l2pattern_width = 3;
;   else
;      l2pattern_width = l2pixel_width;
        CMPS    r11, #3
        MOVLE   r3, r11
        MOVGT   r3, #3

;   l2pattern_height = 5 - l2pattern_width;
        RSB     r2, r3, #5

;   if ( l2pattern_height > 3 )
;      l2pattern_height = 3;
        CMPS    r2, #3
        MOVGT   r2, #3

;   if ( l2pattern_height > l2pattern_width )
;      l2pattern_height = l2pattern_width;
        CMPS    r2, r3
        MOVGT   r2, r3

;   l2pixels_in_pattern = l2pattern_width + l2pattern_height;
        ADD     r1, r3, r2

        MOV     lr, r1

;   for ( i = 0;
;           i < 1<<l2pixels_in_pattern;
;           i++ )
;   {
        MOV     r0, #0
        MOV     r1, r4, ASL r1
        CMPS    r1, r0
        BLE     |L00020c.J27.set_stipple|
|L0000ec.J14.set_stipple|

;      dim[0] = dim[1] = 0;
        MOV     r1, #0
        STR     r1, [sp, #4]
        STR     r1, [sp, #0]

;      for ( j = 0; j < l2pixels_in_pattern; j++ )
;      {
        MOV     r1, #0
        CMPS    r1, lr
        BGE     |L000148.J19.set_stipple|
|L000104.J18.set_stipple|

;         dim[ j&1 ] = (dim[ j&1 ] << 1) | ( ((i>>j)&1) ^ ( j&1 ? 0 : ((i>>(j-1))&1) ) );
        MOV     r7, r0, ASR r1
        AND     r9, r7, #1
        ANDS    r7, r1, #1
        SUBEQ   r8, r1, #1
        MOVEQ   r8, r0, ASR r8
        ANDEQ   r8, r8, #1
        MOVNE   r8, #0
        EOR     r9, r9, r8
        MOV     r8, sp
        STR     r8, [sp, #12]
        LDR     r8, [r8, r7, ASL #2]
        ORR     r8, r9, r8, ASL #1
        MOV     r9, sp
        STR     r8, [r9, r7, ASL #2]

;      }
        ADD     r1, r1, #1
        CMPS    r1, lr
        BLT     |L000104.J18.set_stipple|
|L000148.J19.set_stipple|

;      stipple[ dim[0] + (dim[1] << (5-l2bpp)) ] = (i << (5-l2pixels_in_pattern));
        MOV     r1, #5
        SUB     r1, r1, lr
        MOV     r1, r0, ASL r1
        AND     r7, r1, #255
        LDR     r1, [sp, #4]
        MOV     r1, r1, ASL r5
        LDR     r8, [sp, #0]
        ADD     r1, r1, r8
        ADD     r8, r12, #PaletteStipple
        STRB    r7, [r8, r1, ASL #2]

;   }
        ADD     r0, r0, #1
        MOV     r1, #1
        MOV     r1, r1, ASL lr
        CMPS    r1, r0
        BGT     |L0000ec.J14.set_stipple|

; }
        B       |L00020c.J27.set_stipple|

; else
; {
|L00018c.J5.set_stipple|

;   l2pattern_width = 2;
        MOV     r3, #2

;   l2pattern_height = 3;
        MOV     r2, #3

;   for ( i = 0;
;   {
        MOV     r0, #0
|L000198.J28.set_stipple|

;      dim[0] = dim[1] = 0;
        MOV     r1, #0
        STR     r1, [sp, #4]
        STR     r1, [sp, #0]

;      for ( j = 0;
;      {
        MOV     r1, #0
|L0001a8.J30.set_stipple|

;         dim[ j&1 ] = (dim[ j&1 ] << 1) | ( ((i>>j)&1) ^ ( j&1 ? 0 : ((i>>(j-1))&1) ) );
        MOV     lr, r0, ASR r1
        AND     r8, lr, #1
        ANDS    r7, r1, #1
        SUBEQ   lr, r1, #1
        MOVEQ   lr, r0, ASR lr
        ANDEQ   lr, lr, #1
        MOVNE   lr, #0
        EOR     r9, r8, lr
        MOV     lr, sp
        LDR     r8, [lr, r7, ASL #2]
        ORR     r8, r9, r8, ASL #1
        STR     r8, [lr, r7, ASL #2]

;                  ; j < 5; j++ )
;      }
        ADD     r1, r1, #1
        CMPS    r1, #5
        BLT     |L0001a8.J30.set_stipple|

;      stipple[ dim[1] + (dim[0] << (5-l2bpp)) ] = i;
        AND     lr, r0, #255
        LDR     r1, [sp, #0]
        MOV     r1, r1, ASL r5
        LDR     r7, [sp, #4]
        ADD     r1, r1, r7
        ADD     r7, r12, #PaletteStipple
        STRB    lr, [r7, r1, ASL #2]

;            i < 32;
;            i++ )
;   }
        ADD     r0, r0, #1
        CMPS    r0, #32
        BLT     |L000198.J28.set_stipple|

; }
|L00020c.J27.set_stipple|

; if ( l2pattern_width < l2pixel_width )
; {
        CMPS    r3, r11
        BGE     |L000288.J37.set_stipple|

;   for ( j = 0; j < 1<<l2pattern_height; j++ )
        MOV     r1, #0
        MOV     r0, r4, ASL r2
        STR     r0, [sp, #8]
        CMPS    r0, r1
        BLE     |L000288.J37.set_stipple|

;   {
        MOV     r0, r4
        MOV     lr, r0, ASL r11
        MOV     r3, r0, ASL r3
|L000234.J39.set_stipple|

;      for ( i = 1<<l2pattern_width; i < 1<<l2pixel_width; i++ )
        MOV     r0, r3
        MOV     r5, r0
        CMPS    lr, r5
        BLE     |L000278.J44.set_stipple|
        RSB     r5, r6, #5
        MOV     r7, r1, ASL r5
        ADD     r5, r12, #PaletteStipple

;      {
|L000254.J43.set_stipple|

;         stipple[ i + (j << (5-l2bpp)) ] =
;            stipple[ i - (1<<l2pattern_width) + (j << (5-l2bpp)) ];
        SUB     r9, r0, r3
        MOV     r8, r7
        ADD     r9, r9, r7
        LDRB    r9, [r5, r9, ASL #2]
        ADD     r8, r8, r0
        STRB    r9, [r5, r8, ASL #2]

;      }
        ADD     r0, r0, #1
        CMPS    lr, r0
        BGT     |L000254.J43.set_stipple|
|L000278.J44.set_stipple|

;   }
        ADD     r1, r1, #1
        LDR     r0, [sp, #8]
        CMPS    r0, r1
        BGT     |L000234.J39.set_stipple|

; }
|L000288.J37.set_stipple|

; if ( l2pattern_height < 3 )
        CMPS    r2, #3
        BGE     |L0002f4.J62.set_stipple|

; {
;   for ( j = 1<<l2pattern_height; j < 8; j++ )
        MOV     r1, r4, ASL r2
        CMPS    r1, #8
        BGE     |L0002f4.J62.set_stipple|
        MOV     r3, r4, ASL r11
        MOV     r11, #5
;   {
|L0002r3.J51.set_stipple|

;      for ( i = 0; i < 1<<l2pixel_width; i++ )
        MOV     r0, #0
        CMPS    r3, r0
        BLE     |L0002e8.J56.set_stipple|
        MOV     r5, r1
        MOV     lr, r4, ASL r2
        SUB     lr, r1, lr
        SUB     r7, r11, r6
        MOV     lr, lr, ASL r7
        MOV     r5, r5, ASL r7
        ADD     r7, r12, #PaletteStipple

;      {
|L0002cc.J55.set_stipple|

;         stipple[ i + (j << (5-l2bpp)) ] =
;            stipple[ i + ((j - (1<<l2pattern_height)) << (5-l2bpp)) ];
        ADD     r8, lr, r0
        LDRB    r8, [r7, r8, ASL #2]
        ADD     r9, r5, r0
        STRB    r8, [r7, r9, ASL #2]

;      }
        ADD     r0, r0, #1
        CMPS    r3, r0
        BGT     |L0002cc.J55.set_stipple|
|L0002e8.J56.set_stipple|
        ADD     r1, r1, #1
        CMPS    r1, #8
        BLT     |L0002r3.J51.set_stipple|
;   }
; }
|L0002f4.J62.set_stipple|
        ADD     sp, sp, #16
        Pull    "r4,r5,r6,r7,r8,r9,r11,pc",, ^

      ]
        END