; Copyright 1996 Acorn Computers Ltd ; ; Licensed under the Apache License, Version 2.0 (the "License"); ; you may not use this file except in compliance with the License. ; You may obtain a copy of the License at ; ; http://www.apache.org/licenses/LICENSE-2.0 ; ; Unless required by applicable law or agreed to in writing, software ; distributed under the License is distributed on an "AS IS" BASIS, ; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ; See the License for the specific language governing permissions and ; limitations under the License. ; ; > Sources.SprTrans ^ 0,SP trns_spr_xcoords # 16 ; Four x coordinates trns_spr_ycoords # 16 ; Four y coordinates trns_comp_spr_left # 4 ; Sprite left hand edge (bottom 16 bits) trns_comp_spr_start # 4 ; Sprite start (accounting for internal coord block top) trns_comp_spr_byte_width # 4 ; Sprite byte width << (3-sprite bpp) trns_comp_spr_height # 4 ; Sprite height (top 16 bits) and right hand edge (bottom 16) trns_comp_spr_ttr # 4 ; Translation table (if required) trns_comp_spr_masko # 4 ; Sprite mask offset from image << (3-sprite bpp) trns_comp_ecf_ora # 4 ; ECF OR word trns_comp_ecf_eor # 4 ; ECF EOR word trns_codebuffer # 4 ; Pointer to codebuffer trns_spr_X_x0_y # 4 ; Sprite X,Y at top coordinate of area trns_spr_Y_x0_y # 4 ; in 16.16 fixed point trns_spr_inc_X_x # 4 ; Sprite increments trns_spr_inc_Y_x # 4 ; ( change induced by single trns_spr_inc_Y_y # 4 ; increments in screen x,y on trns_spr_inc_X_y # 4 ; sprite X,Y ) trns_spr_lineptr # 4 ; Line to output onto trns_spr_edgeblock # 6*4*4 ; Edge blocks, in format as below trns_spr_edgeblock_end # 4*6 ; -1, to denote end of edge block trns_ecf_ptr # 4 ; Ecf pointer trns_masking_word # 4 ; Masking word for > eight bit per pixel trns_comp_mask_offset # 4 ; used to point at 1bpp mask data trns_comp_spr_mask_width # 4 ; 1bpp mask equivalent of spr_width trns_comp_mask_base # 4 ; 1bpp mask adjustment to mask data trns_spr_vars_end * :INDEX:@ ; The edge blocks are stored in a similar fashion to Draw Quick Fill, and the code is mainly a copy ; of that. The actual layout of the blocks is as follows: ; Offset 0 : Flag word. Top two bits specify direction of line in the X-axis (01=positive,1x=negative) ; Bottom two bits specify whether edge is active (01), not yet active (00), or dead (11) ; The next 5 words are dependent on the flags. For active edges: ; Offset 4 : Lower Y coordinate (in 256ths pixels, bottom of screen is 0) of edge. The line is deactivated ; after reaching this Y coordinate ; Offset 8 : ABS(deltaX) for the line, in 256ths of a pixel ; Offset 12 : ABS(deltaY) for the line, in 256ths of a pixel ; Offset 16 : Bresenham error value for the pixel ; Offset 20 : Current X coordinate of the point (in pixels) ; For inactive edges: ; Offset 4 : Lower X coordinate (256ths of pixel) ; Offset 8 : Lower Y coordinate ; Offset 12 : Upper X coordinate ; Offset 16 : Upper Y coordinate trns_activated * 2_0001 trns_deactivated * 2_0010 trns_xsize RN 0 ; Top 16 bits only. Used throughout the compiled loop trns_spr_left RN 0 ; Bottom 16 bits only. Used throughout the compiled loop trns_scr_lx RN 0 ; Used outside the loop trns_spr_start RN 1 ; Used throughout the loop trns_scr_rx RN 1 ; Used outside the loop trns_offset RN 2 ; Used throughout the loop trns_scr_y RN 2 ; Used outside the loop trns_X RN 3 ; Used throughout the loop trns_X_x0_y RN 3 ; Used outside the loop trns_Y RN 4 ; Used throughout the loop trns_Y_x0_y RN 4 ; Used outside the loop trns_inc_X_x RN 5 ; Used throughout the loop & outside the loop trns_inc_Y_x RN 6 ; Used throughout the loop & outside the loop trns_byte_width RN 7 ; Used throughout the loop trns_inc_X_y RN 7 ; Used outside the loop trns_spr_height RN 8 ; Top 16 bits only. Used throughout the compiled loop trns_spr_right RN 8 ; Bottom 16 bits only. Used throughout the compiled loop trns_inc_Y_y RN 8 ; Used outside the loop trns_out_ptr RN 9 ; Used throughout the loop trns_line_ptr RN 9 ; Used outside the loop trns_out_word RN 10; Used throughout the loop trns_vertex_ptrs RN 10; Used outside the loop trns_out_mask RN 11; Used throughout the loop trns_dummy11 RN 11; (dummy register - used outside the loop) trns_out_x RN 12; Used on entry to the compiled loop trns_workspace_ptr RN 12; Workspace pointer trns_dummy12 RN 12; (dummy register - used everywhere) trns_in_pixel RN 14; Used in the middle to end of loop trns_dummy14 RN 14; (dummy register - used everywhere) GBLA ldmreg GBLA ldmreg3 GBLS ldmreg2 MACRO $l TrnsAsm $label,$size,$cc ldmreg SETA $size:SHR:2 ldmreg3 SETA ldmreg+3 ldmreg2 SETS "$ldmreg3":RIGHT:1 $l [ ldmreg>6 ADR$cc R4,$label LDM$cc.IA R4!,{R4-R9} STM$cc.IA R10!,{R4-R9} TrnsAsm ($label+6*4),($size-6*4),$cc | [ ldmreg>1 ADR$cc.L R4,$label LDM$cc.IA R4,{R4-R$ldmreg2} STM$cc.IA R10!,{R4-R$ldmreg2} | LDR$cc R4,$label STR$cc R4, [R10], #4 ] ] MEND MACRO $l TrnsAsmReg $reg,$size,$cc ldmreg SETA $size:SHR:2 ldmreg3 SETA ldmreg+3 ldmreg2 SETS "$ldmreg3":RIGHT:1 $l LDM$cc.IA $reg,{R4-R$ldmreg2} STM$cc.IA R10!,{R4-R$ldmreg2} MEND MACRO $l TrnsBranch $reg,$op SUB $reg,$reg,R10 SUB $reg,$reg,#8 MOV $reg,$reg,LSL#6 MOV $reg,$reg,LSR#8 ORR $reg,$reg,#$op STR $reg,[R10],#4 MEND MACRO $label DivRem2 $rc, $ra, $rb, $rtemp $label [ debugtr TEQ $rb,#0 SWIEQ OS_BreakPt ] MOV $rtemp, $rb CMP $rtemp, $ra, LSR #1 01 MOVLS $rtemp, $rtemp, LSL #1 CMPLS $rtemp, $ra, LSR #1 BLS %BT01 MOV $rc, #0 02 CMP $ra, $rtemp SUBCS $ra, $ra, $rtemp ADC $rc, $rc, $rc MOV $rtemp, $rtemp, LSR #1 CMP $rtemp, $rb BCS %BT02 MEND [ usemull ! 0, "Using SMULL - StrongARM or later only" ] MACRO SSmultD $ra,$rb,$rl,$rh ; Asserts to check requirements always meet both options ASSERT $rh = $rl + 1 ASSERT $ra <= R8 ASSERT $rb <= R8 ASSERT $rl <= R7 [ usemull [ $ra = $rl :LOR: $ra = $rh ! 0, "Register clash avoided in SSmultD" MOV R14, $ra SMULL $rl,$rh,R14,$rb | SMULL $rl,$rh,$ra,$rb ] | BL arith_SSmultD DCB $ra,$rb,$rl,0 ] MEND MakeSpriteErrorBlock BadFlags,,BadFlgs MakeSpriteErrorBlock BadCoordBlock,,BadCBlk MakeSpriteErrorBlock BadSourceRectangle,,BadRect MakeSpriteErrorBlock BadTransformation,,BadTfrm badcoordblock ADR R0, ErrorBlock_BadCoordBlock addr r1, Title BL copy_error_one ; Always sets the V bit B exitbiggie nullareasource ADR R0, ErrorBlock_BadSourceRectangle addr r1, Title BL copy_error_one ; Always sets the V bit B exitbiggie badtransformation ADR R0, ErrorBlock_BadTransformation addr r1, Title BL copy_error_one ; Always sets the V bit B exitbiggie GBLA flg_matrix GBLA flg_coordblock flg_matrix SETA 1 flg_coordblock SETA 2 Go_PlotMaskTransformed Push "R1-R9,LR" ; Pretend its a GCOL 8 - use sprite's mask if it has one MOV R5,#8 ; No colour translation MOV R7,#0 B %FT01 Go_PutSpriteTransformed Push "R1-R9,LR" 01 Debug tr,"Draw sprite: R0,R1,R2 =",R0,R1,R2 Debug tr,"Coords, gcol, &scale, &ttr =",R3,R4,R5,R6,R7 CLRPSR I_bit, R14 ; re-enable interrupts [ flagbit MOV R14, R5, LSR #4 STR R14, trns_flags2 [ widetrans BICS R14, R14, #flg2_ignorettr+flg2_widetrans + flg2_ditheron | BICS R14, R14, #flg2_ignorettr + flg2_ditheron ] ADRNE R0, ErrorBlock_BadFlags addr r1, Title, NE BLNE copy_error_one ; Always sets the V bit BVS exitbiggie ] AND R5,R5,#&0F ; only bottom 4 bits are interesting BICS R14, R3, #flg_matrix:OR:flg_coordblock ; Check for legal flags ADRNE R0, ErrorBlock_BadFlags addr r1, Title, NE BLNE copy_error_one ; Always sets the V bit BVS exitbiggie ; see if reason code indicated a sprite name or sprite pointer in R2 BL findsprite ; R2 --> sprite MOVVC R1,R2 ; now R1 --> sprite ; read input/output mode variables BLVC readvduvars LDRVC R0,[R1,#spMode] ; get sprite's original mode MOVVC R14,R0,LSR #27 STRVC R14,save_spr_type ; and derive the sprite type from it BLVC readspritevars BVS exitbiggie [ ignore_ttr ; check whether it has a palette - if doing <16 to >8 we may use it in preference ; to the translation table MOV R0, #0 STR R0, trns_palette LDR R0,[R1,#spImage] CMP R0,#SpriteCBsize BEQ trns_has_no_palette LDR R14,[R1,#spTrans] CMP R14,#SpriteCBsize BEQ trns_has_no_palette ;validate it (to exclude 8bpp without full palettes) ;test is that palette size should be 8*ncolours ;find the lower of the sprite start and mask start CMP R0,R14 MOVCS R0,R14 SUB R0,R0,#SpriteCBsize MOV R0,R0,LSR #3 ;divide by 8 for number of palette entries MOV R14,#1 LDR R9,save_inbpp MOV R14,R14,ASL R9 CMP R14,R0 BNE trns_has_no_palette [ flagbit LDR R14, trns_flags2 TST R14, #flg2_ignorettr ; BNE %FT01 ; [ med01867 ; CMP R9, #8 ; BNE trns_has_no_palette ;restrict it to 8bpp full palette only ; ] BEQ trns_has_no_palette 01 | ; [ med01867 ; CMP R9, #8 ; BNE trns_has_no_palette ;restrict it to 8bpp full palette only ; ] B trns_has_no_palette ] LDR R14,[R1,#spTrans] ADD R14,R1,#SpriteCBsize STR R14, trns_palette trns_has_no_palette ] ; validate supplied translation table (if any) CMP R7,R7,ASR #31 ; documented as <=0 being none - now accept 0 or -1 MOVEQS R7,#0 ; 0 ==> no translation BLNE checktrans BVS exitbiggie ; fault it because it failed ; [ ignore_ttr ; if doing a sprite of <16bpp to >8bpp, and it has a palette, change the ttr pointer ; to point at the palette data instead of the ttr data. Note that the two are different ; formats, so there is also a different plotting routine to include too... LDR R14, BPP ; output bpp CMP R14, #16 MOVCC R14, #0 STRCC R14, trns_palette BCC %FT45 LDR R14, save_inbpp CMP R14, #16 MOVCS R14, #0 STRCS R14, trns_palette BCS %FT45 LDR R14, trns_palette TEQ R14, #0 MOVNE R7,R14 MOVEQ R14,#0 STREQ R14, trns_palette ;only non-zero if going to use this ; trns_palette doubles as a pointer to the palette up this far, and then becomes a ; compilation flag for the macro generation (with the value being passed in as the ; ttr address) 45 ] STR R7,ColourTTR CMP R7,#0 BNE trns_notrans ; LDRB R14,spritecode ; R14 = bottom 8 bits of reason code TEQ R14,#SpriteReason_PlotMaskTransformed LDRNE R14,save_inbpp LDRNE R0,BPP TEQNE R14,R0 ; OK if same bpp or mask plotting ;however, don't error if going 16>32 or 32>16 BEQ trns_notrans ;dispose of the equal case CMP R0,#16 CMPEQ R14,#32 BEQ trns_notrans CMP R0,#32 CMPEQ R14,#16 BEQ trns_notrans B errtrans trns_notrans ; adapt mode variables intelligently (account for double pixel modes) LDR R8, Log2bpc LDR R9, Log2bpp SUB R8, R8, R9 LDR R0, gwx0 MOV R0, R0, ASL R8 STR R0, gwx0 LDR R0, gwx1 ADD R0, R0, #1 MOV R0, R0, ASL R8 STR R0, gwx1 ; Valid registers: R1 - ptr to sprite?, R3 - flags, R4 -> coordinate block, R5 GCOL, R6->matrix/coords, R7->ttr ; Debug tr,"Read VDU and sprite:",#gwx0,#gwy0,#gwx1,#gwy1,#orgx,#orgy ; Debug tr,"Pointer to sprite:",R1 ; Get coordinate block LDR R2, save_inlog2bpp TST R3, #flg_coordblock BNE %FT01 ; (whole sprite R0 - left, R8 - top y, R9 - right, R14 - bottom) LDR R14, save_spr_type CMP R14, #0 LDREQ R0, [R1, #spLBit] MOVEQ R0, R0, ASR R2 MOVNE R0, #0 ; new format sprites don't have any lh wastage LDR R8, [R1, #spHeight] ADD R8, R8, #1 LDR R9, [R1, #spRBit] RSB R9, R9, #31 LDR R4, [R1, #spWidth] ADD R4, R4, #1 RSB R9, R9, R4, LSL#5 MOV R9, R9, ASR R2 MOV R14, #0 B %FT02 01 ; Check area passed in LDMIA R4, {R0,R8,R9,R14} ; If using a matrix then clip source rectangle to sprite TST R3, #flg_matrix BNE %FT03 CMP R8, #0 MOVLT R8, #0 CMP R14, #0 MOVLT R14, #0 LDR R4, [R1, #spHeight] ADD R4, R4, #1 CMP R8, R4 MOVGT R8, R4 CMP R14, R4 MOVGT R14, R4 CMP R0, #0 MOVLT R0, #0 CMP R9, #0 MOVLT R9, #0 LDR R4, [R1, #spLBit] ADD R0, R0, R4, LSR R2 ADD R9, R9, R4, LSR R2 LDR R4, [R1, #spRBit] RSB R4, R4, #31 LDR R10, [R1, #spWidth] ADD R10, R10, #1 RSB R4, R4, R10, LSL#5 CMP R0, R4, LSR R2 MOVGT R0, R4, LSR R2 CMP R9, R4, LSR R2 MOVGT R9, R4, LSR R2 CMP R0, R9 CMPNE R8, R14 BEQ nullareasource B %FT02 03 ; If not using a matrix then check valid source area CMP R0, R9 CMPNE R8, R14 BEQ nullareasource CMP R8, #0 CMPGE R14, #0 BLT badcoordblock LDR R4, [R1, #spHeight] ADD R4, R4, #1 CMP R8, R4 CMPLE R14, R4 BGT badcoordblock CMP R0, #0 CMPGE R9, #0 BLT badcoordblock LDR R4, [R1, #spLBit] ADD R0, R0, R4, LSR R2 ADD R9, R9, R4, LSR R2 LDR R4, [R1, #spRBit] RSB R4, R4, #31 LDR R10, [R1, #spWidth] ADD R10, R10, #1 RSB R4, R4, R10, LSL#5 CMP R0, R4, LSR R2 CMPLE R9, R4, LSR R2 BGT badcoordblock 02 TST R3, #flg_matrix BNE coords_passed_in ; Valid registers: R0,R8,R9,R14 - source rect, R1 - ptr to sprite, R5 GCOL, R6->matrix/coords, R7->ttr ; Set up input path for draw - move to R0,R8, move to R9,R14, end path Push "R0,R1,R2,R3,R4,R5,R7,R8,R9,R10,R11,R12,R14" SUB sp, sp, #64 Push "R12" LDR R11, inlog2px ADD R11, R11, #8 MOV R0, R0, ASL R11 MOV R9, R9, ASL R11 LDR R11, inlog2py ADD R11, R11, #8 MOV R8, R8, ASL R11 MOV R14, R14, ASL R11 ADD R12, SP, #4 MOV R10, R12 MOV R11, #2 STMIA R10!, {R11} STMIA R10!, {R0,R8,R11} STMIA R10!, {R9} STMIA R10!, {R8,R11} MOV R11, #0 STMIA R10!, {R0,R14} STMIA R10!, {R11} ; Transform the path MOV R0, R12 MOV R1, #0 MOV R2, R6 MOV R3, #0 SWI Draw_TransformPath ADDVS sp, sp, #64+8 Pull "R1,R2,R3,R4,R5,R7,R8,R9,R10,R11,R12,R14",VS BVS exitbiggie ; Recover R6 ADD R10, R12, #4 LDMIA R10!, {R0,R3,R4,R6,R8,R9,R11,R14} Pull "R12" ADR R10, save_outoffset STMIA R10!, {R0,R3,R6,R8} ADD R6, R6, R11 ADD R8, R8, R14 SUB R6, R6, R0 SUB R8, R8, R3 STMIA R10!,{R6,R8,R11,R14} ADR R6, save_outoffset ADD sp, sp, #64 Pull "R0,R1,R2,R3,R4,R5,R7,R8,R9,R10,R11,R12,R14" ; Make Y values offset from the top of the sprite, not the bottom coords_passed_in LDR R2, [R1, #spHeight] ADD R2, R2, #1 SUB R8, R2, R8 SUB R14, R2, R14 ; Get R4 = distance from top of sprite, R2 = height of sprite, and set R8,R14 to be offset from R4 SUBS R2, R8, R14 RSBMI R2, R2, #0 MOVMI R4, R8 MOVPL R4, R14 SUB R8, R8, R4 SUB R14, R14, R4 ; Store height, left, right of sprite for inner loop SUB sp, sp, #trns_spr_vars_end CMP R9, R0 ORRGT R10, R9, R2, LSL#16 ORRLE R10, R0, R2, LSL#16 STR R10, trns_comp_spr_height ; Height in top 16 bits, right side in bottom 16 bits STRGT R0, trns_comp_spr_left ; Left side in bottom 16 bits (measurements in pixels) STRLE R9, trns_comp_spr_left ; Left side in bottom 16 bits (measurements in pixels) ; Add R4*sprite_byte_width to sprite_pixel_data_start to give top of sprite, and store for loop LDR R2,[r1,#spMode] Debug ag,"spMode is",R2 LDR R2, [R1, #spImage] LDR R10, [R1, #spWidth] Debug ag,"spWidth is",R10 Debug ag,"sprite is at",R1 ADD R10, R10, #1 LSL R10, 2 MLA R11, R10, R4, R2 ADD R11, R11, R1 STR R11, trns_comp_spr_start ; and we have to do the same for the mask data now ; for now we just save the R4 value and deal with it once we have the mask ; equivalent of spWidth computed for the mask STR R4, trns_comp_mask_base ; Work out byte width << 3-input_bpp, and store for loop LDR R4, save_inlog2bpp CMP R4, #4 BCC %FT89 SUB R4, R4, #3 MOV R11, R10, LSL #3 MOV R10, R10, LSR R4 B %FT88 89 RSB R4, R4, #3 MOV R11, R10, LSL #3 MOV R10, R10, LSL R4 88 STR R10, trns_comp_spr_byte_width ANDS R11, R10, #&1F MOVNE R11, #32 ADD R11, R11, R10 BIC R11, R11, #&1F STR R11, trns_comp_spr_mask_width ; used only for 1bpp masks ;the mask_base was used earlier for the number of rows to go into the ;data, so now we turn that into a byte offset LDR R10, trns_comp_mask_base ; recover row number MUL R10, R11, R10 ; convert to offset (bits) MOV R10, R10, LSR #3 ; convert to bytes STR R10, trns_comp_mask_base ; Get mask offset and store ttr pointer (clear mask GCOL bit if there is no mask) STR R7, trns_comp_spr_ttr LDR R10, BPP SUB R10, R10,#1 MOV R11, #1:SHL:31 MOV R11, R11, ASR R10 STR R11, trns_masking_word ; suitable mask for pixels Debug tr,"trns_masking_word",R11 LDR R10, [R1, #spTrans] LDR R11, [R1, #spImage] Debug ag,"trans and image offsets",R10,R11 SUBS R11, R10, R11 BICEQ R5, R5, #8 ; no mask, so can't do a plot with b3 set Debug ag,"trans minus image",R11 LDR R4, save_inlog2bpp CMP R4, #4 RSBCC R4, R4, #3 SUBCS R4, R4, #3 MOVCC R11, R11, LSL R4 MOVCS R11, R11, LSR R4 ; R4 is finished with now, so used as a working register for the mask stuff LDR R10,[R1,#spTrans] ; re-fetch offset to mask ADD R10,R10,R1 ; turn into absolute address LDR R4, trns_comp_mask_base ; fetch byte offset within 1bpp mask data ADD R10,R10,R4 ; include it STR R10,trns_comp_mask_base ; and store back the final result Debug ag,"mask_base is",R10 STR R11, trns_comp_spr_masko Debug ag,"stored masko as",R11 Debug tr,"Coordinate block:",R0,R8,R9,R14 Debug tr,"Mask offset:",#trns_comp_spr_masko Debug tr,"Flags, GCOL, &Screen block, &TTR",R3, R5, R6, R7 Debug tr,"Height/right, left, spr_start, spr_byte_width",#trns_comp_spr_height,#trns_comp_spr_left,#trns_comp_spr_start,#trns_comp_spr_byte_width ; Copy the coordinate block ; Valid registers: R0,R8,R9,R14 - source rect, R1 - ptr to sprite, R5 GCOL, R6->matrix/coords, R7->ttr ; copy coordinates, adding graphics origin Push "R1,R5,R7,R12" Push "R0,R8,R9,R14" ADR R0, trns_spr_xcoords + 4*8 ; get O.S. to pixel shift (account for double pixel modes) LDR R1, log2px LDR R2, Log2bpp ADD R1, R1, R2 LDR R2, Log2bpc SUB R1, R1, R2 LDR R2, log2py ; get graphics origin LDR R3, orgx LDR R4, orgy ; get coordinates LDMIA R6, {R5,R6,R7,R8,R9,R10,R11,R12} ; x coords ADD R14, R7, R11 SUB R14, R14, R5 CMP R14, R9 Pull "R0,R8,R9,R14",NE Pull "R1,R5,R7,R12",NE ADDNE sp, sp, #trns_spr_vars_end BNE badtransformation ADD R5, R5, R3, LSL#8 ADD R7, R7, R3, LSL#8 ADD R9, R9, R3, LSL#8 ADD R11, R11, R3, LSL#8 MOV R5, R5, ASR R1 MOV R7, R7, ASR R1 MOV R9, R9, ASR R1 MOV R11, R11, ASR R1 STMIA R0!, {R5,R7,R9,R11} ; y coordinates ADD R14, R8, R12 SUB R14, R14, R6 CMPNE R14, R10 Pull "R0,R8,R9,R14",NE Pull "R1,R5,R7,R12",NE ADDNE sp, sp, #trns_spr_vars_end BNE badtransformation ADD R6, R6, R4, LSL#8 ADD R8, R8, R4, LSL#8 ADD R10, R10, R4, LSL#8 ADD R12, R12, R4, LSL#8 MOV R6, R6, ASR R2 MOV R8, R8, ASR R2 MOV R10, R10, ASR R2 MOV R12, R12, ASR R2 STMIA R0!, {R6,R8,R10,R12} Debug tr,"Transformed coords are:",R5,R6,R7,R8 Debug tr,"Transformed coords are:",R9,R10,R11,R12 ; Get determinant SUB R9, R7, R5 ; x1 SUB R10, R11, R5 ; x2 SUB R11, R8, R6 ; y1 SUB R12, R12, R6 ; y2 Debug tr,"x1,x2,y1,y2",R9,R10,R11,R12 ; R4,5 = x1 * y2 MOV R0, R9 MOV R1, R12 SSmultD R0,R1,R4,R5 ; R6,7 = x2 * y1 MOV R0, R10 MOV R1, R11 SSmultD R0,R1,R6,R7 ; R4,R5 = x1*y2 - x2*y1 (48.16 precision) SUBS R4, R4, R6 SBC R5, R5, R7 Debug tr,"R4,R5,R6,R7:",R4,R5,R6,R7 ; R4,R5 = x1*y2 - x2*y1 / 4 (48.14 precision) MOV R4, R4, LSR#2 ORR R4, R4, R5,LSL#30 MOV R5, R5, ASR#2 ; R6,R7 = 1 / (x1*y2 - x2*y1) (16.48 precision) TEQ R4, #0 TEQEQ R5, #0 MOVEQ R6, #0 MOVEQ R7, #0 BEQ trns_division_by_zero MOV R0, #0 MOV R1, #&40000000 Debug tr,"Dividing:",R0,R1,R4,R5 mextralong_divide R6,R7,R0,R1,R4,R5,R2,R3,R8 Debug tr,"Gives:",R6,R7 trns_division_by_zero LDMFD sp, {R0,R1,R2,R3} SUBS R2, R2, R0 ; RSBMI R2, R2, #0 SUBS R3, R3, R1 ; RSBMI R3, R3, #0 ; spr_inc_X_x = det*y2 [ * xsize ] MOV R0, R12 MOV R1, R2 BL produce_increment MOV R12, R4 ; spr_inc_Y_x = det*-y1 [ * ysize ] RSB R0, R11, #0 MOV R1, R3 BL produce_increment MOV R11, R4 ; spr_inc_X_y = det*-x2 [ * xsize ] RSB R0, R10, #0 MOV R1, R2 BL produce_increment MOV R10, R4 ; spr_inc_Y_y = det*x1 [ * ysize ] MOV R0, R9 MOV R1, R3 BL produce_increment MOV R9, R4 ; Store increments STR R12, trns_spr_inc_X_x + 8*4 STR R11, trns_spr_inc_Y_x + 8*4 STR R10, trns_spr_inc_X_y + 8*4 STR R9, trns_spr_inc_Y_y + 8*4 Pull "R0,R8,R9,R14" Pull "R1,R5,R7,R12" Debug tr,"Coordinate block:",R0,R8,R9,R14 ; thats all the horrid fixed point stuff out of the way ; Find the top y coordinate ADR R3, trns_spr_ycoords LDMIA R3!, {R4,R6,R10,R11} CMP R6, R4 MOVGT R4, R6 CMP R10, R4 MOVGT R4, R10 CMP R11, R4 MOVGT R4, R11 MOV R2, R8 ; Valid registers: R5 GCOL, R7->ttr, R4 - maximum ycoordinate of area ; Get top y coordinate of centre of line being drawn in R1 SUB R1, R4, #128 MOV R1, R1, ASR#8 LDR R3, gwy1 CMP R1, R3 MOVGT R1, R3 STR R1, save_ycoord Debug tr,"Top coordinate on screen (PIXELS) is:",R1 ; Get address of line on screen LDR R3, ywindlimit SUB R3, R3, R1 LDR R8, screenstart LDR R6, linelength MLA R6, R3, R6, R8 STR R6, trns_spr_lineptr ; Get ECF pointer MOV R6, #VduDriverWorkSpace + BgEcfOraEor AND R3, R3, #7 ADD R6, R6, R3,LSL#3 ; Debug tr, "Ecf pointer is:",R6 STR R6, trns_ecf_ptr ;spr_X_x0_y% += ((screen_y%-y(0)) * (!asm_spr_inc_X_y)) / 256 ; screen_y%-y(0) = R6.R4 MOV R1, R1, ASL#8 ADD R1, R1, #128 LDR R4, trns_spr_ycoords SUB R4, R1, R4 ; Debug tr,"scry%-y0:",R4 MOV R6, R4, ASR#16 BIC R4, R4, R6, LSL#16 ; inc_X_y = R8.R9 LDR R14, trns_spr_inc_X_x ; Debug tr,"IncXx",R14 LDR R14, trns_spr_inc_X_y ; Debug tr,"incXy,Xx0",R14,R0 MOV R8, R14, ASR#16 BIC R9, R14, R8, LSL#16 ; multiply MUL R10, R9, R6 MLA R10, R8, R4, R10 MUL R11, R6, R8 MUL R14, R9, R4 MOV R14, R14, LSR#8 ADD R10, R14, R10,LSL #8 ADD R0, R10, R0,LSL #16 ADD R0, R0, R11,LSL#24 Debug tr,"Xx0y:",R0 ;spr_Y_x0_y% += ((screen_y%-y(0)) * (!asm_spr_inc_Y_y)) / 256 ; inc_Y_y = R8.R9 LDR R14, trns_spr_inc_Y_y Debug tr,"incYy=",R14 MOV R8, R14, ASR#16 BIC R9, R14, R8, LSL#16 MUL R10, R9, R6 MLA R10, R8, R4, R10 MUL R11, R6, R8 MUL R14, R9, R4 MOV R14, R14, LSR#8 ADD R10, R14, R10,LSL #8 ADD R2, R10, R2,LSL#16 ADD R2, R2, R11,LSL#24 ; Valid registers: R0,R2 - X_x0_y/Y_x0_y, R5 GCOL, R7->ttr STR R0, trns_spr_X_x0_y STR R2, trns_spr_Y_x0_y Debug tr,"Top corner (w.r.t. sprite) is:",R0,R2 ; Now compile the code BL compile_transform_code ADR R10, codebuffer STR R10, trns_codebuffer Debug tr,"Code compiled:",R10 ; Now, set up the edges in the edge block ADR R0, trns_spr_edgeblock MOV R1, #0 ADR R2, trns_spr_xcoords LDMIA R2, {R2,R3,R4,R5, R6,R7,R8,R9} ; Edge 0->1 CMP R6, R7 STMGTIA R0!,{R1,R3,R7} STMGTIA R0!,{R2,R6} STMLEIA R0!,{R1,R2,R6} STMLEIA R0!,{R3,R7} ; Edge 1->2 ADD R0, R0, #8 CMP R7, R8 STMGTIA R0!,{R1,R4,R8} STMGTIA R0!,{R3,R7} STMLEIA R0!,{R1,R3,R7} STMLEIA R0!,{R4,R8} ; Edge 0->3 ADD R0, R0, #8 CMP R6, R9 STMGTIA R0!,{R1,R5,R9} STMGTIA R0!,{R2,R6} STMLEIA R0!,{R1,R2,R6} STMLEIA R0!,{R5,R9} ; Edge 3->2 ADD R0, R0, #8 CMP R9, R8 STMGTIA R0!,{R1,R4,R8} STMGTIA R0!,{R5,R9} STMLEIA R0!,{R1,R5,R9} STMLEIA R0!,{R4,R8} ADD R0, R0, #8 MVN R1, #0 STR R1, [R0] ;; Debug tr,"Active edge list:",#trns_spr_edgeblock,#trns_spr_edgeblock+4,#trns_spr_edgeblock+8,#trns_spr_edgeblock+12,#trns_spr_edgeblock+16,#trns_spr_edgeblock+20,#trns_spr_edgeblock+24 ;; Debug tr,"Active edge list:",#trns_spr_edgeblock+28,#trns_spr_edgeblock+32,#trns_spr_edgeblock+36,#trns_spr_edgeblock+40,#trns_spr_edgeblock+44,#trns_spr_edgeblock+48,#trns_spr_edgeblock+52 ;; Debug tr,"Active edge list:",#trns_spr_edgeblock+56,#trns_spr_edgeblock+60,#trns_spr_edgeblock+64,#trns_spr_edgeblock+68,#trns_spr_edgeblock+72,#trns_spr_edgeblock+76,#trns_spr_edgeblock+80 ;; Debug tr,"Active edge list:",#trns_spr_edgeblock+84,#trns_spr_edgeblock+88,#trns_spr_edgeblock+92,#trns_spr_edgeblock+96,#trns_spr_edgeblock+100,#trns_spr_edgeblock+104,#trns_spr_edgeblock+108 ; Get top Y coordinate LDR R1, save_ycoord MOV R11, #3 ; Start loop - looping until all edges are deactivated or below Y coord 00 LDR R14, gwy0 CMP R1, R14 BLT trns_completed_drawing ; Activate any new edges, deactivate old ones (R11 = count of deactivated edges) ADR R10, trns_spr_edgeblock 01 LDMIA R10, {R0,R5,R6,R7,R8,R9,R14} ; Checked all the edges? CMN R0, #1 BEQ activated_all_edges ; If dead then ignore TST R0, #trns_deactivated BNE edge_inactive ; If active then shoule we deactivate it? TST R0, #trns_activated BNE edge_active ; Test for activation SUB R14, R8, #128 CMP R1, R14, ASR #8 BGT edge_inactive ; Activate an edge SUB R8, R8, #128 SUB R6, R6, #128 ORR R0, R0, #trns_activated ; Calculate R3=ABS(deltaX) and R4=ABS(deltaY). SUBS R3,R7,R5 RSBLT R3,R3,#0 SUB R4,R8,R6 ;Must be correct sign already - lines are sorted wrt Y coords. ; Record direction of line as 1 or -1 in top two bits of flags ORR R0,R0,#&40000000 ;line goes rightwards ORRGE R0,R0,#&80000000 ;Processor Status preserved from above - GE if line goes leftwards ; Now calculate target Y co-ordinate - the line must stop after this Y value is reached. MOV R2,R6,ASR #8 ADD R2,R2,#1 CMP R1, R2 MOVLT R0, #trns_deactivated BLT edge_inactive ; Valid registers: R0-flags, R1-current Y, R2-target Y, R3,R4-dX,dY, ; R7,R8-Upper x,y, R10-store for edge, R11-count of deactivated edges ; Now get Bresenham error in R6. AND R14,R7,#&FF MOV R7,R7,ASR #8 ADD R6,R3,R4 ;ABS(deltaX)+ABS(deltaY) CMP R6,#&80000000:SHR:8 BLO out_qfill_spbres ; Only do complicated stuff if simple stuff will overflow out_qfill_dpbres Push "R0,R4,R5,R7" ; R7=-1,C=1 for leftward lines, R7=0,C=0 for rightward lines. MOVS R7,R0,ASR #32 ; Get R0=real sub-X coordinate on screen (R14=subpixel now) SUB R0,R14,#128 RSBCS R0,R0,#0 ; R4,R5 = R0*dY , R6,R7=R4,R5 -1 if leftward,+0 i rightward SSmultD R0,R4,R4,R5 ;subpixelX * ABS(deltaY) into R4,R5 ADDS R6,R4,R7 ;Accumulate into R6,R7 ADC R7,R5,R7 ; R6,R7 += R0*dX AND R0,R8,#255 SSmultD R0,R3,R4,R5 ADDS R6,R4,R6 ADC R7,R5,R7 ; Change error to full pixel instead of 256ths pixels MOV R6,R6,LSR #8 ORR R6,R6,R7,LSL #24 Pull "R0,R4,R5,R7" B out_qfill_bresdone ; Do simple stuff out_qfill_spbres ; R6=-1,C=1 for leftward lines, R6=0,C=0 for rightward lines. MOVS R6,R0,ASR #32 ; Get R0=real sub-X coordinate on screen (R14=subpixel now) SUB R14,R14,#128 RSBCS R14,R14,#0 MLA R6,R14,R4,R6 ;Accumulate subpixelX * ABS(deltaY) AND R14,R8,#255 MLA R6,R14,R3,R6 ;Accumulate subpixelY * ABS(deltaX) MOV R6,R6,ASR #8 ;Change units to full pixels ; Now R6=Bresenham value out_qfill_bresdone MOV R8,R8,ASR #8 ; Now advance the edge until we're on the right scan line and the Bresenham ; value is negative. CMP R1,R2 ;Don't bother if we're below target Y BLT out_qfill_doneclip SUBS R8,R8,R1 ;Must set GT or EQ BLGT out_qfill_fastclip CMP R6,#0 ;So this can only happen if R1 >= R2 out_qfill_Xcliploop ADDGE R7,R7,R0,ASR #30 ;Advance in X direction as far as SUBGES R6,R6,R4 ; possible BGE out_qfill_Xcliploop out_qfill_doneclip ADD R6,R6,R3 ; Store flags,targetY,deltaX,deltaY STMIA R10!,{R0,R2,R3,R4} STMIA R10!, {R6,R7,R8} B edge_no_store edge_active ; If now below it then kill it CMP R1, R5 ; Check for deactivating the edge MOVLT R0, #trns_deactivated SUBLT R11, R11, #1 ADD R8,R8,R6 ;Adjust Bresenham value for Y move edge_inactive STMIA R10!, {R0,R5,R6,R7,R8,R9,R14} edge_no_store B %BT01 activated_all_edges CMP R11, #0 BLT trns_completed_drawing ;; Debug tr,"Active edge list:",#trns_spr_edgeblock,#trns_spr_edgeblock+4,#trns_spr_edgeblock+8,#trns_spr_edgeblock+12,#trns_spr_edgeblock+16,#trns_spr_edgeblock+20,#trns_spr_edgeblock+24 ;; Debug tr,"Active edge list:",#trns_spr_edgeblock+28,#trns_spr_edgeblock+32,#trns_spr_edgeblock+36,#trns_spr_edgeblock+40,#trns_spr_edgeblock+44,#trns_spr_edgeblock+48,#trns_spr_edgeblock+52 ;; Debug tr,"Active edge list:",#trns_spr_edgeblock+56,#trns_spr_edgeblock+60,#trns_spr_edgeblock+64,#trns_spr_edgeblock+68,#trns_spr_edgeblock+72,#trns_spr_edgeblock+76,#trns_spr_edgeblock+80 ;; Debug tr,"Active edge list:",#trns_spr_edgeblock+84,#trns_spr_edgeblock+88,#trns_spr_edgeblock+92,#trns_spr_edgeblock+96,#trns_spr_edgeblock+100,#trns_spr_edgeblock+104,#trns_spr_edgeblock+108 ;; Debug tr,"Number of active/not yet activated edges:",R11 ; Move on all active lines ADR R10, trns_spr_edgeblock 01 LDMIA R10!, {R0,R2,R3,R4,R6,R7,R8} CMN R0, #1 BEQ trns_draw_line TST R0, #trns_activated BEQ %BT01 ; R0-flags, R2 - target Y, R3 - deltaX, R4-deltaY ; R6-Bresenham error value, R7-current X SUB R8,R7,R0,ASR #31 ;Calculate crossing X CMP R1,R2 ;Stop if already below target BLT trns_deactivate_2 ; CMP R4,R6,ASR #3 ;Use fast code? ; BLLE out_qfill_fasthoriz CMP R6,#0 ;Are we still below the edge? 02 ADDGE R7,R7,R0,ASR #30 ;Make X moves until we're above the SUBGES R6,R6,R4 ; edge, adjusting Bresenham value BGE %BT02 ; for them STMDB R10, {R0,R2,R3,R4,R6,R7,R8} B %BT01 trns_deactivate_2 MOV R0, #trns_deactivated STMDB R10, {R0,R2,R3,R4,R6,R7,R8} B %BT01 ; Now find the pair of lines which are active trns_draw_line ;; Debug tr,"Active edge list:",#trns_spr_edgeblock,#trns_spr_edgeblock+4,#trns_spr_edgeblock+8,#trns_spr_edgeblock+12,#trns_spr_edgeblock+16,#trns_spr_edgeblock+20,#trns_spr_edgeblock+24 ;; Debug tr,"Active edge list:",#trns_spr_edgeblock+28,#trns_spr_edgeblock+32,#trns_spr_edgeblock+36,#trns_spr_edgeblock+40,#trns_spr_edgeblock+44,#trns_spr_edgeblock+48,#trns_spr_edgeblock+52 ;; Debug tr,"Active edge list:",#trns_spr_edgeblock+56,#trns_spr_edgeblock+60,#trns_spr_edgeblock+64,#trns_spr_edgeblock+68,#trns_spr_edgeblock+72,#trns_spr_edgeblock+76,#trns_spr_edgeblock+80 ;; Debug tr,"Active edge list:",#trns_spr_edgeblock+84,#trns_spr_edgeblock+88,#trns_spr_edgeblock+92,#trns_spr_edgeblock+96,#trns_spr_edgeblock+100,#trns_spr_edgeblock+104,#trns_spr_edgeblock+108 ;; Debug tr,"Number of active/not yet activated edges:",R11 ADR R10, trns_spr_edgeblock 01 LDMIA R10!, {R0,R2,R3,R4,R5,R6,R7} CMN R0, #1 BEQ trns_completed_drawing TST R0, #1 BEQ %BT01 01 LDMIA R10!, {R0,R2,R3,R4,R5,R6,R8} CMN R0, #1 BEQ trns_completed_drawing TST R0, #1 BEQ %BT01 ; R6,R7 are x coords to plot between at current Y Push "R1,R11,R12" MOV trns_scr_y, R1 MOV trns_scr_lx, R7 MOV trns_scr_rx, R8 ASSERT trns_scr_lx<>R6 ASSERT trns_scr_y<>R6 ASSERT trns_scr_y<>R7 LDR R14, trns_ecf_ptr + 3*4 LDMIA R14!, {trns_X_x0_y, trns_Y_x0_y} STR trns_X_x0_y, trns_comp_ecf_ora + 3*4 STR trns_Y_x0_y, trns_comp_ecf_eor + 3*4 CMP R14, #VduDriverWorkSpace + BgEcfOraEor+64 MOVGE R14, #VduDriverWorkSpace + BgEcfOraEor STR R14, trns_ecf_ptr + 3*4 ADR R14, trns_spr_X_x0_y + 3*4 LDMIA R14, {trns_X_x0_y,trns_Y_x0_y,trns_inc_X_x,trns_inc_Y_x,trns_inc_X_y,trns_inc_Y_y,trns_line_ptr} ;; Debug tr,"Calling plotting routine: lx,rx,ty",trns_scr_lx, trns_scr_rx, trns_scr_y ;; Debug tr,"incXx,Yx,Xy,Yy,lineptr",trns_inc_X_x,trns_inc_Y_x,trns_inc_X_y,trns_inc_Y_y,trns_line_ptr ;; Debug tr,"X0Y0",trns_X_x0_y,trns_Y_x0_y BL plot_and_calculate_row Pull "R1,R11,R12" LDR R14, linelength LDR trns_line_ptr, trns_spr_lineptr ADD trns_line_ptr, trns_line_ptr, R14 STR trns_line_ptr, trns_spr_lineptr LDR trns_X_x0_y, trns_spr_X_x0_y LDR trns_Y_x0_y, trns_spr_Y_x0_y LDR trns_inc_X_y, trns_spr_inc_X_y LDR trns_inc_Y_y, trns_spr_inc_Y_y SUB trns_X_x0_y, trns_X_x0_y, trns_inc_X_y SUB trns_Y_x0_y, trns_Y_x0_y, trns_inc_Y_y STR trns_X_x0_y, trns_spr_X_x0_y STR trns_Y_x0_y, trns_spr_Y_x0_y SUB R1, R1, #1 B %BT00 trns_completed_drawing ; Update changed coordinate block? LDR R14, changedbox LDR R0, [R14], #4 TST R0, #1 BEQ trns_no_changed_box ; Get R6 = top y, R4 = bottom y LDR R6, save_ycoord MOV R4, R1 ; Check top y was above gwy0, else no change to box LDR R0, gwy0 CMP R6, R0 BLT trns_no_changed_box ; Find R3 = left x, R5 = right x ADR R0, trns_spr_xcoords LDMIA R0, {R3,R7,R8,R9} MOV R5, R3 CMP R3, R7 MOVGT R3, R7 CMP R3, R8 MOVGT R3, R8 CMP R3, R9 MOVGT R3, R9 CMP R5, R7 MOVLT R5, R7 CMP R5, R8 MOVLT R5, R8 CMP R5, R9 MOVLT R5, R9 ; Now convert from 256ths pixels to pixels, and clip to graphics window ADD R3, R3, #128 MOV R3, R3, ASR #8 ADD R5, R5, #128 MOV R5, R5, ASR #8 LDR R0, gwx0 LDR R1, gwx1 CMP R3, R0 MOVLT R3, R0 CMP R5, R1 MOVGT R5, R1 CMP R3, R5 BGT trns_no_changed_box ; Unbodge double pixels LDR R7, Log2bpc LDR R8, Log2bpp SUB R7, R7, R8 MOVNE R3, R3, LSL R7 MOVNE R5, R5, LSL R7 ADD R4, R4, #1 ; Get original box and update it LDMIA R14, {R0,R1,R2,R7} CMP R0, R3 MOVGT R0, R3 CMP R1, R4 MOVGT R1, R4 CMP R2, R5 MOVLT R2, R5 CMP R7, R6 MOVLT R7, R6 STMIA R14, {R0,R1,R2,R7} trns_no_changed_box ADD sp, sp, #trns_spr_vars_end CLRV Pull "R1-R9,PC" ; ------------------------------------------------------------- ; - Routine to get increment in sprite due to x or y movement - ; ------------------------------------------------------------- ; R0 - x or y distance ; R1 - size of sprite (width or height) ; R6,7 - 64 bit determinant (16.48 format) ; Result in R4 produce_increment Push "R0-R3,R5-R12,R14" Debug tr,"Inputs:",R0,R1,R6,R7 ; First split R6/7 into R4-R7 MOVS R3, R7, ASR #32 BEQ %FT01 RSBS R6, R6, #0 RSC R7, R7, #0 01 MOV R4, R7, LSR #16 BIC R5, R7, R4, LSL #16 MOV R7, R6, LSR #16 BIC R6, R6, R7, LSL #16 ; Now check sign of R0, R3 = sign of product EORS R3, R3, R0, ASR #32 ; If R3<0 (i.e. product<0) then R1=-R1 RSBMI R1, R1, #0 ; If R0<0 then R0=-R0 RSBCS R0, R0, #0 ; Split R0 MOV R2, R0, LSR #16 BIC R0, R0, R2, LSL #16 Debug tr,"Premultiply values:R0,R2,R1,R3:",R0,R2,R1,R3 Debug tr,"Premultiply values:R4,R5,R7,R6:",R4,R5,R7,R6 ; Now produce R8 = U0.6 + L2.6 + L0.7, R6 = U2.6 + U0.7 (inc. carries) MUL R8, R0, R6 MUL R6, R2, R6 MUL R3, R0, R7 ADDS R3, R3, R6 MOV R6, R3, LSR #16 ADDCS R6, R6, #&10000 ADDS R8, R8, R3, LSL #16 MOV R8, R8, LSR #16 ADC R6, R6, #0 Debug tr,"Stage 1:R6,R8",R6,R8 ; Now produce R9 = L6 + L2.7 + L0.5, R7 = U6 + U2.7 + U0.5 (inc carries) MUL R9, R0, R5 MUL R7, R2, R7 ADDS R3, R7, R9 MOV R7, R3, LSR #16 ADDCS R7, R7, #&10000 ADD R7, R7, R6, LSR #16 MOV R9, R3, LSL #16 ADDS R9, R9, R6, LSL #16 MOV R9, R9, LSR #16 ADC R7, R7, #0 Debug tr,"Stage 2:R7,R9",R7,R9 ; Now produce R10 = L7 + L2.5 + L0.4, R5 = U7 + U2.5 + U0.4 (inc. carries) MUL R10, R0, R4 MUL R5, R2, R5 ADDS R3, R5, R10 MOV R5, R3, LSR #16 ADDCS R5, R5, #&10000 ADD R5, R5, R7, LSR #16 MOV R10, R3, LSL #16 ADDS R10, R10, R7, LSL #16 MOV R10, R10, LSR #16 ADC R5, R5, #0 Debug tr,"Stage 3:R10,R5",R10,R5 ; Now produce R11 = L5 + L2.4 MLA R11, R2, R4, R5 MOV R3, R11, LSR #16 BIC R11, R11, R3, LSL #16 ; Now R11.R10.R9.R8 (sixteen bits each) = 256/Det*Size in 32.32 form ; So get R1 * R11.R10.R9.R8 into R4 MUL R8, R1, R8 MUL R9, R1, R9 MUL R10, R1, R10 MUL R11, R1, R11 MOV R4, R8, ASR #24 ADD R4, R4, R9, ASR #8 ADD R4, R4, R10, ASL #8 ADD R4, R4, R11, ASL #24 ;bug fix, round up if necessary TST R9,#&80 ADDNE R4,R4,#1 Debug tr,"Output:R4",R4 Pull "R0-R3,R5-R12,PC" ; ----------------------------------- ; - DrQfill routines required above - ; ----------------------------------- ; Fast Y clipping routine out_qfill_fastclip Push "R1,R2,R7,LR" MOV R7,R6,ASR #31 ;Sign-extend Bresenham value SSmultD R8,R3,R1,R2 ;(no. Y steps) * deltaX into R1,R2 ADDS R6,R6,R1 ;Accumulate into Bresenham value ADCS R7,R7,R2 MOVMI R8,#0 ;Check for still being to the right BMI out_qfill_fastclipdone ; of the edge BL arith_DSdivS ;Divide by deltaY DCB R6,R4,R8,0 SSmultD R8,R4,R1,R2 ;(no. X steps) * deltaY into R1,R2 SUB R6,R6,R1 ;Not interested in high word! out_qfill_fastclipdone Pull "R1,R2,R7,LR" TEQ R0,#0 ;Move X co-ord. in right direction ADDPL R7,R7,R8 SUBMI R7,R7,R8 MOV PC,LR ; Subroutine to advance an edge horizontally fast (i.e. using long division ; rather than division by repeated subtraction!) ; Updates R6 and R7, corrupts R2 and R3, preserves flags & other registers out_qfill_fasthoriz DivRem R2,R6,R4,R3 TEQ R0,#0 ADDPL R7,R7,R2 SUBMI R7,R7,R2 MOV PC,LR [ :LNOT:usemull ; Subroutine to multiply two single precision signed numbers together and ; get a double precision result. The word following the BL should contain ; the numbers of the two operand registers in its bottom two bytes and the ; number of the register to take the ls part of the result in the next byte. ; The ms part of the result will go into the next register. ; This routine will only work on registers R0-R8. arith_SSmultD Push "R0-R8" ;REMEMBER: need to stack R0-R8 so they can hold results RSB R8,PC,PC ;get embedded PSR flags into R8 - 26/32-bit mode neutral LDRB R0,[R14,-R8]! ;Get first operand; R14 := (R14 - PSR) LDR R0,[R13,R0,LSL #2] LDRB R1,[R14,#1] ;Get second operand LDR R1,[R13,R1,LSL #2] MOV R4,R0,LSR #16 ;Split first operand into halves BIC R3,R0,R4,LSL #16 MOVS R6,R1,LSR #16 ;Split second operand into halves BIC R5,R1,R6,LSL #16 MUL R2,R3,R5 ;Produce low partial product MUL R3,R6,R3 ;And middle partial products MUL R5,R4,R5 MULNE R6,R4,R6 ;And high partial product ADDS R3,R3,R5 ;Add middle partial products, dealing ADDCS R6,R6,#&10000 ; with overflow ADDS R2,R2,R3,LSL #16 ;Add middle partial product sum into ADC R6,R6,R3,LSR #16 ; result TEQ R0,#0 ;Add cross products of operands and SUBMI R6,R6,R1 ; operand sign extensions into TEQ R1,#0 ; result SUBMI R6,R6,R0 LDRB R0,[R14,#2] ;Store the result on the stack, to be ADD R0,R13,R0,LSL #2 ; picked up by the correct registers STMIA R0,{R2,R6} Pull "R0-R8" ADD PC,R14,#4 ;Skip the argument word on return (don't use ADDS!) ] ; Subroutine to divide a double precision unsigned number by a single ; precision unsigned number, yielding a single precision unsigned result. ; The word following the BL should contain the number of the register holding ; the ls part of the dividend in its bottom byte; the ms part of the dividend ; is in the next register. The next byte of the word contains the number of ; the divisor register, and the next byte the number of the register in which ; to deposit the quotient. ; This routine will only work on registers R0-R8. It assumes that the ; divisor is not zero, and that the quotient will not overflow. arith_DSdivS Push "R0-R8" RSB R8,PC,PC ;get embedded PSR flags into R8 - 26/32-bit mode neutral LDRB R0,[R14,-R8]! ;Get first operand; R14 := (R14 - PSR) ADD R0,R13,R0,LSL #2 LDMIA R0,{R0,R1} LDRB R2,[R14,#1] ;Get second operand LDR R2,[R13,R2,LSL #2] MOV R3,#1 ;Init. quotient with a sentinel bit arith_DSdivS_loop ADDS R0,R0,R0 ;Shift a bit up into the ms half of ADC R1,R1,R1 ; the dividend CMP R1,R2 ;Do trial subtraction, producing SUBCS R1,R1,R2 ; result bit in C ADCS R3,R3,R3 ;Result bit into result, then loop BCC arith_DSdivS_loop ; unless sentinel bit shifted out LDRB R0,[R14,#2] ;Store the result on the stack, to be STR R3,[R13,R0,LSL #2] ; picked up by the correct registers Pull "R0-R8" ADD PC,R14,#4 ;Skip the argument word on return (don't use ADDS!) ; --------------------------------- ; - The code which is compiled in - ; --------------------------------- compiled_routine_stacked * calc_row_stacked + 1*4 - 4; Number of bytes stacked since main loop ; Start of routine trnslp_strt Push "R14" MOV trns_out_word, #&80000000 MOV trns_out_mask, #0 CMP trns_xsize, #0 Pull "PC",LE trnslp_strt_size * .-trnslp_strt ASSERT (trnslp_strt_size) = 5*4 ; Get address of lefthand x on screen (dependent on out_bpp) trnslp_getaddr_o1 ADD trns_out_ptr, trns_out_ptr, trns_out_x, LSR# 3 ;(0.1.2.3 - 8.4.2.1 out_bpp) BIC trns_out_ptr, trns_out_ptr, #3 AND trns_out_x, trns_out_x, #31 ;(3.7.15.31 - 8.4.2.1 out_bpp) ANDEQ R0,R0,R0 trnslp_getaddr_o2 ADD trns_out_ptr, trns_out_ptr, trns_out_x, LSR# 2 ;(0.1.2.3 - 8.4.2.1 out_bpp) BIC trns_out_ptr, trns_out_ptr, #3 MOV trns_out_x, trns_out_x, LSL#1 AND trns_out_x, trns_out_x, #30 ;(3.7.15.31 - 8.4.2.1 out_bpp) trnslp_getaddr_o4 ADD trns_out_ptr, trns_out_ptr, trns_out_x, LSR# 1 ;(0.1.2.3 - 8.4.2.1 out_bpp) BIC trns_out_ptr, trns_out_ptr, #3 MOV trns_out_x, trns_out_x, LSL#2 AND trns_out_x, trns_out_x, #28 ;(3<<3.7<<2.15<<1.31<<0 - 8.4.2.1 out_bpp) trnslp_getaddr_o8 ADD trns_out_ptr, trns_out_ptr, trns_out_x ;(0.1.2.3 - 8.4.2.1 out_bpp) BIC trns_out_ptr, trns_out_ptr, #3 MOV trns_out_x, trns_out_x, LSL#3 AND trns_out_x, trns_out_x, #24 ;(3.7.15.31 - 8.4.2.1 out_bpp) trnslp_getaddr_o16 ADD trns_out_ptr, trns_out_ptr, trns_out_x, LSL #1 BIC trns_out_ptr, trns_out_ptr, #3 MOV trns_out_x, trns_out_x, LSL #4 AND trns_out_x, trns_out_x, #16 trnslp_getaddr_o24 ADD trns_out_ptr, trns_out_ptr, trns_out_x, LSL #2 BIC trns_out_ptr, trns_out_ptr, #3 MOV trns_out_x, trns_out_x, LSL#5 AND trns_out_x, trns_out_x, #0 trnslp_getaddr_size * .-trnslp_getaddr_o24 ASSERT (.-trnslp_getaddr_o1) = 6*trnslp_getaddr_size ; Get pixel and macro word shifted to align with lefthand pixel inside screen word trnslp_getwam MOV trns_out_word, trns_out_word, LSR trns_out_x trnslp_getwam_size * .-trnslp_getwam ASSERT (trnslp_getwam_size) = 1*4 ; Start of loop trnslp_stloop1 MOVS trns_offset, trns_Y, ASR#16 MOVMI trns_offset, #0 MOV trns_dummy14, trns_spr_height, LSR#16 CMP trns_dummy14, trns_Y, ASR#16 trnslp_stloop1_size * .-trnslp_stloop1 trnslp_stloop2 SUBLE trns_offset, trns_dummy14, #1 MUL trns_offset, trns_byte_width, trns_offset MOV trns_dummy14, trns_X CMP trns_X, trns_spr_left, LSL #16 trnslp_stloop2_size * .-trnslp_stloop2 trnslp_stloop3 MOVLT trns_dummy14, trns_spr_left, LSL #16 CMP trns_X, trns_spr_right, LSL #16 MOVGE trns_dummy14, trns_spr_right, LSL #16 SUBGE trns_dummy14, trns_dummy14, #1 ADD trns_offset, trns_offset, trns_dummy14, LSR #16 trnslp_stloop3_size * .-trnslp_stloop3 ; Start of loop (1bpp mask) trnslp_nmstloop1 MOVS trns_offset, trns_Y, ASR#16 MOVMI trns_offset, #0 MOV trns_dummy14, trns_spr_height, LSR#16 CMP trns_dummy14, trns_Y, ASR#16 trnslp_nmstloop1_size * .-trnslp_nmstloop1 trnslp_nmstloop2 SUBLE trns_offset, trns_dummy14, #1 LDR trns_dummy12, trns_comp_spr_mask_width + compiled_routine_stacked MUL trns_dummy12, trns_offset, trns_dummy12 MUL trns_offset, trns_byte_width, trns_offset MOV trns_dummy14, trns_X trnslp_nmstloop2_size * .-trnslp_nmstloop2 trnslp_nmstloop3 CMP trns_X, trns_spr_left, LSL #16 MOVLT trns_dummy14, trns_spr_left, LSL #16 CMP trns_X, trns_spr_right, LSL #16 MOVGE trns_dummy14, trns_spr_right, LSL #16 SUBGE trns_dummy14, trns_dummy14, #1 trnslp_nmstloop3_size * .-trnslp_nmstloop3 trnslp_nmstloop4 ADD trns_offset, trns_offset, trns_dummy14, LSR #16 ADD trns_dummy12, trns_dummy12, trns_dummy14, LSR #16 STR trns_dummy12, trns_comp_mask_offset + compiled_routine_stacked trnslp_nmstloop4_size * .-trnslp_nmstloop4 ; Shift pixel and mask words by out_bpp trnslp_shfwam_o1 MOVS trns_out_word, trns_out_word, LSR#1 MOV trns_out_mask, trns_out_mask, LSR#1 trnslp_shfwam_o2 MOVS trns_out_word, trns_out_word, LSR#2 MOV trns_out_mask, trns_out_mask, LSR#2 trnslp_shfwam_o4 MOVS trns_out_word, trns_out_word, LSR#4 MOV trns_out_mask, trns_out_mask, LSR#4 trnslp_shfwam_o8 MOVS trns_out_word, trns_out_word, LSR#8 MOV trns_out_mask, trns_out_mask, LSR#8 trnslp_shfwam_o16 MOVS trns_out_word, trns_out_word, LSR#16 MOV trns_out_mask, trns_out_mask, LSR#16 trnslp_shfwam_o24 ;MOVS trns_out_word, trns_out_word ;MOV trns_out_mask, trns_out_mask ;achieve the same effect as a LSR#32 would do, if such existed! MOV trns_out_word,#1 MOVS trns_out_word,trns_out_word,LSR #1 trnslp_shfwam_size * .-trnslp_shfwam_o24 ASSERT (.-trnslp_shfwam_o1) = trnslp_shfwam_size*6 ;can't do the 32 bit case in two instructions, so here's the last one trnslp_shfwam_o24_2 MOV trns_out_mask, #0 ; Read pixel from sprite (dependent on in_bpp) trnslp_readpx_i1 LDRB trns_dummy14, [trns_spr_start, trns_offset, LSR #3] ;(0.1.2.3 - 8.4.2.1 in_bpp) AND trns_dummy12, trns_offset, #7 ;(0.1.3.7 - 8.4.2.1 in_bpp) MOV trns_dummy14, trns_dummy14, LSR trns_dummy12 AND trns_dummy14, trns_dummy14, #&1 ;(&FF.&F.&3.&1 - 8.4.2.1 in_bpp) trnslp_readpx_i1_size * .-trnslp_readpx_i1 ANDEQ R0,R0,R0 trnslp_readpx_i2 LDRB trns_dummy14, [trns_spr_start, trns_offset, LSR #2] ;(0.1.2.3 - 8.4.2.1 in_bpp) AND trns_dummy12, trns_offset, #3 ;(0.1.3.7 - 8.4.2.1 in_bpp) MOV trns_dummy12, trns_dummy12, LSL#1 ;(3.2.1.0 - 8.4.2.1 in_bpp) MOV trns_dummy14, trns_dummy14, LSR trns_dummy12 AND trns_dummy14, trns_dummy14, #&3 ;(&FF.&F.&3.&1 - 8.4.2.1 in_bpp) trnslp_readpx_i4 LDRB trns_dummy14, [trns_spr_start, trns_offset, LSR #1] ;(0.1.2.3 - 8.4.2.1 in_bpp) AND trns_dummy12, trns_offset, #1 ;(0.1.3.7 - 8.4.2.1 in_bpp) MOV trns_dummy12, trns_dummy12, LSL#2 ;(3.2.1.0 - 8.4.2.1 in_bpp) MOV trns_dummy14, trns_dummy14, LSR trns_dummy12 AND trns_dummy14, trns_dummy14, #&F ;(&FF.&F.&3.&1 - 8.4.2.1 in_bpp) trnslp_readpx_i8 LDRB trns_dummy14, [trns_spr_start, trns_offset] ANDEQ R0,R0,R0 ANDNE R0,R0,R0 ANDNE R0,R0,R0 ANDEQ R0,R0,R0 trnslp_readpx_i16 ;note - this will be word or half word aligned LDR trns_dummy14, [trns_spr_start, trns_offset, LSL #1] MOV trns_dummy14, trns_dummy14, LSL #16 MOV trns_dummy14, trns_dummy14, LSR #16 ANDNE R0,R0,R0 ANDEQ R0,R0,R0 trnslp_readpx_i32 LDR trns_dummy14, [trns_spr_start, trns_offset, LSL #2] ANDEQ R0,R0,R0 ANDNE R0,R0,R0 ANDNE R0,R0,R0 ANDEQ R0,R0,R0 trnslp_readpx_size * trnslp_readpx_i8-trnslp_readpx_i4 ASSERT (.-trnslp_readpx_i1) = trnslp_readpx_size*6 ; Translate pixel through ttr trnslp_transpx LDR trns_dummy12, trns_comp_spr_ttr + compiled_routine_stacked LDRB trns_dummy14, [trns_dummy12, trns_dummy14] trnslp_transpx_size * .-trnslp_transpx ASSERT (trnslp_transpx_size) = 2*4 trnslp_transpx1 LDR trns_dummy12, trns_comp_spr_ttr + compiled_routine_stacked LDR trns_dummy14, [trns_dummy12, trns_dummy14, LSL #2] trnslp_transpx1_size * .-trnslp_transpx1 [ ignore_ttr trnslp_transpx2 LDR trns_dummy12, trns_comp_spr_ttr + compiled_routine_stacked LDR trns_dummy14, [trns_dummy12, trns_dummy14, LSL #3] MOV trns_dummy14, trns_dummy14, LSR #8 ;now in correct form for 32bpp trnslp_transpx2_size * .-trnslp_transpx2 ;use trnslp_munge_32to16 if doing 16bpp ] trnslp_use32K MOV trns_dummy14,trns_dummy14,LSL #17 MOV trns_dummy14,trns_dummy14,LSR #17 LDR trns_dummy12,trns_comp_spr_ttr + compiled_routine_stacked ;fetch the table address LDR trns_dummy12,[trns_dummy12,#4] ; trns_dummy12 is correct at this point.... LDRB trns_dummy14,[trns_dummy12,trns_dummy14] trnslp_use32K_size * .-trnslp_use32K ; Copy pixel into output word (dependent on out_bpp) trnslp_setpx ORR trns_out_word, trns_out_word, trns_dummy14, LSL #xxx trnslp_setpx_size * .-trnslp_setpx ASSERT (trnslp_setpx_size) = 1*4 ; If sprite has no mask, then ORR set bits into the output mask word trnslp_sprnomask_o1 ORR trns_out_mask, trns_out_mask, #&80000000 trnslp_sprnomask_o2 ORR trns_out_mask, trns_out_mask, #&C0000000 trnslp_sprnomask_o4 ORR trns_out_mask, trns_out_mask, #&F0000000 trnslp_sprnomask_o8 ORR trns_out_mask, trns_out_mask, #&FF000000 trnslp_sprnomask_size * .-trnslp_sprnomask_o8 ASSERT (.-trnslp_sprnomask_o1) = trnslp_sprnomask_size*4 trnslp_sprnomask16or24 LDR trns_dummy14, trns_masking_word +compiled_routine_stacked ORR trns_out_mask, trns_out_mask, trns_dummy14 trnslp_sprnomask16or24_size * .-trnslp_sprnomask16or24 ASSERT trnslp_sprnomask16or24_size = 4*2 ; If sprite has a mask then read it in (also uses trnslp_readpx_i, above) trnslp_sprmask LDR trns_dummy14, trns_comp_spr_masko + compiled_routine_stacked ADD trns_offset, trns_offset, trns_dummy14 trnslp_sprmask_size * .-trnslp_sprmask ASSERT (trnslp_sprmask_size) = 2*4 ; If sprite has a 1BPP mask then read it in ; (note: no longer uses trnslp_readpx_i1 - it needs a different base address and must ; preserve the original one by avoiding trns_spr_start (R1) ) trnslp_new_sprmask ; LDR trns_dummy12, trns_comp_mask_base + compiled_routine_stacked LDR trns_offset, trns_comp_mask_offset + compiled_routine_stacked LDRB trns_dummy14,[trns_dummy12, trns_offset, LSR #3] trnslp_new_sprmask_size * .-trnslp_new_sprmask trnslp_new_sprmask2 AND trns_dummy12, trns_offset, #7 MOV trns_dummy14, trns_dummy14, LSR trns_dummy12 AND trns_dummy14, trns_dummy14, #1 trnslp_new_sprmask2_size * .-trnslp_new_sprmask2 ; Setup valid mask pixel into mask word trnslp_setmask_o1 TEQ trns_dummy14, #0 ;Is the mask pixel set? ORRNE trns_out_mask, trns_out_mask, #&80000000 trnslp_setmask_o2 TEQ trns_dummy14, #0 ;Is the mask pixel set? ORRNE trns_out_mask, trns_out_mask, #&C0000000 trnslp_setmask_o4 TEQ trns_dummy14, #0 ;Is the mask pixel set? ORRNE trns_out_mask, trns_out_mask, #&F0000000 trnslp_setmask_o8 TEQ trns_dummy14, #0 ;Is the mask pixel set? ORRNE trns_out_mask, trns_out_mask, #&FF000000 trnslp_setmask_size * .-trnslp_setmask_o8 ASSERT (.-trnslp_setmask_o1) = trnslp_setmask_size*4 trnslp_setmask16or24 TEQ trns_dummy14, #0 ;Is the mask pixel set? LDRNE trns_dummy14, trns_masking_word +compiled_routine_stacked ORRNE trns_out_mask, trns_out_mask, trns_dummy14 trnslp_setmask16or24_size * .-trnslp_setmask16or24 ASSERT (.-trnslp_setmask16or24) = 3*4 ; Cope with 1:1 mapping in the mask and screen mode trnslp_setmask2 ORR trns_out_mask, trns_out_mask, trns_dummy14, LSL #xxx trnslp_setmask2_size * .-trnslp_setmask2 ASSERT (trnslp_setmask2_size) = 1*4 ; Read the screen word trnslp_readscrn LDRCS trns_offset, [trns_out_ptr] ANDCS trns_out_word, trns_out_word, trns_out_mask trnslp_readscrn_size * .-trnslp_readscrn ASSERT (trnslp_readscrn_size) = 2*4 [ AvoidScreenReads trnslp_readscrn0 MVNS trns_offset, trns_out_mask ; Z set iff out_mask is all 1s LDRHI trns_offset, [trns_out_ptr] ; load if C set & Z clear ANDCS trns_out_word, trns_out_word, trns_out_mask trnslp_readscrn0_size * .-trnslp_readscrn0 ] ; Effect the screen word depending on the GCOL action trnslp_gcol ; GCOL 0 - plot direct BICCS trns_offset, trns_offset, trns_out_mask EORCS trns_offset, trns_offset, trns_out_word ; GCOL 1 - OR with screen ORRCS trns_offset, trns_offset, trns_out_word ANDEQ R0,R0,R0 ; GCOL 2 - AND with screen EORCS trns_out_word, trns_out_word, trns_out_mask BICCS trns_offset, trns_offset, trns_out_word ; GCOL 3 - EOR with screen EORCS trns_offset, trns_offset, trns_out_word ANDEQ R0,R0,R0 ; GCOL 4 - Invert screen EORCS trns_offset, trns_offset, trns_out_mask ANDEQ R0, R0, R0 ; GCOL 5 - Do nothing ANDEQ R0,R0,R0 ANDEQ R0,R0,R0 ; GCOL 6 - AND with NOT colour BICCS trns_offset, trns_offset, trns_out_word ANDEQ R0,R0,R0 ; GCOL 7 - ORR with NOT colour EORCS trns_out_word, trns_out_word, trns_out_mask ORRCS trns_offset, trns_offset, trns_out_word trnslp_gcol_size * 2*4 ASSERT (.-trnslp_gcol) = trnslp_gcol_size * 8 ; Effect the screen word according to the mask and ECF trnslp_plotmask LDRCS trns_dummy12, trns_comp_ecf_ora + compiled_routine_stacked LDRCS trns_out_word, trns_comp_ecf_eor + compiled_routine_stacked ANDCS trns_dummy12, trns_dummy12, trns_out_mask ANDCS trns_out_word, trns_out_word, trns_out_mask ORRCS trns_offset, trns_offset, trns_dummy12 EORCS trns_offset, trns_offset, trns_out_word trnslp_plotmask_size * .-trnslp_plotmask ASSERT (trnslp_plotmask_size) = 6*4 ; End of the loop - store the screen word and move on a screen pixel trnslp_eoloop STRCS trns_offset, [trns_out_ptr], #4 MOVCS trns_out_word, #&80000000 ADD trns_X, trns_X, trns_inc_X_x ADD trns_Y, trns_Y, trns_inc_Y_x SUB trns_xsize, trns_xsize, #&10000 CMP trns_xsize, #&10000 trnslp_eoloop_size * .-trnslp_eoloop ASSERT (trnslp_eoloop_size) = 6*4 ; Finished row so store last compiled output word and mask, if necessary trnslp_lastword CMP trns_out_word, #&80000000 LDMEQFD sp!, {PC} trnslp_lastword_size * .-trnslp_lastword ASSERT (trnslp_lastword_size) = 2*4 ; Stored last output word and mask on screen, so exit trnslp_lastword2 STR trns_offset, [trns_out_ptr] LDMFD sp!, {PC} trnslp_lastword2_size * .-trnslp_lastword2 ASSERT (trnslp_lastword2_size) = 2*4 trnslp_munge_16to321 Push "R0,R2" ; fedcba9876543210 fedcba9876543210 ; LR = 0bbbbbgggggrrrrr MOV R0,trns_dummy14,LSR #10 ; R0 = 0bbbbb MOV R2,R0,LSL #19 ; R2 = 0bbbbb000 0000000000000000 AND R0,trns_dummy14,#&3E0 ; R0 = 000000ggggg00000 ORR R2,R2,R0,LSL #6 ; R2 = 0bbbbb000 ggggg00000000000 trnslp_m1632_size1 * .-trnslp_munge_16to321 trnslp_munge_16to322 MOV R0,trns_dummy14,LSL #27 ; R0 = rrrrr00000000000 0000000000000000 ORR R2,R2,R0,LSR #24 ; R2 = 0bbbbb000 ggggg000rrrrr000 ;now copy the top three bits of each colour component into the bottom three MOV R0,#&E0 ;avoid an LDR for speed ORR R0,R0,R0,LSL #8 trnslp_m1632_size2 * .-trnslp_munge_16to322 trnslp_munge_16to323 ORR R0,R0,R0,LSL #8 ; R0 = 0000000011100000 1110000011100000 AND R0,R0,R2 ; R0 = 00000000bbb00000 ggg00000rrr00000 ORR trns_dummy14,R2,R0,LSR #5 ; LR = 00000000bbbbbbbb ggggggggrrrrrrrr Pull "R0,R2" trnslp_m1632_size3 * .-trnslp_munge_16to323 trnslp_munge_32to161 Push "R0,R2" ; fedcba9876543210 fedcba9876543210 ; LR = 00000000bbbbbbbb ggggggggrrrrrrrr AND R0,trns_dummy14,#&F80000 ; R0 = 00000000bbbbb000 0000000000000000 MOV R2,R0,LSR #9 ; R2 = 0bbbbb0000000000 AND R0,trns_dummy14,#&F800 ; R0 = 0000000000000000 ggggg00000000000 trnslp_m3216_size1 * .-trnslp_munge_32to161 trnslp_munge_32to162 ORR R2,R2,R0,LSR #6 ; R2 = 0bbbbbggggg00000 AND R0,trns_dummy14,#&F8 ; R0 = 0000000000000000 00000000rrrrr000 ORR trns_dummy14,R2,R0,LSR #3 ; LR = 0bbbbbgggggrrrrr Pull "R0,R2" trnslp_m3216_size2 * .-trnslp_munge_32to162 ; --------------------------------------------- ; - Routine to compile the code specified by - ; - the GCOL action, in_bpp, out_bpp - ; - translation table, and plotmask/putsprite - ; - options - ; --------------------------------------------- compile_transform_code Push "LR" ; R11 = GCOL calue (including mask bit) AND R11,R5,#mc_gcol:OR:mc_hasmask ; Set mc_ttr if colour translation table is to be used CMP R7,#0 ORRNE R11,R11,#mc_ttr ; Set mc_plotmask if plotting the mask, clear if putting a sprite LDR R14,spritecode AND R14,R14,#&FF TEQ R14,#SpriteReason_PlotMaskTransformed ORREQ R11,R11,#mc_plotmask ; Store the sprite type, so that new masks make a difference LDR R0, save_spr_type ORR R11, R11, R0, LSL #mcb_sprtype ; Set in_bpp, out_bpp LDR R0,save_inbpp ORR R11,R11,R0,LSL #mcb_inbpp LDR R1,BPP ORR R11,R11,R1,LSL #mcb_outbpp ; Set bit denoting transformed sprite, rather than scaled sprite (uses same area as Sprite_PutSpriteScaled) ORR R11, R11, #mc_transformed ; Test with old compiled routine LDR R14,macroword TEQ R11,R14 BEQ trns_compiling_complete ; STR R11,macroword ; compile the code - R0=in_bpp, R1=out_bpp LDR R0, save_inlog2bpp LDR R1, Log2bpp ADR R10, codebuffer Debug mc,"Transformation code starts at",R10 ; Start of loop TrnsAsm trnslp_strt, trnslp_strt_size ; Get start pixel on the screen line of lefthand x coord (4 instr) ADRL R2, trnslp_getaddr_o1 ADD R2, R2, R1, LSL#4 TrnsAsmReg R2, trnslp_getaddr_size ASSERT trnslp_getaddr_size = 4*4 ; Shift start words (pixel and mask) according to pixel offset TrnsAsm trnslp_getwam, trnslp_getwam_size ; loop Store address on stack for the loop Push "R10" ; If using a 1bpp mask we need to work out a different offset for the mask too MOVS R2, R11, LSR #mcb_sprtype TSTNE R11, #mc_hasmask ; First 15 words of the loop (to find byte offset in sprite from sprite X,Y coords) TrnsAsm trnslp_stloop1, trnslp_stloop1_size ,EQ TrnsAsm trnslp_stloop2, trnslp_stloop2_size ,EQ TrnsAsm trnslp_stloop3, trnslp_stloop3_size ,EQ TrnsAsm trnslp_nmstloop1, trnslp_nmstloop1_size ,NE TrnsAsm trnslp_nmstloop2, trnslp_nmstloop2_size ,NE TrnsAsm trnslp_nmstloop3, trnslp_nmstloop3_size ,NE TrnsAsm trnslp_nmstloop4, trnslp_nmstloop4_size ,NE ; Shift pixel and mask words by output bpp (2 instructions) ADRL R2, trnslp_shfwam_o1 ADD R2, R2, R1, LSL#3 TrnsAsmReg R2, trnslp_shfwam_size ASSERT trnslp_shfwam_size = 2*4 CMP R1, #5 LDREQ R2, trnslp_shfwam_o24_2 STREQ R2, [R10], #4 ; need three insts for 32bpp ; If putting the sprite (i.e. not plotting the mask) TST R11, #mc_plotmask BNE trns_compile_plotmask ; Compile reading of pixel (4,5,5,1 instructions - each input store 5 instructions) ADRL R2, trnslp_readpx_i1 ADD R2, R2, R0, LSL#4 ADD R2, R2, R0, LSL#2 TrnsAsmReg R2, trnslp_readpx_size BL trnslp_chopexcess ASSERT trnslp_readpx_size = 5*4 ; if doing 16->32 and 32->16 then build in appropriate munging code CMP R1,#4 BCC %FT07 CMP R0,#4 BCC %FT07 CMP R0,R1 BEQ %FT07 ; input and output are both >8bpp and are different CMP R0,#4 ;inbpp ADREQ R2,trnslp_munge_16to321 TrnsAsmReg R2, trnslp_m1632_size1, EQ ADREQ R2,trnslp_munge_16to322 TrnsAsmReg R2, trnslp_m1632_size2, EQ ADREQ R2,trnslp_munge_16to323 TrnsAsmReg R2, trnslp_m1632_size3, EQ ADRNE R2,trnslp_munge_32to161 TrnsAsmReg R2, trnslp_m3216_size1, NE ADRNE R2,trnslp_munge_32to162 TrnsAsmReg R2, trnslp_m3216_size2, NE 07 ; If there is a translation table ... TST R11, #mc_ttr ; then translate the pixel BEQ %FT01 ;if inbpp and outbpp are both >8bpp ignore the translation table CMP R1,#4 BCC %FT09 CMP R0,#4 BCS %FT08 09 ;if inbpp is 16 or 32 and outbpp is 8 or below we need a CTrans ;32K entry table for colour matched. This will already have been validated CMP R1,#4 BCS %FT07 ;output depth is >8bpp so skip this CMP R0,#4 BCC %FT07 ;input depth is <16bpp so skip this ;ok - we definitely need a 32K table here. If it is 32bpp ;we also need to munge down to 16bpp before using the table ADRNE R2,trnslp_munge_32to161 TrnsAsmReg R2, trnslp_m3216_size1, NE ADRNE R2,trnslp_munge_32to162 TrnsAsmReg R2, trnslp_m3216_size2, NE ADRL R2,trnslp_use32K TrnsAsmReg R2, trnslp_use32K_size B %FT08 07 ; First check to see if the output depth is greater than 8 bit per pixel, ; if it is then compile in the new translation functions which use ; a word array, rather than a byte array! [ ignore_ttr CMP R1,#4 BCC %FT29 ;not if output bpp is <16bpp CMP R0,#4 BCS %FT29 ;or input bpp is >8bpp LDR R2,trns_palette TEQ R2,#0 BEQ %FT29 TrnsAsm trnslp_transpx2, trnslp_transpx2_size ;if output is 16bpp now need to munge it down as well CMP R1,#5 ADRNE R2,trnslp_munge_32to161 TrnsAsmReg R2, trnslp_m3216_size1, NE ADRNE R2,trnslp_munge_32to162 TrnsAsmReg R2, trnslp_m3216_size2, NE B %FT28 29 CMP R1,#4 TrnsAsm trnslp_transpx, trnslp_transpx_size, LT TrnsAsm trnslp_transpx1, trnslp_transpx1_size, GE 28 | CMP R1,#4 TrnsAsm trnslp_transpx, trnslp_transpx_size, LT TrnsAsm trnslp_transpx1, trnslp_transpx1_size, GE ] 01 08 ; Copy pixel into output pixel word (ORR out_word, out_word, pixel,LSL#32-out_bpp) LDR R2, trnslp_setpx MOV R3, #1 MOV R3, R3, LSL R1 RSB R3, R3, #32 SetLsl R2, R3 STR R2, [R10], #4 ASSERT (trnslp_setpx_size = 4) ; Assemble mask word trns_compile_plotmask ; If in GCOL 0-7,9,11,13, then assume the mask is whole sprite AND R2, R11, #15 ; R2 = GCOL in range 0-15 ADD R2, R2, #1 ; R2 = GCOL +1 MOV R3, #2_1101010100000000 ; Bits set are 8, 10, 12, 14, 15 MOVS R3, R3, LSR R2 ; Move bit (R2-1) of R3 into the carry flag ; CC if GCOL 0-7,9,11,13 (or no mask, in which case GCOL is in range 0-7) ;;----------------------------------------------------------------------------- ;; CC if no mask / CS has a mask so attempt to assemble in the function ;; which is correct for this depth, for 1,2,4 or 8 bit per pixel we can ;; simply compile in the ORR with a suitable mask, for 16 or 24 bit per pixel ;; we need to faff around and generate a new workspace word which ;; contains the correctmask. ;;----------------------------------------------------------------------------- BCS trns_compile_mask CMP R1, #4 ; is the depth sensible? ADRCCL R2, trnslp_sprnomask_o1 ADDCC R2, R2, R1,LSL#2 TrnsAsmReg R2, trnslp_sprnomask_size,CC ADRCSL R2, trnslp_sprnomask16or24 TrnsAsmReg R2, trnslp_sprnomask16or24_size,CS B trns_compile_nomask ; Sprite has mask and it is used (GCOL 8,10,12,14,15) trns_compile_mask MOVS R2, R11, LSR #mcb_sprtype BEQ trns_old_mask TrnsAsm trnslp_new_sprmask, trnslp_new_sprmask_size TrnsAsm trnslp_new_sprmask2, trnslp_new_sprmask2_size B trns_any_mask trns_old_mask TrnsAsm trnslp_sprmask, trnslp_sprmask_size ; Compile reading of mask pixel (4,5,5,1 instructions - each input store 5 instructions) ADRL R2, trnslp_readpx_i1 ADD R2, R2, R0, LSL#4 ADD R2, R2, R0, LSL#2 TrnsAsmReg R2, trnslp_readpx_size BL trnslp_chopexcess ASSERT trnslp_readpx_size = 5*4 TEQ R0,R1 ;Check for 1:1 mapping? BEQ trns_compile_mask1to1 trns_any_mask ; Copy mask byte into output mask, first check for non-zero then orr in special case CMP R1,#4 ADRCCL R2,trnslp_setmask_o1 ADDCC R2,R2,R1,LSL #3 ;Each section is 8 bytes (2 words) TrnsAsmReg R2,trnslp_setmask_size,CC ASSERT trnslp_setmask_size = 2*4 ADRCSL R2,trnslp_setmask16or24 TrnsAsmReg R2,trnslp_setmask16or24_size,CS B trns_compile_nomask ; Copy pixel into output pixel word (ORR out_mask, out_mask, mask_pixel,LSL#32-out_bpp) trns_compile_mask1to1 LDR R2, trnslp_setmask2 MOV R3, #1 MOV R3, R3, LSL R1 RSB R3, R3, #32 SetLsl R2, R3 STR R2, [R10], #4 ASSERT (trnslp_setmask2_size = 4) trns_compile_nomask ; Compile code to read the screen word [ AvoidScreenReads ; If action code 0 (solid), use alternate code that avoids a load if the ; mask is solid. ANDS R2, R11, #7 TrnsAsm trnslp_readscrn0, trnslp_readscrn0_size, EQ TrnsAsm trnslp_readscrn, trnslp_readscrn_size, NE | TrnsAsm trnslp_readscrn, trnslp_readscrn_size ] ; Test for plotting mask - if so compile ECF code, else GCOL code TST R11, #mc_plotmask BNE trns_compile_ecf_store ; Store the GCOL action code (2,1,2,1,1,0,1,2 instructions, stored as 2 instructions in input) ADRL R3, trnslp_gcol ANDS R2, R11, #7 ADD R3, R3, R2,LSL#3 TrnsAsmReg R3, trnslp_gcol_size CMPNE R2, #2 CMPNE R2, #7 SUBNE R10, R10, #4 CMP R2, #5 SUBEQ R10, R10, #4 ASSERT trnslp_gcol_size = 2*4 B trns_compiled_gcol ; Handle the mask and ECF trns_compile_ecf_store TrnsAsm trnslp_plotmask, trnslp_plotmask_size ; Store the end of loop code trns_compiled_gcol TrnsAsm trnslp_eoloop, trnslp_eoloop_size Pull "R2" TrnsBranch R2, BGE ; Store the code for storing the last word TrnsAsm trnslp_lastword, trnslp_lastword_size ; loop Push "R10" ; Shift pixel and mask words by output bpp (2 instructions) ADRL R2, trnslp_shfwam_o1 ADD R2, R2, R1, LSL#3 TrnsAsmReg R2, trnslp_shfwam_size ASSERT trnslp_shfwam_size = 2*4 ; Loop until words are aligned Pull "R2" TrnsBranch R2, BCC ; Compile code to read the screen word TrnsAsm trnslp_readscrn, trnslp_readscrn_size ; Test for plotting mask - if so compile ECF code, else GCOL code TST R11, #mc_plotmask BNE trns_compile_ecf_store2 ; Store the GCOL action code (2,1,2,1,1,0,1,2 instructions, stored as 2 instructions in input) ADRL R3, trnslp_gcol ANDS R2, R11, #7 ADD R3, R3, R2,LSL#3 TrnsAsmReg R3, trnslp_gcol_size CMPNE R2, #2 CMPNE R2, #7 SUBNE R10, R10, #4 CMP R2, #5 SUBEQ R10, R10, #4 ASSERT trnslp_gcol_size = 2*4 B trns_compiled_gcol2 ; Handle the mask and ECF trns_compile_ecf_store2 TrnsAsm trnslp_plotmask, trnslp_plotmask_size ; Compile code to store the word and exit trns_compiled_gcol2 TrnsAsm trnslp_lastword2, trnslp_lastword2_size ; Code compiled. Debug mc,"Transformation code ends at",r10 Push "r0-r2" MOV r2, r10 LDR r1, trns_codebuffer MOV r0, #1 SWI XOS_SynchroniseCodeAreas Pull "r0-r2" trns_compiling_complete Pull "PC" trnslp_chopexcess ;this routine reduces the various readpx_i segments down to the real ;size rather than five words TEQ R0, #0 SUBEQ R10, R10, #4 TEQ R0, #3 SUBEQ R10, R10, #16 TEQ R0, #4 SUBEQ R10, R10, #4 TEQ R0, #5 SUBEQ R10, R10, #16 MOV R15,LR calc_row_stacked * 4*4 ; Number of bytes stacked in plot_and_calculate_row plot_and_calculate_row Push "R14" CMP trns_scr_lx, trns_scr_rx Swap trns_scr_lx, trns_scr_rx, GT LDR trns_dummy14, gwx0 CMP trns_scr_lx, trns_dummy14 MOVLT trns_scr_lx, trns_dummy14 LDR trns_dummy14, gwx1 CMP trns_scr_rx, trns_dummy14 MOVGT trns_scr_rx, trns_dummy14 CMP trns_scr_lx, trns_scr_rx Pull "PC",GE ; Get sprite offsets for left of current y line ; First get dummy11 = scr_lx - x(0) LDR trns_dummy11, trns_spr_xcoords + calc_row_stacked ; Debug tr,"X,Y(x0,y0), incXx, incYy",trns_X_x0_y,trns_Y_x0_y,trns_inc_X_x,trns_inc_Y_x ; Debug tr,"coords:",trns_scr_lx, trns_scr_rx,trns_scr_y,trns_dummy11 MOV trns_dummy11, trns_dummy11, ASR#8 SUB trns_dummy11, trns_scr_lx, trns_dummy11 ; X = ((screen_lx%-x(0)) * (inc_X_x)) + X_x0_y MLA trns_X, trns_dummy11, trns_inc_X_x, trns_X_x0_y ; Y = ((screen_lx%-x(0)) * (spr_inc_Y_x)) + Y_x0_y MLA trns_Y, trns_inc_Y_x, trns_dummy11, trns_Y_x0_y ; Get the x posn and x size for the plot SUB trns_dummy14, trns_scr_rx, trns_scr_lx MOV trns_out_x, trns_scr_lx ; Load the compile routine registers - HERE ADR trns_spr_start, trns_comp_spr_left + calc_row_stacked LDMIA trns_spr_start, {trns_xsize, trns_spr_start, trns_byte_width, trns_spr_height} ORR trns_xsize, trns_xsize, trns_dummy14,LSL#16 ; Plot the row (call compiled code) Pull "R14" LDR PC, trns_codebuffer + calc_row_stacked -4 END