Commit 5c69acca authored by Robert Sprowson's avatar Robert Sprowson
Browse files

Bulk search & replace to use 8b headers.

Highlighted that last_dc_val should have been a JCOEF * not an int *.
Still works - amazing!

Version 1.38, 1.35.2.1. Tagged as 'SprExtend-1_38-1_35_2_1'
......@@ -56,7 +56,7 @@ h_l RN 2 ; used in huff_decode - same as temp3
MACRO
HUFF_DECODE_SETUP $tbl
; set up the specific table pointers for ac or dc huff table
; $tbl is a HUFF_TBL*.
; $tbl is a JHUFF_TBL*.
ADD h_maxcode,$tbl,#huff_tbl_maxcode
ADD h_shortcut,$tbl,#huff_tbl_shortcut
ADD h_huffval,$tbl,#huff_tbl_huffval
......@@ -221,16 +221,15 @@ huff_decode_loop$lab
; --------------------------------------------------------------------
EXPORT asm_huff_decode_blocks
asm_huff_decode_blocks
;LOCAL void
;huff_decode_blocks (decompress_info_ptr cinfo, JBLOCK block,
; HUFF_TBL *dctbl, HUFF_TBL *actbl,
; QUANT_TBL_PTR quanttbl, int *last_dc_val, int nblocks)
;huff_decode_blocks (j_decompress_ptr cinfo, JBLOCK block,
; JHUFF_TBL *dctbl, JHUFF_TBL *actbl,
; JQUANT_TBL *quanttbl, JCOEF *last_dc_val, int nblocks)
; r0 = cinfo
; r1 = block pointer
; r2 = HUFF_TBL* dctbl
; r3 = HUFF_TBL* actbl
; [sp] = quanttbl
; [sp,#4] = int *last_dc_val
; r2 = JHUFF_TBL *dctbl
; r3 = JHUFF_TBL *actbl
; [sp,#0] = quanttbl
; [sp,#4] = JCOEF *last_dc_val
; [sp,#8] = int nblocks
; save registers
......@@ -283,7 +282,7 @@ huff_block_clear
huff_anotherblock ; loop round to here nblocks times
; Set up huffman decoding for the DC component.
LDR h_temp,[sp,#2*4] ; HUFF_TBL* dctbl (DC table pointer)
LDR h_temp,[sp,#2*4] ; JHUFF_TBL* dctbl (DC table pointer)
HUFF_DECODE_SETUP h_temp ; set maxcode,huffval,shortcut
; Handle the DC component
......@@ -310,7 +309,7 @@ huff_dc_0
; That's the DC value done.
; Set up huffman decoding for the AC components.
LDR h_temp,[sp,#3*4] ; HUFF_TBL* actbl (AC table pointer)
LDR h_temp,[sp,#3*4] ; JHUFF_TBL* actbl (AC table pointer)
HUFF_DECODE_SETUP h_temp ; set maxcode,huffval,shortcut
; The loop that does AC components, once round for each non-zero component.
......@@ -380,16 +379,15 @@ huff_zag_end
; --------------------------------------------------------------------
EXPORT asm_huff_skip_blocks
asm_huff_skip_blocks
;LOCAL void
;huff_skip_blocks (decompress_info_ptr cinfo, JBLOCK block,
; HUFF_TBL *dctbl, HUFF_TBL *actbl,
; QUANT_TBL_PTR quanttbl, int *last_dc_val, int nblocks)
;huff_skip_blocks (j_decompress_ptr cinfo, JBLOCK block,
; JHUFF_TBL *dctbl, JHUFF_TBL *actbl,
; JQUANT_TBL *quanttbl, JCOEF *last_dc_val, int nblocks)
; r0 = cinfo
; r1 = block pointer (UNUSED)
; r2 = HUFF_TBL* dctbl
; r3 = HUFF_TBL* actbl
; [sp] = quanttbl (UNUSED)
; [sp,#4] = int *last_dc_val
; r2 = JHUFF_TBL *dctbl
; r3 = JHUFF_TBL *actbl
; [sp,#0] = quanttbl (UNUSED)
; [sp,#4] = JCOEF *last_dc_val
; [sp,#8] = int nblocks
; This routine is very similar to huff_decode_blocks, except that
; we do not actually output the block - we simply skip forward that far
......@@ -415,7 +413,7 @@ asm_huff_skip_blocks
huff_skip_anotherblock ; loop round to here nblocks times
; Set up huffman decoding for the DC component.
LDR h_temp,[sp,#2*4] ; HUFF_TBL* dctbl (DC table pointer)
LDR h_temp,[sp,#2*4] ; JHUFF_TBL* dctbl (DC table pointer)
HUFF_DECODE_SETUP h_temp ; set maxcode,huffval,shortcut
; Handle the DC component
......@@ -435,7 +433,7 @@ huff_skip_dc_0
; That's the DC value done.
; Set up huffman decoding for the AC components.
LDR h_temp,[sp,#3*4] ; HUFF_TBL* actbl (AC table pointer)
LDR h_temp,[sp,#3*4] ; JHUFF_TBL* actbl (AC table pointer)
HUFF_DECODE_SETUP h_temp ; set maxcode,huffval,shortcut
; The loop that does AC components, once round for each non-zero component.
......
......@@ -266,7 +266,7 @@ $rc._odd_shortcut
;; ------------------------------------------------------------------------
;; Test proc - procedure to do a 1-D DCT
;; ------------------------------------------------------------------------
;; extern void dct_1d(decompress_info_ptr cinfo, int *data);
;; extern void dct_1d(j_decompress_ptr cinfo, int *data);
;asm_dct_1_d
; STMDB sp!,{r0-r12,lr} ; save state
......@@ -290,7 +290,7 @@ $rc._odd_shortcut
; r2=count
EXPORT asm_j_rev_dct
asm_j_rev_dct ; extern void asm_j_rev_dct(decompress_info_ptr cinfo, DCTBLOCK data, int count);
asm_j_rev_dct ; extern void asm_j_rev_dct(j_decompress_ptr cinfo, JBLOCK data, int count);
CMP r2,#0 ; if count=0, do nothing
MOVLE pc,lr
......
......@@ -13,11 +13,11 @@
GBLS Module_ComponentPath
Module_MajorVersion SETS "1.38"
Module_Version SETA 138
Module_MinorVersion SETS ""
Module_Date SETS "23 Dec 2010"
Module_ApplicationDate SETS "23-Dec-10"
Module_MinorVersion SETS "1.35.2.1"
Module_Date SETS "04 Jan 2011"
Module_ApplicationDate SETS "04-Jan-11"
Module_ComponentName SETS "SprExtend"
Module_ComponentPath SETS "mixed/RiscOS/Sources/Video/Render/SprExtend"
Module_FullVersion SETS "1.38"
Module_HelpVersion SETS "1.38 (23 Dec 2010)"
Module_FullVersion SETS "1.38 (1.35.2.1)"
Module_HelpVersion SETS "1.38 (04 Jan 2011) 1.35.2.1"
END
......@@ -5,19 +5,19 @@
*
*/
#define Module_MajorVersion_CMHG 1.38
#define Module_MinorVersion_CMHG
#define Module_Date_CMHG 23 Dec 2010
#define Module_MinorVersion_CMHG 1.35.2.1
#define Module_Date_CMHG 04 Jan 2011
#define Module_MajorVersion "1.38"
#define Module_Version 138
#define Module_MinorVersion ""
#define Module_Date "23 Dec 2010"
#define Module_MinorVersion "1.35.2.1"
#define Module_Date "04 Jan 2011"
#define Module_ApplicationDate "23-Dec-10"
#define Module_ApplicationDate "04-Jan-11"
#define Module_ComponentName "SprExtend"
#define Module_ComponentPath "mixed/RiscOS/Sources/Video/Render/SprExtend"
#define Module_FullVersion "1.38"
#define Module_HelpVersion "1.38 (23 Dec 2010)"
#define Module_FullVersion "1.38 (1.35.2.1)"
#define Module_HelpVersion "1.38 (04 Jan 2011) 1.35.2.1"
#define Module_LibraryVersionInfo "1:38"
/* Copyright 2011 Castle Technology Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/* c.PutScaled - the bitblit compiler for PutSpriteScaled/PlotMaskScaled */
#include <stdarg.h>
#include <string.h>
#include <stdio.h>
#include "swis.h"
#include "commondefs.h"
#include "jpeglib.h"
#include "putscaled.h"
#include "C:Global.h.Sprite"
/**************************************************************************
* *
* Macros. *
* *
**************************************************************************/
#define SOURCE_32_BIT (wp->save_inlog2bpp == 5)
#define SOURCE_16_BIT (wp->save_inlog2bpp == 4)
#define SOURCED_16_BIT (wp->save_inlog2bpc == 4) /* like SOURCE_16_BIT but includes 16-bit double-pixels */
#define SOURCE_MASK (ws->masked)
#define SOURCE_BPPMASK (ws->mask1bpp)
#define SOURCE_TABLE ((wp->ColourTTR != 0) || (wp->trns_palette != 0))
#define DPIXEL_INPUT (wp->save_inlog2bpp != wp->save_inlog2bpc)
#define DPIXEL_OUTPUT (wp->BPP != wp->BPC)
#define PLOTMASK ((wp->spritecode & 255) == SpriteReason_PlotMaskScaled)
#define DEST_32_BIT (wp->BPP == 32)
#define DEST_16_BIT (wp->BPP == 16)
#define DEST_1_BIT (wp->BPC == 1)
#define DESTD_16_BIT (wp->BPC == 16) /* like DEST_16_BIT but includes 16-bit double-pixels */
/**************************************************************************
* *
* Low-level debugging output. *
* *
**************************************************************************/
#ifdef DEBUG
#define tracef(args) do_sprintf(0, args)
#define assert(x, y) do_assert(__LINE__, x, y, NULL)
#define newline() tracef("\n");
#define comment(ws,text) do_comment(text)
#define IFDEBUG(a) a
#include "tracing.c"
#else
#define tracef(args) /* Nothing */
#define assert(x, y) {if (!(x)) exit_erl(y, __LINE__);}
#define newline() /* Nothing */
#define comment(ws,text) /* Nothing */
#define IFDEBUG(a) /* Nothing */
#endif
/**************************************************************************
* *
* JPEG handling. *
* *
**************************************************************************/
#ifdef ASMjpeg
#include "rojpeg.c"
#endif
/**************************************************************************
* *
* C Workspace declarations. *
* *
**************************************************************************/
/* Code buffers */
#define NBUFFERS 8 /* Number of code buffers */
#define BUFSIZE 256 /* words per buffer */
typedef struct
{
int key_word; /* descriptor for this code, or -1 if empty */
int xadd; /* precise scale factors compiled into this code */
int xdiv;
int yadd;
int ydiv;
int outoffset; /* output row offset compiled into this code */
int code[BUFSIZE]; /* the code itself */
} code_buffer;
#define FOR_EACH_BUFFER(ptr) for (ptr = &ws->buffers[0]; ptr < &ws->buffers[NBUFFERS]; ptr++)
/* Labels - there's one of these for each label in the source we generate. */
typedef struct
{
int *def; /* where the label is, or 0 if not yet defined. */
int *ref; /* a reference to the label, to be filled in when it's defined. */
#ifdef DEBUG
char *name; /* textual name of the label - same as field name */
#endif
} label;
/* Each label must be added as a field to this structure. */
typedef struct
{
#define FIRST_LABEL loop_y_repeat
label loop_y_repeat;
#ifdef TESTDEBUG
label test1;
label test2;
#endif
label loop_x_enter;
label loop_x_repeat;
label loop_x_exit;
label l_masked;
label loop_put_pixel_repeat;
label loop_put_masked_repeat;
label y_loop;
label y_loop_enter;
label y_loop_exit;
label loop_delay;
label x_evenstart;
label x_oddmask;
label x_aligned_loop;
label x_aligned_enter;
label x_alignmask1;
label x_alignmask2;
label x_misaligned;
label x_misaligned_loop;
label x_misaligned_enter;
label x_misalignmask1;
label x_misalignmask2;
label x_2atatime_exit;
label x_lastmask;
label loop_x_exit1;
label loop_x_exitskip;
label loop1;
label loop2;
label plot_loopa;
label plot_loop1;
label plot_loop1a;
label plot_loop1b;
label plot_loop1c;
label plot_loop2;
label plot_loop3;
label plot_loop4;
label plot_loop4a;
label plot_loop4b;
label plot_loop4c;
label last;
#define LAST_LABEL last
/* If you add a label, add giving it a name in check_workspace */
} labels_rec;
#define FOR_EACH_LABEL(ptr) for (ptr = &ws->labels.FIRST_LABEL; ptr <= &ws->labels.LAST_LABEL; ptr++)
#define L(name) (&(ws->labels.name))
/* Register names - one for each register name (the register numbers are allocated at compile time) */
typedef struct
{
int regno; /* the physical register number */
#ifdef DEBUG
char *name; /* the name, for trace output */
#endif
} regname;
/* Each register name must be added as a field to this structure. */
typedef struct
{
#define FIRST_REGISTER r_pixel
regname r_pixel;
regname r_inptr;
regname r_inshift;
regname r_inword;
regname r_maskinptr;
regname r_maskinword;
regname r_maskinshift;
regname r_masko;
regname r_temp1;
regname r_temp2;
regname r_c1632;
regname r_oditheradd;
regname r_blockroutine;
regname r_ecfindex;
regname r_bgcolour;
regname r_fetchroutine;
regname r_outptr;
regname r_outword;
regname r_outshift;
regname r_table;
regname r_xsize;
regname r_xcount;
regname r_ysize;
regname r_ycount;
regname r_inoffset;
regname r_maskinoffset;
regname r_in_pixmask; /* only used by 2-at-a-time loop */
regname r1;
regname r2;
regname r3;
regname wp;
regname sp;
regname lr;
regname pc;
#define LAST_REGISTER pc
} regnames_rec;
#define FOR_EACH_REGISTER_NAME(ptr) for (ptr = &ws->regnames.FIRST_REGISTER; ptr <= &ws->regnames.LAST_REGISTER; ptr++)
#if 0
#define R(reg) rr(&ws->regnames.reg)
static int rr(regname *r)
{
/* Makes code bigger, lots of string clashes - rats! */
if (r->regno == -1) tracef("Register %s not defined\n" _ r->name);
assert(r->regno != -1, ERROR_FATAL);
return r->regno;
}
#else
#define R(reg) rr(ws->regnames.reg.regno)
static int rr(int r)
{
/* Assert that the register is at least set */
assert(r != -1, ERROR_FATAL);
return r;
}
#endif
/* The structure containing all workspace - essentially our static variables. */
#define CHECK_CODE 123456789
typedef struct
{
/* Initialisation */
int check_code;
/* Code buffer management */
int build_buffer; /* Buffer currently being built, or next to build */
int *compile_base;
int *compile_ptr; /* where to put next instruction */
int *compile_lim;
/* Label control and allocation */
labels_rec labels; /* each label, and where it is in the generated code */
/* Register control and allocation */
regnames_rec regnames; /* physical assignment of each register name */
int next_free_reg; /* allocator of physical registers, as they are needed. */
BOOL leave_r12_alone; /* Leave assembler 'wp' in place during compiled code */
int gcol; /* GCOL action */
BOOL masked; /* whether to use mask */
BOOL mask1bpp; /* whether mask is 1bpp mask */
int odither; /* If 0, then there's no ordered dither. If non-0, number of bits - 1 being truncated by dither. */
#if 0
int odither_eorvalue; /* value for eor alternation along a line */
int odither_shift; /* offset of two-bit dither value in r_oditheradd */
#endif
/* Assemble-time constants */
int in_bpp;
int in_bpc; /* Same as bpp unless double-pixel, in which case double bpp */
int in_pixmask;
int mask_bpp;
int mask_bpc;
int mask_pixmask;
int out_l2bpp; /* not provided in wp */
int out_l2bpc; /* ditto */
int out_pixmask; /* mask for one pixel */
int out_dpixmask;
int out_ppw; /* pixels per word */
int out_l2ppw;
BOOL cal_table_simple; /* If true, a simple table lookup is possible */
/* Space for compiled code, near the end so most field accesses have only a small offset. */
code_buffer buffers[NBUFFERS];
/* Check for workspace overwritten */
int check_code2;
} workspace;
static void check_workspace(workspace *ws)
/* Basic validity checks, and initialise if this is the first time. */
{
assert(ws != 0, ERROR_NO_MEMORY);
if (ws->check_code != CHECK_CODE)
{
code_buffer *p;
tracef("Initialising workspace.\n");
ws->check_code = CHECK_CODE;
ws->check_code2 = CHECK_CODE;
ws->build_buffer = 0;
FOR_EACH_BUFFER(p) p->key_word = -1;
#ifdef DEBUG
{
label *l;
/* Set up textual names of all the labels */
FOR_EACH_LABEL(l) l->name = 0;
#define LN(lname) ws->labels.lname.name = #lname;
LN(loop_y_repeat)
#ifdef TESTDEBUG
LN(test1)
LN(test2)
#endif
LN(loop_x_enter)
LN(loop_x_repeat)
LN(loop_x_exit)
LN(l_masked)
LN(loop_put_pixel_repeat)
LN(loop_put_masked_repeat)
LN(y_loop)
LN(y_loop_enter)
LN(y_loop_exit)
LN(loop_delay)
LN(x_evenstart)
LN(x_oddmask)
LN(x_aligned_loop)
LN(x_aligned_enter)
LN(x_alignmask1)
LN(x_alignmask2)
LN(x_misaligned)
LN(x_misaligned_loop)
LN(x_misaligned_enter)
LN(x_misalignmask1)
LN(x_misalignmask2)
LN(x_2atatime_exit)
LN(x_lastmask)
LN(loop_x_exit1)
LN(loop_x_exitskip)
LN(loop1)
LN(loop2)
LN(plot_loopa)
LN(plot_loop1)
LN(plot_loop1a)
LN(plot_loop1b)
LN(plot_loop1c)
LN(plot_loop2)
LN(plot_loop3)
LN(plot_loop4)
LN(plot_loop4a)
LN(plot_loop4b)
LN(plot_loop4c)
LN(last)
/* Check he's got them all */
FOR_EACH_LABEL(l) assert(l->name != 0, ERROR_FATAL);
}
{
regname *r;
FOR_EACH_REGISTER_NAME(r) r->name = 0;
#define RNN(rname) ws->regnames.rname.name = #rname;
RNN(r_pixel)
RNN(r_inptr)
RNN(r_inshift)
RNN(r_inword)
RNN(r_maskinptr)
RNN(r_maskinword)
RNN(r_maskinshift)
RNN(r_masko)
RNN(r_temp1)
RNN(r_temp2)
RNN(r_c1632)
RNN(r_oditheradd)
RNN(r_blockroutine)
RNN(r_ecfindex)
RNN(r_bgcolour)
RNN(r_fetchroutine)
RNN(r_outptr)
RNN(r_outword)
RNN(r_outshift)
RNN(r_table)
RNN(r_xsize)
RNN(r_xcount)
RNN(r_ysize)
RNN(r_ycount)
RNN(r_inoffset)
RNN(r_maskinoffset)
RNN(r_in_pixmask)
RNN(r1)
RNN(r2)
RNN(r3)
RNN(wp)
RNN(sp)
RNN(lr)
RNN(pc)
FOR_EACH_REGISTER_NAME(r) assert(r->name != 0, ERROR_FATAL);
ws->leave_r12_alone = FALSE;
}
#endif
}
assert(ws->check_code2 == CHECK_CODE, ERROR_FATAL);
}
#ifdef DEBUG
static void dump_asm_workspace(asm_workspace *wp)
{
/* Oddly spaced out to allow it to be easily lined up with the structure definition */
tracef("Assembler workspace at %x:\n" _ wp);
tracef("save_outoffset=%i %t32. byte offset between output rows - SUBTRACT for next row.\n" _ wp->save_outoffset);
tracef("save_inoffset=%i %t32. byte offset between input rows - SUBTRACT for next row.\n" _ wp->save_inoffset);
tracef("save_inptr=0x%x %t32. word address of input pixels.\n" _ wp->save_inptr);
tracef("save_outptr=0x%x %t32. address of word containing first output pixel.\n" _ wp->save_outptr);
tracef("save_ydiv=%i %t32. subtracter value for y scale.\n" _ wp->save_ydiv);
tracef("save_yadd=%i %t32. adder value for y scale.\n" _ wp->save_yadd);
tracef("save_ysize=%i %t32. number of output rows.\n" _ wp->save_ysize);
tracef("save_ycount=%i %t32. total of ymag/ydiv sum, for y scale factor\n" _ wp->save_ycount);
newline();
tracef("save_inshift=%i %t32. bit shift of first pixel.\n" _ wp->save_inshift);
tracef("save_xsize=%i %t32. number of output pixels per row.\n" _ wp->save_xsize);
tracef("save_xcount=%i %t32. total of xmag/xdiv sum, for x scale factor\n" _ wp->save_xcount);
tracef("save_ecfptr=0x%x %t32. ECF pointer - only useful if plotting the mask.\n" _ wp->save_ecfptr);
tracef("save_ecflimit=0x%x %t32. ECF limit - only useful if plotting the mask.\n" _ wp->save_ecflimit);
tracef("save_xdiv=%i %t32. subtracter value for x scale.\n" _ wp->save_xdiv);
tracef("save_xadd=%i %t32. adder value for x scale\n" _ wp->save_xadd);
newline();
tracef("save_masko=%i %t32. if not 1bpp mask then this is mask data offset from inptr. Otherwise...\n" _ wp->save_masko);
tracef("save_xcoord=%i %t32. pixel x coordinate of first output pixel.\n" _ wp->save_xcoord);
tracef("save_ycoord=%i %t32. pixel y coordinate of first output pixel.\n" _ wp->save_ycoord);
tracef("save_xmag=%i %t32. adder value for x scale?\n" _ wp->save_xmag);
tracef("save_ymag=%i %t32. adder value for y scale?\n" _ wp->save_ymag);
newline();
tracef("save_inlog2bpp=%i %t32. log 2 bits per pixel of input.\n" _ wp->save_inlog2bpp);
tracef("save_inlog2bpc=%i %t32. log 2 bits per character of input (only different for double-pixels).\n"
_ wp->save_inlog2bpc);
tracef("save_mode=%i (>>27 = %i) %t32. mode number/pointer of sprite - 1bpp sprites have hi bits set.\n" _ wp->save_mode _ wp->save_mode >> 27);
newline();
tracef("save_maskinshift=%i %t32. initial bit shift within mask word.\n" _ wp->save_maskinshift);
tracef("save_maskinptr=0x%x %t32. word address of mask (or 0 if there isn't one).\n" _ wp->save_maskinptr);
tracef("save_maskinoffset=%i %t32. byte offset between mask rows - SUBTRACT for next row.\n" _ wp->save_maskinoffset);
newline();
tracef("BPP=%i %t32. bits per pixel of output.\n" _ wp->BPP);
tracef("BPC=%i %t32. bits per character of output (only different for double pixels).\n" _ wp->BPC);
tracef("ColourTTR=0x%x %t32. translation table or palette.\n" _ wp->ColourTTR);
tracef("trns_palette=0x%x %t32. if non-0 ignore TTR and use this palette instead.\n" _ wp->trns_palette);
tracef("spritecode=%i (& 255 = %i) %t32. SpriteOp - 52 for PutSpriteScaled, 50 for PlotMaskScaled.\n" _ wp->spritecode _ wp->spritecode & 255);
tracef("bgcolour=%i %t32. Background colour (only valid if plotting the mask)\n" _ wp->bgcolour);
newline();
}
#endif
#ifdef TESTDEBUG
static void dump_workspace(workspace *ws)
{
code_buffer *p;
tracef("Dumping workspace.\n");
#define DUMPINT(field) tracef("%s = %i.\n" _ #field _ ws->field);
DUMPINT(build_buffer)
FOR_EACH_BUFFER(p) tracef("buffer->keyword = %i.\n" _ p->key_word);
}
#endif
/**************************************************************************
* *
* Low-level instruction generation. *
* *
**************************************************************************/
/* Condition codes */
#define EQ 0xf0000000 /* It's 0 really - frigged so that 0 can be 'always' - the usual case. */
#define NE 0x10000000
#define CS 0x20000000
#define CC 0x30000000
#define MI 0x40000000
#define PL 0x50000000
#define VS 0x60000000
#define VC 0x70000000
#define HI 0x80000000
#define LS 0x90000000
#define GE 0xa0000000
#define LT 0xb0000000
#define GT 0xc0000000
#define LE 0xd0000000
#define AL 0xe0000000
#define NV 0xDONOTUSE
/* Branches */
#define B 0x0a000000
#define BL 0x0b000000
#define B_OFFSET_MASK 0x00ffffff /* and with this for negative offsets */
/* ALU ops */
#define S (1<<20)
#define AND(dst,op1,rest,str) ins(ws,(0x0 << 21) | DSTR(dst) | OP1R(op1) | (rest), str)
#define EOR(dst,op1,rest,str) ins(ws,(0x1 << 21) | DSTR(dst) | OP1R(op1) | (rest), str)
#define SUB(dst,op1,rest,str) ins(ws,(0x2 << 21) | DSTR(dst) | OP1R(op1) | (rest), str)
#define RSB(dst,op1,rest,str) ins(ws,(0x3 << 21) | DSTR(dst) | OP1R(op1) | (rest), str)
#define ADD(dst,op1,rest,str) ins(ws,(0x4 << 21) | DSTR(dst) | OP1R(op1) | (rest), str)
#define ADC(dst,op1,rest,str) ins(ws,(0x5 << 21) | DSTR(dst) | OP1R(op1) | (rest), str)
#define SBC(dst,op1,rest,str) ins(ws,(0x6 << 21) | DSTR(dst) | OP1R(op1) | (rest), str)
#define RSC(dst,op1,rest,str) ins(ws,(0x7 << 21) | DSTR(dst) | OP1R(op1) | (rest), str)
#define TST(op1,rest,str) ins(ws,(0x8 << 21) | S | OP1R(op1) | (rest), str)
#define TEQ(op1,rest,str) ins(ws,(0x9 << 21) | S | OP1R(op1) | (rest), str)
#define CMP(op1,rest,str) ins(ws,(0xa << 21) | S | OP1R(op1) | (rest), str)
#define CMN(op1,rest,str) ins(ws,(0xb << 21) | S | OP1R(op1) | (rest), str)
#define ORR(dst,op1,rest,str) ins(ws,(0xc << 21) | DSTR(dst) | OP1R(op1) | (rest), str)
#define MOV(dst,rest,str) ins(ws,(0xd << 21) | DSTR(dst) | (rest), str)
#define BIC(dst,op1,rest,str) ins(ws,(0xe << 21) | DSTR(dst) | OP1R(op1) | (rest), str)
#define MVN(dst,rest,str) ins(ws,(0xf << 21) | DSTR(dst) | (rest), str)
#define ADD_OPCODE (0x4 << 21)
#define SUB_OPCODE (0x2 << 21)
#define MOV_OPCODE (0xd << 21)
#define DSTR(x) ((x) << 12) /* destination - ignored by TST/TEQ/CMP/CMN */
#define OP1R(x) ((x) << 16) /* first operand */
#define OP2R(x) ((x) << 0) /* if !IMM */
#define IMM(x) ((x) | (1<<25)) /* an 8-bit unsigned field */
#define IMMROR(x) ((x) << 7) /* an EVEN number to rotate right IMM by */
#define LSLI(x) (((x) << 7) | 0x00) /* 5-bit immed shift applied to OP2R */
#define LSRI(x) (((x) << 7) | 0x20)
#define ASRI(x) (((x) << 7) | 0x40)
#define RORI(x) (((x) << 7) | 0x60)
#define LSLR(x) (((x) << 8) | 0x10) /* shift register applied to OP2R */
#define LSRR(x) (((x) << 8) | 0x30)
#define ASRR(x) (((x) << 8) | 0x50)
#define RORR(x) (((x) << 8) | 0x70)
/* Load and store ops */
#define LDR(reg,basereg) (0x04100000 | ((reg) << 12)| ((basereg) << 16))
#define STR(reg,basereg) (0x04000000 | ((reg) << 12)| ((basereg) << 16))
#define LDRB(reg,basereg) (0x04500000 | ((reg) << 12)| ((basereg) << 16))
#define STRB(reg,basereg) (0x04400000 | ((reg) << 12)| ((basereg) << 16))
#define WRITEBACK (1 << 21)
#define ADDOFFSET (1 << 23) /* else subtract */
#define PREADD (1 << 24) /* else post */
#define OFFSET(x) (PREADD | ADDOFFSET | (x)) /* normal simple index */
#define NEGOFFSET(x) (PREADD | (x)) /* subtract offset */
#define PREINC(x) (WRITEBACK | ADDOFFSET | PREADD | (x))
#define PREDEC(x) (WRITEBACK | PREADD | (x))
#define POSTINC(x) (ADDOFFSET | (x)) /* The manual says, do not set WRITEBACK if doing post-addition */
#define POSTDEC(x) ((x)) /* writeback will always occur, setting it is does LDRT/LDRBT */
#define PUSH (0x08000000 | (13<<16) /* register 13 */ \
| (1<<21) /* write-back */ \
| (1<<24) /* add offset before transfer */)
#define POP (0x08000000 | (13<<16) /* register 13 */ \
| (1<<20) /* load from memory */ \
| (1<<21) /* write-back */ \
| (1<<23) /* add, not subtract */ )
#define LDMIA(reg) (0x08000000 | (reg<<16) /* register to load from */ \
| (1<<20) /* load from memory */ \
| (1<<23) /* add, not subtract */ )
#define STMIA(reg) (0x08000000 | (reg<<16) /* register to load from */ \
| (1<<23) /* add, not subtract */ )
/* Supervisor call */
#define SWI(swino) (0x0F000000 | swino)
/* Indexed load - LSL shift assumed - writeback or negative not covered */
#define INDEX(reg, shift) ((1<<25) | OFFSET(0) | OP2R(reg) | LSLI(shift))
/* Offset in assembler workspace */
#define WP_OFFSET(field) OFFSET(((char*)&(wp->field)) - ((char*)&(wp->WP_FIRST_FIELD)))
/* Define an assembler register */
#define RN(name,no,describe) set_regname(ws, &ws->regnames.name, no, describe);
#ifdef DEBUG
static void ldm_reg_list(workspace *ws, char *a, int regmask, BOOL lastname)
/* Construct a string in a which can be placed in curly brackets, describing
* a LDM/STM instruction. If lastname then find the last such register name in
* the case of duplicates - eg. the y-loop name rather than the x-loop name
* for the same physical register.
*/
{
int i;
regname *r;
BOOL found;
char *aptr;
a[0] = 0;
for (i = 0; i <= 15; i++) /* for each physical register */
{
if ((regmask & (1<<i)) != 0) /* find a name for this register */
{
found = FALSE;
aptr = a;
while (*aptr != 0) aptr++; /* points at the null at the end of the string */
FOR_EACH_REGISTER_NAME(r)
{
if (r->regno == i)
{
*aptr = 0; /* If lastname and finding it again, delete last one */
if (a[0] != 0) strcat(aptr, ",");
strcat(aptr, r->name);
found = TRUE;
if (!lastname) break;
}
}
assert(found, ERROR_FATAL);
}
}
}
#endif
#ifdef DEBUG
static void ins(workspace *ws, int w, char *description)
#else
#define ins(ws,w,description) do_ins(ws,w)
static void do_ins(workspace *ws, int w)
#endif
/* Put an instruction into the output buffer.
* When debugging an assembler listings is generated too. These can be fed through
* objasm, and the results compared with the opcodes that I generate.
* Columns of assembler output:
* addressX opcodeXX label opcodes regs comment
* ^0 ^10 ^20 ^28 ^36 ^68
*/
{
int ccode = w & 0xf0000000;
/* Handle the AL/EQ condition codes being wrong, so that 0 can be AL elsewhere. */
if (ccode == 0xf0000000) w = w & 0x0fffffff; /* EQ code */
else if (ccode == 0) w = w | 0xe0000000; /* AL code */
/* All others are per the ARM expects */
tracef("%x %x %t28.%s\n" _
(ws->compile_ptr - ws->compile_base) * sizeof(int) _
w _ description); /* pseudo-assembler format of output */
assert(ws->compile_ptr < ws->compile_lim, ERROR_NO_MEMORY); /* Check the buffer is big enough */
*(ws->compile_ptr)++ = w; /* Store at then increment P% */
}
#ifdef DEBUG
#define DEFINE_LABEL(lab,describe) define_label(ws, L(lab), describe);
static void define_label(workspace *ws, label *lab, char *description)
#else
#define DEFINE_LABEL(lab,describe) define_label(ws, L(lab));
static void define_label(workspace *ws, label *lab)
#endif
/* Define a label, and fill in a forward reference to it if necessary. */
{
assert(lab->def == 0, ERROR_FATAL); /* Check not defined twice */
lab->def = ws->compile_ptr;
tracef("%t20.%s%t68.; %s\n" _ lab->name _ description);
if (lab->ref != 0)
{
int newvalue = *(lab->ref) | (B_OFFSET_MASK & (lab->def - (lab->ref + 2))); /* compute offset */
tracef("%t20.; Zapping forward ref instruction at %x to be %x.\n" _
sizeof(int) * (lab->ref - ws->compile_base) _ newvalue);
*(lab->ref) = newvalue;
lab->ref = 0;
}
}
#ifdef DEBUG
static void branch(workspace *ws, unsigned int opcode, label *lab, char *description)
#else
#define branch(ws,opcode,lab,description) do_branch(ws,opcode,lab)
static void do_branch(workspace *ws, unsigned int opcode, label *lab)
#endif
/* Compile a branch instruction to a label. The opcode includes the condition code. */
{
if (lab->def == 0) /* Forward reference */
{
#ifdef DEBUG
if (lab->ref != 0)
tracef("Already referenced at 0x%x\n" _ sizeof(int) * (lab->ref - ws->compile_base));
#endif
assert(lab->ref == 0, ERROR_FATAL); /* Check for two forward refs to same label */
lab->ref = ws->compile_ptr;
ins(ws, opcode, description); /* Just give as offset 0 for now */
}
else
{
assert(lab->ref == 0, ERROR_FATAL);
ins(ws,
opcode | (B_OFFSET_MASK & (lab->def - (ws->compile_ptr + 2))), description);
}
}
#ifdef DEBUG
static void set_regname(workspace *ws, regname *r, int regno, char *describe)
#else
#define set_regname(ws,r,regno,describe) do_set_regname(ws,r,regno)
static void do_set_regname(workspace *ws, regname *r, int regno)
#endif
/* Allocate a physical register number. If regno is -1 then allocate an
* as-yet-unused one, otherwise it's a specific register number.
*/
{
if (regno == -1) /* allocate a number, one of 0..12 */
{
regno = ws->next_free_reg;
ws->next_free_reg++;
assert(regno >= 0 && regno <= 12, ERROR_FATAL); /* Check for register overflow */
if (regno == 12) assert(!ws->leave_r12_alone, ERROR_FATAL);
}
r->regno = regno;
tracef("%t20.%s%t27 RN %t36.%i %t68.; %s\n" _ r->name _ r->regno _ describe);
}
static void align16(asm_workspace *wp, workspace *ws)
/* Align next instruction to quadword boundary */
{
UNUSED(wp);
while (((int) ws->compile_ptr) & 15 != 0)
MOV(R(r_pixel), OP2R(R(r_pixel)), "MOV r_pixel,r_pixel ; align to 16-byte boundary");
}
#if defined(DEBUG_TML) && defined(DEBUG)
static void write_reg(workspace *ws, regname *reg)
/* Sppol the register to the TML hardware */
{
comment(ws, "Write Register to TML card");
tracef("Register to be output is... %s\n" _ reg->name);
ins(ws, PUSH | (1<<10) | (1<<11) | 1 | (1<<1) | (1<<14), "STMDB sp!,{r0,r1,r10,r11,r14} ; prepare to call SWI");
ins(ws, MOV_OPCODE | DSTR(1) | OP2R(reg->regno), "MOV r1,r_somereg");
ins(ws, MOV_OPCODE | DSTR(0) | OP2R(1), "MOV r0,r1");
AND(0, 0, IMM(0xff), "AND r0,r0,#255 ");
ins(ws, SWI(HostFS_WriteC), "SWI HostFS_WriteC ; convert r1 value");
ins(ws, MOV_OPCODE | DSTR(0) | OP2R(1) | LSRI(8), "MOV r0,r1 LSR #8");
AND(0, 0, IMM(0xff), "AND r0,r0,#255 ");
ins(ws, SWI(HostFS_WriteC), "SWI HostFS_WriteC ; convert r1 value");
ins(ws, MOV_OPCODE | DSTR(0) | OP2R(1) | LSRI(16), "MOV r0,r1 LSR #16");
AND(0, 0, IMM(0xff), "AND r0,r0,#255 ");
ins(ws, SWI(HostFS_WriteC), "SWI HostFS_WriteC ; convert r1 value");
ins(ws, MOV_OPCODE | DSTR(0) | OP2R(1) | LSRI(24), "MOV r0,r1 LSR #24");
AND(0, 0, IMM(0xff), "AND r0,r0,#255 ");
ins(ws, SWI(HostFS_WriteC), "SWI HostFS_WriteC ; convert r1 value");
ins(ws, POP | (1<<10) | (1<<11) | 1 | (1<<1) | (1<<14), "LDMIA sp!,{r0,r1,r10,r11,r14} ; restore after calling SWI");
comment(ws, "");
}
#endif
static void compile_buffer_init(asm_workspace *wp, workspace *ws)
/* We intend to compile some code. Pick a buffer to use, and set up
* for generating into it. We use a simple round-robin for reusing buffers,
* rather than attempting to do LRU.
*/
{
label *p;
regname *r;
code_buffer *b = &(ws->buffers[ws->build_buffer]);
ws->compile_base = &(b->code[0]);
ws->compile_ptr = ws->compile_base;
ws->compile_lim = ws->compile_base + BUFSIZE;
FOR_EACH_LABEL(p) {p->def = 0; p->ref = 0;} /* zap all the labels to be undefined. */
FOR_EACH_REGISTER_NAME(r) r->regno = -1;
ws->next_free_reg = 0; /* allocate registers from 0 */
tracef("Compile buffer initialised.\n");
tracef("%t20; Blitting code for %s, scale factors %i:%i,%i:%i outoffset %x\n" _
(PLOTMASK ? "PlotMaskScaled" : "PutSpriteScaled") _
b->xadd - b->xdiv _ b->xdiv _ b->yadd _ b->ydiv _ wp->save_outoffset);
tracef("%t20; gcol action=%i in-bpp=%i out-bpp=%i in-dpix=%s out-dpix=%s mask=%s 1bppmask=%s palette=%s table=%s\n" _
ws->gcol _ (1<<wp->save_inlog2bpp) _ wp->BPP _
whether(DPIXEL_INPUT) _ whether(DPIXEL_OUTPUT) _
whether(SOURCE_MASK) _ whether(SOURCE_BPPMASK) _
whether(wp->trns_palette != 0) _ whether(wp->ColourTTR != 0));
tracef("%t20.; Generated by compiler of (%s %s)\n" _ __DATE__ _ __TIME__);
comment(ws, "Get register and workspace definitions, turn on listing");
tracef("%t28.GET w.GenHdr\n");
tracef("%t28.OPT 1\n");
RN(r1, 1, "r1");
RN(r2, 2, "r2");
RN(r3, 3, "r3");
RN(wp, 12, "workspace pointer")
RN(sp, 13, "stack pointer")
RN(lr, 14, "link register")
RN(pc, 15, "program counter")
ws->leave_r12_alone = FALSE; /* by default, compiled code does not have module workspace pointer */
UNUSED(wp);
}
static void compile_buffer_done(workspace *ws)
/* Finished compiling code sequence. */
{
#ifdef DEBUG
label *p;
#endif
tracef("%t28.END\n");
tracef("Compile buffer done, %i words generated.\n" _ ws->compile_ptr - ws->compile_base);
/* Increment pointer for next buffer to reuse. */
ws->build_buffer++;
if (ws->build_buffer >= NBUFFERS) ws->build_buffer = 0;
#ifdef DEBUG
/* Check no unresolved references to labels */
FOR_EACH_LABEL(p)
{
IFDEBUG(if(p->ref != 0) tracef("Unresolved reference to label %s at %x\n" _ p->name _ sizeof(int) * (p->ref - ws->compile_base));)
assert(p->ref == 0, ERROR_FATAL);
}
#endif
/* ws->compile_base can be used as the base of the resulting procedure. */
}
/**************************************************************************
* *
* Test main entry sequence, low level IO and code generation. *
* *
**************************************************************************/
#ifdef TESTDEBUG
blitter putscaled_compiler(asm_workspace *wp, workspace *ws, workspace *ws_end, int gcol)
{
/* Test low-level output */
writes(0, "Hello there!\nhi!\n");
writech(0, 'X');
writehex(0, 0x5732abcd, 8);
writech(0, '_');
tracef("Test tracef, esc:%%, string:'%s', char:'%c', int:'%i', hex:'%x'.\n",
"hello", 'X', 1234567, 0x6789abcd);
/* Check that the assembler has an adequate opinion of our workspace needs. */
tracef("wp=%x ws=%x ws_end=%x.\n" _ wp _ ws _ ws_end);
tracef("Size of assembler workspace: %i.\n" _ ((char*)ws) - ((char*)wp));
tracef("Size of C workspace: %i.\n" _ ((char*)ws_end) - ((char*)ws));
assert(ws_end > ws, ERROR_NO_MEMORY);
check_workspace(ws);
dump_workspace(ws);
compile_buffer_init(wp, ws);
/* Check compilation of all instruction forms. The resulting trace output can then
* have its binary details stripped, be run through objasm, and the resulting listings
* compared to check the bit patterns generated.
*/
ADD(5, 6, GT | OP2R(7), "ADDGT r5,r6,r7");
branch(ws, B, L(loop_y_exit), "B loop_y_exit");
ORR(2, 3, S | IMM(23), "ORRS r2,r3,#23");
DEFINE_LABEL(test1, "test label")
EOR(1, 2, EQ | IMM(255) | IMMROR(10), "EOREQ r1,r2,#(255:ROR:10)");
comment(ws, "This is a comment");
DEFINE_LABEL(test2, "test label")
branch(ws, B + NE, L(loop_y_repeat), "BNE loop_y_repeat");
branch(ws, BL + EQ, L(loop_y_repeat), "BLEQ loop_y_repeat");
CMP(8, OP2R(9) | LSLI(12), "CMP r8,r9,LSL #12");
CMP(8, OP2R(9) | ASRR(6), "CMP r8,r9,ASR r6");
MOV(3, OP2R(4) | RORI(1), "MOV r3,r4,ROR #1");
MOV(R(pc)), OP2R(R(lr)), "MOV pc,lr");
ins(ws, LDR(8,3) | OFFSET(249), "LDR r8,[r3,#249]");
ins(ws, LDR(8,3) | OFFSET(0), "LDR r8,[r3]");
ins(ws, STR(1,2) | GT + NEGOFFSET(12), "STRGT r1,[r2,#-12]");
ins(ws, LDRB(1,2) | PREINC(4), "LDRB r1,[r2,#4]!");
ins(ws, STRB(6,7) | POSTINC(4), "STRB r6,[r7],#4");
ins(ws, LDRB(1,2) | PREDEC(4), "LDRB r1,[r2,#-4]!");
ins(ws, STRB(6,7) | POSTDEC(4), "STRB r6,[r7],#-4");
ins(ws, LDRB(8,3) | INDEX(4,0), "LDRB r8,[r3,r4]");
ins(ws, LDR(8,3) | INDEX(4,2), "LDR r8,[r3,r4,LSL #2]");
ins(ws, PUSH | GT | (1<<4) | (1<<5) | (1<<6), "STMGTDB sp!,{r4,r5,r6}");
ins(ws, POP | (1<<4) | (1<<5) | (1<<6), "LDMIA sp!,{r4,r5,r6}");
ins(ws, POP | (1<<4) | (1<<5) | (1<<6) | (1<<R(pc)),"LDMIA sp!,{r4,r5,r6,pc}");
{
char a[256];
char a2[256];
int regmask = (1<<13) | (1<<15); /* pretty arbitrary silly one actually */
ldm_reg_list(ws, a, regmask, FALSE);
do_sprintf(a2, "LDMIA lr,{%s}", a)
ins(ws, LDMIA(lr) | regmask, a2);
do_sprintf(a2, "STMIA l4,{%s}", a)
ins(ws, STMIA(lr) | regmask, a2);
}
branch(ws, BL + EQ, &ws->labels.loop_y_repeat, "BLEQ loop_y_repeat");
compile_buffer_done(ws);
writes(0, "Exit.\n");
}
#else
/**************************************************************************
* *
* Bitblit: Evaluate conditions. *
* *
**************************************************************************/
static BOOL simple_x_scale(asm_workspace *wp, workspace *ws)
/* Return true if 1:1 along x */
{
return ( wp->save_xadd - wp->save_xdiv == wp->save_xdiv
&& wp->save_xdiv <= wp->save_xcount
&& !PLOTMASK
&& ws->gcol == 0
&& !ws->odither /* CAN be done, but the code sequences get awfully big so let's cut it out for now. */
? TRUE : FALSE);
/* Without the second test we MIGHT have to omit the first pixel, which the 1:1 code doesn't allow for. */
/* The 2-at-a-time loop doesn't allow for PLOTMASK - not important enough. */
/* The 2-at-a-time loop doesn't allow for any gcol but 0 - not important enough. */
}
static BOOL x_block_move(asm_workspace *wp, workspace *ws)
/* Returns true if the inner loop is the simple movement of a block of bits */
{
return ( simple_x_scale(wp, ws)
&& wp->BPC == (1<<wp->save_inlog2bpc)
&& ws->gcol == 0
&& !SOURCE_MASK
&& !SOURCE_TABLE
&& wp->cal_table == 0
? TRUE : FALSE);
}
static BOOL simple_y_scale(asm_workspace *wp, workspace *ws)
/* Return true if 1:1 along y */
{
UNUSED(ws);
return wp->save_yadd == wp->save_ydiv;
}
static int palette_is_grey(int *palette, int entries)
/* Scan a palette looking how they increment to deduce if it's just greyscale */
{
int loop;
int entry;
int ascending = 1;
for (loop=0;loop<entries;loop++)
{
entry = palette[loop];
if (((entry ^ (entry>>8)) & 0xffff00) != 0)
return 0;
if ((entry & 0xff00)>>8 != loop)
ascending = 0;
}
if (ascending)
return 2;
return 1;
}
/**************************************************************************
* *
* Bitblit: Register allocation. *
* *
**************************************************************************/
static void ptrs_rn(asm_workspace *wp, workspace *ws)
/* Declare the pointer registers, which must be visible in both the x-loop and the y-loop */
{
/* r_pixel is always needed, and need not be saved between loops.
* So, we put it in r14 to remove the need for the register allocator
* to worry about r14.
*/
RN(r_pixel, 14, "fetched and translated pixel")
/* In most cases there are not enough registers, and the control of
* the outer (y) loop requires swapping two 'banks' of registers.
* inptr, outptr (and maskinptr if it exists) are always registers
* r0, r1, r2, and they are visible when the y registers are swapped in.
*/
RN(r_inptr, -1, PLOTMASK ? "ECF pattern pointer" : "input word pointer")
RN(r_outptr, -1, "word pointer to output")
if (SOURCE_BPPMASK || PLOTMASK) RN(r_maskinptr, -1, "mask input word pointer")
if (ws->odither) RN(r_oditheradd, -1, "ordered dither offset value")
/* The initial dither add value needs to be changed for every output line,
* so it helps to have r_oditheradd visible in the y loop
*/
}
static void xloop_rn(asm_workspace *wp, workspace *ws)
/* Other variables for the x-loop */
{
int need_temps = 0; /* set to 1 or 2 if temp1 and temp2 are needed */
if (x_block_move(wp, ws))
{
/* X loop is very very simple, and communicates with machine-code block-shift routine. */
RN(r_inshift, -1, "Number of (most sig) bits of first input word to transfer, in 1..32")
RN(r_outshift, -1, "Number of (most sig) bits of first output word to fill, in 1..32")
RN(r_xsize, -1, "Number of bits to transfer per row")
RN(r_blockroutine, -1, "Block transfer routine")
/* Those registers had better be the same ones as the assembler code is expecting! */
assert(ws->regnames.r_inptr.regno == 0, ERROR_FATAL);
assert(ws->regnames.r_outptr.regno == 1, ERROR_FATAL);
assert(ws->regnames.r_inshift.regno == 2, ERROR_FATAL);
assert(ws->regnames.r_outshift.regno == 3, ERROR_FATAL);
assert(ws->regnames.r_xsize.regno == 4, ERROR_FATAL);
}
else
{
/* Normal case - declare whatever other registers are needed for fetching and translating pixels. */
if (PLOTMASK)
RN(r_inword, -1, "ECF pattern input word")
else if (!SOURCE_32_BIT) /* if not 32-bit source */
{
RN(r_inshift, -1, "bit shift of current pixel LSL #27")
RN(r_inword, -1, "current input word")
}
if (SOURCE_MASK)
{
RN(r_maskinword, -1, "current mask word")
if (SOURCE_BPPMASK || PLOTMASK)
RN(r_maskinshift, -1, "bit shift of current mask pixel")
else
RN(r_masko, -1, "offset of mask data from sprite data")
}
if ( need_temps == 0
&& (ws->gcol != 0)
&& DEST_32_BIT /* use in save_pixel */
)
need_temps = 1;
if (PLOTMASK)
{
RN(r_ecfindex, -1, "index into ECF pattern")
RN(r_bgcolour, -1, "background plotting colour")
}
else
{
if (SOURCE_TABLE || wp->cal_table) RN(r_table, -1, "translation table or palette")
{
/* Work out whether we need 16->32 or 32->16 transformations, with their temp registers
* So, mirror the structure of translate_pixel
*/
int pixl2bpp = wp->save_inlog2bpp;
if ((wp->trns_palette != 0) && (wp->BPP != 16)) pixl2bpp = 5;
if (pixl2bpp == 5 && wp->BPP != 32) need_temps = 2;
if (pixl2bpp == 4 && wp->BPP == 32)
{
need_temps = 2;
RN(r_c1632, -1, "constant for 16->32 transformation")
}
}
if ( need_temps == 0
&& (wp->save_xmag % wp->save_xdiv) == 0
&& (wp->save_xmag / wp->save_xdiv) > 4 /* used in optimised scale up */
)
need_temps = 1;
}
/* Declare whatever registers needed for saving the new pixel
* into the current destination pixel.
*/
if (!DEST_32_BIT)
{
RN(r_outword, -1, "current output word")
RN(r_outshift, -1, "bit shift of current pixel in current output word LSL 27")
}
if (wp->save_inlog2bpp <= 3 && simple_x_scale(wp, ws))
/* going to use 2-at-a-time loop - if 16bpp or more, don't need this register. */
RN(r_in_pixmask, -1, "pixel mask for 2-at-a-time loop")
/* Declare whatever registers are needed for control of
* horizontal scaling. For some simple cases no scaling registers
* are needed.
*/
RN(r_xsize, -1, "number of output pixels per row")
if (!simple_x_scale(wp, ws)) /* not 1:1 scale */
RN(r_xcount, -1, "total for x scale")
/* Adder and subractor values become constants in the code. */
}
/* The temporaries are shuffled to the end, so that if r12 (the assembler wp) is used then
* it does not get loaded before the y loop variables are initialised.
*/
if (need_temps >= 1) RN(r_temp1, -1, "temp1 for pixel transformation temporary values")
if (need_temps >= 2) RN(r_temp2, -1, "temp2 for pixel transformation temporary values")
/* MAX POSSIBLE REQUIREMENT - 13, if vcount stuff not done.
* It may appear 15, but temp1 and temp2 are only needed if one of src/dst
* is 32bpp, in which case we save elsewhere.
* >>> AH not so, they are also needed if a palette is used, in which case
* the source can be fewer bpp. Ooops. Can we ever overflow? Not sure.
*/
}
static int yloop_rn_count(asm_workspace *wp, workspace *ws)
/* Say how many registers yloop_rn will declare */
{
int result = 2; /* r_ysize, r_inoffset */
if (wp->save_yadd != wp->save_ydiv) result++; /* r_ycount */
if (SOURCE_BPPMASK || PLOTMASK) result++; /* r_maskinoffset */
if (wp->is_it_jpeg) result++; /* r_fetchroutine */
return result;
}
static void yloop_rn(asm_workspace *wp, workspace *ws)
/* Declare whatever registers are needed for control of
* the vertical loop. These registers are part of a separate 'bank'
* from those in the central loop.
*/
{
RN(r_ysize, -1, "number of output rows");
if (!simple_y_scale(wp, ws)) /* not 1:1 scale */
RN(r_ycount, -1, "total for y scale")
/* Adder and subractor values become constants in the code. */
RN(r_inoffset, -1, "byte offset between input rows.")
if (SOURCE_BPPMASK || PLOTMASK) RN(r_maskinoffset, -1, "byte offset between mask rows.")
if (wp->is_it_jpeg) RN(r_fetchroutine, -1, "routine for getting row of decompressed JPEG data.")
/* MAX POSSIBLE REQUIREMENT - 5 registers */
}
/**************************************************************************
* *
* Bitblit: Register initialisation. *
* *
**************************************************************************/
/* Loading a constant index from the workspace pointer */
#define LDR_WP(reg,value) ins(ws, LDR(R(reg),R(wp)) + WP_OFFSET(value), \
"LDR " #reg "," #value);
#ifdef DEBUG
#define LDR_WP_C(reg,value, comment) \
{ \
char a[256]; \
do_sprintf(a, "LDR " #reg "," #value " %t40.; " comment); \
ins(ws, LDR(R(reg),R(wp)) + WP_OFFSET(value), a); \
}
#else
#define LDR_WP_C(reg,value, comment) ins(ws, LDR(R(reg),R(wp)) + WP_OFFSET(value), 0);
#endif
/* Loading a constant index from a register */
#ifdef DEBUG
#define LDR_INDEX(destreg,indexreg,offset,comment) \
{ \
char a[256]; \
do_sprintf(a, "LDR " #destreg ",[" #indexreg ", #%i] %t40.; " comment, offset); \
ins(ws, LDR(R(destreg),R(indexreg)) | OFFSET(offset), a); \
}
#else
#define LDR_INDEX(destreg,indexreg,offset,comment) ins(ws, LDR(R(destreg),R(indexreg)) | OFFSET(offset), 0);
#endif
static void get_in_shift(asm_workspace *wp, workspace *ws)
/* Used within fetch_pixel_init, to load r_inshift. The complication is
* that if this is JPEG data then the save_inshift value was not calculated,
* because SpriteExtend assembler stuff thought this was 32bit data. This
* only matters if JPEG is being made to produce 8bpp or 16bpp data.
*/
{
if (wp->is_it_jpeg && wp->save_inlog2bpp != 5)
{
LDR_WP_C(r_inshift, in_x, "input x coord (JPEG input data)")
if (wp->save_inlog2bpp == 4)
{
AND(R(r_inshift), R(r_inshift), S | IMM(1), "ANDS r_inshift,r_inshift,#1 ; halfword offset (0 or 1)");
MOV(R(r_inshift), EQ | IMM(2), "MOVEQ r_inshift,#2 ; halfword offset (1 or 2)");
MOV(R(r_inshift), OP2R(R(r_inshift)) | LSLI(4), "MOV r_inshift,r_inshift,LSL #4 ; 16/32 bit offset");
}
else /* wp->save_inlog2bpp == 3 */
{
AND(R(r_inshift), R(r_inshift), S | IMM(3), "ANDS r_inshift,r_inshift,#3 ; byte offset as 0/1/2/3");
RSB(R(r_inshift), R(r_inshift), IMM(4), "RSB r_inshift,r_inshift,#4 ; byte offset as 4/3/2/1");
MOV(R(r_inshift), OP2R(R(r_inshift)) | LSLI(3), "MOV r_inshift,r_inshift,LSL #3 ; 8/16/24/32 bit offset");
}
}
else
{
LDR_WP_C(r_inshift, save_inshift, "input initial shift")
RSB(R(r_inshift), R(r_inshift), IMM(32), "RSB r_inshift,r_inshift,#32 ; pixels of first word to transfer, in 1..32");
}
}
static void fetch_pixel_init(asm_workspace *wp, workspace *ws)
/* Initialise whatever registers are needed for fetching and translating
* pixels.
*/
{
/* The input word pointer */
if (PLOTMASK)
{
LDR_WP_C(r_inptr, save_ecflimit, "base of ECF pattern")
}
else if (wp->is_it_jpeg)
{
LDR_WP_C(r_inptr, in_y, "initial y coordinate (for JPEG data)")
}
else /* normal data source for PutSpriteScaled */
{
LDR_WP_C(r_inptr, save_inptr, "input word pointer")
}
/* all other registers re fetching input data */
if (x_block_move(wp, ws))
{
/* Prepare for machine code core to inner loop */
#if 0
LDR_WP_C(r_inshift, save_inshift, "input initial shift (for block move)")
RSB(R(r_inshift), R(r_inshift), IMM(32), "RSB r_inshift,r_inshift,#32 ; pixels of first word to transfer, in 1..32");
#else
get_in_shift(wp, ws);
#endif
LDR_WP(r_blockroutine, ccompiler_bitblockmove)
}
else
{
/* initialise r_inptr */
if (PLOTMASK)
{
LDR_WP(r_ecfindex, save_ecfptr) /* byte index into ECF pattern, not rounded */
AND(R(r_pixel), R(r_ecfindex), IMM(0x18), "AND r_pixel,r_ecfindex,#&18 ; extract initial row offset in ECF");
ADD(R(r_inptr), R(r_inptr), OP2R(R(r_pixel)), "ADD r_inptr,r_inptr,r_pixel ; and add to initial ECF row address");
LDR_WP(r_bgcolour, bgcolour) /* background colour pixel */
}
else
{
/* r_inword and r_inshift */
if (!SOURCE_32_BIT) /* if not 32-bit source */
{
/* r_inword not initialised yet, done in inner loop */
#if 0
LDR_WP(r_inshift, save_inshift)
RSB(R(r_inshift), R(r_inshift), IMM(32), "RSB r_inshift,r_inshift,#32 ; pixels still to shift");
#else
get_in_shift(wp, ws);
#endif
MOV(R(r_inshift), OP2R(R(r_inshift)) | LSLI(27), "MOV r_inshift,r_inshift,LSL #27 ; keep up at top end of register");
}
}
/* mask registers */
if (SOURCE_MASK)
{
if (SOURCE_BPPMASK || PLOTMASK)
{
LDR_WP(r_maskinshift, save_maskinshift)
if (SOURCE_BPPMASK)
{
LDR_WP(r_maskinptr, save_maskinptr)
}
else /* PLOTMASK and not BPPMASK */
{
LDR_WP_C(r_maskinptr, save_inptr, "mask pointer for PlotMaskScaled")
LDR_WP(r_pixel, save_masko) /* temp use of r_pixel */
ADD(R(r_maskinptr), R(r_maskinptr), OP2R(R(r_pixel)),"ADD r_maskinptr,r_maskinptr,r_pixel ; mask pointer (for PlotMask)");
}
RSB(R(r_maskinshift), R(r_maskinshift), IMM(32), "RSB r_maskinshift,r_maskinshift,#32 ; pixels still to shift");
MOV(R(r_maskinshift),
OP2R(R(r_maskinshift)) | LSLI(27), "MOV r_maskinshift,r_maskinshift,LSL #27 ; keep up at top end of register");
}
else
LDR_WP(r_masko, save_masko)
}
/* translation registers */
if (wp->cal_table) LDR_WP(r_table, cal_table)
else if (wp->trns_palette != 0) LDR_WP(r_table, trns_palette)
else
{
if (wp->ColourTTR != 0)
{
LDR_WP(r_table, ColourTTR)
if ( wp->BPP <= 8 /* 256 colours or less on output */
&& wp->save_inlog2bpp >= 4 /* thousands or millions of input colours */
)
{
ins(ws, LDR(R(r_table), R(r_table)) | OFFSET(4), "LDR r_table,[r_table,#4] ; load base of 32K table");
}
}
}
if (wp->save_inlog2bpp <= 3 && simple_x_scale(wp, ws))
MOV(R(r_in_pixmask), IMM(ws->in_pixmask), "MOV r_in_pixmask,#in_pixmask ; for use in 2-at-a-time loop");
/* temp1 and temp2 need no initialisation. */
if (ws->regnames.r_c1632.regno != -1) /* Generate binary constant 0000000011100000 1110000011100000 */
{
MOV(R(r_c1632), IMM(0xe0), "MOV r_c1632,#&e0 ; 0000000000000000 0000000011100000");
ORR(R(r_c1632), R(r_c1632),OP2R(R(r_c1632))|LSLI(8),"ORR r_c1632,r_c1632,r_c1632,LSL #8 ; 0000000000000000 1110000011100000");
ORR(R(r_c1632), R(r_c1632),OP2R(R(r_c1632))|LSLI(8),"ORR r_c1632,r_c1632,r_c1632,LSL #8 ; 0000000011100000 1110000011100000");
}
if (ws->odither)
{
/* We use ordered dither to attempt to increase the output resolution by almost two bits.
* This only happens for a 16bpp or 32bpp source that's being truncated somewhat.
* A square of output pixels has the following binary addition values:
* 11 01
* 00 10
* These values are added to the value of each or R/G/B, just before those values are
* truncated or looked up in a table, shifted so that we add to the bits which are
* just about to be discarded.
* We keep the value to add in r_oditheradd.
* To proceed along the x axis we EOR by 10 every output pixel.
* We must also EOR by 01 every line.
* The starting value must be aligned with the origin of the output.
*/
comment(ws, "Compute initial dither addition value - bit 0 changes every y, bit 1 every x");
LDR_WP(r_pixel, save_xcoord)
AND(R(r_pixel), R(r_pixel), IMM(1), "AND r_pixel,r_pixel,#1 ; least sig bit of x, for dither");
LDR_WP(r_oditheradd, save_ycoord)
AND(R(r_oditheradd), R(r_oditheradd), IMM(1), "AND r_oditheradd,r_oditheradd,#1 ; least sig bit of y, for dither");
EOR(R(r_pixel),R(r_pixel),OP2R(R(r_oditheradd)), "EOR r_pixel,r_pixel,r_oditheradd ; if we start Y off on an odd footing, invert x as well");
ORR(R(r_oditheradd), R(r_oditheradd),
OP2R(R(r_pixel)) | LSLI(1), "ORR r_oditheradd,r_oditheradd,r_pixel,LSL #1 ; dither add value");
/* The dither should start based on the current ECF offset */
MOV(R(r_pixel),IMM(0x10) | IMMROR(24), "MOV r_pixel,#&1000 ; prepare to get ECFYOffset");
LDR_INDEX(r_pixel,r_pixel,0x1FC,"get kernel variable ECFYOffset from &11FC")
TST(R(r_pixel),IMM(1), "TST r_pixel,#1 ; is Y ECF offset odd?");
EOR(R(r_oditheradd),R(r_oditheradd),NE | IMM(3), "EORNE r_oditheradd,r_oditheradd,#3 ; if so, change ordered dither origin to match");
MOV(R(r_pixel),IMM(0x10) | IMMROR(24), "MOV r_pixel,#&1000 ; prepare to get ECFShift");
LDR_INDEX(r_pixel,r_pixel,0x1F8,"get kernel variable ECFShift from &11F8")
TST(R(r_pixel),IMM(wp->BPP), "TST r_pixel,#out_bpp ; is ECF Shift an odd number of pixels?");
EOR(R(r_oditheradd),R(r_oditheradd),NE | IMM(2), "EORNE r_oditheradd,r_oditheradd,#2 ; if so, change ordered dither origin to match");
/* Shift the dither value to the top of the register. */
{
IFDEBUG(char a[256];)
IFDEBUG(do_sprintf(a, "MOV r_oditheradd,r_oditheradd,LSL #%i %t40; shift to top of word", 23 + ws->odither);)
MOV(R(r_oditheradd), OP2R(R(r_oditheradd)) | LSLI(23 + ws->odither), a);
}
}
}
newline();
}
static void save_pixel_init(asm_workspace *wp, workspace *ws)
/* Initialise whatever registers are needed for saving the new pixel
* into the current destination pixel.
*/
{
LDR_WP(r_outptr, save_outptr)
if (x_block_move(wp, ws))
{
/* Very simple inner loop */
LDR_WP_C(r_pixel, save_xcoord, "get initial output x coord in pixels") /* Measured in pixels */
AND(R(r_outshift), R(r_pixel), IMM(ws->out_ppw-1), "AND r_outshift,r_pixel,#out_ppw-1 ; pix offset of start");
MOV(R(r_outshift),OP2R(R(r_outshift)) | LSLI(ws->out_l2bpc), "MOV r_outshift,r_outshift,LSL #out_l2bpc ; bit offset of start, in 0..31");
RSB(R(r_outshift), R(r_outshift), IMM(32), "RSB r_outshift,r_outshift,#32 ; pixels of space, in 1..32");
}
else
{
/* Normal cases */
if (PLOTMASK || !DEST_32_BIT)
LDR_WP_C(r_pixel, save_xcoord, "output x coord measured in pixels")
if (PLOTMASK)
{
MOV(R(r_ecfindex), OP2R(IMM(0)), "MOV r_ecfindex, #0 ; should always be 0 ?");
#if 0
AND(R(r_ecfindex), R(r_pixel), IMM(ws->out_ppw), "AND r_ecfindex,r_pixel,#out_ppw ; pixels into ECF pattern");
/* Convert from pixels, to byte offset into ECF line - either 0 or 4 */
if (ws->out_l2ppw > 2) /* > 4 output pixels per word */
MOV(R(r_ecfindex), OP2R(R(r_ecfindex))
| LSRI(ws->out_l2ppw - 2), "MOV r_ecfindex,r_ecfindex,LSR #out_l2ppw-2 ; convert to byte offset");
if (ws->out_l2ppw < 2) /* < 4 output pixels per word (ie 2 or 1) */
MOV(R(r_ecfindex), OP2R(R(r_ecfindex))
| LSLI(2 - ws->out_l2ppw), "MOV r_ecfindex,r_ecfindex,LSL #2-out_l2ppw ; convert to byte offset");
#endif
}
if (!DEST_32_BIT)
{
AND(R(r_outshift), R(r_pixel), IMM(ws->out_ppw-1), "AND r_outshift,r_pixel,#out_ppw-1 ; pixel offset of start");
MOV(R(r_outshift),OP2R(R(r_outshift)) | LSLI(ws->out_l2bpc),"MOV r_outshift,r_outshift,LSL #out_l2bpc ; bit offset of start");
RSB(R(r_outshift), R(r_outshift), IMM(32), "RSB r_outshift,r_outshift,#32 ; pixels still to rotate");
MOV(R(r_outshift), OP2R(R(r_outshift)) | LSLI(27), "MOV r_outshift,r_outshift,LSL #27 ; up at the top");
}
}
}
static void xloop_init(asm_workspace *wp, workspace *ws)
/* Initialise whatever registers are needed for control of
* horizontal scaling. For some simple cases no scaling registers
* are needed.
*/
{
LDR_WP(r_xsize, save_xsize)
if (!simple_x_scale(wp, ws)) /* not 1:1 scale */
{
if ((ws->odither) && (SOURCE_16_BIT))
{
LDR_WP(r_pixel, save_xcount); /* Changed by (GPS) to fix register spill bug*/
}
else
{
LDR_WP(r_xcount, save_xcount);
}
}
if (x_block_move(wp, ws))
MOV(R(r_xsize), OP2R(R(r_xsize)) | LSLI(ws->out_l2bpc), "MOV r_xsize,r_xsize,LSL #out_l2bpc ; size in bits");
}
static void yloop_init(asm_workspace *wp, workspace *ws)
/* Initialise whatever registers are needed for control of
* the vertical loop. These registers are part of a separate 'bank'
* from those in the central loop.
*/
{
if (wp->is_it_jpeg) LDR_WP_C(r_fetchroutine, fetchroutine, "routine to call to get JPEG data line")
LDR_WP(r_ysize, save_ysize)
if (!simple_y_scale(wp, ws)) /* not 1:1 scale */ LDR_WP(r_ycount, save_ycount)
if (!PLOTMASK)
{
if (wp->is_it_jpeg)
/* We could save this register, but there's not all that much point - simpler to code like this. */
MOV(R(r_inoffset),IMM(1), "MOV r_inoffset,#1 ; JPEG coord offset on input");
else
LDR_WP(r_inoffset, save_inoffset)
}
if (SOURCE_BPPMASK) LDR_WP(r_maskinoffset, save_maskinoffset)
else if (PLOTMASK) LDR_WP(r_maskinoffset, save_inoffset)
}
/**************************************************************************
* *
* Bitblit: Pixel loading, translation, saving. *
* *
**************************************************************************/
static void fetch_pixel_unmasked(asm_workspace *wp, workspace *ws)
/* Assuming no mask, get the next input pixel and put it in r_pixel. This is separated
* from fetch_pixel for the case of scaling up an ordered dither, where the same input
* pixel is repeatedly fetched and translated.
*/
{
if (PLOTMASK)
{
comment(ws, "Fetch an ECF pixel");
if (DEST_32_BIT)
{
ins(ws, LDR(R(r_inword), R(r_inptr))
| INDEX(R(r_ecfindex), 0), "LDR r_inword,[r_inptr,r_ecfindex] 2222");
ADD(R(r_ecfindex), R(r_ecfindex),
IMM(4), "ADD r_ecfindex,r_ecfindex,#4 5t453");
ins(ws, LDR(R(r_bgcolour), R(r_inptr))
| INDEX(R(r_ecfindex), 0), "LDR r_bgcolour,[r_inptr,r_ecfindex] ; load next EOR word of ECF222");
SUB(R(r_ecfindex), R(r_ecfindex),
IMM(4), "SUB r_ecfindex,r_ecfindex,#4 1212");
}
else
{
if (DEST_16_BIT)
{
MOV(R(r_pixel), OP2R(R(r_inword)) | LSLI(16), "MOV r_pixel,r_inword,LSL #16 ; fetch 16 bit ECF pattern pixel");
MOV(R(r_pixel), OP2R(R(r_pixel)) | LSRI(16), "MOV r_pixel,r_pixel,LSR #16");
}
else
{
// AND(R(r_pixel), R(r_inword), IMM(ws->out_pixmask),"AND r_pixel,r_inword,#out_pixmask ; fetch the pixel from the ECF pattern");
// AND(R(r_pixel), R(r_pixel), OP2R(R(r_bgcolour)), "AND r_pixel,r_pixel,r_bgcolour ; turn it into a background colour pixel");
}
}
}
else
{
comment(ws, "Fetch a source pixel");
if (SOURCE_32_BIT)
ins(ws, LDR(R(r_pixel), R(r_inptr)) | OFFSET(0), "LDR r_pixel,[r_inptr]");
else if (SOURCE_16_BIT)
{
MOV(R(r_pixel), OP2R(R(r_inword)) | LSLI(16), "MOV r_pixel,r_inword,LSL #16 ; fetch 16 bit pixel");
MOV(R(r_pixel), OP2R(R(r_pixel)) | LSRI(16), "MOV r_pixel,r_pixel,LSR #16");
/* >>> Maybe we can leave it in the top 16 bits, and get by? Not yet. */
}
else
{
AND(R(r_pixel), R(r_inword), IMM(ws->in_pixmask), "AND r_pixel,r_inword,#in_pixmask ; fetch the pixel");
}
}
}
static BOOL fetch_pixel(asm_workspace *wp, workspace *ws, label *l_masked)
/* Check the mask, fetch the current pixel. If the current pixel is
* transparent then branch out to l_masked. Return TRUE if the branch could be
* taken, else FALSE.
*/
{
IFDEBUG(char a[256];)
if (SOURCE_MASK)
{
TST(R(r_maskinword), IMM(1), "TST r_maskinword,#1");
IFDEBUG(do_sprintf(a, "BEQ %s", l_masked->name);)
branch(ws, B | EQ, l_masked, a);
}
fetch_pixel_unmasked(wp, ws);
return SOURCE_MASK;
}
static BOOL fetch_pixel2(asm_workspace *wp, workspace *ws, label *l_masked)
/* Check the mask, fetch the pixel after the current one. You are assured
* that no word of input need be loaded between these two. If the pixel is
* transparent then branch out to l_masked. Return TRUE if the branch could be
* taken, else FALSE.
*/
{
#ifdef DEBUG
char a[256];
#endif
assert(!PLOTMASK, ERROR_FATAL); /* Doesn't do 2-at-a-time loop */
if (SOURCE_MASK) /* Test the second pixel of mask */
{
if (SOURCE_BPPMASK) /* we may have reached the end of mask word if not doing an aligned plot */
{
MOV(R(r_maskinword), OP2R(R(r_maskinword))
| RORI(ws->mask_bpp), "x"/*MOV r_maskinword,r_maskinword,ROR #mask_bpp"*/);
SUB(R(r_maskinshift),R(r_maskinshift),
S | IMM(ws->mask_bpp*2) | IMMROR(6), "x"/*SUBS r_maskinshift,r_maskinshift,#mask_bpp:SHL:27"*/);
ins(ws, LDR(R(r_maskinword), R(r_maskinptr))
| EQ | WRITEBACK | OFFSET(4), "x"/* "LDREQ r_maskinword,[r_maskinptr,#4]! ; load more mask pixels (inc2)"*/);
TST(R(r_maskinword), IMM(1), "TST r_maskinword,#1");
}
else
{
TST(R(r_maskinword),
ws->mask_bpc < 8
? IMM(1 << ws->mask_bpc)
: IMM(1) | IMMROR(32 - ws->mask_bpc), "TST r_maskinword,#1:SHL:mask_bpc");
IFDEBUG(do_sprintf(a, "BEQ %s", l_masked->name);)
}
branch(ws, B | EQ, l_masked, a);
}
comment(ws, "Fetch the source pixel after the current one");
if (SOURCE_32_BIT)
ins(ws, LDR(R(r_pixel), R(r_inptr)) | OFFSET(4), "LDR r_pixel,[r_inptr,#4]");
else if (SOURCE_16_BIT)
{
MOV(R(r_pixel), OP2R(R(r_inword)) | LSRI(16), "MOV r_pixel,r_inword,LSR #16");
/* >>> Getting it into top 16bits harder in this case! */
}
else
AND(R(r_pixel), R(r_in_pixmask),
OP2R(R(r_inword)) | LSRI(ws->in_bpc), "AND r_pixel,r_in_pixmask,r_inword,LSR #in_bpc"
" ; fetch the next pixel");
return SOURCE_MASK;
}
#ifdef DEBUG
static void add_ordered_dither_gun(asm_workspace *wp, workspace *ws, int bits_per_gun, int offset, char *gun)
#else
#define add_ordered_dither_gun(a,b,c,d,e) do_add_ordered_dither_gun(a,b,c,d)
static void do_add_ordered_dither_gun(asm_workspace *wp, workspace *ws, int bits_per_gun, int offset)
#endif
/* Do one gun of the ordered dither - entirely local to add_ordered_dither below
* Offset is the offset from bit 0 of the base of this field of the colour
*/
{
int x = 32 - bits_per_gun - offset; /* amount to shift the colour field in question */
#ifdef DEBUG
char a[256];
#endif
IFDEBUG(do_sprintf(a, "CMN r_oditheradd,r_pixel,LSL #%i %t40; will the %s value overflow?", x, gun);)
CMN(R(r_oditheradd), OP2R(R(r_pixel)) | LSLI(x), a);
IFDEBUG(do_sprintf(a, "ADDCC r_pixel,r_pixel,r_oditheradd,LSR #%i %t40; if not, add.", x);)
ADD(R(r_pixel), R(r_pixel), CC | OP2R(R(r_oditheradd)) | LSRI(x), a);
UNUSED(wp);
}
static void add_ordered_dither(asm_workspace *wp, workspace *ws, int bits_per_gun)
/* bits_per_gun is 5 or 8. The 32-bit RGB value in r_pixel should have
* r_oditheradd >> (32-bits_per_gun) added to each of R/G/B, except that these
* additions should be 'sticky' at 255 in each gun.
*
* The resulting values are just about to be truncated somewhat, so the lo
* bits of each answer do not matter much. Thus, if the value is currently
* 254 we never add, but this doesn't matter.
*/
{
if (ws->odither) /* turn off for now */
{
comment(ws, "Add current value for ordered dither");
add_ordered_dither_gun(wp, ws, bits_per_gun, 2*bits_per_gun, "blue");
add_ordered_dither_gun(wp, ws, bits_per_gun, 1*bits_per_gun, "green");
add_ordered_dither_gun(wp, ws, bits_per_gun, 0, "red");
newline();
}
}
static void translate_pixel(asm_workspace *wp, workspace *ws)
/* Translate r_pixel from being a source pixel, to being a destination pixel. */
{
int pixl2bpp = wp->save_inlog2bpp;
if (PLOTMASK)
{
if ((ws->gcol & 7) == 2) /* AND plot action */
{
MOV(R(r_pixel), OP2R(R(r_pixel)) | LSLI(31-(wp->BPP)), "MOV r_pixel, r_pixel, LSL 31-out_bpp ;a");
ORR(R(r_pixel), R(r_pixel), IMM(2) | IMMROR(2), "ORR r_pixel,r_pixel,#&80000000 ;a");
MOV(R(r_pixel), OP2R(R(r_pixel)) | ASRI(31-(wp->BPP)), "MOV r_pixel, r_pixel, ASR 31-out_bpp ;a");
}
return; /* No more transformation necessary */
}
if (ws->odither) add_ordered_dither(wp, ws, pixl2bpp == 5 ? 8 : 5); /* do ordered dither */
comment(ws, "Perform any0 transformation necessary");
if (wp->trns_palette != 0)
{
assert(pixl2bpp <= 3, ERROR_FATAL);
if (wp->BPP == 16)
{
ins(ws, LDR(R(r_pixel), R(r_table))
| INDEX(R(r_pixel), 2), "LDR r_pixel,[r_table, r_pixel, LSL #2] ; 16bpp palette lookup");
pixl2bpp = 4;
}
else
{
ins(ws, LDR(R(r_pixel), R(r_table))
| INDEX(R(r_pixel), 3), "LDR r_pixel,[r_table, r_pixel, LSL #3] ; palette lookup");
MOV(R(r_pixel), OP2R(R(r_pixel)) | LSRI(8), "MOV r_pixel,r_pixel,LSR #8 ; BBGGRR00 to 00BBGGRR ");
pixl2bpp = 5;
}
}
if (pixl2bpp == 5 && wp->BPP != 32 && !(wp->is_it_jpeg && (wp->dither_truecolour & 2) && (wp->BPP != 16))) /* all we can do is truncate to 4, as a first stage. */
{
/* if (wp->BPP == 16 && ws->odither) add_ordered_dither(wp, ws, 8); */
comment(ws, " Taken from munge32to16 fedcba9876543210 fedcba9876543210");
comment(ws, " r_pixel = 00000000bbbbbbbb ggggggggrrrrrrrr");
AND(R(r_temp1),R(r_pixel),IMM(0xf8) | IMMROR(16), "AND r_temp1,r_pixel,#&F80000 ; r_temp1 = 00000000bbbbb000 0000000000000000");
MOV(R(r_temp2), OP2R(R(r_temp1)) | LSLI(7), "MOV r_temp2,r_temp1,LSL #7 ; r_temp2 = 0bbbbb0000000000 0000000000000000");
AND(R(r_temp1), R(r_pixel), IMM(0xf8)|IMMROR(24), "AND r_temp1,r_pixel,#&f800 ; r_temp1 = 0000000000000000 ggggg00000000000");
ORR(R(r_temp2),R(r_temp2),OP2R(R(r_temp1))|LSLI(10),"ORR r_temp2,r_temp2,r_temp1,LSL #10 ; r_temp2 = 0bbbbbggggg00000");
AND(R(r_temp1), R(r_pixel), IMM(0xf8), "AND r_temp1,r_pixel,#&F8 ; r_temp1 = 0000000000000000 00000000rrrrr000");
ORR(R(r_pixel),R(r_temp2),OP2R(R(r_temp1))|LSLI(13),"ORR r_pixel,r_temp2,r_temp1,LSL #13 ; r_pixel = 0bbbbbgggggrrrrr");
MOV(R(r_pixel),OP2R(R(r_pixel)) | LSRI(16), "MOV r_pixel,r_pixel,LSR #16 ; result in bottom half");
/* >>> check re keeping 16bit r_pixel at the top */
pixl2bpp = 4;
}
if (pixl2bpp == 4 && wp->BPP == 32) /* pad out to 32bpp */
{
MOV(R(r_pixel),OP2R(R(r_pixel)) | LSLI(16), "MOV r_pixel,r_pixel,LSL #16 ; input in top half");
/* >>> check re keeping 16bit r_pixel at the top */
comment(ws, " ; fedcba9876543210 fedcba9876543210");
comment(ws, " ; r_pixel = 0bbbbbgggggrrrrr");
MOV(R(r_temp1), OP2R(R(r_pixel)) | LSRI(26), "MOV r_temp1,r_pixel,LSR #26 ; r_temp1 = 0bbbbb");
MOV(R(r_temp2), OP2R(R(r_temp1)) | LSLI(19), "MOV r_temp2,r_temp1,LSL #19 ; r_temp2 = 0bbbbb000 0000000000000000");
AND(R(r_temp1), R(r_pixel), IMM(0x3E) | IMMROR(12), "AND r_temp1,r_pixel,#&03E00000 ; r_temp1 = 000000ggggg00000");
ORR(R(r_temp2), R(r_temp2),OP2R(R(r_temp1))|LSRI(10),"ORR r_temp2,r_temp2,r_temp1,LSR #10 ; r_temp2 = 0bbbbb000 ggggg00000000000");
MOV(R(r_temp1), OP2R(R(r_pixel)) | LSLI(11), "MOV r_temp1,r_pixel,LSL #11 ; r_temp1 = rrrrr00000000000 0000000000000000");
ORR(R(r_temp2), R(r_temp2),OP2R(R(r_temp1))|LSRI(24),"ORR r_temp2,r_temp2,r_temp1,LSR #24 ; r_temp2 = 0bbbbb000 ggggg000rrrrr000");
comment(ws, "Now copy the top three bits of each colour component into the bottom three");
comment(ws, " ; r_c1632 = 0000000011100000 1110000011100000");
AND(R(r_temp1), R(r_temp2), OP2R(R(r_c1632)), "AND r_temp1,r_temp2,r_c1632 ; r_temp1 = 00000000bbb00000 ggg00000rrr00000");
ORR(R(r_pixel), R(r_temp2),OP2R(R(r_temp1))|LSRI(5), "ORR r_pixel,r_temp2,r_temp1,LSR #5 ; r_pixel = 00000000bbbbbbbb ggggggggrrrrrrrr");
pixl2bpp = 5;
}
/* Translation table lookup */
if (wp->ColourTTR != 0)
{
comment(ws, "We have a translation table.");
if (ws->out_l2bpp <= 3) /* ie BPP <= 8 */
{
assert(pixl2bpp <= 4, ERROR_FATAL); /* up to 32K entries in byte table */
/* if (pixl2bpp == 4 && ws->odither) add_ordered_dither(wp, ws, 5); */
ins(ws, LDRB(R(r_pixel), R(r_table)) | INDEX(R(r_pixel), 0), "LDRB r_pixel,[r_table, r_pixel] ; byte table lookup");
}
else
{
assert(pixl2bpp <= 3, ERROR_FATAL); /* up to 256 entries in word table */
ins(ws, LDR(R(r_pixel), R(r_table)) | INDEX(R(r_pixel), 2), "LDR r_pixel,[r_table, r_pixel, LSL #2] ; word table lookup");
/* >>> with 16bpp that could be in the top half? Not sure... */
}
pixl2bpp = ws->out_l2bpp; /* we've finished */
}
else if (wp->is_it_jpeg && (wp->dither_truecolour & 2))
{
/* bottom n bits of word contains colour number we want... */
pixl2bpp = ws->out_l2bpp; /* we've finished */
comment(ws, "JPEG error diffusion should have done all the work!");
}
else if (pixl2bpp == 4 && ws->out_l2bpp < 4)
{
/* Hack for JPEG data in RISC OS 3
* r_pixel is a 16bpp colour value at the moment, but we have no lookup table for the 16->1/2/4/8 transition
* For 1/2/4bpp we use the top bits of red as the grey level. From a JPEG source this will work
* fine, as the JPEG will have noticed that the output is mono and simply produced greyscale
* output.
*/
comment(ws, "Colour truncation without lookup table.\n");
if (ws->out_l2bpp == 0) /* 1bpp */
{
comment(ws, "Creating 0 or 1 from 0bbbbbgg gggrrrrr");
TST(R(r_pixel), IMM(16), "TST r_pixel,#16 ; test hi bit of R");
MOV(R(r_pixel), IMM(1), "MOV r_pixel,#1 ; black");
MOV(R(r_pixel), NE | IMM(0), "MOVNE r_pixel,#0 ; white");
pixl2bpp = 0;
}
else if (ws->out_l2bpp == 1) /* 2bpp */
{
comment(ws, "Creating 0,1,2 or 3 from 0bbbbbgg gggrrrrr");
MOV(R(r_pixel), OP2R(R(r_pixel)) | LSRI(3), "MOV r_pixel,r_pixel,LSR #3 ; hi 2 bits of R");
AND(R(r_pixel), R(r_pixel), IMM(3), "AND r_pixel,r_pixel,#3 ; mask off everything else");
RSB(R(r_pixel), R(r_pixel), IMM(3), "RSB r_pixel,r_pixel,#3 ; change to 0->white, 3->black");
pixl2bpp = 1;
}
if (ws->out_l2bpp == 2) /* 4bpp */
{
comment(ws, "Creating wimp colour in 0..7 from 0bbbbbgg gggrrrrr");
MOV(R(r_pixel), OP2R(R(r_pixel)) | LSRI(2), "MOV r_pixel,r_pixel,LSR #2 ; hi 3 bits of R");
AND(R(r_pixel), R(r_pixel), IMM(7), "AND r_pixel,r_pixel,#7 ; mask off everything else");
RSB(R(r_pixel), R(r_pixel), IMM(7), "RSB r_pixel,r_pixel,#7 ; change to 0->white, 7->black");
pixl2bpp = 2;
}
else if (ws->out_l2bpp == 3) /* 8bpp from 16bpp true colour, no lookup table - only for use on RISC OS 3.10 */
{
/* Get the top two bits of each gun. The organisation is:
* bit 0 - tint 0
* bit 1 - tint 1
* bit 2 - red 2
* bit 3 - blue 2
* bit 4 - red 3 (high)
* bit 5 - green 2
* bit 6 - green 3 (high)
* bit 7 - blue 3 (high)
*/
comment(ws, "Creating bggrbrtt from 0bbbbbgg gggrrrrr");
/* Making the tint - the average of the lo 3 bits of RGB isn't a bad approximation. We make this
* by adding them all up, multiplying by 3, and dividing by 8. We involve the lo bits in the approximation
* as well, in case they produce a useful carry.
*/
AND(R(r_temp1), R(r_pixel), IMM(0x1C) | IMMROR(24), "AND r_temp1,r_pixel,#&1C00 ; bottom 3 bits of B");
MOV(R(r_temp2), OP2R(R(r_temp1)) | LSRI(10), "MOV r_temp2,r_temp1,LSR #10 ; at bottom of temp2");
AND(R(r_temp1), R(r_pixel), IMM(0xE0), "AND r_temp1,r_pixel,#&E0 ; bottom 3 bits of G");
ADD(R(r_temp2), R(r_temp2), OP2R(R(r_temp1)) | LSRI(5), "ADD r_temp2,r_temp2,r_temp1,LSR #5 ; add to bottom B bits");
AND(R(r_temp1), R(r_pixel), IMM(0x07), "AND r_temp1,r_pixel,#&07 ; bottom 3 bits of R");
ADD(R(r_temp2), R(r_temp2), OP2R(R(r_temp1)), "ADD r_temp2,r_temp2,r_temp1 ; add to bottom B+G bits");
ADD(R(r_temp2), R(r_temp2), OP2R(R(r_temp2)) | LSLI(2), "ADD r_temp2,r_temp2,r_temp2,LSL #2 ; (lo R+G+B)*5 (< 128)");
/* The hi bits are just done by extracting from the 16bpp value. This takes ages! */
MOV(R(r_temp1), IMM(0), "MOV r_temp1,#0 ; building result pixel for hi bits");
/* Top bits of B */
TST(R(r_pixel), IMM(64) | IMMROR(24), "TST r_pixel,#&4000 ; test top bit of B");
ORR(R(r_temp1), R(r_temp1), NE | IMM(128), "ORRNE r_temp1,r_temp1,#128 ; bit 7 = top bit of B");
TST(R(r_pixel), IMM(32) | IMMROR(24), "TST r_pixel,#&2000 ; test next bit of B");
ORR(R(r_temp1), R(r_temp1), NE | IMM(8), "ORRNE r_temp1,r_temp1,#8 ; bit 3 = next bit of B");
/* Top bits of G */
TST(R(r_pixel), IMM(2) | IMMROR(24), "TST r_pixel,#&200 ; test top bit of G");
ORR(R(r_temp1), R(r_temp1), NE | IMM(64), "ORRNE r_temp1,r_temp1,#64 ; bit 6 = top bit of G");
TST(R(r_pixel), IMM(1) | IMMROR(24), "TST r_pixel,#&100 ; test next bit of G");
ORR(R(r_temp1), R(r_temp1), NE | IMM(32), "ORRNE r_temp1,r_temp1,#32 ; bit 5 = next bit of G");
/* Top bits of R */
TST(R(r_pixel), IMM(16), "TST r_pixel,#&10 ; test top bit of R");
ORR(R(r_temp1), R(r_temp1), NE | IMM(16), "ORRNE r_temp1,r_temp1,#16 ; bit 4 = top bit of R");
TST(R(r_pixel), IMM(8), "TST r_pixel,#&08 ; test next bit of R");
ORR(R(r_temp1), R(r_temp1), NE | IMM(4), "ORRNE r_temp1,r_temp1,#4 ; bit 2 = next bit of R");
ORR(R(r_pixel), R(r_temp1), OP2R(R(r_temp2)) | LSRI(5), "ORR r_pixel,r_temp1,r_temp2,LSR #5 ; combine hi bits and tint");
pixl2bpp = 3;
}
}
assert(pixl2bpp == ws->out_l2bpp, ERROR_FATAL); /* If this hasn't happened, we haven't completed the transformation. */
if (((ws->gcol & 7) == 2) && (pixl2bpp != 5)) /* AND plot action which did something stupid for 32bpp (GPS)*/
{
MOV(R(r_pixel), OP2R(R(r_pixel)) | LSLI(31-(wp->BPP)), "MOV r_pixel, r_pixel, LSL 31-out_bpp");
ORR(R(r_pixel), R(r_pixel), IMM(2) | IMMROR(2), "ORR r_pixel,r_pixel,#&80000000 ");
MOV(R(r_pixel), OP2R(R(r_pixel)) | ASRI(31-(wp->BPP)), "MOV r_pixel, r_pixel, ASR 31-out_bpp");
}
comment(ws, "r_pixel is now a destination pixel.");
if (DPIXEL_OUTPUT)
ORR(R(r_pixel), R(r_pixel), OP2R(R(r_pixel)) | LSLI(wp->BPP), "ORR r_pixel,r_pixel,r_pixel,LSL #out_bpp:SHR:1 ; double pixel output");
newline();
}
static void save_pixel(asm_workspace *wp, workspace *ws)
/* Save the new pixel into the current destination pixel. */
/* Recall GCOL actions:
* 0 -> overwrite old pixel
* 1 -> OR with old pixel
* 2 -> AND with old pixel
* 3 -> EOR with old pixel
* 4 -> invert old pixel
* 5 -> do nothing
* 6 -> AND old pixel with NOT of new pixel
* 7 -> OR old pixel with NOT of new pixel
*/
{
comment(ws, "Put the pixel in the output stream.");
if (PLOTMASK)
{
if (DEST_32_BIT)
{
ins(ws, LDR(R(r_pixel), R(r_outptr)) | OFFSET(0), "LDR r_pixel,[r_outptr] ;bkah");
ORR(R(r_pixel), R(r_inword), OP2R(R(r_pixel)), "ORR r_pixel,r_inword,r_pixel ; 1OR gcol action");
EOR(R(r_pixel), R(r_bgcolour), OP2R(R(r_pixel)), "EOR r_pixel,r_bgcolour,r_pixel ; 1EOR gcol action");
ins(ws, STR(R(r_pixel), R(r_outptr)) | OFFSET(0), "STR r_pixel,[r_outptr] ;blaq5h");
}
else
{
if (DEST_16_BIT)
{
MOV(R(r_pixel), OP2R(R(r_inword)) | LSLI(16), "MOV r_pixel,r_inword,LSL #16 ; fetch 16 bit ECF pattern pixel44 99");
MOV(R(r_pixel), OP2R(R(r_pixel)) | LSRI(16), "MOV r_pixel,r_pixel,LSR #16 ; 4444444");
ORR(R(r_outword), R(r_outword), OP2R(R(r_pixel)), "ORR r_outword,r_outword,r_pixel ; ECF OR mask44 99");
MOV(R(r_pixel), OP2R(R(r_bgcolour)) | LSLI(16), "MOV r_pixel,r_bgcolour,LSL #16 ; fetch 16 bit ECF pattern pixel 4499");
MOV(R(r_pixel), OP2R(R(r_pixel)) | LSRI(16), "MOV r_pixel,r_pixel,LSR #16 ;449");
EOR(R(r_outword), R(r_outword), OP2R(R(r_pixel)), "EOR r_outword,r_outword,r_pixel ; ECF EOR mask 4499");
}
else
{
AND(R(r_pixel), R(r_inword), IMM(ws->out_pixmask), "1AND r_pixel,r_inword,#out_pixmask ; blah blah");
ORR(R(r_outword), R(r_outword), OP2R(R(r_pixel)), "ORR r_outword,r_outword,r_pixel ; ECF OR mask");
AND(R(r_pixel), R(r_bgcolour), IMM(ws->out_pixmask), "1AND r_pixel,r_bgcolour,#out_pixmask jthjg");
EOR(R(r_outword), R(r_outword), OP2R(R(r_pixel)), "EOR r_outword,r_outword,r_pixel ; ECF EOR mask");
}
}
}
else
{
if (DEST_32_BIT)
{
if (ws->gcol != 0) /* Not just a simple store operation */
{
ins(ws, LDR(R(r_temp1), R(r_outptr)) | OFFSET(0), "LDR r_temp1,[r_outptr]");
switch(ws->gcol)
{
case 7: MVN(R(r_pixel), OP2R(R(r_pixel)), "MVN r_pixel,r_pixel ; OR with neg action");
case 1: ORR(R(r_temp1), R(r_pixel), OP2R(R(r_temp1)), "ORR r_temp1,r_pixel,r_temp1 ; OR gcol action"); break;
case 6: MVN(R(r_pixel), OP2R(R(r_pixel)), "MVN r_pixel,r_pixel ; AND with neg action");
case 2: AND(R(r_temp1), R(r_pixel), OP2R(R(r_temp1)), "AND r_temp1,r_pixel,r_temp1 ; AND gcol action"); break;
case 3: EOR(R(r_temp1), R(r_pixel), OP2R(R(r_temp1)), "EOR r_temp1,r_pixel,r_temp1 ; EOR gcol action"); break;
case 4: MVN(R(r_temp1), OP2R(R(r_temp1)), "MVN r_temp1,r_temp1 ; neg gcol action"); break;
/* case 5: is a NOP */
}
ins(ws, STR(R(r_temp1), R(r_outptr)) | OFFSET(0), "STR r_temp1,[r_outptr]");
if ((ws->gcol == 7) || (ws->gcol == 6)) /* put r_pixel back as we found it */
MVN(R(r_pixel), OP2R(R(r_pixel)), "1MVN r_pixel,r_pixel ; Put r_pixel back");
}
else
{
ins(ws, STR(R(r_pixel), R(r_outptr)) | OFFSET(0), "STR r_pixel,[r_outptr]");
}
}
else
{
if (ws->gcol == 6 || ws->gcol == 7) /* and/or with NOT of incoming pixel */
{
if (DESTD_16_BIT)
{
EOR(R(r_pixel), R(r_pixel), IMM(255), "1EOR r_pixel,r_pixel,#0x00ff ; act with NOT of input pixel");
EOR(R(r_pixel), R(r_pixel), IMM(255) | IMMROR(24), "1EOR r_pixel,r_pixel,#0xff00");
}
else
EOR(R(r_pixel), R(r_pixel), IMM(ws->out_dpixmask), "1EOR r_pixel,r_pixel,#out_dpixmask ; act with NOT of input pixel");
}
switch (ws->gcol)
{
case 0:
if (SOURCE_MASK) /* if no mask, the pixels are clear already */
{
if (DESTD_16_BIT)
{
BIC(R(r_outword), R(r_outword), IMM(255), "BIC r_outword,r_outword,#0x00ff");
BIC(R(r_outword), R(r_outword), IMM(255) | IMMROR(24), "BIC r_outword,r_outword,#0xff00");
}
else
BIC(R(r_outword), R(r_outword), IMM(ws->out_dpixmask), "BIC r_outword,r_outword,#out_dpixmask");
}
/* fall through */
case 7:
case 1: ORR(R(r_outword), R(r_outword), OP2R(R(r_pixel)), "ORR r_outword,r_outword,r_pixel ; gcol action"); break;
case 6:
case 2: AND(R(r_outword), R(r_outword), OP2R(R(r_pixel)), "AND r_outword,r_outword,r_pixel ; AND gcol action"); break;
case 3: EOR(R(r_outword), R(r_outword), OP2R(R(r_pixel)), "EOR r_outword,r_outword,r_pixel ; EOR gcol action"); break;
case 4: if (DESTD_16_BIT)
{
EOR(R(r_outword), R(r_outword), IMM(255), "EOR r_outword,r_outword,#0x00ff ; negate existing pixel");
EOR(R(r_outword), R(r_outword), IMM(255) | IMMROR(24),"EOR r_outword,r_outword,#0xff00");
}
else
EOR(R(r_outword), R(r_outword), IMM(ws->out_dpixmask),"EOR r_outword,r_outword,#out_dpixmask ; negate existing pixel");
break;
case 5: comment(ws, "no GCOL action"); break;
}
if (ws->gcol == 6 || ws->gcol == 7) /* put r_pixel back as we found it in case scaling > 1:1! */
{
if (DESTD_16_BIT)
{
EOR(R(r_pixel), R(r_pixel), IMM(255), "EOR r_pixel,r_pixel,#0x00ff ; put r_pixel back as it was");
EOR(R(r_pixel), R(r_pixel), IMM(255) | IMMROR(24), "EOR r_pixel,r_pixel,#0xff00 ; put r_pixel back as it was");
}
else
EOR(R(r_pixel), R(r_pixel), IMM(ws->out_dpixmask), "EOR r_pixel,r_pixel,#out_dpixmask ; put r_pixel back as it was");
}
}
}
}
static void save_pixel_opt(asm_workspace *wp, workspace *ws)
/* Save pixel for use by optimised >5 scaling code. */
{
if (DESTD_16_BIT)
{
BIC(R(r_outword), R(r_outword), IMM(255), "3BIC r_outword,r_outword,#0x00ff");
BIC(R(r_outword), R(r_outword), IMM(255) | IMMROR(24), "4BIC r_outword,r_outword,#0xff00");
}
else
{
BIC(R(r_outword), R(r_outword), IMM(ws->out_dpixmask), "5BIC r_outword,r_outword,#out_dpixmask");
}
ORR(R(r_outword), R(r_outword), OP2R(R(r_pixel)), "6ORR r_outword,r_outword,r_pixel ; gcol action");
}
static void save_pixel2(asm_workspace *wp, workspace *ws)
/* Save the new pixel into the pixel after the current destination pixel. */
{
comment(ws, "Put the pixel in the output stream, one after the 'current' pixel.");
/* Current limitation */
assert(ws->gcol == 0, ERROR_FATAL);
if (DEST_32_BIT)
{
ins(ws, STR(R(r_pixel), R(r_outptr)) | OFFSET(4), "STR r_pixel,[r_outptr,#4]");
}
else
{
if (SOURCE_MASK)
{
if (wp->BPC == 16) /* DEST_16_BIT but includes double-pixel 256-colour mode 10 too */
{
BIC(R(r_outword), R(r_outword), IMM(255) | IMMROR(16), "BIC r_outword,r_outword,#0x00ff0000");
BIC(R(r_outword), R(r_outword), IMM(255) | IMMROR(8), "BIC r_outword,r_outword,#0xff000000");
}
else
BIC(R(r_outword), R(r_outword),
wp->BPC == 1
? IMM(2) /* IMMROR arg must be an even number */
: IMM(ws->out_dpixmask) | IMMROR(32 - wp->BPC), "BIC r_outword,r_outword,#out_dpixmask:SHL:out_bpc");
}
ORR(R(r_outword),R(r_outword),
OP2R(R(r_pixel)) | LSLI(wp->BPC), "ORR r_outword,r_outword,r_pixel,LSL #out_bpc");
}
}
/**************************************************************************
* *
* Bitblit: Advancing the current pixel. *
* *
**************************************************************************/
static void fetch_pixel_inc(asm_workspace *wp, workspace *ws)
/* Increment the pointer to the source pixel */
{
comment(ws, "Advance source pointer");
if (!PLOTMASK) /* The ECF pattern remains aligned to the destination */
{
if (SOURCE_32_BIT)
{
ADD(R(r_inptr), R(r_inptr), IMM(4), "ADD r_inptr,r_inptr,#4");
}
else
{
MOV(R(r_inword), OP2R(R(r_inword)) | RORI(ws->in_bpc), "MOV r_inword,r_inword,ROR #in_bpc");
if (SOURCE_MASK & !(SOURCE_BPPMASK))
MOV(R(r_maskinword), OP2R(R(r_maskinword)) |
RORI(ws->in_bpc), "MOV r_maskinword,r_maskinword,ROR #in_bpc");
SUB(R(r_inshift), R(r_inshift),
S | IMM(ws->in_bpc*2) | IMMROR(6), "SUBS r_inshift,r_inshift,#in_bpc:SHL:27 ; auto-resets itself to 0");
ins(ws, LDR(R(r_inword), R(r_inptr))
| EQ | WRITEBACK | OFFSET(4), "LDREQ r_inword,[r_inptr,#4]!");
}
}
if (SOURCE_MASK)
{
if (SOURCE_BPPMASK || PLOTMASK)
{
MOV(R(r_maskinword), OP2R(R(r_maskinword))
| RORI(ws->mask_bpp), "MOV r_maskinword,r_maskinword,ROR #mask_bpp");
SUB(R(r_maskinshift),R(r_maskinshift),
S | IMM(ws->mask_bpp*2) | IMMROR(6),"SUBS r_maskinshift,r_maskinshift,#mask_bpp:SHL:27 ; auto-resets itself to 0");
ins(ws, LDR(R(r_maskinword), R(r_maskinptr))
| EQ | WRITEBACK | OFFSET(4), "LDREQ r_maskinword,[r_maskinptr,#4]!");
}
else
{
assert(!SOURCE_32_BIT, ERROR_FATAL);
ins(ws, LDR(R(r_maskinword),
R(r_inptr)) | EQ | INDEX(R(r_masko), 0), "LDREQ r_maskinword,[r_inptr,r_masko]");
}
}
}
static void fetch_pixel_inc2(asm_workspace *wp, workspace *ws)
/* Increment the pointer to the source pixel by two - only used in the 2-at-a-time
* optimised loop
*/
{
comment(ws, "Advance source pointer by two pixels");
if (SOURCE_32_BIT)
{
ADD(R(r_inptr), R(r_inptr), IMM(8), "ADD r_inptr,r_inptr,#8 ; past 2 32-bit pixels");
}
else if (SOURCED_16_BIT)
{
/* Two pixels per word - assured of loading a new word */
ins(ws, LDR(R(r_inword), R(r_inptr))
| WRITEBACK | OFFSET(4), "LDR r_inword,[r_inptr,#4]! ; past 2 16-bit pixels");
}
else
{
MOV(R(r_inword), OP2R(R(r_inword)) | RORI(ws->in_bpc*2), "MOV r_inword,r_inword,ROR #in_bpc*2");
if (SOURCE_MASK & !(SOURCE_BPPMASK))
MOV(R(r_maskinword), OP2R(R(r_maskinword)) |
RORI(ws->in_bpc*2), "MOV r_maskinword,r_maskinword,ROR #in_bpc*2");
SUB(R(r_inshift), R(r_inshift),
S | IMM(ws->in_bpc) | IMMROR(4), "SUBS r_inshift,r_inshift,#in_bpc:SHL:28 ; auto-resets itself to 0");
ins(ws, LDR(R(r_inword), R(r_inptr))
| EQ | WRITEBACK | OFFSET(4), "LDREQ r_inword,[r_inptr,#4]! ; load more input pixels (inc2)");
}
if (SOURCE_MASK)
{
if (SOURCE_BPPMASK)
{
#if 0
MOV(R(r_maskinword), OP2R(R(r_maskinword))
| RORI(ws->mask_bpp), "x"/*MOV r_maskinword,r_maskinword,ROR #mask_bpp"*/);
SUB(R(r_maskinshift),R(r_maskinshift),
S | IMM(ws->mask_bpp*2) | IMMROR(6), "x"/*SUBS r_maskinshift,r_maskinshift,#mask_bpp:SHL:27"*/);
ins(ws, LDR(R(r_maskinword), R(r_maskinptr))
| EQ | WRITEBACK | OFFSET(4), "x"/* "LDREQ r_maskinword,[r_maskinptr,#4]! ; load more mask pixels (inc2)"*/);
#endif
MOV(R(r_maskinword), OP2R(R(r_maskinword))
| RORI(ws->mask_bpp), "MOV r_maskinword,r_maskinword,ROR #mask_bpp");
SUB(R(r_maskinshift),R(r_maskinshift),
S | IMM(ws->mask_bpp*2) | IMMROR(6),"SUBS r_maskinshift,r_maskinshift,#mask_bpp:SHL:27");
ins(ws, LDR(R(r_maskinword), R(r_maskinptr))
| EQ | WRITEBACK | OFFSET(4), "LDREQ r_maskinword,[r_maskinptr,#4]! ; load more mask pixels (inc2)");
}
else
{
assert(!SOURCE_32_BIT, ERROR_FATAL);
ins(ws, LDR(R(r_maskinword), R(r_inptr))
| EQ | INDEX(R(r_masko), 0), "LDREQ r_maskinword,[r_inptr,r_masko] ; load more mask pixels (inc2)");
}
}
}
static void odither_inc(asm_workspace *wp, workspace *ws, int xy)
/* Call every output pixel - alternates the ordered dither addition value
* xy == 0 for x, 1 for y
*/
{
if (ws->odither)
EOR(R(r_oditheradd),R(r_oditheradd), IMM(1 << (ws->odither - xy)) | IMMROR(8),
xy == 0 ? "EOR r_oditheradd,r_oditheradd,#odither_eorvalue ; alternate dither offset"
: "EOR r_oditheradd,r_oditheradd,#odither_eorvalue:SHR:1 ; alternate dither offset");
UNUSED(wp);
}
#if 1
static void skip_current_output_words(asm_workspace *wp, workspace *ws)
/* Skip over masked out words. r_xcount = output pixels to skip
* r_temp1 = pixels left in current word.
*/
{
comment(ws, "4Skipping masked words.");
if (DEST_32_BIT)
{
ADD(R(r_outptr), R(r_outptr), R(r_xcount) | LSLI(2), "4~ADD r_outptr,r_outptr,r_xcount,LSL #2 ; skip 4*pixels bytes");
MOV(R(r_xcount), IMM(0), "41MOV r_xcount,#0");
}
else
{
SUB(R(r_xcount), R(r_xcount), OP2R(R(r_temp1)), "4~SUB r_xcount, r_xcount, r_temp1");
MOV(R(r_temp1), OP2R(R(r_temp1)) | LSLI(ws->out_l2bpc), "4~MOV r_temp1, t_temp1, LSL #out_log2bpc");
MOV(R(r_outword), OP2R(R(r_outword)) | RORR(R(r_temp1)), "4~MOV r_outword,r_outword,ROR r_temp1");
ins(ws, STR(R(r_outword), R(r_outptr)) | POSTINC(4), "4~STR r_outword,[r_outptr],#4");
MOV(R(r_outshift), IMM(0), "4~MOV r_outshift, #0");
MOV(R(r_temp1), OP2R(R(r_xcount)) | S |LSRI(ws->out_l2ppw), "4~~MOVS r_temp1,r_xcount,LSR #out_log2ppw ; whole words to skip");
ADD(R(r_outptr), R(r_outptr), NE | R(r_temp1) | LSLI(2), "4~ADDNE r_outptr,r_outptr,r_temp1,LSL #2 ; skip 4*pixels bytes");
ins(ws, LDR(R(r_outword), R(r_outptr)) | OFFSET(0), "4~~LDR r_outword,[r_outptr]");
SUB(R(r_xcount), R(r_xcount),
OP2R(R(r_temp1)) | LSLI(ws->out_l2ppw), "4~SUB r_xcount, r_xcount, r_temp1 LSL #out_log2ppw ; pixels left to skip");
}
}
static void skip_some_pixels(asm_workspace *wp, workspace *ws)
/* Adjust outword and outshift back to start */
{
MOV(R(r_temp1), OP2R(R(r_xcount)) | LSLI(ws->out_l2bpc), "2~~MOV r_temp1, r_xcount, LSL #out_log2bpc");
MOV(R(r_outword), OP2R(R(r_outword)) | RORR(R(r_temp1)), "2~MOV r_outword,r_outword,ROR r_temp1");
SUB(R(r_outshift), R(r_outshift),
OP2R(R(r_temp1)) | LSLI(27), "2~~SUB r_outshift,r_outshift,r_temp1,SHL #27");
MOV(R(r_xcount), IMM(0), "31MOV r_xcount,#0");
UNUSED(wp);
}
#endif
static void save_pixel_inc(asm_workspace *wp, workspace *ws)
/* Increment the pointer to the destination pixel */
{
comment(ws, "Advance destination pointer");
if (DEST_32_BIT)
{
ADD(R(r_outptr), R(r_outptr), IMM(4), "ADD r_outptr,r_outptr,#4 323232");
if (PLOTMASK)
{
#if 0
EOR(R(r_ecfindex), R(r_ecfindex), IMM(4), "EOR r_ecfindex,r_ecfindex,#4 ; either 0 or 4323232");
#endif
/* ins(ws, LDR(R(r_inword), R(r_inptr))
| INDEX(R(r_ecfindex), 0), "LDR r_inword,[r_inptr,r_ecfindex] ; load next word of ECF 32323");
ADD(R(r_ecfindex), R(r_ecfindex),
IMM(4), "ADD r_ecfindex,r_ecfindex,#4 132323");
ins(ws, LDR(R(r_bgcolour), R(r_inptr))
| INDEX(R(r_ecfindex), 0), "LDR r_bgcolour,[r_inptr,r_ecfindex] ; load next EOR word of ECF123232");
SUB(R(r_ecfindex), R(r_ecfindex),
IMM(4), "SUB r_ecfindex,r_ecfindex,#4 132323");
*/ }
}
else
{
MOV(R(r_outword), OP2R(R(r_outword)) | RORI(wp->BPC), "MOV r_outword,r_outword,ROR #out_bpc 545454");
if (PLOTMASK)
{
MOV(R(r_inword), OP2R(R(r_inword)) | RORI(wp->BPC), "MOV r_inword,r_inword,ROR #out_bpc ; advance ECF pattern 5");
MOV(R(r_bgcolour), OP2R(R(r_bgcolour)) | RORI(wp->BPC), "MOV r_bgcolour,r_bgcolour,ROR #out_bpc ; advance ECF eeyore pattern 5");
}
SUB(R(r_outshift), R(r_outshift),
S | IMM(wp->BPC*2) | IMMROR(6), "SUBS r_outshift,r_outshift,#out_bpc:SHL:27 5");
ins(ws, STR(R(r_outword), R(r_outptr)) | EQ | POSTINC(4),"STREQ r_outword,[r_outptr],#4 4");
if (ws->gcol == 0 && !SOURCE_MASK && !PLOTMASK)
MOV(R(r_outword), EQ | IMM(0), "MOVEQ r_outword,#0 ; setting pixels and no mask 4");
else
ins(ws, LDR(R(r_outword), R(r_outptr)) | EQ | OFFSET(0), "LDREQ r_outword,[r_outptr] 4");
/* If entirely replacing pixels, no need to fetch the old ones.
* The last word has to be patched up carefully, see x_loop.
*/
if (PLOTMASK)
{
#if 0
EOR(R(r_ecfindex), R(r_ecfindex), EQ | IMM(4), "EOREQ r_ecfindex,r_ecfindex,#4 ; either 0 or 4");
#endif
#if 0
ins(ws, LDR(R(r_inword), R(r_inptr))
| INDEX(R(r_ecfindex) | EQ, 0), "LDREQ r_inword,[r_inptr,r_ecfindex] ; load next word of ECF 1");
ADD(R(r_ecfindex), R(r_ecfindex),
IMM(4) | EQ, "ADDEQ r_ecfindex,r_ecfindex,#4 2");
ins(ws, LDR(R(r_bgcolour), R(r_inptr))
| INDEX(R(r_ecfindex) | EQ, 0), "LDREQ r_bgcolour,[r_inptr,r_ecfindex] ; load next EOR word of ECF2");
SUB(R(r_ecfindex), R(r_ecfindex),
IMM(4) | EQ, "SUBEQ r_ecfindex,r_ecfindex,#4 2");
#endif
}
}
odither_inc(wp, ws, 0);
}
static void save_pixel_inc2(asm_workspace *wp, workspace *ws)
/* Increment the pointer to the destination pixel by two. You are assured that
* a word fetch won't be necessary after the first of these. Only used in the
* optimised 2-at-a-time inner loop. You are assured that gcol==0.
*/
{
comment(ws, "Advance destination pointer by two pixels");
if (DEST_32_BIT)
ADD(R(r_outptr), R(r_outptr), IMM(8), "ADD r_outptr,r_outptr,#8");
else if (DESTD_16_BIT)
{
/* Two pixels per word - assured of saving a word, assured that gcol==0 and !SOURCE_MASK*/
ins(ws, STR(R(r_outword), R(r_outptr)) | POSTINC(4), "STR r_outword,[r_outptr],#4 ; store two pixels");
if (!SOURCE_MASK)
MOV(R(r_outword), IMM(0), "MOV r_outword,#0 ; setting pixels and no mask");
else
ins(ws, LDR(R(r_outword), R(r_outptr)) | OFFSET(0), "LDR r_outword,[r_outptr] ; load dest data (in case of mask)");
}
else
{
MOV(R(r_outword), OP2R(R(r_outword)) | RORI(wp->BPC*2), "MOV r_outword,r_outword,ROR #out_bpc*2");
SUB(R(r_outshift), R(r_outshift),
S | IMM(wp->BPC) | IMMROR(4), "SUBS r_outshift,r_outshift,#out_bpc:SHL:28");
ins(ws, STR(R(r_outword), R(r_outptr)) | EQ | POSTINC(4),"STREQ r_outword,[r_outptr],#4 ; store pixels (inc2)");
if (!SOURCE_MASK)
MOV(R(r_outword), EQ | IMM(0), "MOVEQ r_outword,#0 ; setting pixels and no mask (inc2)");
else
ins(ws, LDR(R(r_outword), R(r_outptr)) | EQ | OFFSET(0), "LDREQ r_outword,[r_outptr] ; get dest data (in case of mask)");
/* If entirely replacing pixels, no need to fetch the old ones.
* The last word has to be patched up carefully, see x_loop.
*/
}
odither_inc(wp, ws, 0); /* assume this has also been called once after the first pixel has been translated */
}
static void plot_current_output_words(asm_workspace *wp, workspace *ws, int scale)
/* plot multiple words of one pixel. r_xcount = output pixels to skip
* r_temp1 = pixels left in current word.
* r_pixel = pixel to output.
*/
{
int loop;
comment(ws, "2Optimised plotting of scaled sprite.");
if (DEST_32_BIT)
{
#if 1
ins(ws, STR(R(r_pixel), R(r_outptr)) | POSTINC(4), "32STR r_pixel,[r_outptr],#4");
SUB(R(r_xcount), R(r_xcount),
S | IMM(1), "14SUBS r_xcount,r_xcount,#1");
if (scale < 21)
{
for (loop = 1;loop<scale;loop++)
{
ins(ws, STR(R(r_pixel), R(r_outptr)) | NE | POSTINC(4), "32STRNE r_pixel,[r_outptr],#4");
SUB(R(r_xcount), R(r_xcount),
S | NE | IMM(1), "14SUBNES r_xcount,r_xcount,#1");
}
}
else
{
CMP(R(r_xcount), IMM(10), "CMP r_xcount, #10");
branch(ws, B | LE, L(plot_loop1b), "BLE plot_loop1b");
DEFINE_LABEL(plot_loop1a, "loop for every ten pixels")
for (loop = 0;loop<10;loop++)
{
ins(ws, STR(R(r_pixel), R(r_outptr)) | POSTINC(4), "32STR r_pixel,[r_outptr],#4");
}
SUB(R(r_xcount), R(r_xcount),
IMM(10), "14SUB r_xcount,r_xcount,#10");
CMP(R(r_xcount), IMM(10), "CMP r_xcount, #10");
branch(ws, B | GT, L(plot_loop1a), "BGT plot_loop1a");
DEFINE_LABEL(plot_loop1b, "branch here when LH side obscured")
CMP(R(r_xcount), IMM(0), "CMP r_xcount, #0");
for (loop = 0;loop<10;loop++)
{
ins(ws, STR(R(r_pixel), R(r_outptr)) | NE | POSTINC(4), "4STRNE r_pixel,[r_outptr],#4");
SUB(R(r_xcount), R(r_xcount),
S | NE | IMM(1), "16SUBNES r_xcount,r_xcount,#1");
}
}
#else
for (loop = 0;loop<scale;loop++)
ins(ws, STR(R(r_pixel), R(r_outptr)) | POSTINC(4), "32STR r_outword,[r_outptr],#4");
#endif
}
else
{
SUB(R(r_xcount), R(r_xcount), OP2R(R(r_temp1)), "52SUB r_xcount, r_xcount, r_temp1");
DEFINE_LABEL(plot_loop1, "1???")
save_pixel_opt(wp, ws);
MOV(R(r_outword), OP2R(R(r_outword)) | RORI(wp->BPC), "015MOV r_outword,r_outword,ROR #out_bpc");
SUB(R(r_outshift), R(r_outshift),
S | IMM(wp->BPC*2) | IMMROR(6), "7SUBS r_outshift,r_outshift,#out_bpc:SHL:27");
branch(ws, B | EQ, L(plot_loop1a), "BEQ plot_loop1a");
save_pixel_opt(wp, ws);
MOV(R(r_outword), OP2R(R(r_outword)) | RORI(wp->BPC), "115MOV r_outword,r_outword,ROR #out_bpc");
SUB(R(r_outshift), R(r_outshift),
S | IMM(wp->BPC*2) | IMMROR(6), "17SUBS r_outshift,r_outshift,#out_bpc:SHL:27");
branch(ws, B | EQ, L(plot_loop1b), "BEQ plot_loop1b");
save_pixel_opt(wp, ws);
MOV(R(r_outword), OP2R(R(r_outword)) | RORI(wp->BPC), "215MOV r_outword,r_outword,ROR #out_bpc");
SUB(R(r_outshift), R(r_outshift),
S | IMM(wp->BPC*2) | IMMROR(6), "27SUBS r_outshift,r_outshift,#out_bpc:SHL:27");
branch(ws, B | EQ, L(plot_loop1c), "BEQ plot_loop1c");
save_pixel_opt(wp, ws);
MOV(R(r_outword), OP2R(R(r_outword)) | RORI(wp->BPC), "315MOV r_outword,r_outword,ROR #out_bpc");
SUB(R(r_outshift), R(r_outshift),
S | IMM(wp->BPC*2) | IMMROR(6), "37SUBS r_outshift,r_outshift,#out_bpc:SHL:27");
branch(ws, B | NE, L(plot_loop1), "8BNE plot_loop1");
DEFINE_LABEL(plot_loop1a, "plot loop 1a - coz only one forward referance allowed")
DEFINE_LABEL(plot_loop1b, "plot loop 1b - coz only one forward referance allowed")
DEFINE_LABEL(plot_loop1c, "plot loop 1c - coz only one forward referance allowed")
ins(ws, STR(R(r_outword), R(r_outptr)) | POSTINC(4), "9STR r_outword,[r_outptr],#4");
MOV(R(r_temp1), OP2R(R(r_xcount)) | S |LSRI(ws->out_l2ppw), "0MOVS r_temp1,r_xcount,LSR #out_log2ppw ; whole words to skip");
branch(ws, B | EQ, L(plot_loop3), "1BEQ plot_loop3");
for (loop = wp->BPP;loop<32;loop*=2)
ORR(R(r_pixel), R(r_pixel), OP2R(R(r_pixel)) | LSLI(loop), "2ORR r_pixel,r_pixel,r_pixel, LSL #somenumber");
DEFINE_LABEL(plot_loop2, "2???")
ins(ws, STR(R(r_pixel), R(r_outptr)) | POSTINC(4), "3STR r_pixel,[r_outptr],#4");
SUB(R(r_xcount), R(r_xcount),
IMM(ws->out_ppw), "4SUB r_xcount,r_xcount,#out_ppw");
SUB(R(r_temp1), R(r_temp1),
S | IMM(1), "5SUBS r_temp1,r_temp1,#1");
branch(ws, B | NE, L(plot_loop2), "6BNE plot_loop2");
if (DESTD_16_BIT)
{
MOV(R(r_pixel), OP2R(R(r_pixel)) | LSLI(16), "7MOV r_pixel, r_pixel, LSL #16 ; whole words to skip");
MOV(R(r_pixel), OP2R(R(r_pixel)) | LSRI(16), "8MOV r_pixel, r_pixel, LSR #16 ; whole words to skip");
}
else
AND(R(r_pixel), R(r_pixel), IMM(ws->out_dpixmask), "9AND r_pixel,r_pixel,#dpix_mask");
DEFINE_LABEL(plot_loop3, "3???")
ins(ws, LDR(R(r_outword), R(r_outptr)) | OFFSET(0), "0LDR r_outword,[r_outptr]");
}
}
static void plot_some_pixels(asm_workspace *wp, workspace *ws)
/* Non complete word pixel plot */
{
DEFINE_LABEL(plot_loop4, "4???")
save_pixel_opt(wp, ws);
MOV(R(r_outword), OP2R(R(r_outword)) | RORI(wp->BPC), "14MOV r_outword,r_outword,ROR #out_bpc");
SUB(R(r_outshift), R(r_outshift),
S | IMM(wp->BPC*2) | IMMROR(6), "15SUBS r_outshift,r_outshift,#out_bpc:SHL:27");
SUB(R(r_xcount), R(r_xcount),
S | IMM(1), "16SUBS r_xcount, r_xcount, #1");
branch(ws, B | EQ, L(plot_loop4a), "17BEQ plot_loop4a");
save_pixel_opt(wp, ws);
MOV(R(r_outword), OP2R(R(r_outword)) | RORI(wp->BPC), "214MOV r_outword,r_outword,ROR #out_bpc");
SUB(R(r_outshift), R(r_outshift),
S | IMM(wp->BPC*2) | IMMROR(6), "215SUBS r_outshift,r_outshift,#out_bpc:SHL:27");
SUB(R(r_xcount), R(r_xcount),
S | IMM(1), "216SUBS r_xcount, r_xcount, #1");
branch(ws, B | EQ, L(plot_loop4b), "17BEQ plot_loop4b");
save_pixel_opt(wp, ws);
MOV(R(r_outword), OP2R(R(r_outword)) | RORI(wp->BPC), "314MOV r_outword,r_outword,ROR #out_bpc");
SUB(R(r_outshift), R(r_outshift),
S | IMM(wp->BPC*2) | IMMROR(6), "315SUBS r_outshift,r_outshift,#out_bpc:SHL:27");
SUB(R(r_xcount), R(r_xcount),
S | IMM(1), "316SUBS r_xcount, r_xcount, #1");
branch(ws, B | EQ, L(plot_loop4c), "17BEQ plot_loop4c");
save_pixel_opt(wp, ws);
MOV(R(r_outword), OP2R(R(r_outword)) | RORI(wp->BPC), "414MOV r_outword,r_outword,ROR #out_bpc");
SUB(R(r_outshift), R(r_outshift),
S | IMM(wp->BPC*2) | IMMROR(6), "415SUBS r_outshift,r_outshift,#out_bpc:SHL:27");
SUB(R(r_xcount), R(r_xcount),
S | IMM(1), "416SUBS r_xcount, r_xcount, #1");
branch(ws, B | NE, L(plot_loop4), "17BNE plot_loop4");
DEFINE_LABEL(plot_loop4a, "plot loop 4a - coz only one forward referance allowed")
DEFINE_LABEL(plot_loop4b, "plot loop 4b - coz only one forward referance allowed")
DEFINE_LABEL(plot_loop4c, "plot loop 4c - coz only one forward referance allowed")
}
/**************************************************************************
* *
* Bitblit: Overall construction of the X loop. *
* *
**************************************************************************/
#define ADD_A(reg,value) arbitrary_add(ws, TRUE, FALSE, &ws->regnames.reg, value);
#define ADDS_A(reg,value) arbitrary_add(ws, TRUE, TRUE, &ws->regnames.reg, value);
#define SUB_A(reg,value) arbitrary_add(ws, FALSE, FALSE, &ws->regnames.reg, value);
#define SUBS_A(reg,value) arbitrary_add(ws, FALSE, TRUE, &ws->regnames.reg, value);
static void arbitrary_add(workspace *ws, BOOL add, BOOL s, regname *r, int value)
/* Add/subtract an arbitrary constant to a register - could be more than 8 bits. */
{
IFDEBUG(char a[256];)
if (value < 0) {value = -value; add = !add;}
if (value == 0) /* special case with 0 constant */
{
if (s)
{
IFDEBUG(do_sprintf(a, "CMP %s,#0", r->name);)
CMP(r->regno, IMM(0), a);
}
/* else, nothing */
}
else
{
int opcode = add ? ADD_OPCODE : SUB_OPCODE;
int sopcode = s ? S : 0;
int shift_it = 0;
while (value != 0)
{
BOOL last;
int valuebyte;
if (value > 255)
while ((value & 3) == 0) {value >>= 2; shift_it += 2;}
valuebyte = value & 0xff;
value &= 0xffffff00;
last = value == 0; /* the last instruction needed */
IFDEBUG(
do_sprintf(a,
(last && sopcode ? "%sS%t8.%s,%s,#&%x" : "%s%t8.%s,%s,#&%x") _
(add ? "ADD" : "SUB") _ r->name _ r->name _ valuebyte << shift_it);)
ins(ws, opcode | (last ? sopcode : 0)
| DSTR(r->regno) | OP1R(r->regno)
| IMM(valuebyte) | IMMROR ((32 - shift_it) & 0x1e),
a);
}
}
}
static void init_word_registers(asm_workspace *wp, workspace *ws)
/* Initialise inword, outword, maskinword from their respective pointers
* and shift values.
*/
{
comment(ws, "Load initial values of word registers");
/* Set up inword */
if (!PLOTMASK) /* PLOTMASK case handled below, because helped by setting up r_outword */
{
if (!SOURCE_32_BIT)
{
ins(ws, LDR(R(r_inword), R(r_inptr)) | OFFSET(0), "LDR r_inword,[r_inptr] ; fetch first input pixels");
MOV(R(r_pixel), OP2R(R(r_inshift)) | LSRI(27), "MOV r_pixel,r_inshift,LSR #27 ; get real shift distance");
RSB(R(r_pixel), R(r_pixel), IMM(32), "RSB r_pixel,r_pixel,#32 ; temporary use of r_pixel");
MOV(R(r_inword), OP2R(R(r_inword)) | RORR(R(r_pixel)), "MOV r_inword,r_inword,ROR r_pixel "
"; current input pixel now in least sig bit[s]");
}
}
if (SOURCE_MASK) /* Set up maskinword */
{
if (SOURCE_BPPMASK || PLOTMASK)
{
ins(ws, LDR(R(r_maskinword), R(r_maskinptr)) | OFFSET(0), "LDR r_maskinword,[r_maskinptr] ; fetch first mask word");
MOV(R(r_pixel), OP2R(R(r_maskinshift)) | LSRI(27), "MOV r_pixel,r_maskinshift,LSR #27 ; get real shift distance");
RSB(R(r_pixel), R(r_pixel), IMM(32), "RSB r_pixel,r_pixel,#32 ; mask shift");
}
else
ins(ws, LDR(R(r_maskinword),
R(r_inptr)) | INDEX(R(r_masko), 0), "LDR r_maskinword,[r_inptr,r_masko] ; fetch first mask word");
MOV(R(r_maskinword), OP2R(R(r_maskinword)) | RORR(R(r_pixel)),"MOV r_maskinword,r_maskinword,ROR r_pixel "
"; current mask pixel now in least sig bit[s]");
}
if (!DEST_32_BIT) /* Set up outword */
{
if (ws->gcol == 0 && !SOURCE_MASK && !PLOTMASK)
{
/* Faster in the inner loop, but the unneeded pixels must be cleared out first */
MOV(R(r_pixel), S | OP2R(R(r_outshift)) | LSRI(27), "MOVS r_pixel,r_outshift,LSR #27 ; get real shift distance");
ins(ws, NE | LDR(R(r_outword), R(r_outptr)) | OFFSET(0), "LDRNE r_outword,[r_outptr] ; load up output word");
MOV(R(r_outword), NE | OP2R(R(r_outword))
| LSLR(R(r_pixel)), "MOVNE r_outword,r_outword,LSL r_pixel "
"; set untouched pixels to correct places, clear the others");
MOV(R(r_outword), EQ | IMM(0), "MOVEQ r_outword,#0 ; if r_pixel=0, make them all clear");
}
else
{
ins(ws, LDR(R(r_outword), R(r_outptr)) | OFFSET(0), "LDR r_outword,[r_outptr] ; load up output word");
MOV(R(r_pixel), OP2R(R(r_outshift)) | LSRI(27), "MOV r_pixel,r_outshift,LSR #27 ; get real shift distance");
RSB(R(r_pixel), R(r_pixel), IMM(32), "RSB r_pixel,r_pixel,#32 ; temp use of r_pixel");
MOV(R(r_outword), OP2R(R(r_outword)) | RORR(R(r_pixel)),"MOV r_outword,r_outword,ROR r_pixel "
"; current output pixel now in least sig bit[s]");
/* Set up inword from ECF pattern - uses r_pixel value */
if (PLOTMASK)
{
ins(ws, LDR(R(r_inword), R(r_inptr))
| INDEX(R(r_ecfindex), 0), "LDR r_inword,[r_inptr,r_ecfindex] ; get ECF pattern word");
MOV(R(r_inword), OP2R(R(r_inword)) | RORR(R(r_pixel)),"MOV r_inword,r_inword,ROR r_pixel 1 "
"; current ECF pixel now in least sig bit[s]");
ADD(R(r_ecfindex), R(r_ecfindex),
IMM(4), "ADD r_ecfindex,r_ecfindex,#4 ; to load EOR word 1");
ins(ws, LDR(R(r_bgcolour), R(r_inptr))
| INDEX(R(r_ecfindex), 0), "LDR r_bgcolour,[r_inptr,r_ecfindex] ;fetch next EOR word of ECF1");
SUB(R(r_ecfindex), R(r_ecfindex),
IMM(4), "SUB r_ecfindex,r_ecfindex,#4 ;blah1");
MOV(R(r_bgcolour), OP2R(R(r_bgcolour)) | RORR(R(r_pixel)),"MOV r_bgcolour,r_bgcolour,ROR r_pixel 1 ");
}
}
}
}
static void loop_x(asm_workspace *wp, workspace *ws)
/* The variables are set up - perform the inner loop that processes a
* single line. Fall out of the bottom of the loop when complete.
*/
{
BOOL mask_possible;
comment(ws, "The inner loop: iterating along a row of pixels.");
if (x_block_move(wp, ws))
{
comment(ws, "Very simple inner loop - we use an existing block-move primitive");
MOV(R(lr), OP2R(R(pc)), "MOV lr,pc ; remember return address");
MOV(R(pc), OP2R(R(r_blockroutine)), "MOV pc,r_blockroutine ; block move");
/* It would be a little bit more efficient to do state saving here rather than inside the routine,
* and so only save registers that need to be saved - not a big saving, and only per-line.
*/
}
else
{
init_word_registers(wp, ws);
if (simple_x_scale(wp, ws)) /* 1:1 scaling */
{
comment(ws, "1:1 scaling along x, so each source pixel is painted once");
#if 0
align16(wp, ws);
DEFINE_LABEL(loop_x_repeat, "Loop around for each source/dest pixel")
mask_possible = fetch_pixel(wp, ws, &ws->labels.l_masked);
translate_pixel(wp, ws);
save_pixel(wp, ws);
if (mask_possible) DEFINE_LABEL(l_masked, "This pixel masked out")
fetch_pixel_inc(wp, ws);
save_pixel_inc(wp, ws);
SUB(R(r_xsize), R(r_xsize), S | IMM(1), "SUBS r_xsize,r_xsize,#1");
branch(ws, B | NE, L(loop_x_repeat), "BNE loop_x_repeat");
#else
/* We generate a loop that does two pixels at a time, only advancing pointers, counts, shifts
* etc. every two pixels. There are two versions of this loop, one where the in and out shifts
* are 'in phase' (ie initially both even or both odd), one where they are out of phase. There
* is also some initial stuff to get the outshift to be even if necessary when entering either
* of these, and some final stuff to patch up the end.
*/
comment(ws, "Optimised 2-at-a-time loop");
if (!DEST_32_BIT)
{
TST(R(r_outshift), IMM(wp->BPC*2) | IMMROR(6), "TST r_outshift,#out_bpc:SHL:27 ; start at odd or even pixel shift?");
branch(ws, B | EQ, L(x_evenstart), "BEQ x_evenstart ; B if even");
comment(ws, "r_outshift an odd number of pixels - process just one of these");
mask_possible = fetch_pixel(wp, ws, &ws->labels.x_oddmask);
translate_pixel(wp, ws);
save_pixel(wp, ws);
if (mask_possible) DEFINE_LABEL(x_oddmask, "This pixel masked out")
fetch_pixel_inc(wp, ws);
save_pixel_inc(wp, ws);
SUB(R(r_xsize), R(r_xsize), S | IMM(1), "SUBS r_xsize,r_xsize,#1 ; count towards overall width");
branch(ws, B | EQ, L(loop_x_exit), "BEQ loop_x_exit ; check for just one pixel wide");
DEFINE_LABEL(x_evenstart, "r_outshift is an even number of pixels")
}
if (!SOURCE_32_BIT)
{
TST(R(r_inshift), IMM(ws->in_bpc*2) | IMMROR(6), "TST r_inshift,#in_bpc:SHL:27 ; input at odd or even pixel shift?");
branch(ws, B | NE, L(x_misaligned), "BNE x_misaligned ; B if odd");
}
branch(ws, B, L(x_aligned_enter), "B x_aligned_enter ; else, in phase with output - start loop");
newline();
align16(wp, ws);
DEFINE_LABEL(x_aligned_loop, "The 2-at-a-time inner loop, aligned case")
mask_possible = fetch_pixel(wp, ws, &ws->labels.x_alignmask1);
translate_pixel(wp, ws);
save_pixel(wp, ws);
if (mask_possible) DEFINE_LABEL(x_alignmask1, "First pixel masked out")
odither_inc(wp, ws, 0);
mask_possible = fetch_pixel2(wp, ws, &ws->labels.x_alignmask2);
translate_pixel(wp, ws);
save_pixel2(wp, ws);
if (mask_possible) DEFINE_LABEL(x_alignmask2, "Second pixel masked out")
fetch_pixel_inc2(wp, ws);
save_pixel_inc2(wp, ws);
DEFINE_LABEL(x_aligned_enter, "Entering the aligned 2-at-a-time inner loop")
SUB(R(r_xsize), R(r_xsize), S | IMM(2), "SUBS r_xsize,r_xsize,#2 ; done 2 pixels");
branch(ws, B | GE, L(x_aligned_loop), "BGE x_aligned_loop ; loop until 0 or 1 left");
if (!SOURCE_32_BIT)
{
branch(ws, B, L(x_2atatime_exit), "B x_2atatime_exit ; final patchup code");
newline();
DEFINE_LABEL(x_misaligned, "The 2-at-a-time inner loop, misaligned case, entry sequence")
/* A bit delicate - we have to prepare the input stream for an inc2 call,
* by effectively winding it back by a pixel. We know this won't go back a word,
* however, because r_inshift is an odd number of pixels.
*/
comment(ws, "Wind input stream back by a pixel");
if (SOURCE_32_BIT)
SUB(R(r_inptr), R(r_inptr), IMM(4), "SUB r_inptr,r_inptr,#4 ; wind back a pixel");
else
{
MOV(R(r_inword), OP2R(R(r_inword)) | LSLI(ws->in_bpp), "MOV r_inword,r_inword,LSL #in_bpp ; wind back a pixel");
ADD(R(r_inshift), R(r_inshift),
IMM(ws->in_bpp*2) | IMMROR(6), "ADD r_inshift,r_inshift,#in_bpp:SHL:27");
}
if (SOURCE_MASK)
{
MOV(R(r_maskinword), OP2R(R(r_maskinword))
| LSLI(ws->mask_bpp), "MOV r_maskinword,r_maskinword,LSL #mask_bpp");
if (SOURCE_BPPMASK)
ADD(R(r_maskinshift), R(r_maskinshift),
IMM(ws->mask_bpp*2) | IMMROR(6), "ADD r_maskinshift,r_maskinshift,#mask_bpp:SHL:27");
}
branch(ws, B, L(x_misaligned_enter), "B x_misaligned_enter ; start misaligned loop");
align16(wp, ws);
DEFINE_LABEL(x_misaligned_loop, "The 2-at-a-time inner loop, misaligned case")
mask_possible = fetch_pixel2(wp, ws, &ws->labels.x_misalignmask1);
translate_pixel(wp, ws);
save_pixel(wp, ws);
if (mask_possible) DEFINE_LABEL(x_misalignmask1, "A pixel masked out")
fetch_pixel_inc2(wp, ws);
odither_inc(wp, ws, 0);
mask_possible = fetch_pixel(wp, ws, &ws->labels.x_misalignmask2);
translate_pixel(wp, ws);
save_pixel2(wp, ws);
if (mask_possible) DEFINE_LABEL(x_misalignmask2, "Another pixel masked out")
save_pixel_inc2(wp, ws);
DEFINE_LABEL(x_misaligned_enter, "Entering the misaligned 2-at-a-time inner loop")
SUB(R(r_xsize), R(r_xsize), S | IMM(2), "SUBS r_xsize,r_xsize,#2 ; count towards overall size");
branch(ws, B | GE, L(x_misaligned_loop), "BGE x_misaligned_loop ; and loop until done");
fetch_pixel_inc(wp, ws);
newline();
DEFINE_LABEL(x_2atatime_exit, "Final patchup for 2-at-a-time inner loop")
}
else
newline();
ADD(R(r_xsize), R(r_xsize), S | IMM(2), "ADDS r_xsize,r_xsize,#2 ; up to 0 or 1");
branch(ws, B | EQ, L(loop_x_exit1), "BEQ loop_x_exit1 ; No last pixel to be done\n");
mask_possible = fetch_pixel(wp, ws, &ws->labels.x_lastmask);
translate_pixel(wp, ws);
save_pixel(wp, ws);
if (mask_possible) DEFINE_LABEL(x_lastmask, "Last pixel masked out")
fetch_pixel_inc(wp, ws);
save_pixel_inc(wp, ws);
DEFINE_LABEL( loop_x_exit1, "End of input pixel line (1)")
#endif
}
else
{
comment(ws, "Control of scaling along x");
if (ws->odither && wp->save_xadd - wp->save_xdiv > wp->save_xdiv)
{
/* If dithering and scaling we have to be very careful about where we do fetch_pixel_inc, because when replicating
* a pixel we must repeatedly fetch_pixel it.
*/
SUB_A(r_xcount, wp->save_xadd)
DEFINE_LABEL( loop_x_repeat, "Loop around for each source pixel (ordered dither)")
ADD_A(r_xcount, wp->save_xadd) /*(GPS)*/
mask_possible = fetch_pixel(wp, ws, &ws->labels.l_masked);
SUBS_A(r_xcount, wp->save_xdiv) /* Stop dither from printing 1 too many pixels... (GPS) */
DEFINE_LABEL( loop_put_pixel_repeat, "Repeatedly paint and ordered-dither a source pixel");
translate_pixel(wp, ws);
save_pixel(wp, ws);
save_pixel_inc(wp, ws);
SUB(R(r_xsize), R(r_xsize), S | IMM(1), "SUBS r_xsize,r_xsize,#1 ; count output ordered dither pixels");
branch(ws, B | EQ, L(loop_x_exit), "BEQ loop_x_exit ; painted enough pixels");
/* We must not paint the same pixel repeatedly - we must reextract and retranslate it, otherwise
* the dithering on scaled up pixels will not occur.
*/
fetch_pixel_unmasked(wp, ws); /* reextract the pixel into r_pixel */
SUBS_A(r_xcount, wp->save_xdiv) /* Decrement count (GPS) */
branch(ws, B | PL, L(loop_put_pixel_repeat), "BPL loop_put_pixel_repeat ; recalculate and repaint");
fetch_pixel_inc(wp, ws); /* moved by (GPS) */
branch(ws, B, L(loop_x_repeat), "B loop_x_repeat ; next input pixel");
}
else
{
if ( !PLOTMASK && (wp->save_xmag % wp->save_xdiv) == 0 && ((wp->save_xmag / wp->save_xdiv) > 4) && ws->gcol == 0)
/* do optimised code */
{
register int toskip = wp->save_xmag / wp->save_xdiv;
tracef("in optimised scale\nxmag = %d, xdiv = %d, xmag mod xdiv = %d\n" _ wp->save_xmag _ wp->save_xdiv _ wp->save_xmag % wp->save_xdiv);
SUB_A(r_xcount, toskip)
DEFINE_LABEL( loop_x_repeat, "3Loop around for each source pixel")
TEQ(R(r_xsize), IMM(0), "3TEQ r_xsize, #0");
DEFINE_LABEL(loop_x_exitskip, "3Kludge to avoid multiple forward references");
branch(ws, B | EQ, L(loop_x_exit), "3BEQ loop_x_exit");
ADD_A(r_xcount, toskip)
mask_possible = fetch_pixel(wp, ws, &ws->labels.l_masked);
translate_pixel(wp, ws); /* If we're about the discard the pixel this is in fact wasted work - we could reorganise
* this whole loop to improve that situation, but it doesn't really seem worthwhile, the gain
* is not enormous.
*/
fetch_pixel_inc(wp, ws);
comment(ws, "3calculating number of times to plot pixel 1");
MOV(R(r_temp1), OP2R(R(r_xsize)), "3MOV r_temp1, r_xsize ; store r_xsize");
SUB(R(r_xsize), R(r_xsize), S | OP2R(R(r_xcount)), "3SUBS r_xsize, r_xsize, r_xcount ; count output pixels");
MOV(R(r_xsize), MI | IMM(0), "3MOVMI r_xsize, #0 ");
MOV(R(r_xcount), MI | OP2R(R(r_temp1)), "3MOVMI r_xcount, r_temp1 ");
if (!DEST_32_BIT)
{
MOV(R(r_temp1), S | OP2R(R(r_outshift)) | LSRI(27), "3MOVS r_temp1, r_outshift, LSR #27");
MOV(R(r_temp1), EQ | IMM(32), "3MOVEQ r_temp1, #32 ; 0 in r_outshift => 32 bits left");
if (!DEST_1_BIT)
MOV(R(r_temp1), OP2R(R(r_temp1)) | LSRI(ws->out_l2bpc), "3MOV r_temp1, r_temp1, LSR #out_log2bpc");
CMP(R(r_xcount), OP2R(R(r_temp1)), "3CMP r_xcount, r_temp1");
branch(ws, B + LT, L(loop2), "3BLT loop2 ; end of this masked input pixel");
}
plot_current_output_words(wp, ws, toskip);
if (DEST_32_BIT)
{
branch(ws, B, L(loop_x_repeat), "11B loop_x_repeat ; end of this masked input pixel");
}
else
{
TEQ(R(r_xcount), IMM(0), "1TEQ r_xcount, #0");
branch(ws, B + EQ, L(loop_x_repeat), "1BEQ loop_x_repeat ; end of this masked input pixel");
DEFINE_LABEL(loop2, "Last word to plot")
plot_some_pixels(wp, ws);
branch(ws, B, L(loop_x_repeat), "1B loop_x_repeat ; end of this masked input pixel");
}
#if 0
int loop;
comment(ws, "Doing multiple plots of same pixel in line");
DEFINE_LABEL( loop_x_repeat, "Loop around for each source pixel")
CMN(R(pc), OP2R(R(pc)), "CMN pc, pc ; this will clear the Z flag");
DEFINE_LABEL(loop_x_exitskip, "Kludge to avoid multiple forward references");
branch(ws, B | EQ, L(loop_x_exit), "BEQ loop_x_exit");
mask_possible = fetch_pixel(wp, ws, &ws->labels.l_masked);
translate_pixel(wp, ws); /* If we're about the discard the pixel this is in fact wasted work - we could reorganise
* this whole loop to improve that situation, but it doesn't really seem worthwhile, the gain
* is not enormous.
*/
fetch_pixel_inc(wp, ws);
for (loop = 0;loop < (wp->save_xmag / wp->save_xdiv);loop++)
{
save_pixel(wp, ws);
save_pixel_inc(wp, ws);
SUB(R(r_xsize), R(r_xsize), S | IMM(1), "SUBS r_xsize,r_xsize,#1 ; count for each output pixel");
branch(ws, B | EQ, L(loop_x_exitskip), "BEQ loop_x_exitskip");
}
branch(ws, B , L(loop_x_repeat), "B loop_x_repeat ; discard this pixel");
#endif
}
else
{
/* >>> There's not all that much point in this being separate from the odither case - could really
* abandon this one and use the ditering one all the time, with tiny variants. Not done.
*/
SUB_A(r_xcount, wp->save_xadd)
DEFINE_LABEL( loop_x_repeat, "Loop around for each source pixel")
ADD_A(r_xcount, wp->save_xadd)
mask_possible = fetch_pixel(wp, ws, &ws->labels.l_masked);
translate_pixel(wp, ws); /* If we're about the discard the pixel this is in fact wasted work - we could reorganise
* this whole loop to improve that situation, but it doesn't really seem worthwhile, the gain
* is not enormous.
*/
fetch_pixel_inc(wp, ws);
DEFINE_LABEL(loop_put_pixel_repeat, "Loop around to repeatedly paint a source pixel");
SUBS_A(r_xcount, wp->save_xdiv)
branch(ws, B | MI, L(loop_x_repeat), "BMI loop_x_repeat ; discard this pixel");
save_pixel(wp, ws);
save_pixel_inc(wp, ws);
SUB(R(r_xsize), R(r_xsize), S | IMM(1), "SUBS r_xsize,r_xsize,#1 ; count for each output pixel");
branch(ws, B | NE, L(loop_put_pixel_repeat), "BNE loop_put_pixel_repeat");
branch(ws, B, L(loop_x_exit), "B loop_x_exit ; skip code for masked pixels");/* moved from next if (GPS) */
}
}
if (mask_possible)
{
DEFINE_LABEL(l_masked, "This source pixel masked out")
if (!PLOTMASK && (wp->save_xmag % wp->save_xdiv) == 0 && ((wp->save_xmag / wp->save_xdiv) > 4) && ws->gcol == 0)
{
#if 1
fetch_pixel_inc(wp, ws);
comment(ws, "calculating number of times to plot pixel");
MOV(R(r_temp1), OP2R(R(r_xsize)), "@MOV r_xtemp1, r_xsize ; store r_xsize");
SUB(R(r_xsize), R(r_xsize), S | OP2R(R(r_xcount)), "@SUBS r_xsize, r_xsize, r_xcount ; count output pixels");
MOV(R(r_xsize), MI | IMM(0), "@MOVMI r_xsize, #0 ");
MOV(R(r_xcount), MI | OP2R(R(r_temp1)), "@MOVMI r_xcount, r_temp1 ");
if (!DEST_32_BIT)
{
MOV(R(r_temp1), S | OP2R(R(r_outshift)) | LSRI(27), "@@MOVS r_temp1, r_outshift, LSR #27");
MOV(R(r_temp1), EQ | IMM(32), "@@MOVEQ r_temp1, #32 ; 0 in r_outshift => 32 bits left");
if (!DEST_1_BIT)
MOV(R(r_temp1), OP2R(R(r_temp1)) | LSRI(ws->out_l2bpc), "@@MOV r_temp1, r_temp1, LSR #log2bpc");
CMP(R(r_xcount), OP2R(R(r_temp1)), "@@CMP r_xcount, r_temp1");
branch(ws, B + LT, L(loop1), "@@BLT loop1 ; end of this masked input pixel");
}
skip_current_output_words(wp, ws);
if (DEST_32_BIT)
{
branch(ws, B, L(loop_x_repeat), "1@B loop_x_repeat ; end of this masked input pixel");
}
else
{
TEQ(R(r_xcount), IMM(0), "1@TEQ r_xcount, #0");
branch(ws, B + EQ, L(loop_x_repeat), "1@BEQ loop_x_repeat ; end of this masked input pixel");
DEFINE_LABEL(loop1, "Last word to skip")
skip_some_pixels(wp, ws);
branch(ws, B, L(loop_x_repeat), "1@@B loop_x_repeat ; end of this masked input pixel");
}
#else
int loop;
fetch_pixel_inc(wp, ws);
for (loop = 0;loop < (wp->save_xmag / wp->save_xdiv);loop++)
{
save_pixel_inc(wp, ws);
SUB(R(r_xsize), R(r_xsize), S | IMM(1), "SUBS r_xsize,r_xsize,#1 ; count output pixels");
branch(ws, B | EQ, L(loop_x_exitskip), "BEQ loop_x_exitskip");
}
branch(ws, B, L(loop_x_repeat), "B loop_x_repeat ; end of this masked input pixel");
#endif
}
else
{
fetch_pixel_inc(wp, ws);
DEFINE_LABEL(loop_put_masked_repeat, "Loop around to skip over dest pixels");
SUBS_A(r_xcount, wp->save_xdiv)
branch(ws, B | MI, L(loop_x_repeat), "BMI loop_x_repeat ; end of this masked input pixel");
save_pixel_inc(wp, ws);
SUB(R(r_xsize), R(r_xsize), S | IMM(1), "SUBS r_xsize,r_xsize,#1 ; count output pixels");
branch(ws, B | NE, L(loop_put_masked_repeat), "BNE loop_put_masked_repeat");
}
}
}
DEFINE_LABEL( loop_x_exit, "End of input pixel line")
newline();
if (!DEST_32_BIT)
{
comment(ws, "End of x loop - ensure any contents of r_outword are written out.");
MOV(R(r_outshift), S | OP2R(R(r_outshift)) | LSRI(27), "MOVS r_outshift,r_outshift,LSR #27 ; get real output shift distance");
MOV(R(r_outshift), EQ | IMM(32), "MOVEQ r_outshift,#32 "
"; number of useful new bits in r_outword");
if (ws->gcol == 0 && !SOURCE_MASK)
{
/* If setting pixels we must pick up the word we're about to
* partially overwrite, and combine the new and old pixels.
*/
comment(ws, "The top 32-r_outshift bits of r_outword are new pixels.");
MOV(R(r_outword), OP2R(R(r_outword)) | LSRR(R(r_outshift)),"MOV r_outword,r_outword,LSR r_outshift ; get new pixels in correct place");
ins(ws, LDR(R(r_pixel), R(r_outptr)) | OFFSET(0), "LDR r_pixel,[r_outptr] ; temporary use of r_pixel");
RSB(R(r_outshift), R(r_outshift), IMM(32), "RSB r_outshift,r_outshift,#32");
MOV(R(r_pixel), OP2R(R(r_pixel)) | LSRR(R(r_outshift)), "MOV r_pixel,r_pixel,LSR r_outshift ; shift to clear out old pixels");
ORR(R(r_outword), R(r_outword),
OP2R(R(r_pixel)) | LSLR(R(r_outshift)), "ORR r_outword,r_outword,r_pixel, LSL r_outshift ; combine old and new");
ins(ws, STR(R(r_outword), R(r_outptr)) | OFFSET(0), "STR r_outword,[r_outptr] ; store updated word");
}
else
{
MOV(R(r_outword), OP2R(R(r_outword)) | RORR(R(r_outshift)),"MOV r_outword,r_outword,ROR r_outshift");
ins(ws, STR(R(r_outword), R(r_outptr)) | OFFSET(0), "STR r_outword,[r_outptr]");
}
}
}
}
/**************************************************************************
* *
* Bitblit: Overall construction of the Y loop. *
* *
**************************************************************************/
static void loop_y(asm_workspace *wp, workspace *ws, j_decompress_ptr cinfo)
/* Overall control of the code and outer loop */
{
/* Declare the registers */
int yrn;
int x_loop_save_mask;
int y_loop_save_mask;
int ptrs_save_mask; /* r_inptr, r_outptr, and (if it exists) r_maskinptr */
int x_loop_save_size;
int ptrs_save_size;
BOOL onebank; /* one bank of registers, or two */
#ifdef DEBUG
char xregs[256];
char yregs[256];
char ptrregs[256];
char a[256];
#endif
/* Various useful constants not provided directly by wp. */
newline();
comment(ws, "Various useful constants");
if (DPIXEL_INPUT)
comment(ws, "Double-pixel input - pixels are not the same as double-pixels");
else
comment(ws, "Not double-pixel input - pixels are exactly the same as double-pixels");
ws->in_bpp = 1 << wp->save_inlog2bpp;
ws->in_bpc = 1 << wp->save_inlog2bpc;
ws->in_pixmask = (1 << ws->in_bpp) - 1;
tracef("%t20.in_bpp * %i %t68; bits per input pixel\n" _ ws->in_bpp);
tracef("%t20.in_bpc * %i %t68; bits per input double-pixel ('character')\n" _ ws->in_bpc);
if (ws->in_bpp <= 8) tracef("%t20.in_pixmask * %i %t68; input pixel mask\n" _ ws->in_pixmask);
if (SOURCE_MASK)
{
if (SOURCE_BPPMASK) /* a bit mask */
{
ws->mask_bpp = 1;
ws->mask_bpc = 1;
ws->mask_pixmask = 1;
}
else
{
ws->mask_bpp = ws->in_bpp;
ws->mask_bpc = ws->in_bpc;
ws->mask_pixmask = ws->in_pixmask;
}
tracef("%t20.mask_bpp * %i %t68; bits per mask pixel\n" _ ws->mask_bpp);
tracef("%t20.mask_bpc * %i %t68; bits per mask double-pixel\n" _ ws->mask_bpc);
tracef("%t20.mask_pixmask * %i %t68; mask pixel mask\n" _ ws->mask_pixmask);
}
else
comment(ws, "No input mask");
if (DPIXEL_OUTPUT)
comment(ws, "Double-pixel output - pixels are not the same as double-pixels");
else
comment(ws, "Not double-pixel output - pixels are exactly the same as double-pixels");
ws->out_l2ppw = 5 - ws->out_l2bpc;
ws->out_ppw = 1 << ws->out_l2ppw;
ws->out_pixmask = (1 << wp->BPP) - 1;
ws->out_dpixmask = (1 << wp->BPC) - 1;
tracef("%t20.out_bpp * %i %t68; bits per output pixel\n" _ wp->BPP);
tracef("%t20.out_bpc * %i %t68; bits per output double-pixel\n" _ wp->BPC);
tracef("%t20.out_l2bpp * %i %t68; log base 2 of bits per output pixel\n" _ ws->out_l2bpp);
tracef("%t20.out_l2bpc * %i %t68; log base 2 of bits per output double-pixel\n" _ ws->out_l2bpc);
tracef("%t20.out_ppw * %i %t68; double-pixels per output word\n" _ ws->out_ppw);
tracef("%t20.out_l2ppw * %i %t68; log base 2 of double-pixels per output word\n" _ ws->out_l2ppw);
if (wp->BPC <= 8)
{
tracef("%t20.out_pixmask * %i %t68; output pixel mask\n" _ ws->out_pixmask);
tracef("%t20.out_dpixmask * %i %t68; output double-pixel mask\n" _ ws->out_dpixmask);
}
/* Setting up ordered dither, if required */
if ( !PLOTMASK /* if plotting sprite */
&& ws->in_bpp >= 16 /* from true colour source */
&& wp->BPP < ws->in_bpp /* and losing resolution */
&& (wp->dither_truecolour & 1)
&& !(wp->is_it_jpeg && (wp->dither_truecolour & 2))
)
{
tracef("in dither_truecolour = %x\n" _ wp->dither_truecolour);
comment(ws, "Ordered dither being used");
/* If not 0 then ws->odither is the number of bits - 1 being truncated from 8-bit source colour values */
if (wp->BPP == 16) /* dithering down from 32 bit to 16 bit */
ws->odither = 2;
else /* dithering down from 16 or 32 bit, to 1/2/4/8 bit. */
{
if (ws->out_l2bpp == 3) /* 8bpp */
{
if (wp->is_it_jpeg && cinfo->jpeg_color_space == JCS_GRAYSCALE)
ws->odither = 3; /* dither assuming 4 bits of grey represented */
else
ws->odither = 4; /* seems to work better for colour than 3, which is what you might expect if
* you were assuming 4 bits of colour per gun. In other words, the tint is NOT
* effective enough at representing the next two bits of colour output!
* If the source is known to be greyscale then 3 is a better value.
*/
}
else
ws->odither = 6 - ws->out_l2bpp; /* 6, 5 or 4 for 2, 4, or 16 colour output (2, 4 or 8 grey level) */
}
tracef("%t20.odither_eorvalue * 1:SHL:(24+%i) %t68; value to EOR into r_oditheradd each pixel" _ ws->odither);
}
tracef("out dither_truecolour = %x\n" _ wp->dither_truecolour);
newline();
ins(ws, PUSH | 0x5fff, "STMDB sp!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} ; save entry registers");
newline();
comment(ws, "Register declarations");
if (wp->is_it_jpeg)
{
ws->leave_r12_alone = TRUE;
comment(ws, "Leave r12 unallocated, it contains the assembler module workspace pointer");
}
ptrs_rn(wp, ws);
ptrs_save_mask = (1<<ws->next_free_reg) - 1;
IFDEBUG(ldm_reg_list(ws, ptrregs, ptrs_save_mask, FALSE);)
ptrs_save_size = SOURCE_BPPMASK || PLOTMASK ? 12 : 8;
if (ws->odither) ptrs_save_size += 4;
xloop_rn(wp, ws);
x_loop_save_mask = ((1<<ws->next_free_reg) - 1) & ~ptrs_save_mask;
x_loop_save_size = 4 * ws->next_free_reg - ptrs_save_size; /* size in bytes, used right at the end */
/* Of the x-loop variables, no need to save r_inword/outword/maskinword/temp1/temp2 - set up every time round */
if (ws->regnames.r_inword.regno != -1) {x_loop_save_mask &= ~(1 << ws->regnames.r_inword.regno); x_loop_save_size -= 4;}
if (ws->regnames.r_outword.regno != -1) {x_loop_save_mask &= ~(1 << ws->regnames.r_outword.regno); x_loop_save_size -= 4;}
if (ws->regnames.r_maskinword.regno != -1) {x_loop_save_mask &= ~(1 << ws->regnames.r_maskinword.regno); x_loop_save_size -= 4;}
if (ws->regnames.r_temp1.regno != -1) {x_loop_save_mask &= ~(1 << ws->regnames.r_temp1.regno); x_loop_save_size -= 4;}
if (ws->regnames.r_temp2.regno != -1) {x_loop_save_mask &= ~(1 << ws->regnames.r_temp2.regno); x_loop_save_size -= 4;}
IFDEBUG(ldm_reg_list(ws, xregs, x_loop_save_mask, FALSE);)
yrn = yloop_rn_count(wp, ws);
onebank = yrn + ws->next_free_reg + ws->leave_r12_alone <= 13;
comment(ws, onebank ? "The y loop variables will fit in registers too"
: "The y loop variables are overlaid on the x ones");
if (!onebank) ws->next_free_reg = 4; /* Overlay the x-loop register allocations - but not ptr registers */
yloop_rn(wp, ws);
if (!onebank) /* If two banks, be prepared to do LDM/STM for the y-loop bank */
{
y_loop_save_mask = ((1<<ws->next_free_reg) - 1) & 0xfffffff0; /* not regs 0..3 */
IFDEBUG(ldm_reg_list(ws, yregs, y_loop_save_mask, TRUE);)
}
newline();
comment(ws, "Load up initial values of x-loop variables");
fetch_pixel_init(wp, ws);
save_pixel_init(wp, ws);
xloop_init(wp, ws);
tracef("%t20.x_loop_save_size * %t28%i %t68.; Bytes of stack for x-loop variables\n" _ x_loop_save_size);
tracef("%t20.ptrs_save_size * %t28%i %t68.; Bytes of stack for ptr variables\n" _ ptrs_save_size);
comment(ws, "Save x-loop and pointer variables on the stack");
IFDEBUG(do_sprintf(a, "STMDB sp!,{%s,%s}", ptrregs,xregs);)
/* Added by (GPS) to get round spilled reg bug. */
if(ws->odither && SOURCE_16_BIT)
{
tracef("x_loop_save_mask = %x\n" _ x_loop_save_mask);
x_loop_save_mask &= ~(1<<(ws->regnames.r_xcount.regno));
x_loop_save_mask |= (1<<(ws->regnames.r_pixel.regno));
tracef("x_loop_save_mask = %x\n" _ x_loop_save_mask);
ins(ws, PUSH | x_loop_save_mask | ptrs_save_mask, a);
x_loop_save_mask |= (1<<(ws->regnames.r_xcount.regno));
x_loop_save_mask &= ~(1<<(ws->regnames.r_pixel.regno));
comment(ws, "r_pixel pushed instead of x-count");
#ifdef DEBUG
tracef("x_loop_save_mask = %x\n" _ x_loop_save_mask);
#endif
}
else
{
ins(ws, PUSH | x_loop_save_mask | ptrs_save_mask, a);
}
/* end added code...*/
newline();
comment(ws, "Load up initial values of y-loop variables");
yloop_init(wp, ws);
if(ws->odither && SOURCE_16_BIT)
{
MOV(R(r_xcount), OP2R(R(r_pixel)), "MOV r_xcount,r_pixel ; set r_xcount to correct value");
}
if (!simple_y_scale(wp, ws)) /* If not simple scaling, might not paint the first row */
branch(ws, B, L(y_loop_enter), "B y_loop_enter ; enter the main loop");
/* Top of the y-loop */
newline();
DEFINE_LABEL(y_loop, "Loop around for each row")
if (!simple_y_scale(wp, ws))
{
comment(ws, "At this point the ptr registers have been updated but not saved");
IFDEBUG(do_sprintf(a, "STMIA sp,{%s}", ptrregs);)
ins(ws, STMIA(R(sp)) | ptrs_save_mask, a);
}
if (wp->is_it_jpeg)
{
comment(ws, "r_inptr is the source y coord for JPEG data: convert to data pointer");
comment(ws, "fetchroutine uses r_inptr(=r0), r12. On output r_inptr=source result pointer");
MOV(R(lr), OP2R(R(pc)), "MOV lr,pc ; remember return address from fetchroutine");
MOV(R(pc), OP2R(R(r_fetchroutine)), "MOV pc,r_fetchroutine ; get source address");
LDR_WP_C(lr, in_x, "returned value is for base of line - add initial offset")
if (wp->save_inlog2bpp < 5)
{
if (wp->save_inlog2bpp == 3)
ADD(R(r_inptr),R(r_inptr),OP2R(R(lr)), "ADD r_inptr,r_inptr,lr ; add in_x as byte offset");
else
ADD(R(r_inptr),R(r_inptr),OP2R(R(lr)) | LSLI(1), "ADD r_inptr,r_inptr,lr,LSL#1 ; add in_x as halfword offset");
BIC(R(r_inptr),R(r_inptr),IMM(3), "BIC r_inptr,r_inptr,#3 ; r_inptr is a word pointer");
}
else
ADD(R(r_inptr),R(r_inptr),OP2R(R(lr)) | LSLI(2), "ADD r_inptr,r_inptr,lr,LSL#2 ; add in_x as word offset");
}
if (!onebank)
{
/* the x-loop variables are already set up, with inptr/outptr/maskinptr saved at new values */
ADD(R(lr), R(sp), IMM(ptrs_save_size), "ADD lr,sp,#ptrs_save_size");
IFDEBUG(do_sprintf(a, "STMDB sp!,{%s} %t40; push y-loop variables", yregs);)
ins(ws, PUSH | y_loop_save_mask, a);
IFDEBUG(do_sprintf(a, "LDMIA lr,{%s} %t40; load x-loop variables", xregs);)
ins(ws, LDMIA(R(lr)) | x_loop_save_mask, a); /* Reload the x-loop variables */
}
newline();
/* Generate the inner loop. */
loop_x(wp, ws);
/* Suitable register 'bank' swapping. */
if (onebank)
{
IFDEBUG(do_sprintf(a, "LDMIA sp,{%s,%s} %t40; reload x-loop and ptr registers", ptrregs, xregs);)
ins(ws, LDMIA(R(sp)) | x_loop_save_mask | ptrs_save_mask, a);
}
else
{
IFDEBUG(do_sprintf(a, "LDMIA sp!,{%s} %t40; pop y-loop variables", yregs);)
ins(ws, POP | y_loop_save_mask, a);
newline();
comment(ws, "Reload pointers to the start of a row");
IFDEBUG(do_sprintf(a, "LDMIA sp,{%s} %t40; reload ptr registers", ptrregs);)
ins(ws, LDMIA(R(sp)) | ptrs_save_mask, a);
}
/* Control of scaling in the y direction */
if (simple_y_scale(wp, ws))
{
comment(ws, "1:1 scaling in y direction - each source row appears once");
if (!PLOTMASK)
{
if (wp->is_it_jpeg)
ADD(R(r_inptr), R(r_inptr), IMM(1), "ADD r_inptr,r_inptr,#1 ; inc y coord of input JPEG data");
else
SUB(R(r_inptr), R(r_inptr), OP2R(R(r_inoffset)), "SUB r_inptr,r_inptr,r_inoffset");
}
SUB_A(r_outptr,wp->save_outoffset) /*SUB r_outptr,r_outptr,#outoffset*/
odither_inc(wp, ws, 1); /* advance to next coord */
odither_inc(wp, ws, 0); /* ensure X coord phase alternates on alternate lines */
if (SOURCE_BPPMASK || PLOTMASK)
SUB(R(r_maskinptr), R(r_maskinptr),
OP2R(R(r_maskinoffset)), "SUB r_maskinptr,r_maskinptr,r_maskinoffset");
IFDEBUG(do_sprintf(a, "STMIA sp,{%s} %t40.; Save updated ptr registers", ptrregs);)
ins(ws, STMIA(R(sp)) | ptrs_save_mask, a);
SUB(R(r_ysize), R(r_ysize), S | IMM(1), "SUBS r_ysize,r_ysize,#1 ; decrement output pixel size");
branch(ws, B | GT, L(y_loop), "BGT y_loop");
}
else
{
SUB(R(r_ysize), R(r_ysize), S | IMM(1), "SUBS r_ysize,r_ysize,#1");
branch(ws, B | LE, L(y_loop_exit), "BLE y_loop_exit");
SUB_A(r_outptr,wp->save_outoffset) /*SUB r_outptr,r_outptr,#outoffset*/
odither_inc(wp, ws, 1);
odither_inc(wp, ws, 0);
if (PLOTMASK)
{
comment(ws, "Advance ECF pointer");
LDR_WP(r_pixel, save_ecflimit); /*LDR r_pixel,save_ecflimit*/
CMP(R(r_inptr), OP2R(R(r_pixel)), "CMP r_inptr,r_pixel ; check for bottom of ECF");
ADD(R(r_inptr), R(r_inptr), EQ | IMM(64), "ADDEQ r_inptr,r_inptr,#64 ; and if reached, reset to top");
SUB(R(r_inptr), R(r_inptr), IMM(8), "SUB r_inptr,r_inptr,#8 ; points to base of current row of ECF");
}
comment(ws, "Control of scaling in y direction");
DEFINE_LABEL( y_loop_enter, "Initial entry into the loop")
SUBS_A(r_ycount, wp->save_ydiv) /*SUBS r_ycount,r_ycount,#ydiv*/
branch(ws, B | PL, L(y_loop), "BPL y_loop ; if count>=0 then B else next source row");
if (!PLOTMASK)
{
if (wp->is_it_jpeg)
ADD(R(r_inptr), R(r_inptr), IMM(1), "ADD r_inptr,r_inptr,#1 ; inc y coord of source JPEG data");
else
SUB(R(r_inptr), R(r_inptr), OP2R(R(r_inoffset)), "SUB r_inptr,r_inptr,r_inoffset ; next source row");
}
if (SOURCE_BPPMASK || PLOTMASK)
SUB(R(r_maskinptr), R(r_maskinptr),
OP2R(R(r_maskinoffset)), "SUB r_maskinptr,r_maskinptr,r_maskinoffset ; advance input mask pointer");
ADD_A(r_ycount, wp->save_ydiv + wp->save_yadd) /*ADD r_ycount,r_ycount,#(ydiv+yadd)*/
branch(ws, B, L(y_loop_enter), "B y_loop_enter ; reenter the main loop");
DEFINE_LABEL(y_loop_exit, "Exit from y loop")
}
newline();
comment(ws, "Discard workspace, restore registers, and exit");
ADD(R(sp), R(sp), IMM(x_loop_save_size+ptrs_save_size), "ADD sp,sp,#x_loop_save_size+ptrs_save_size ; discard saved x-loop variables");
ins(ws, POP | 0x5fff, "LDMIA sp!,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} ; restore, exit");
MOV(R(pc), OP2R(R(lr)), "MOV pc, lr");
}
/**************************************************************************
* *
* Bitblit: The main compiler entry points. *
* *
**************************************************************************/
static blitter find_or_compile_code(asm_workspace *wp, workspace *ws, j_decompress_ptr cinfo)
/* Based on the workspace variables look through existing compiled buffers for an existing match */
{
code_buffer *p;
int key_word;
key_word = wp->save_inlog2bpp /* 0..2 */
+ (ws->out_l2bpp << 3) /* 3..5 */
+ (ws->gcol << 6); /* 6..8 */
if (SOURCE_MASK) key_word |= 1<<9;
if (SOURCE_BPPMASK) key_word |= 1<<10;
if (wp->trns_palette != 0) key_word |= 1<<11;
if (wp->ColourTTR != 0) key_word |= 1<<12;
if (wp->BPP != wp->BPC) key_word |= 1<<13;
if (wp->save_inlog2bpp != wp->save_inlog2bpc) key_word |= 1<<14;
if (PLOTMASK) key_word |= 1<<15;
#ifdef ASMjpeg
if (wp->is_it_jpeg) key_word |= 1<<16;
if (wp->is_it_jpeg && cinfo->jpeg_color_space == JCS_GRAYSCALE) key_word |= 1<<17;
#endif
if (wp->cal_table)
{
key_word |= 1<<18;
if (ws->cal_table_simple) key_word |= 1<<19;
if (wp->cal_table->tablecount == 3) key_word |= 1<<20;
}
#ifdef ASMjpeg
if (wp->is_it_jpeg && (wp->dither_truecolour & 1)) key_word |= 1<<21;
if (wp->is_it_jpeg && (wp->dither_truecolour & 2)) key_word |= 1<<22;
#endif
tracef("Searching for compiled code for key_word=%x, scale=%i:%i,%i:%i outoffset=%x.\n" _
key_word _ wp->save_xadd - wp->save_xdiv _ wp->save_xdiv _ wp->save_yadd _ wp->save_ydiv _ wp->save_outoffset);
tracef("simple_x_scale=%s x_block_move=%s jpeg=%s calibration table=0x%x\n"
_ whether(simple_x_scale(wp, ws))
_ whether(x_block_move(wp, ws))
_ whether(wp->is_it_jpeg)
_ wp->cal_table);
FOR_EACH_BUFFER(p)
if ( p->key_word == key_word
&& p->xadd == wp->save_xadd
&& p->xdiv == wp->save_xdiv
&& p->yadd == wp->save_yadd
&& p->ydiv == wp->save_ydiv
&& p->outoffset == wp->save_outoffset
)
{
tracef("Found existing compiled code in buffer %x.\n" _ p);
return (blitter)p->code;
}
p = &ws->buffers[ws->build_buffer];
p->key_word = -1; /* Not set unless we complete the compilation - see below */
p->xadd = wp->save_xadd;
p->xdiv = wp->save_xdiv;
p->yadd = wp->save_yadd;
p->ydiv = wp->save_ydiv;
p->outoffset = wp->save_outoffset;
tracef("Compiler initialised for buffer at %x.\n" _ p);
compile_buffer_init(wp, ws);
/* Now we actually do the compile */
loop_y(wp, ws, cinfo);
compile_buffer_done(ws);
p->key_word = key_word;
/* Just did some dynamic code generation so flush the I cache */
_swix(OS_SynchroniseCodeAreas, _IN(0) | _IN(1) | _IN(2), 1,
(int)ws->compile_base, (int)ws->compile_base + ((BUFSIZE - 1 /* Inclusive */) * sizeof(int)));
return (blitter)ws->compile_base;
}
blitter putscaled_compiler(asm_workspace *wp, workspace *ws, workspace *ws_end, int gcol)
/* Main entrypoint from the assembler */
{
j_decompress_ptr cinfo = NULL;
int i, j;
blitter result;
/* Check that the assembler has an adequate opinion of our workspace needs. */
tracef("wp=%x ws=%x ws_end=%x.\n" _ wp _ ws _ ws_end);
tracef("Size of assembler workspace: %i.\n" _ ((char*)ws) - ((char*)wp));
tracef("Size of C workspace: %i.\n" _ ((char*)ws_end) - ((char*)ws));
assert(ws_end > ws, ERROR_FATAL);
check_workspace(ws);
IFDEBUG(dump_asm_workspace(wp);)
ws->gcol = gcol & 7;
ws->masked = (gcol & 8) != 0;/* || PLOTMASK;*/
ws->mask1bpp = ws->masked & (((wp->save_mode) >> 27) != SpriteType_Old);
ws->odither = FALSE; /* Set more carefully later. */
tracef("gcol=%i (& 7 = %i) %t32. GCOL action - 0 for plot, 1..7 for various others.\n" _ gcol _ gcol & 7);
tracef("masked=%s %t32. whether to use mask.\n" _ whether(ws->masked));
tracef("1bpp mask=%s %t32. whether mask is new format.\n" _ whether(ws->mask1bpp));
#ifdef ASMjpeg
if (wp->is_it_jpeg)
{
sprite_header *s = wp->save_sprite;
int *compress_id_word = (int*)((char*) s + s->image); /* The first word of the sprite data */
char *jpeg_data;
int jpeg_data_size, jpeg_ws_size;
int opt, err, xmax;
assert(compress_id_word[0] == -1, ERROR_BAD_JPEG);
tracef("This JPEG sprite was constructed by PutJPEGScaled\n");
jpeg_data = (char*)compress_id_word[1];
jpeg_data_size = compress_id_word[2];
jpeg_ws_size = compress_id_word[3];
check_jpeg_workspace(wp, jpeg_ws_size);
cinfo = wp->jpeg_info_ptr;
assert(wp->save_inlog2bpp == 5, ERROR_FATAL); /* 32bpp source */
assert(!SOURCE_MASK, ERROR_FATAL); /* no mask */
tracef("JPEG, initial source coords are %i,%i.\n" _ wp->in_x _ wp->in_y);
if ((wp->save_mode >> 27) == 0)
{
/* Old-style mode - make sure no translation table present. */
wp->ColourTTR = 0; /* >>>> mainly for JPEG on RO3 */
wp->trns_palette = 0; /* >>>> mainly for JPEG on RO3 */
}
/* Deduce the decompression options */
opt = jpeg_decompressor_opts(cinfo, wp);
/* Reverse scaling calculation */
xmax = wp->in_x + 2 + (wp->save_xsize * wp->save_xdiv) / (wp->save_xadd - wp->save_xdiv);
if (xmax < 0) xmax = s->width; /* set safe xmax if reverse scale calculation overflowed */
/* Initialise the decompressor */
err = jpeg_scan_file(cinfo, jpeg_data, jpeg_data_size, wp->in_x, xmax, -1, -1, opt);
assert(err == 0, ERROR_BAD_JPEG);
/* Check the decompressor agreed with proposed output options */
if (cinfo->error_argument1 & (jopt_OUTBPP_8 | jopt_OUTBPP_8YUV | jopt_OUTBPP_8GREY)) /* we asked for it, and we got it - 8bpp output pixels */
{
tracef("actually doing new shiny 8BPP plotting technique\n");
wp->save_inlog2bpp = wp->save_inlog2bpc = 3;
wp->ColourTTR = 0;
}
else
{
if (cinfo->error_argument1 & jopt_OUTBPP_16) /* we asked for it, and we got it - 16bpp output pixels */
wp->save_inlog2bpp = wp->save_inlog2bpc = 4;
}
}
#endif
#ifdef DEBUG
/* Additional mask tracing */
if (PLOTMASK)
{
char *p;
int *ecf = (int*) wp->save_ecflimit;
tracef("Sprite data:\n");
p = (char*) wp->save_inptr;
for (i = 0; i < 16; i++)
{
tracef("%x" _ p);
for (j = 0; j < 16; j++) tracef(" %2x" _ p[j]);
newline();
p -= wp->save_inoffset; /* convert from byte offset to int offset */
}
tracef("Mask data:\n");
p = (char*) (SOURCE_BPPMASK ? wp->save_maskinptr : (int) wp->save_inptr + wp->save_masko);
for (i = 0; i < 16; i++)
{
tracef("%x" _ p);
for (j = 0; j < 16; j++) tracef(" %2x" _ p[j]);
newline();
p -= wp->save_inoffset;
}
tracef("ECF pattern:\n");
for (i = 0; i <= 8; i++)
tracef("%x: %c %x %x\n" _ ecf + 2*i _ (ecf+2*i == (int*)wp->save_ecfptr ? '>' : ' ') _ ecf[2*i] _ ecf[2*i + 1]);
}
#endif
if (wp->cal_table)
{
calibration_table *t = wp->cal_table;
ws->cal_table_simple = t->idealblack == 0 && t->idealwhite == 0xffffff00 && t->postprocessSWI == 0;
#ifdef DEBUG
tracef("Calibration table at 0x%x: version=%i idealblack=0x%x idealwhite=0x%x postprocessSWI=0x%x tablecount=%i simple=%s.\n"
_ t->version _ t->idealblack _ t->idealwhite _ t->postprocessSWI _ t->tablecount _ whether(ws->cal_table_simple));
for (i = 0; i < 256; i++) tracef(" %i" _ t->redtable[i]); newline();
if (t->tablecount == 3) for (i = 0; i < 256; i++) tracef(" %i" _ t->greentable[i]); newline();
if (t->tablecount == 3) for (i = 0; i < 256; i++) tracef(" %i" _ t->bluetable[i]); newline();
#endif
assert(wp->BPP == 32, ERROR_FATAL); /* only to 32 bit dest */
assert(wp->save_inlog2bpp >= 4, ERROR_FATAL); /* only from 16 or 32 bit source */
assert(!SOURCE_TABLE, ERROR_FATAL); /* there isn't room for a calibration table and another table - they share r_table */
assert(t->version == 0, ERROR_FATAL); /* check version number of lookup table */
}
/* Compute l2bpp from BPP of output - all we're given. */
i = 0;
j = wp->BPP;
while (j > 1)
{
j = j >> 1; i++;
}
ws->out_l2bpp = i;
if (wp->BPP != wp->BPC) i++;
ws->out_l2bpc = i;
/* If using a palette, ignore any translation table */
if (wp->trns_palette != 0) wp->ColourTTR = 0;
/* Simplify scale factors - >>> is this useful? Helps spot 1:1 scaling I guess? */
assert(wp->save_xadd > 0, ERROR_FATAL);
assert(wp->save_xdiv > 0, ERROR_FATAL);
assert(wp->save_ydiv > 0, ERROR_FATAL);
assert(wp->save_ydiv > 0, ERROR_FATAL);
while ((wp->save_xadd & 1) == 0 &&
(wp->save_xdiv & 1) == 0 &&
(wp->save_xcount & 1) == 0 &&
(wp->save_xmag & 1) == 0)
{
wp->save_xadd >>= 1; wp->save_xdiv >>= 1;
wp->save_xcount >>= 1; wp->save_xmag >>=1;
}
while ((wp->save_yadd & 1) == 0 &&
(wp->save_ydiv & 1) == 0 &&
(wp->save_ycount & 1) == 0)
{
wp->save_yadd >>= 1; wp->save_ydiv >>= 1;
wp->save_ycount >>= 1;
}
/* Look for unit translation table */
#ifdef DEBUG
if (wp->ColourTTR != 0 && wp->BPP == (1<<wp->save_inlog2bpp)) /* only if table, and depth matches */
{
char *t = (char*) wp->ColourTTR;
BOOL same = TRUE;
int size = 1 << (1 << (wp->save_inlog2bpp == 5 ? 4 : wp->save_inlog2bpp));
if (wp->save_xsize * wp->save_ysize > size) /* Unless huge table for tiny sprite */
{
for (i = 0; i < size; i++)
if (t[i] != i) {same = FALSE; break;}
if (same)
{
tracef("Unit translation table - discarded\n");
wp->ColourTTR = 0;
assert(0, ERROR_FATAL); /* These are now zapped by the assembler, so they shouldn't ever turn up. */
}
}
}
#endif
/* Precise handling of double-pixel modes by the surrounding code is still unclear to me!
* When it enters this code bpc!=bpp can still be the case, but it seems that the actual
* value of bpc is best ignored, it has all been frigged into the scale factors. Avoid
* this issue for now, but note that we must set the values back afterwards because they
* can be reused on the next sprite plot, if the source sprite mode word is the same.
*/
i = wp->BPC;
j = wp->save_inlog2bpc;
wp->BPC = wp->BPP;
wp->save_inlog2bpc = wp->save_inlog2bpp;
result = find_or_compile_code(wp, ws, cinfo);
wp->BPC = i;
wp->save_inlog2bpc = j;
return result;
}
#endif
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment