; Copyright 1996 Acorn Computers Ltd ; ; Licensed under the Apache License, Version 2.0 (the "License"); ; you may not use this file except in compliance with the License. ; You may obtain a copy of the License at ; ; http://www.apache.org/licenses/LICENSE-2.0 ; ; Unless required by applicable law or agreed to in writing, software ; distributed under the License is distributed on an "AS IS" BASIS, ; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ; See the License for the specific language governing permissions and ; limitations under the License. ; ; Assembler decoding of huffman stream. ; 21-Sep-93 WRS - started. ; 29-Oct-93 WRS - handling of EOI marker added, usage found in ONE JPEG file ; among hundreds (xuxa2/jpg, I think it was). ; 09-Nov-93 WRS - tried a few speedups: ; write out the fill_her_up loop in main AC-fetch loop ; swap args of MUL to avoid late termination for -ve args ; could also try: ; zag array deserves a register (would save 1) ; But, no measurable effect so regressed to previous version. ; Structure/algorithm generally follows that of the Indep JPEG gp's release 4 code, ; except that an additional lookup table is used to decode huffman codes up to ; 8 bits big in a single step. Some of the comments in this assembler refer to ; variable names etc. in the IJG4 C code. ; Reading the huffman stream, and skipping over it without generating an answer, ; are separated into two routines to speed up the skipping operation. GET jhdr.s h_temp RN 0 h_temp2 RN 1 h_temp3 RN 2 h_inptr RN 3 h_inword RN 4 h_incount RN 5 h_maxcode RN 6 ; points into huff tables h_huffval RN 7 ; points into huff tables h_shortcut RN 8 ; points into huff tables h_quanttbl RN 9 h_block RN 10 h_k RN 11 h_topbit RN 12 h_code RN 1 ; used in huff_decode - same as temp2 h_l RN 2 ; used in huff_decode - same as temp3 ; -------------------------------------------------------------------- ; Accessing a HUFF_TBL structure. MACRO HUFF_DECODE_SETUP $tbl ; set up the specific table pointers for ac or dc huff table ; $tbl is a HUFF_TBL*. ADD h_maxcode,$tbl,#huff_tbl_maxcode ADD h_shortcut,$tbl,#huff_tbl_shortcut ADD h_huffval,$tbl,#huff_tbl_huffval MEND ; ------------------------------------------------------------------- ; Bit extraction. MACRO Huff_getbits $n ; get $n bits, put them into h_temp sign extended. ; there are h_incount bits left at the TOP of h_inword ; Potentially corrupts h_temp2, h_temp3, lr. ; $n CAN be h_temp. ; ; Having fetched the bits we must 'sign extend' them according to: ; #define huff_EXTEND(x, s) ((x) < (1 << ((s)-1)) ? \ ; semicolons to stop warnings ; (x) + (-1 << (s)) + 1 : \ ; only! Remove these three ; (x)) ; if you want the macro!!! ; ie top bit set -> stays unaltered ; else, set all bits above and add 1 ; (this is because we KNOW the top bit is significant, so simple sign extension ; would waste possible combinations.) ; CMP h_incount,$n ; we're about to take the top n bits of inword BLLT huff_fill_her_up ; make sure inword has at least n bits RSB h_temp2,$n,#32 ; get ready for shifts by 32-n SUB h_incount,h_incount,$n ; update incount for what you're taking ; do the 'sign extend' EOR h_temp3,h_inword,h_topbit ; invert top bit MOV h_inword,h_inword,LSL $n ; get rid of those bits from inword MOVS h_temp3,h_temp3,ASR h_temp2 ; now sign extend EOR h_temp,h_temp3,h_topbit, LSR h_temp2 ; invert that bit again ADDLT h_temp,h_temp,#1 ; if it was negative, add one. ; that's it - result in h_temp. MEND MACRO Huff_skipbits $n ; Exactly like Huff_getbits, but we are not interested in the answer. ; Used for skipping forward in the huffman stream. CMP h_incount,$n ; we're about to take the top n bits of inword BLLT huff_fill_her_up ; make sure inword has at least n bits SUB h_incount,h_incount,$n ; update incount for what you're taking MOV h_inword,h_inword,LSL $n ; get rid of those bits from inword MEND ; To fill up h_inword with at least 24 bits of input. ; DO NOT CALL if h_incount already >= 24. ; Corrupts h_temp2, h_temp3, lr but NOT h_temp. huff_fill_her_up LDRB h_temp3,[h_inptr],#1 ADD h_incount,h_incount,#8 ; get another 8 bits ORR h_inword,h_inword,h_temp3,ROR h_incount ; combine into inword CMP h_temp3,#255 ; check for stuffed byte BEQ huff_check_stuff_byte huff_checked_stuff_byte CMP h_incount,#24 ; since we're here, do as many as poss. BLE huff_fill_her_up MOV pc,lr huff_check_stuff_byte LDRB h_temp3,[h_inptr],#1 CMP h_temp3,#0 BEQ huff_checked_stuff_byte ; It COULD be a restart marker - FFs and then one of RST0..RST7, (&D0..&D7). ; If this is found then don't read it, but return from huff_fill_her_up without ; filling it as much as was requested. ; If this is a well-formed stream then we won't actually need more than there ; are before the restart marker appears. MOV h_temp2,#255 EOR h_inword,h_inword,h_temp2,ROR h_incount ; remove the 255 from the input buffer. SUB h_incount,h_incount,#8 ; remove the bits from the count in bits_left SUB h_temp2,h_inptr,#2 ; remember start pos in input stream. huff_check_restart_loop CMP h_temp3,#255 ; more 255s are legal at this point. LDREQB h_temp3,[h_inptr],#1 ; if found, try next byte BEQ huff_check_restart_loop ; and go round CMP h_temp3,#&D9 ; EOI (end of image) marker? ANDNE h_temp3,h_temp3,#&F8 ; else, only check top bits: CMPNE h_temp3,#&D0 ; is it a restart marker? MOVEQ h_inptr,h_temp2 ; if so, set h_temp2 as if nothing had happened MOVEQ pc,lr ; and return. ; Looks like bad data. There's just one other possibility, that ; we're at the very end of the input data and that this part will ; never actually be read (we are reading ahead a byte or two, after all). ; So, check for the end of the input buffer and if beyond it, keep quiet ; (trusting that the data will never be used). LDR h_temp2,[sp,#0] ; ...cinfo LDR h_temp2,[h_temp2,#cinfo_buffer_end] ; ...cinfo->buffer_end SUB h_temp2,h_temp2,#8 ; in case buffer_end padded up to mul of 4 CMP h_temp2,h_inptr ; have we read past the end? MOVLT pc,lr ; if so then return - we've got enough B huff_bad_stuff_byte ; panic exit, bad data ; ------------------------------------------------------------------- ; Decoding the huffman stream. ; Decode a symbol from the input stream - answer in h_temp. ; Potentially corrupts h_temp2, h_temp3, lr. MACRO Huff_decode $lab CMP h_incount,#8 BLLT huff_fill_her_up ; ensure h_inword has enough bits for a shortcut-derived answer ; try the shortcut table MOV h_temp3,h_inword,LSR #24 ; get next 8 bits of input LDR h_temp3,[h_shortcut,h_temp3,LSL #2] ; get shortcut value ; the shortcut value is in h_temp3 - top byte is number of bits, rest is value BIC h_temp,h_temp3,#&ff000000 ; the answer in h_temp MOVS h_temp3,h_temp3,ASR #24 ; the number of bits we've used BLT huff_noshortcut$lab ; the shortcut entry was -ve - no shortcut. SUB h_incount,h_incount,h_temp3 ; set h_incount correctly MOV h_inword,h_inword,LSL h_temp3 ; the bit source huff_decodeexit$lab ; the value is in 'temp' - return. MEND MACRO Huff_decode_leftovers $lab huff_noshortcut$lab ; According to some stuff on the net, this happens for 3-4% of the huffman codes. ; the incoming value is at least 8 bits - could avoid going round the loop first 8 times, ; instead of setting h_code and h_l to 0. CMP h_incount,#16 BLLT huff_fill_her_up ; ensure h_inword has enough bits MOV h_l,#8 ; as though we've been round 8 times already MOV h_code,h_inword,LSR #24 ; first 8 bits MOV h_inword,h_inword,LSL #8 ; shift input by 8 bits - inword done at the end. ; ; don't use the shortcut table - all the way round the loop. ; MOV h_code,#0 ; loop initialise ; MOV h_l,#0 huff_decode_loop$lab MOVS h_inword,h_inword,LSL #1 ADC h_code,h_code,h_code ; code = (code << 1) + get_bit(cinfo); ADD h_l,h_l,#1 ; l++; LDR h_temp,[h_maxcode,h_l,LSL #2] ; ...htbl->maxcode[l] CMP h_code,h_temp ; while (code > htbl->maxcode[l]) BGT huff_decode_loop$lab ; ... loop ; check for garbage input CMP h_l,#16 BGT huff_badinput ; do the final table lookups. ADD h_temp,h_shortcut,#huff_tbl_valptr-huff_tbl_shortcut ; get valptr table LDR h_temp,[h_temp,h_l,LSL #2] ; do lookup in valptr table ADD h_temp,h_temp,h_code LDRB h_temp,[h_huffval,h_temp] ; advance in the bit stream SUB h_incount,h_incount,h_l ; we've finished, return to the main stream B huff_decodeexit$lab MEND ; -------------------------------------------------------------------- asm_huff_decode_blocks ;LOCAL void ;huff_decode_blocks (decompress_info_ptr cinfo, JBLOCK block, ; HUFF_TBL *dctbl, HUFF_TBL *actbl, ; QUANT_TBL_PTR quanttbl, int *last_dc_val, int nblocks) ; r0 = cinfo ; r1 = block pointer ; r2 = HUFF_TBL* dctbl ; r3 = HUFF_TBL* actbl ; [sp] = quanttbl ; [sp,#4] = int *last_dc_val ; [sp,#8] = int nblocks ; save registers STMDB sp!,{r0-r12,lr} ; args 0..3 are at sp ; args 4..6 are at [sp,#14*4..16*4] ; if nblocks<=0, return LDR r2,[sp,#16*4] ; get nblocks CMP r2,#0 ; if <=0 LDMLEIA sp!,{r0-r12,pc} ; then return ; paranoid search for nondeterministic results MOV r3,#0 MOV lr,#0 ; clear out the target block(s) - r1 is the block pointer LDR r2,[sp,#16*4] ; nblocks MOV r4,#0 ; get ready to clear the block MOV r5,#0 MOV r6,#0 MOV r7,#0 MOV r8,#0 MOV r9,#0 MOV r10,#0 MOV r11,#0 huff_block_clear STMIA r1!,{r4,r5,r6,r7,r8,r9,r10,r11} STMIA r1!,{r4,r5,r6,r7,r8,r9,r10,r11} STMIA r1!,{r4,r5,r6,r7,r8,r9,r10,r11} STMIA r1!,{r4,r5,r6,r7,r8,r9,r10,r11} STMIA r1!,{r4,r5,r6,r7,r8,r9,r10,r11} STMIA r1!,{r4,r5,r6,r7,r8,r9,r10,r11} STMIA r1!,{r4,r5,r6,r7,r8,r9,r10,r11} STMIA r1!,{r4,r5,r6,r7,r8,r9,r10,r11} SUBS r2,r2,#1 BNE huff_block_clear ; Now scratch all registers except r0=h_temp=cinfo. ; now set up registers for the main procedure. ; r0=h_temp=cinfo LDR h_inptr,[h_temp,#cinfo_next_input_byte] LDR h_inword,[h_temp,#cinfo_get_buffer] LDR h_incount,[h_temp,#cinfo_bits_left] LDR h_quanttbl,[sp,#14*4] ADD h_quanttbl,h_quanttbl,#64*4 ; indexed from the end, to make termination testing faster LDR h_block,[sp,#4] MOV h_topbit,#&80000000 huff_anotherblock ; loop round to here nblocks times ; Set up huffman decoding for the DC component. LDR h_temp,[sp,#2*4] ; HUFF_TBL* dctbl (DC table pointer) HUFF_DECODE_SETUP h_temp ; set maxcode,huffval,shortcut ; Handle the DC component Huff_decode 0 ; h_temp = s = huff_DECODE(cinfo, dctbl); CMP h_temp,#0 ; if (s) BEQ huff_dc_0 Huff_getbits h_temp ; h_temp counts the bits, result in h_temp ; s = huff_EXTEND(get_bits(cinfo, s), s); huff_dc_0 ; h_temp holds the delta to the DC value for this block ; Create the absolute DC value LDR h_temp3,[sp,#15*4] ; int *last_dc_val LDR h_temp2,[h_temp3] ; ...*last_dc_val ADD h_temp2,h_temp,h_temp2 ; s += *last_dc_val; STR h_temp2,[h_temp3] ; and write *last_dc_val back ; Now multiply by the quantisation value, and store in the block LDR h_temp3,[h_quanttbl,#-64*4] ; ...quanttbl[0] MUL h_temp,h_temp2,h_temp3 ; ...s * quanttbl[0] ; >>>> hack for experimental DCT code, which wants shifted-up inputs ; MOV h_temp,h_temp,LSL #16 STR h_temp,[h_block] ; block[0] = (JCOEF) (((JCOEF) s) * quanttbl[0]); ; That's the DC value done. ; Set up huffman decoding for the AC components. LDR h_temp,[sp,#3*4] ; HUFF_TBL* actbl (AC table pointer) HUFF_DECODE_SETUP h_temp ; set maxcode,huffval,shortcut ; The loop that does AC components, once round for each non-zero component. MOV h_k,#-63 ; index into block huff_loop Huff_decode 1 ; result in r (ie h_temp) MOV h_temp2,h_temp,LSR #4 ; n = r >> 4; ANDS h_temp,h_temp,#15 ; s = r & 15; BEQ huff_else ; if (s) { ADD h_k,h_k,h_temp2 ; k += n; Huff_getbits h_temp ; s = huff_EXTEND(get_bits(cinfo, s), s); in h_temp LDR h_temp2,[h_quanttbl,h_k,LSL #2] ; ... quanttbl[k] MUL h_temp,h_temp2,h_temp ; ... s * quanttbl[k] ADR h_temp2,huff_zag_end LDRB h_temp2,[h_temp2,h_k] ; ... ZAG[k] ; >>>> hack for experimental DCT code, which wants shifted-up inputs ; MOV h_temp,h_temp,LSL #16 STR h_temp,[h_block,h_temp2,LSL #2] ; block[ZAG[k]] = (JCOEF) (((JCOEF) s) * quanttbl[k]); ADDS h_k,h_k,#1 ; CMP h_k,#63 BLT huff_loop B huff_exit huff_else ; } else { CMP h_temp2,#15 ; if (n != 15) BNE huff_exit ; break; ADDS h_k,h_k,#16 ; k += 15; (and 1 for the for loop) ; CMP h_k,#63 BLT huff_loop huff_exit ; That block is completed now. ; Have we done enough blocks? If not, do another. ADD h_block,h_block,#64*4 ; advance block pointer LDR h_temp,[sp,#16*4] ; get nblocks SUBS h_temp,h_temp,#1 ; decrement it STRNE h_temp,[sp,#16*4] ; if not zero, save new value BNE huff_anotherblock ; ...and loop. ; We are going to exit. ; Save the state of the huff stream, and return. LDR h_temp,[sp] ; cinfo STR h_inptr,[h_temp,#cinfo_next_input_byte] ; save state of input bit stream STR h_inword,[h_temp,#cinfo_get_buffer] STR h_incount,[h_temp,#cinfo_bits_left] LDMIA sp!,{r0-r12,pc} ; exit ; -------------------------------------------------------------------- ; The zig-zag reordering table - the coefficients are actually provided ; in a zig-zag ordering to maximise the length of runs of 0, this table ; descrambles this order. huff_zag DCB 0, 1, 8, 16, 9, 2, 3, 10 DCB 17, 24, 32, 25, 18, 11, 4, 5 DCB 12, 19, 26, 33, 40, 48, 41, 34 DCB 27, 20, 13, 6, 7, 14, 21, 28 DCB 35, 42, 49, 56, 57, 50, 43, 36 DCB 29, 22, 15, 23, 30, 37, 44, 51 DCB 58, 59, 52, 45, 38, 31, 39, 46 DCB 53, 60, 61, 54, 47, 55, 62, 63 huff_zag_end DCB 0, 0, 0, 0, 0, 0, 0, 0 ;extra entries in case k>63 DCB 0, 0, 0, 0, 0, 0, 0, 0 ; -------------------------------------------------------------------- asm_huff_skip_blocks ;LOCAL void ;huff_skip_blocks (decompress_info_ptr cinfo, JBLOCK block, ; HUFF_TBL *dctbl, HUFF_TBL *actbl, ; QUANT_TBL_PTR quanttbl, int *last_dc_val, int nblocks) ; r0 = cinfo ; r1 = block pointer (UNUSED) ; r2 = HUFF_TBL* dctbl ; r3 = HUFF_TBL* actbl ; [sp] = quanttbl (UNUSED) ; [sp,#4] = int *last_dc_val ; [sp,#8] = int nblocks ; This routine is very similar to huff_decode_blocks, except that ; we do not actually output the block - we simply skip forward that far ; in the huffman stream, updating last_dc_val correctly. ; save registers STMDB sp!,{r0-r12,lr} ; args 0..3 are at sp ; args 4..6 are at [sp,#14*4..16*4] ; now set up registers for the main procedure. ; r0=h_temp=cinfo LDR h_inptr,[h_temp,#cinfo_next_input_byte] LDR h_inword,[h_temp,#cinfo_get_buffer] LDR h_incount,[h_temp,#cinfo_bits_left] MOV h_topbit,#&80000000 ; if nblocks<=0, return LDR h_temp2,[sp,#16*4] ; get nblocks CMP h_temp2,#0 ; if <=0 LDMLEIA sp!,{r0-r12,pc} ; then return huff_skip_anotherblock ; loop round to here nblocks times ; Set up huffman decoding for the DC component. LDR h_temp,[sp,#2*4] ; HUFF_TBL* dctbl (DC table pointer) HUFF_DECODE_SETUP h_temp ; set maxcode,huffval,shortcut ; Handle the DC component Huff_decode 2 ; h_temp = s = huff_DECODE(cinfo, dctbl); CMP h_temp,#0 ; if (s) BEQ huff_skip_dc_0 Huff_getbits h_temp ; h_temp counts the bits, result in h_temp ; s = huff_EXTEND(get_bits(cinfo, s), s); huff_skip_dc_0 ; h_temp holds the delta to the DC value for this block ; Create the absolute DC value LDR h_temp3,[sp,#15*4] ; int *last_dc_val LDR h_temp2,[h_temp3] ; ...*last_dc_val ADD h_temp2,h_temp,h_temp2 ; s += *last_dc_val; STR h_temp2,[h_temp3] ; and write *last_dc_val back ; ... and throw the value away ; That's the DC value done. ; Set up huffman decoding for the AC components. LDR h_temp,[sp,#3*4] ; HUFF_TBL* actbl (AC table pointer) HUFF_DECODE_SETUP h_temp ; set maxcode,huffval,shortcut ; The loop that does AC components, once round for each non-zero component. MOV h_k,#-63 ; index into block huff_skip_loop Huff_decode 3 ; result in r (ie h_temp) MOV h_temp2,h_temp,LSR #4 ; n = r >> 4; ANDS h_temp,h_temp,#15 ; s = r & 15; BEQ huff_skip_else ; if (s) { ADD h_k,h_k,h_temp2 ; k += n; Huff_skipbits h_temp ; s = huff_EXTEND(get_bits(cinfo, s), s); in h_temp ; ... and throw the value away. ADDS h_k,h_k,#1 BLT huff_skip_loop B huff_skip_exit huff_skip_else ; } else { CMP h_temp2,#15 ; if (n != 15) BNE huff_skip_exit ; break; ADDS h_k,h_k,#16 ; k += 15; (and 1 for the for loop) BLT huff_skip_loop huff_skip_exit ; That block is completed now. ; Have we done enough blocks? If not, do another. LDR h_temp,[sp,#16*4] ; get nblocks SUBS h_temp,h_temp,#1 ; decrement it STRNE h_temp,[sp,#16*4] ; if not zero, save new value BNE huff_skip_anotherblock ; ...and loop. ; We are going to exit. ; Save the state of the huff stream, and return. LDR h_temp,[sp] ; cinfo STR h_inptr,[h_temp,#cinfo_next_input_byte] ; save state of input bit stream STR h_inword,[h_temp,#cinfo_get_buffer] STR h_incount,[h_temp,#cinfo_bits_left] LDMIA sp!,{r0-r12,pc} ; exit Huff_decode_leftovers 0 Huff_decode_leftovers 1 Huff_decode_leftovers 2 Huff_decode_leftovers 3 ; ------------------------------------------------------------------- ; Errors in the input stream. ; Should really construct an error message, for the moment do a panic exit. ; We reload the registers to get things like sl back, which gets us the ; module wp again. huff_badinput ; attempt to get details out there of what the situation was when we went wrong. STMDB sp!,{r0-lr,pc} ; dump ALL the registers. MOV r2,#123 ; r2 = panic type huff_panic_exit ADD r3,sp,#16*4+4*4 ; point at original saved registers, except r0-r3 LDMIA r3,{r4-r12} ; restore registers that C wants LDR r0,[r3,#-16] ; r0 = cinfo MOV r1,sp ; r1 -> saved register block BL assembler_panic ; call dump routine, written in C ADD sp,sp,#16*4 ; set sp back to what it was LDMIA sp!,{r0-r12,lr} ; restore entry registers MOV r0,#123 ; >>>> rationalise exit codes! B panic_exit huff_bad_stuff_byte ; attempt to get details out there of what the situation was when we went wrong. STMDB sp!,{r0-lr,pc} ; dump ALL the registers. MOV r2,#124 ; r2 = panic type B huff_panic_exit ; join common code ; LDMIA sp!,{r0-r12,lr} ; restore entry registers ; MOV r0,#124 ; >>>> rationalise exit codes! ; B panic_exit END