; Copyright 1996 Acorn Computers Ltd
;
; Licensed under the Apache License, Version 2.0 (the "License");
; you may not use this file except in compliance with the License.
; You may obtain a copy of the License at
;
;     http://www.apache.org/licenses/LICENSE-2.0
;
; Unless required by applicable law or agreed to in writing, software
; distributed under the License is distributed on an "AS IS" BASIS,
; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
; See the License for the specific language governing permissions and
; limitations under the License.
;
; Assembler decoding of huffman stream.

;   21-Sep-93 WRS - started.
;   29-Oct-93 WRS - handling of EOI marker added, usage found in ONE JPEG file
;                   among hundreds (xuxa2/jpg, I think it was).
;   09-Nov-93 WRS - tried a few speedups:
;                     write out the fill_her_up loop in main AC-fetch loop
;                     swap args of MUL to avoid late termination for -ve args
;                   could also try:
;                     zag array deserves a register (would save 1)
;                   But, no measurable effect so regressed to previous version.

; Structure/algorithm generally follows that of the Indep JPEG gp's release 4 code,
; except that an additional lookup table is used to decode huffman codes up to
; 8 bits big in a single step. Some of the comments in this assembler refer to
; variable names etc. in the IJG4 C code.

; Reading the huffman stream, and skipping over it without generating an answer,
; are separated into two routines to speed up the skipping operation.

        GET     jhdr.s

h_temp     RN      0
h_temp2    RN      1
h_temp3    RN      2
h_inptr    RN      3
h_inword   RN      4
h_incount  RN      5
h_maxcode  RN      6      ; points into huff tables
h_huffval  RN      7      ; points into huff tables
h_shortcut RN      8      ; points into huff tables
h_quanttbl RN      9
h_block    RN      10
h_k        RN      11
h_topbit   RN      12
h_code     RN      1      ; used in huff_decode - same as temp2
h_l        RN      2      ; used in huff_decode - same as temp3

; --------------------------------------------------------------------
; Accessing a HUFF_TBL structure.

        MACRO
        HUFF_DECODE_SETUP $tbl
        ; set up the specific table pointers for ac or dc huff table
        ; $tbl is a HUFF_TBL*.
        ADD     h_maxcode,$tbl,#huff_tbl_maxcode
        ADD     h_shortcut,$tbl,#huff_tbl_shortcut
        ADD     h_huffval,$tbl,#huff_tbl_huffval
        MEND

; -------------------------------------------------------------------
; Bit extraction.

        MACRO
        Huff_getbits $n
; get $n bits, put them into h_temp sign extended.
; there are h_incount bits left at the TOP of h_inword
; Potentially corrupts h_temp2, h_temp3, lr.
; $n CAN be h_temp.
;
; Having fetched the bits we must 'sign extend' them according to:
; #define huff_EXTEND(x, s)       ((x) < (1 << ((s)-1)) ? \             ; semicolons to stop warnings
;                                  (x) + (-1 << (s)) + 1 : \            ; only!  Remove these three
;                                  (x))                                 ; if you want the macro!!!
; ie top bit set -> stays unaltered
;             else, set all bits above and add 1
; (this is because we KNOW the top bit is significant, so simple sign extension
; would waste possible combinations.)
;
        CMP     h_incount,$n                           ; we're about to take the top n bits of inword
        BLLT    huff_fill_her_up                       ; make sure inword has at least n bits
        RSB     h_temp2,$n,#32                         ; get ready for shifts by 32-n
        SUB     h_incount,h_incount,$n                 ; update incount for what you're taking
        ; do the 'sign extend'
        EOR     h_temp3,h_inword,h_topbit              ; invert top bit
        MOV     h_inword,h_inword,LSL $n               ; get rid of those bits from inword
        MOVS    h_temp3,h_temp3,ASR h_temp2            ; now sign extend
        EOR     h_temp,h_temp3,h_topbit, LSR h_temp2   ; invert that bit again
        ADDLT   h_temp,h_temp,#1                       ; if it was negative, add one.
        ; that's it - result in h_temp.
        MEND

        MACRO
        Huff_skipbits $n
; Exactly like Huff_getbits, but we are not interested in the answer.
; Used for skipping forward in the huffman stream.
        CMP     h_incount,$n                           ; we're about to take the top n bits of inword
        BLLT    huff_fill_her_up                       ; make sure inword has at least n bits
        SUB     h_incount,h_incount,$n                 ; update incount for what you're taking
        MOV     h_inword,h_inword,LSL $n               ; get rid of those bits from inword
        MEND

; To fill up h_inword with at least 24 bits of input.
; DO NOT CALL if h_incount already >= 24.
; Corrupts h_temp2, h_temp3, lr but NOT h_temp.
huff_fill_her_up
        LDRB    h_temp3,[h_inptr],#1
        ADD     h_incount,h_incount,#8                ; get another 8 bits
        ORR     h_inword,h_inword,h_temp3,ROR h_incount ; combine into inword
        CMP     h_temp3,#255                          ; check for stuffed byte
        BEQ     huff_check_stuff_byte
huff_checked_stuff_byte
        CMP     h_incount,#24                         ; since we're here, do as many as poss.
        BLE     huff_fill_her_up
        MOV     pc,lr

huff_check_stuff_byte
        LDRB    h_temp3,[h_inptr],#1
        CMP     h_temp3,#0
        BEQ     huff_checked_stuff_byte

; It COULD be a restart marker - FFs and then one of RST0..RST7, (&D0..&D7).
; If this is found then don't read it, but return from huff_fill_her_up without
; filling it as much as was requested.
; If this is a well-formed stream then we won't actually need more than there
; are before the restart marker appears.
        MOV     h_temp2,#255
        EOR     h_inword,h_inword,h_temp2,ROR h_incount ; remove the 255 from the input buffer.
        SUB     h_incount,h_incount,#8                  ; remove the bits from the count in bits_left
        SUB     h_temp2,h_inptr,#2                      ; remember start pos in input stream.
huff_check_restart_loop
        CMP     h_temp3,#255                            ; more 255s are legal at this point.
        LDREQB  h_temp3,[h_inptr],#1                    ; if found, try next byte
        BEQ     huff_check_restart_loop                 ; and go round
        CMP     h_temp3,#&D9                            ; EOI (end of image) marker?
        ANDNE   h_temp3,h_temp3,#&F8                    ; else, only check top bits:
        CMPNE   h_temp3,#&D0                            ; is it a restart marker?
        MOVEQ   h_inptr,h_temp2                         ; if so, set h_temp2 as if nothing had happened
        MOVEQ   pc,lr                                   ; and return.

; Looks like bad data. There's just one other possibility, that
; we're at the very end of the input data and that this part will
; never actually be read (we are reading ahead a byte or two, after all).
; So, check for the end of the input buffer and if beyond it, keep quiet
; (trusting that the data will never be used).
        LDR     h_temp2,[sp,#0]                       ; ...cinfo
        LDR     h_temp2,[h_temp2,#cinfo_buffer_end]   ; ...cinfo->buffer_end
        SUB     h_temp2,h_temp2,#8                    ; in case buffer_end padded up to mul of 4
        CMP     h_temp2,h_inptr                       ; have we read past the end?
        MOVLT   pc,lr                                 ; if so then return - we've got enough
        B       huff_bad_stuff_byte                   ; panic exit, bad data

; -------------------------------------------------------------------
; Decoding the huffman stream.

; Decode a symbol from the input stream - answer in h_temp.
; Potentially corrupts h_temp2, h_temp3, lr.
        MACRO
        Huff_decode $lab
        CMP     h_incount,#8
        BLLT    huff_fill_her_up                   ; ensure h_inword has enough bits for a shortcut-derived answer

; try the shortcut table
        MOV     h_temp3,h_inword,LSR #24           ; get next 8 bits of input
        LDR     h_temp3,[h_shortcut,h_temp3,LSL #2] ; get shortcut value
        ; the shortcut value is in h_temp3 - top byte is number of bits, rest is value
        BIC     h_temp,h_temp3,#&ff000000          ; the answer in h_temp
        MOVS    h_temp3,h_temp3,ASR #24            ; the number of bits we've used
        BLT     huff_noshortcut$lab                ; the shortcut entry was -ve - no shortcut.
        SUB     h_incount,h_incount,h_temp3        ; set h_incount correctly
        MOV     h_inword,h_inword,LSL h_temp3      ; the bit source
huff_decodeexit$lab
; the value is in 'temp' - return.
        MEND

        MACRO
        Huff_decode_leftovers $lab
huff_noshortcut$lab
        ; According to some stuff on the net, this happens for 3-4% of the huffman codes.
        ; the incoming value is at least 8 bits - could avoid going round the loop first 8 times,
        ; instead of setting h_code and h_l to 0.
        CMP     h_incount,#16
        BLLT    huff_fill_her_up                   ; ensure h_inword has enough bits
        MOV     h_l,#8                             ; as though we've been round 8 times already
        MOV     h_code,h_inword,LSR #24            ; first 8 bits
        MOV     h_inword,h_inword,LSL #8           ; shift input by 8 bits - inword done at the end.

;        ; don't use the shortcut table - all the way round the loop.
;        MOV     h_code,#0                          ; loop initialise
;        MOV     h_l,#0

huff_decode_loop$lab
        MOVS    h_inword,h_inword,LSL #1
        ADC     h_code,h_code,h_code               ; code = (code << 1) + get_bit(cinfo);
        ADD     h_l,h_l,#1                         ; l++;
        LDR     h_temp,[h_maxcode,h_l,LSL #2]      ; ...htbl->maxcode[l]
        CMP     h_code,h_temp                      ; while (code > htbl->maxcode[l])
        BGT     huff_decode_loop$lab               ;    ... loop

; check for garbage input
        CMP     h_l,#16
        BGT     huff_badinput

; do the final table lookups.
        ADD     h_temp,h_shortcut,#huff_tbl_valptr-huff_tbl_shortcut ; get valptr table
        LDR     h_temp,[h_temp,h_l,LSL #2]        ; do lookup in valptr table
        ADD     h_temp,h_temp,h_code
        LDRB    h_temp,[h_huffval,h_temp]

; advance in the bit stream
        SUB     h_incount,h_incount,h_l

; we've finished, return to the main stream
        B       huff_decodeexit$lab
        MEND

; --------------------------------------------------------------------
asm_huff_decode_blocks
;LOCAL void
;huff_decode_blocks (decompress_info_ptr cinfo, JBLOCK block,
;                    HUFF_TBL *dctbl, HUFF_TBL *actbl,
;                    QUANT_TBL_PTR quanttbl, int *last_dc_val, int nblocks)
;  r0 = cinfo
;  r1 = block pointer
;  r2 = HUFF_TBL* dctbl
;  r3 = HUFF_TBL* actbl
;  [sp] = quanttbl
;  [sp,#4] = int *last_dc_val
;  [sp,#8] = int nblocks

; save registers
        STMDB   sp!,{r0-r12,lr}
        ; args 0..3 are at sp
        ; args 4..6 are at [sp,#14*4..16*4]

; if nblocks<=0, return
        LDR     r2,[sp,#16*4]                    ; get nblocks
        CMP     r2,#0                            ; if <=0
        LDMLEIA sp!,{r0-r12,pc}                  ; then return

; paranoid search for nondeterministic results
        MOV     r3,#0
        MOV     lr,#0

; clear out the target block(s) - r1 is the block pointer
        LDR     r2,[sp,#16*4]           ; nblocks
        MOV     r4,#0                   ; get ready to clear the block
        MOV     r5,#0
        MOV     r6,#0
        MOV     r7,#0
        MOV     r8,#0
        MOV     r9,#0
        MOV     r10,#0
        MOV     r11,#0
huff_block_clear
        STMIA   r1!,{r4,r5,r6,r7,r8,r9,r10,r11}
        STMIA   r1!,{r4,r5,r6,r7,r8,r9,r10,r11}
        STMIA   r1!,{r4,r5,r6,r7,r8,r9,r10,r11}
        STMIA   r1!,{r4,r5,r6,r7,r8,r9,r10,r11}
        STMIA   r1!,{r4,r5,r6,r7,r8,r9,r10,r11}
        STMIA   r1!,{r4,r5,r6,r7,r8,r9,r10,r11}
        STMIA   r1!,{r4,r5,r6,r7,r8,r9,r10,r11}
        STMIA   r1!,{r4,r5,r6,r7,r8,r9,r10,r11}
        SUBS    r2,r2,#1
        BNE     huff_block_clear
; Now scratch all registers except r0=h_temp=cinfo.

; now set up registers for the main procedure.
; r0=h_temp=cinfo
        LDR     h_inptr,[h_temp,#cinfo_next_input_byte]
        LDR     h_inword,[h_temp,#cinfo_get_buffer]
        LDR     h_incount,[h_temp,#cinfo_bits_left]
        LDR     h_quanttbl,[sp,#14*4]
        ADD     h_quanttbl,h_quanttbl,#64*4      ; indexed from the end, to make termination testing faster
        LDR     h_block,[sp,#4]
        MOV     h_topbit,#&80000000

huff_anotherblock                                ; loop round to here nblocks times

; Set up huffman decoding for the DC component.
        LDR     h_temp,[sp,#2*4]                 ; HUFF_TBL* dctbl    (DC table pointer)
        HUFF_DECODE_SETUP h_temp                 ; set maxcode,huffval,shortcut

; Handle the DC component
        Huff_decode 0                            ; h_temp = s = huff_DECODE(cinfo, dctbl);
        CMP     h_temp,#0                        ; if (s)
        BEQ     huff_dc_0
        Huff_getbits h_temp                      ; h_temp counts the bits, result in h_temp
                                                 ; s = huff_EXTEND(get_bits(cinfo, s), s);
huff_dc_0
        ; h_temp holds the delta to the DC value for this block
        ; Create the absolute DC value
        LDR     h_temp3,[sp,#15*4]               ; int *last_dc_val
        LDR     h_temp2,[h_temp3]                ; ...*last_dc_val
        ADD     h_temp2,h_temp,h_temp2           ; s += *last_dc_val;
        STR     h_temp2,[h_temp3]                ; and write *last_dc_val back
        ; Now multiply by the quantisation value, and store in the block
        LDR     h_temp3,[h_quanttbl,#-64*4]      ; ...quanttbl[0]
        MUL     h_temp,h_temp2,h_temp3           ; ...s * quanttbl[0]

; >>>> hack for experimental DCT code, which wants shifted-up inputs
;        MOV     h_temp,h_temp,LSL #16

        STR     h_temp,[h_block]                 ; block[0] = (JCOEF) (((JCOEF) s) * quanttbl[0]);
        ; That's the DC value done.

; Set up huffman decoding for the AC components.
        LDR     h_temp,[sp,#3*4]                 ; HUFF_TBL* actbl    (AC table pointer)
        HUFF_DECODE_SETUP h_temp                 ; set maxcode,huffval,shortcut

; The loop that does AC components, once round for each non-zero component.
        MOV     h_k,#-63                         ; index into block
huff_loop
        Huff_decode 1                            ; result in r (ie h_temp)

        MOV     h_temp2,h_temp,LSR #4            ; n = r >> 4;
        ANDS    h_temp,h_temp,#15                ; s = r & 15;
        BEQ     huff_else                        ; if (s) {
        ADD     h_k,h_k,h_temp2                  ;   k += n;
        Huff_getbits h_temp                      ;   s = huff_EXTEND(get_bits(cinfo, s), s); in h_temp
        LDR     h_temp2,[h_quanttbl,h_k,LSL #2]  ;   ... quanttbl[k]
        MUL     h_temp,h_temp2,h_temp            ;   ... s * quanttbl[k]
        ADR     h_temp2,huff_zag_end
        LDRB    h_temp2,[h_temp2,h_k]            ;   ... ZAG[k]

; >>>> hack for experimental DCT code, which wants shifted-up inputs
;        MOV     h_temp,h_temp,LSL #16

        STR     h_temp,[h_block,h_temp2,LSL #2]  ;   block[ZAG[k]] = (JCOEF) (((JCOEF) s) * quanttbl[k]);
        ADDS    h_k,h_k,#1
;        CMP     h_k,#63
        BLT     huff_loop
        B       huff_exit
huff_else                                        ; } else {
        CMP     h_temp2,#15                      ;   if (n != 15)
        BNE     huff_exit                        ;     break;
        ADDS    h_k,h_k,#16                      ;   k += 15;     (and 1 for the for loop)
;        CMP     h_k,#63
        BLT     huff_loop
huff_exit
; That block is completed now.

; Have we done enough blocks? If not, do another.
        ADD     h_block,h_block,#64*4                    ; advance block pointer
        LDR     h_temp,[sp,#16*4]                        ; get nblocks
        SUBS    h_temp,h_temp,#1                         ; decrement it
        STRNE   h_temp,[sp,#16*4]                        ; if not zero, save new value
        BNE     huff_anotherblock                        ;           ...and loop.

; We are going to exit.
; Save the state of the huff stream, and return.
        LDR     h_temp,[sp]                              ; cinfo
        STR     h_inptr,[h_temp,#cinfo_next_input_byte]  ; save state of input bit stream
        STR     h_inword,[h_temp,#cinfo_get_buffer]
        STR     h_incount,[h_temp,#cinfo_bits_left]
        LDMIA   sp!,{r0-r12,pc}                          ; exit

; --------------------------------------------------------------------
; The zig-zag reordering table - the coefficients are actually provided
; in a zig-zag ordering to maximise the length of runs of 0, this table
; descrambles this order.
huff_zag
        DCB      0,  1,  8, 16,  9,  2,  3, 10
        DCB     17, 24, 32, 25, 18, 11,  4,  5
        DCB     12, 19, 26, 33, 40, 48, 41, 34
        DCB     27, 20, 13,  6,  7, 14, 21, 28
        DCB     35, 42, 49, 56, 57, 50, 43, 36
        DCB     29, 22, 15, 23, 30, 37, 44, 51
        DCB     58, 59, 52, 45, 38, 31, 39, 46
        DCB     53, 60, 61, 54, 47, 55, 62, 63
huff_zag_end
        DCB      0,  0,  0,  0,  0,  0,  0,  0  ;extra entries in case k>63
        DCB      0,  0,  0,  0,  0,  0,  0,  0

; --------------------------------------------------------------------
asm_huff_skip_blocks
;LOCAL void
;huff_skip_blocks (decompress_info_ptr cinfo, JBLOCK block,
;                  HUFF_TBL *dctbl, HUFF_TBL *actbl,
;                  QUANT_TBL_PTR quanttbl, int *last_dc_val, int nblocks)
;  r0 = cinfo
;  r1 = block pointer             (UNUSED)
;  r2 = HUFF_TBL* dctbl
;  r3 = HUFF_TBL* actbl
;  [sp] = quanttbl                (UNUSED)
;  [sp,#4] = int *last_dc_val
;  [sp,#8] = int nblocks
; This routine is very similar to huff_decode_blocks, except that
; we do not actually output the block - we simply skip forward that far
; in the huffman stream, updating last_dc_val correctly.

; save registers
        STMDB   sp!,{r0-r12,lr}
        ; args 0..3 are at sp
        ; args 4..6 are at [sp,#14*4..16*4]

; now set up registers for the main procedure.
; r0=h_temp=cinfo
        LDR     h_inptr,[h_temp,#cinfo_next_input_byte]
        LDR     h_inword,[h_temp,#cinfo_get_buffer]
        LDR     h_incount,[h_temp,#cinfo_bits_left]
        MOV     h_topbit,#&80000000

; if nblocks<=0, return
        LDR     h_temp2,[sp,#16*4]               ; get nblocks
        CMP     h_temp2,#0                       ; if <=0
        LDMLEIA sp!,{r0-r12,pc}                  ; then return

huff_skip_anotherblock                           ; loop round to here nblocks times

; Set up huffman decoding for the DC component.
        LDR     h_temp,[sp,#2*4]                 ; HUFF_TBL* dctbl    (DC table pointer)
        HUFF_DECODE_SETUP h_temp                 ; set maxcode,huffval,shortcut

; Handle the DC component
        Huff_decode 2                            ; h_temp = s = huff_DECODE(cinfo, dctbl);
        CMP     h_temp,#0                        ; if (s)
        BEQ     huff_skip_dc_0
        Huff_getbits h_temp                      ; h_temp counts the bits, result in h_temp
                                                 ; s = huff_EXTEND(get_bits(cinfo, s), s);
huff_skip_dc_0
        ; h_temp holds the delta to the DC value for this block
        ; Create the absolute DC value
        LDR     h_temp3,[sp,#15*4]               ; int *last_dc_val
        LDR     h_temp2,[h_temp3]                ; ...*last_dc_val
        ADD     h_temp2,h_temp,h_temp2           ; s += *last_dc_val;
        STR     h_temp2,[h_temp3]                ; and write *last_dc_val back
        ; ... and throw the value away
        ; That's the DC value done.

; Set up huffman decoding for the AC components.
        LDR     h_temp,[sp,#3*4]                 ; HUFF_TBL* actbl    (AC table pointer)
        HUFF_DECODE_SETUP h_temp                 ; set maxcode,huffval,shortcut

; The loop that does AC components, once round for each non-zero component.
        MOV     h_k,#-63                         ; index into block
huff_skip_loop
        Huff_decode 3                            ; result in r (ie h_temp)
        MOV     h_temp2,h_temp,LSR #4            ; n = r >> 4;
        ANDS    h_temp,h_temp,#15                ; s = r & 15;
        BEQ     huff_skip_else                   ; if (s) {
        ADD     h_k,h_k,h_temp2                  ;   k += n;
        Huff_skipbits h_temp                     ;   s = huff_EXTEND(get_bits(cinfo, s), s); in h_temp
                                                 ; ... and throw the value away.
        ADDS    h_k,h_k,#1
        BLT     huff_skip_loop
        B       huff_skip_exit
huff_skip_else                                   ; } else {
        CMP     h_temp2,#15                      ;   if (n != 15)
        BNE     huff_skip_exit                   ;     break;
        ADDS    h_k,h_k,#16                      ;   k += 15;     (and 1 for the for loop)
        BLT     huff_skip_loop
huff_skip_exit
; That block is completed now.

; Have we done enough blocks? If not, do another.
        LDR     h_temp,[sp,#16*4]                        ; get nblocks
        SUBS    h_temp,h_temp,#1                         ; decrement it
        STRNE   h_temp,[sp,#16*4]                        ; if not zero, save new value
        BNE     huff_skip_anotherblock                   ;           ...and loop.

; We are going to exit.
; Save the state of the huff stream, and return.
        LDR     h_temp,[sp]                              ; cinfo
        STR     h_inptr,[h_temp,#cinfo_next_input_byte]  ; save state of input bit stream
        STR     h_inword,[h_temp,#cinfo_get_buffer]
        STR     h_incount,[h_temp,#cinfo_bits_left]
        LDMIA   sp!,{r0-r12,pc}                          ; exit

        Huff_decode_leftovers 0
        Huff_decode_leftovers 1
        Huff_decode_leftovers 2
        Huff_decode_leftovers 3

; -------------------------------------------------------------------
; Errors in the input stream.
; Should really construct an error message, for the moment do a panic exit.
; We reload the registers to get things like sl back, which gets us the
; module wp again.
huff_badinput

        ; attempt to get details out there of what the situation was when we went wrong.
        STMDB   sp!,{r0-lr,pc}                           ; dump ALL the registers.
        MOV     r2,#123                                  ; r2 = panic type
huff_panic_exit
        ADD     r3,sp,#16*4+4*4                          ; point at original saved registers, except r0-r3
        LDMIA   r3,{r4-r12}                              ; restore registers that C wants
        LDR     r0,[r3,#-16]                             ; r0 = cinfo
        MOV     r1,sp                                    ; r1 -> saved register block
        BL      assembler_panic                          ; call dump routine, written in C
        ADD     sp,sp,#16*4                              ; set sp back to what it was

        LDMIA   sp!,{r0-r12,lr}                          ; restore entry registers
        MOV     r0,#123                                  ; >>>> rationalise exit codes!
        B       panic_exit

huff_bad_stuff_byte
        ; attempt to get details out there of what the situation was when we went wrong.
        STMDB   sp!,{r0-lr,pc}                           ; dump ALL the registers.
        MOV     r2,#124                                  ; r2 = panic type
        B       huff_panic_exit                          ; join common code

;        LDMIA   sp!,{r0-r12,lr}                          ; restore entry registers
;        MOV     r0,#124                                  ; >>>> rationalise exit codes!
;        B       panic_exit

        END