; Copyright 1996 Acorn Computers Ltd
;
; Licensed under the Apache License, Version 2.0 (the "License");
; you may not use this file except in compliance with the License.
; You may obtain a copy of the License at
;
;     http://www.apache.org/licenses/LICENSE-2.0
;
; Unless required by applicable law or agreed to in writing, software
; distributed under the License is distributed on an "AS IS" BASIS,
; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
; See the License for the specific language governing permissions and
; limitations under the License.
;
;
; s.UnSqueeze by RCC 25-Aug-87
; This is a bit of code to be included in self-decompressing images to
; expand the image in place.  See elsewhere for details of the compression
; algorithm.
;
; ***********************************
; ***    C h a n g e   L i s t    ***
; ***********************************

; Date       Name       Description
; ----       ----       -----------
; 13-Feb-90  TDobson    Minor optimisation which saves 1 instruction for
;                       every output word that isn't a "short" or a "long".
; 15-Feb-90  TDobson    Started conversion for inclusion in RISC OS kernel

;        GET     Hdr:Debug

; Constants defining partition of nibble value space: these must match
; corresponding values in mod.squeeze.

NibsLong    * 7
NibsShort   * (14-NibsLong)
MinShort    * (2+NibsLong)
MinLong     * 2

; **************************************************************************
;
;       CheckForSqueezedModule - Check whether a module is squeezed, and
;                                unsqueeze it if it is
;
; in:   R9 -> module node
;       R10 -> environment string
;       R12 -> incarnation node
;
; out:  R9 preserved, but module node pointed to may have code entry changed
;       to point to expanded copy of module
;       R10, R12 preserved
;       R0-R6 may be corrupted
;
; If offset to init entry is negative, then (offset BIC &80000000) is the
; offset from the size of the encoded module. The last 5 words of this are
; as follows

        ^       -5*4
Squeeze_DecodedSize # 4                                 ; size of decoded image     (bytes)
Squeeze_EncodedSize # 4                                 ; size of encoded image     (--"--)
Squeeze_TablesSize  # 4                                 ; size of short+long tables (--"--)
Squeeze_NShorts     # 4                                 ; number of "short" entries
Squeeze_NLongs      # 4                                 ; number of "long" entries
        ASSERT  @=0

CheckForSqueezedModule ROUT
        CLRV                                            ; prepare for normal exit (V clear)
        LDR     R6, [R9, #Module_code_pointer]          ; R6 -> module code
        LDR     R5, [R6, #Module_Init]                  ; R5 = offset to init entry
        EORS    R5, R5, #&80000000                      ; take off top bit
        MOVMI   PC, lr                                  ; if -ve now, then it's a normal module, so exit doing nothing

; it's a squeezed module, R5 = size of compressed module

        Push    "R6-R12,lr"     ; save other registers (and pointer to module base)

;        DLINE   "Unsqueezing module"

        ADD     R5, R6, R5      ; R5 -> byte after end of module
        LDMDB   R5!, {R8-R12}   ; load all the data - R8=decoded size, R9=encoded size
                                ; R10=tables size, R11=no. of shorts, R12=no. of longs

        SUB     R10, R5, R10    ; R10 -> start (lowest address) of encoded tables
                                ; = highest address +1 of encoded data
        SUB     R9, R10, R9     ; R9 -> lowest address of encoded data

;        DREG    R8, "Claiming block for module of size "

        MOV     R3, R8          ; now claim a block for the expanded code
        BL      RMAClaim_Chunk
        BVS     ExpandFailed1

;        DREG    R2, "Successfully claimed block for expanded code at "

        MOV     R7, R2          ; R7 -> start of expanded module

        ADD     R3, R11, R12    ; R3 = no. of shorts and longs
        MOV     R3, R3, LSL #2  ; convert to bytes

;        DREG    R3, "Claiming block for shorts+longs of size "

        BL      RMAClaim_Chunk
        BVS     ExpandFailed2   ; must free module block before exiting!

;        DREG    R2, "Successfully claimed block for shorts+longs at "

        MOV     R6, R2          ; R6 -> start of expanded table of shorts+longs

        ADD     R8, R7, R8      ; R8 -> highest address of decoded image +1

; We only need nLongs and nShorts while we are decoding the tables.
; Afterwards we will re-use the registers for pointers to start of tables.

        MOV   R5, R10       ; R5 is ptr into encoded tables
        MOV   R4, #0        ; this is the first table el

;        DLINE   "Expanding shorts+longs table"

decodeTab
        ; Require:  R11 -- no of els left to decode
        ;           R6  -- ptr into decoded table
        ;           R5  -- ptr into encoding
        ;           R4  -- = 0 iff this is the shorts table (i.e. 4-byte vals)

; I believe this loop could be made good deal smaller and possibly
; faster, but it's only a couple of hundred bytes and it works.

        MOV   R2, R6        ; stash away base of first table
        MOV   R3, #-1       ; start as if previous entry was -1
decodeEntry
        SUBS  R11, R11, #1  ; while (--nEntries >= 0) {
        BLT   decodedTab    ; assert: previous word is in R3
        LDRB  R1, [R5], #1  ; byte = *p++
        SUBS  R0, R1, #10
        BGE   greaterThan9
literalOrOnes
        CMPS  R1, #0
        BNE   ones
literal
        LDRB  R0, [R5], #1
        LDRB  R1, [R5], #1
        ORR   R0, R0, R1, LSL #8
        LDRB  R1, [R5], #1
        ORR   R0, R0, R1, LSL #16
        CMPS  R4, #0                 ; in the 4-byte (short encodings) table?
        LDREQB R1, [R5], #1          ; yes, so include the 4th byte
        ORREQ  R0, R0, R1, LSL #24   ; in the resultant word
        ADD   R3, R3, R0
        STR   R3, [R6], #4
        B     decodeEntry
ones
        SUB   R11, R11, R1
        ADD   R11, R11, #1
anotherOne        ; Have number of increment-by-ones in R1
        ADD   R3, R3, #1
        STR   R3, [R6], #4
        SUBS  R1, R1, #1
        BGT   anotherOne
        B     decodeEntry
greaterThan9
        CMPS  R1, #92
        ADDLT R3, R3, R0
        STRLT R3, [R6], #4
        BLT   decodeEntry
greaterThan91
        SUBS  R0, R1, #174
        BLT   oneMore
twoMore
        LDRB  R1, [R5], #1
        ORR   R0, R1, R0, LSL #16
        LDRB  R1, [R5], #1
        ORR   R0, R0, R1, LSL #8
        ADD   R3, R3, R0
        STR   R3, [R6], #4
        B     decodeEntry
oneMore
        SUBS  R0, R1, #92
        LDRB  R1, [R5], #1
        ORR   R0, R1, R0, LSL #8
        ADD   R3, R3, R0
        STR   R3, [R6], #4
        B     decodeEntry   ; } /* end while (--nEntries >= 0) { */

decodedTab
        CMPS  R4, #0        ; if isShorts then
        BNE   finishLongs   ; else finishLongs
finishShorts
        MOV   R11, R12      ; no of els to decode = nLongs
        MOV   R12, R2       ; R12 = &shorts[0]
        MOV   R2, R6        ; stash away start of longs table
        MOV   R4, #1        ; next table is longs
        B     decodeTab
finishLongs
        MOV   R11, R2       ; R11 = &longs[0]

;        DLINE   "Finished expanding shorts+longs table"

decodedBothTabs
        ; Now have:  R12 = &shorts[0]
        ;            R11 = &longs[0]
        ;            R10 = highest address +1 of encoded data
        ;            R9  = lowest address of encoded data
        ;            R8  = highest address +1 of decoded data
        ;
        ; R0..R7 are free for workspace

;        DREG    R12, "Shorts table at "
;        DREG    R11, "Longs table at "
;        DREG    R9, "Encoded data start "
;        DREG    R10, "Encoded data end+1 "
;        DREG    R8, "Decoded data end+1 "

decodePair
        CMPS  R10, R9           ; Have we reached the base ?
        BLE   doneDecode
        LDRB  R6, [R10, #-1]!   ; byte value
        ; The words will be put in R4 and R5, to be STMDB'd
        AND   R3, R6, #15       ; first nibble
        SUBS  R0, R3, #MinShort ; idx = (val - 8)
        BLT   notshort0
short0
        LDRB  R1, [R10, #-1]!
        ORR   R0, R1, R0, LSL #8
        LDR   R4, [R12, R0, LSL #2]    ; w = shorts[(nibble-8)<<8 | *p--]
        B     gotFirst
notshort0
        SUBS  R0, R3, #MinLong         ; idx = (val - 2)
        BLT   notlong0
long0
        LDRB  R1, [R10, #-1]!
        ORR   R0, R1, R0, LSL #8
        LDR   R0, [R11, R0, LSL #2]    ; w = longs[(nibble-2)<<8 | *p--]
        LDRB  R1, [R10, #-1]!
        ORR   R4, R1, R0, LSL #8
        B gotFirst
notlong0
        MOVS  R4, R3            ; TMD 13-Feb-90: combine 2 instructions here
                                ; used to be CMPS R3,#0; MOVEQ R4,R3
        BEQ   gotFirst
literal0
        LDRB  R0, [R10, #-1]!
        LDRB  R1, [R10, #-1]!
        ORR   R0, R0, R1, LSL #8
        LDRB  R1, [R10, #-1]!
        ORR   R0, R0, R1, LSL #16
        LDRB  R1, [R10, #-1]!
        ORR   R4, R0, R1, LSL #24

gotFirst
        ; Phew!  We have the first word of the pair (in R4), now we have
        ; to do (almost) the same again, result in R5, and STMDB.

        MOV   R3, R6, LSR #4     ; second nibble
        SUBS  R0, R3, #MinShort  ; idx = (val - 8)
        BLT   notshort1
short1
        LDRB  R1, [R10, #-1]!
        ORR   R0, R1, R0, LSL #8
        LDR   R5, [R12, R0, LSL #2]    ; w = shorts[(nibble-8)<<8 | *p--]
        STMDB R8!, {R4,R5}
        B     decodePair
notshort1
        SUBS  R0, R3, #MinLong        ; idx = (val - 2)
        BLT   notlong1
long1
        LDRB  R1, [R10, #-1]!
        ORR   R0, R1, R0, LSL #8
        LDR   R0, [R11, R0, LSL #2]    ; w = longs[(nibble-2)<<8 | *p--]
        LDRB  R1, [R10, #-1]!
        ORR   R5, R1, R0, LSL #8
        STMDB R8!, {R4,R5}
        B     decodePair
notlong1
        MOVS  R5, R3            ; TMD 13-Feb-90: combine 2 instructions here
                                ; used to be CMPS R3,#0; MOVEQ R5,R3

                                       ; This doesn't pay off much
        STMEQDB R8!, {R4,R5}           ; might be better to swap round
        BEQ   decodePair               ; literal and zero, to save 3S on
literal1                               ; the longer path ?
        LDRB  R0, [R10, #-1]!
        LDRB  R1, [R10, #-1]!          ; If I had the right byte-sex and
        ORR   R0, R0, R1, LSL #8       ; a couple of registers to spare,
        LDRB  R1, [R10, #-1]!          ; could do this in 15S instead of 22S
        ORR   R0, R0, R1, LSL #16      ; using the load non-aligned word code
        LDRB  R1, [R10, #-1]!          ; given in ARM CPU Manual.
        ORR   R5, R0, R1, LSL #24
        STMDB R8!, {R4,R5}
        B decodePair

doneDecode

;        DREG    R8, "Finished decoding, module at "

; now R8 -> the completely unsqueezed module

; so first, free the shorts+longs table block
; R12 -> shorts, which is first of the two

        MOV     R2, R12

;        DREG    R2, "Freeing shorts+longs table at "

        BL      FreeRMABlock

; ignore any error from this
        MOV     R3, R8                  ; save pointer to expanded module
        Pull    "R2,R7-R12"             ; pull pointer to original module base into R2 and restore other registers

;        DREG    R2, "Freeing original module block at "

        BL      FreeRMABlock            ; may fail because original module is in ROM, so ignore error

;        DLINE   "Returning new module to OS"

        STR     R3, [R9, #Module_code_pointer]  ; point module node at expanded module
        CLRV
        Pull    PC                      ; exit (VC)

; come here if failed to claim block for tables

ExpandFailed2

;        DLINE   "Failed to claim table block, freeing module block"

        Push    R0                      ; save original error pointer
        MOV     R2, R7
        BL      FreeRMABlock
        Pull    R0                      ; restore error pointer, and drop thru to ...

; come here if failed to claim block for expanded module

ExpandFailed1
        SETV
        Pull    "R6-R12, pc"            ; restore registers, and exit V set

; subroutine to free a block in RMA
; in: R2 -> block
; out: R0,R1 corrupted

FreeRMABlock Entry
;        LDR     R0, [R2, #-4]
;        DREG    R0, "FreeRMABlock called, block size purports to be "

        MOV     R0, #HeapReason_Free
        MOV     R1, #RMAAddress
        SWI     XOS_Heap
        EXIT

;        InsertDebugRoutines

        END