; Copyright 1996 Acorn Computers Ltd ; ; Licensed under the Apache License, Version 2.0 (the "License"); ; you may not use this file except in compliance with the License. ; You may obtain a copy of the License at ; ; http://www.apache.org/licenses/LICENSE-2.0 ; ; Unless required by applicable law or agreed to in writing, software ; distributed under the License is distributed on an "AS IS" BASIS, ; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ; See the License for the specific language governing permissions and ; limitations under the License. ; ; ; s.UnSqueeze by RCC 25-Aug-87 ; This is a bit of code to be included in self-decompressing images to ; expand the image in place. See elsewhere for details of the compression ; algorithm. ; ; *********************************** ; *** C h a n g e L i s t *** ; *********************************** ; Date Name Description ; ---- ---- ----------- ; 13-Feb-90 TDobson Minor optimisation which saves 1 instruction for ; every output word that isn't a "short" or a "long". ; 15-Feb-90 TDobson Started conversion for inclusion in RISC OS kernel ; GET Hdr:Debug ; Constants defining partition of nibble value space: these must match ; corresponding values in mod.squeeze. NibsLong * 7 NibsShort * (14-NibsLong) MinShort * (2+NibsLong) MinLong * 2 ; ************************************************************************** ; ; CheckForSqueezedModule - Check whether a module is squeezed, and ; unsqueeze it if it is ; ; in: R9 -> module node ; R10 -> environment string ; R12 -> incarnation node ; ; out: R9 preserved, but module node pointed to may have code entry changed ; to point to expanded copy of module ; R10, R12 preserved ; R0-R6 may be corrupted ; ; If offset to init entry is negative, then (offset BIC &80000000) is the ; offset from the size of the encoded module. The last 5 words of this are ; as follows ^ -5*4 Squeeze_DecodedSize # 4 ; size of decoded image (bytes) Squeeze_EncodedSize # 4 ; size of encoded image (--"--) Squeeze_TablesSize # 4 ; size of short+long tables (--"--) Squeeze_NShorts # 4 ; number of "short" entries Squeeze_NLongs # 4 ; number of "long" entries ASSERT @=0 CheckForSqueezedModule ROUT CLRV ; prepare for normal exit (V clear) LDR R6, [R9, #Module_code_pointer] ; R6 -> module code LDR R5, [R6, #Module_Init] ; R5 = offset to init entry EORS R5, R5, #&80000000 ; take off top bit MOVMI PC, lr ; if -ve now, then it's a normal module, so exit doing nothing ; it's a squeezed module, R5 = size of compressed module Push "R6-R12,lr" ; save other registers (and pointer to module base) ; DLINE "Unsqueezing module" ADD R5, R6, R5 ; R5 -> byte after end of module LDMDB R5!, {R8-R12} ; load all the data - R8=decoded size, R9=encoded size ; R10=tables size, R11=no. of shorts, R12=no. of longs SUB R10, R5, R10 ; R10 -> start (lowest address) of encoded tables ; = highest address +1 of encoded data SUB R9, R10, R9 ; R9 -> lowest address of encoded data ; DREG R8, "Claiming block for module of size " MOV R3, R8 ; now claim a block for the expanded code BL RMAClaim_Chunk BVS ExpandFailed1 ; DREG R2, "Successfully claimed block for expanded code at " MOV R7, R2 ; R7 -> start of expanded module ADD R3, R11, R12 ; R3 = no. of shorts and longs MOV R3, R3, LSL #2 ; convert to bytes ; DREG R3, "Claiming block for shorts+longs of size " BL RMAClaim_Chunk BVS ExpandFailed2 ; must free module block before exiting! ; DREG R2, "Successfully claimed block for shorts+longs at " MOV R6, R2 ; R6 -> start of expanded table of shorts+longs ADD R8, R7, R8 ; R8 -> highest address of decoded image +1 ; We only need nLongs and nShorts while we are decoding the tables. ; Afterwards we will re-use the registers for pointers to start of tables. MOV R5, R10 ; R5 is ptr into encoded tables MOV R4, #0 ; this is the first table el ; DLINE "Expanding shorts+longs table" decodeTab ; Require: R11 -- no of els left to decode ; R6 -- ptr into decoded table ; R5 -- ptr into encoding ; R4 -- = 0 iff this is the shorts table (i.e. 4-byte vals) ; I believe this loop could be made good deal smaller and possibly ; faster, but it's only a couple of hundred bytes and it works. MOV R2, R6 ; stash away base of first table MOV R3, #-1 ; start as if previous entry was -1 decodeEntry SUBS R11, R11, #1 ; while (--nEntries >= 0) { BLT decodedTab ; assert: previous word is in R3 LDRB R1, [R5], #1 ; byte = *p++ SUBS R0, R1, #10 BGE greaterThan9 literalOrOnes CMPS R1, #0 BNE ones literal LDRB R0, [R5], #1 LDRB R1, [R5], #1 ORR R0, R0, R1, LSL #8 LDRB R1, [R5], #1 ORR R0, R0, R1, LSL #16 CMPS R4, #0 ; in the 4-byte (short encodings) table? LDREQB R1, [R5], #1 ; yes, so include the 4th byte ORREQ R0, R0, R1, LSL #24 ; in the resultant word ADD R3, R3, R0 STR R3, [R6], #4 B decodeEntry ones SUB R11, R11, R1 ADD R11, R11, #1 anotherOne ; Have number of increment-by-ones in R1 ADD R3, R3, #1 STR R3, [R6], #4 SUBS R1, R1, #1 BGT anotherOne B decodeEntry greaterThan9 CMPS R1, #92 ADDLT R3, R3, R0 STRLT R3, [R6], #4 BLT decodeEntry greaterThan91 SUBS R0, R1, #174 BLT oneMore twoMore LDRB R1, [R5], #1 ORR R0, R1, R0, LSL #16 LDRB R1, [R5], #1 ORR R0, R0, R1, LSL #8 ADD R3, R3, R0 STR R3, [R6], #4 B decodeEntry oneMore SUBS R0, R1, #92 LDRB R1, [R5], #1 ORR R0, R1, R0, LSL #8 ADD R3, R3, R0 STR R3, [R6], #4 B decodeEntry ; } /* end while (--nEntries >= 0) { */ decodedTab CMPS R4, #0 ; if isShorts then BNE finishLongs ; else finishLongs finishShorts MOV R11, R12 ; no of els to decode = nLongs MOV R12, R2 ; R12 = &shorts[0] MOV R2, R6 ; stash away start of longs table MOV R4, #1 ; next table is longs B decodeTab finishLongs MOV R11, R2 ; R11 = &longs[0] ; DLINE "Finished expanding shorts+longs table" decodedBothTabs ; Now have: R12 = &shorts[0] ; R11 = &longs[0] ; R10 = highest address +1 of encoded data ; R9 = lowest address of encoded data ; R8 = highest address +1 of decoded data ; ; R0..R7 are free for workspace ; DREG R12, "Shorts table at " ; DREG R11, "Longs table at " ; DREG R9, "Encoded data start " ; DREG R10, "Encoded data end+1 " ; DREG R8, "Decoded data end+1 " decodePair CMPS R10, R9 ; Have we reached the base ? BLE doneDecode LDRB R6, [R10, #-1]! ; byte value ; The words will be put in R4 and R5, to be STMDB'd AND R3, R6, #15 ; first nibble SUBS R0, R3, #MinShort ; idx = (val - 8) BLT notshort0 short0 LDRB R1, [R10, #-1]! ORR R0, R1, R0, LSL #8 LDR R4, [R12, R0, LSL #2] ; w = shorts[(nibble-8)<<8 | *p--] B gotFirst notshort0 SUBS R0, R3, #MinLong ; idx = (val - 2) BLT notlong0 long0 LDRB R1, [R10, #-1]! ORR R0, R1, R0, LSL #8 LDR R0, [R11, R0, LSL #2] ; w = longs[(nibble-2)<<8 | *p--] LDRB R1, [R10, #-1]! ORR R4, R1, R0, LSL #8 B gotFirst notlong0 MOVS R4, R3 ; TMD 13-Feb-90: combine 2 instructions here ; used to be CMPS R3,#0; MOVEQ R4,R3 BEQ gotFirst literal0 LDRB R0, [R10, #-1]! LDRB R1, [R10, #-1]! ORR R0, R0, R1, LSL #8 LDRB R1, [R10, #-1]! ORR R0, R0, R1, LSL #16 LDRB R1, [R10, #-1]! ORR R4, R0, R1, LSL #24 gotFirst ; Phew! We have the first word of the pair (in R4), now we have ; to do (almost) the same again, result in R5, and STMDB. MOV R3, R6, LSR #4 ; second nibble SUBS R0, R3, #MinShort ; idx = (val - 8) BLT notshort1 short1 LDRB R1, [R10, #-1]! ORR R0, R1, R0, LSL #8 LDR R5, [R12, R0, LSL #2] ; w = shorts[(nibble-8)<<8 | *p--] STMDB R8!, {R4,R5} B decodePair notshort1 SUBS R0, R3, #MinLong ; idx = (val - 2) BLT notlong1 long1 LDRB R1, [R10, #-1]! ORR R0, R1, R0, LSL #8 LDR R0, [R11, R0, LSL #2] ; w = longs[(nibble-2)<<8 | *p--] LDRB R1, [R10, #-1]! ORR R5, R1, R0, LSL #8 STMDB R8!, {R4,R5} B decodePair notlong1 MOVS R5, R3 ; TMD 13-Feb-90: combine 2 instructions here ; used to be CMPS R3,#0; MOVEQ R5,R3 ; This doesn't pay off much STMEQDB R8!, {R4,R5} ; might be better to swap round BEQ decodePair ; literal and zero, to save 3S on literal1 ; the longer path ? LDRB R0, [R10, #-1]! LDRB R1, [R10, #-1]! ; If I had the right byte-sex and ORR R0, R0, R1, LSL #8 ; a couple of registers to spare, LDRB R1, [R10, #-1]! ; could do this in 15S instead of 22S ORR R0, R0, R1, LSL #16 ; using the load non-aligned word code LDRB R1, [R10, #-1]! ; given in ARM CPU Manual. ORR R5, R0, R1, LSL #24 STMDB R8!, {R4,R5} B decodePair doneDecode ; DREG R8, "Finished decoding, module at " ; now R8 -> the completely unsqueezed module ; so first, free the shorts+longs table block ; R12 -> shorts, which is first of the two MOV R2, R12 ; DREG R2, "Freeing shorts+longs table at " BL FreeRMABlock ; ignore any error from this MOV R3, R8 ; save pointer to expanded module Pull "R2,R7-R12" ; pull pointer to original module base into R2 and restore other registers ; DREG R2, "Freeing original module block at " BL FreeRMABlock ; may fail because original module is in ROM, so ignore error ; DLINE "Returning new module to OS" STR R3, [R9, #Module_code_pointer] ; point module node at expanded module CLRV Pull PC ; exit (VC) ; come here if failed to claim block for tables ExpandFailed2 ; DLINE "Failed to claim table block, freeing module block" Push R0 ; save original error pointer MOV R2, R7 BL FreeRMABlock Pull R0 ; restore error pointer, and drop thru to ... ; come here if failed to claim block for expanded module ExpandFailed1 SETV Pull "R6-R12, pc" ; restore registers, and exit V set ; subroutine to free a block in RMA ; in: R2 -> block ; out: R0,R1 corrupted FreeRMABlock Entry ; LDR R0, [R2, #-4] ; DREG R0, "FreeRMABlock called, block size purports to be " MOV R0, #HeapReason_Free MOV R1, #RMAAddress SWI XOS_Heap EXIT ; InsertDebugRoutines END