; Copyright 1996 Acorn Computers Ltd ; ; Licensed under the Apache License, Version 2.0 (the "License"); ; you may not use this file except in compliance with the License. ; You may obtain a copy of the License at ; ; http://www.apache.org/licenses/LICENSE-2.0 ; ; Unless required by applicable law or agreed to in writing, software ; distributed under the License is distributed on an "AS IS" BASIS, ; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ; See the License for the specific language governing permissions and ; limitations under the License. ; SUBT > HeapSort ; +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ ; HeapSort routine. Borrowed from Knuth by Tutu. Labels h(i) correspond to ; steps in the algorithm. ;mjs June 2001, API adjusted to allow for full 32-bit addresses ; In r0 = n (maximum of 268,435,455), and flags in top 4 bits ; bit 31 *must* be set, flagging new API to allow full 32-bit addresses ; bit 30 set -> use r4,r5 on postpass ; bit 29 set -> build (r1) from r4,r5 in prepass ; bit 28 set -> use r6 as temp slot ; r1 = array(n) of word size objects (r2 determines type) ; r2 = address of comparison procedure ; Special cases: ; 0 -> treat r(n) as array of cardinal ; 1 -> treat r(n) as array of integer ; 2 -> treat r(n) as array of cardinal* ; 3 -> treat r(n) as array of integer* ; 4 -> treat r(n) as array of char* (case insensitive) ; 5 -> treat r(n) as array of char* (case sensitive) ; r3 = wsptr for comparison procedure (only needed if r2 > 5) ; r4 = array(n) of things (only needed if r0 & &60000000) ; r5 = sizeof(element) ( --------- ditto ---------- ) ; r6 = address of temp slot (only needed if r5 > 16K or r0 & &10000000) ;For backward compatibility, supports old API assuming addresses are safe ;(specifically, that address in r1 does not have any of top 3 bits set) ; ;The old API uses bits 31-29 of r1 to mean the equivalent of bits 30-28 of r0 ;as defined above. Just to make this more confusing, the old API always behaves ;as if bit 31 of r1 mirrors bit 30 (ie. bit 30 set forces bit 31 set, see PRM 5a-662), ;so we preserve this for old API, but do originally intended thing for new API. ; r10-r12 trashable hs_array RN r4 hs_procadr RN r5 hs_i RN r6 hs_j RN r7 hs_K RN r8 hs_R RN r9 hs_l RN r10 hs_r RN r11 ;wp RN r12 ; User sort procedure entered in SVC mode, interrupts enabled ; r0 = contents of array(1) ; r1 = contents of array(2) ; r0-r3 may be trashed ; wp = value requested (trash at your peril; you'll get the bad one next time) ; User sort procedure returns: ; LT: if f(r0) < f(r1) ; GE: if f(r0) => f(r1) ; (ie. N_bit and V_bit only considered) HeapSortRoutine ROUT BIC r10, r0, #&F0000000 ; n (zapping flag bits) CMP r10, #2 ; 0 or 1 elements? No data moved either ExitSWIHandler LO ; VClear in lr and psr Push "r0-r3, hs_array, hs_procadr, hs_i, hs_j, hs_K, hs_R, lr" CLRPSR I_bit, r14 ; Enable interrupts (may take ages) [ False STR r0, ndump ; For debugging porpoises ] TST r0, #&80000000 ; test old/new API MOVNE r10, r0 ; new, flags are in r0 BICNE r0, r0, #&F0000000 ; zap new flags MOVEQ r10, r1, LSR #1 ; old, flags are in r1, 1 bit to left BICEQ r1, r1, #&E0000000 ; zap old flags TST r10, #&20000000 ; Are we to build the pointer array? BEQ %FT01 ; Build array of pointers to data blocks for the punter if he desires this ; (lazy slobs abound ...) ; for (i=0; i<n; i++) r(i) = &block + i*sizeof(element); MOV r10, r0 ; n MOV r14, r1 ; r14 -> base of pointer array 00 STR r4, [r14], #4 ADD r4, r4, r5 ; r4 += sizeof(element) SUBS r10, r10, #1 BNE %BT00 01 SUB hs_array, r1, #4 ; HeapSort assumes r(1..n) not (0..n-1) MOV hs_procadr, r2 ; Put proc address where we need it CMP hs_procadr, #6 ; Special procedure ? ADRLO r14, hs_Procedures LDRLO hs_procadr, [r14, hs_procadr, LSL #2] ADDLO hs_procadr, hs_procadr, r14 MOV wp, r3 ; Can now use r3 temp. Keep set up ; for speed during execution MOV hs_l, r0, LSR #1 ; l = floor(n/2) + 1 ADD hs_l, hs_l, #1 MOV hs_r, r0 ; r = n h2 CMP hs_l, #1 BEQ %FT10 SUB hs_l, hs_l, #1 LDR hs_R, [hs_array, hs_l, LSL #2] ; R = R(l) MOV hs_K, hs_R B %FT20 10 LDR hs_R, [hs_array, hs_r, LSL #2] ; R = R(r) MOV hs_K, hs_R LDR r14, [hs_array, #4] ; R(r) = R(1) STR r14, [hs_array, hs_r, LSL #2] SUB hs_r, hs_r, #1 CMP hs_r, #1 ; IF r=1 THEN R(1) = R STREQ hs_R, [hs_array, #4] 20 [ False BL DoDebug ] CMP hs_r, #1 BEQ %FT90 ; [finished sorting the array] h3 MOV hs_j, hs_l h4 MOV hs_i, hs_j MOV hs_j, hs_j, LSL #1 [ False DREG hs_i," i ",cc DREG hs_j," j " ] CMP hs_j, hs_r BEQ h6 BHI h8 h5 LDR r0, [hs_array, hs_j, LSL #2] ; IF K(R(j)) < K(R(j+1)) THEN j +:= 1 ADD r14, hs_j, #1 LDR r1, [hs_array, r14, LSL #2] MOV lr, pc ; r0, r1 for comparison MOV pc, hs_procadr ADDLT hs_j, hs_j, #1 ; Assumes signed comparison done <<<<<< h6 MOV r0, hs_K ; IF K >= K(R(j)) THEN h8 LDR r1, [hs_array, hs_j, LSL #2] MOV lr, pc ; r0, r1 for comparison MOV pc, hs_procadr LDRLT r14, [hs_array, hs_j, LSL #2] ; R(i) = R(j) STRLT r14, [hs_array, hs_i, LSL #2] BLT h4 h8 STR hs_R, [hs_array, hs_i, LSL #2] ; R(i) = R B h2 ; Array now sorted into order 90 LDR r14, [sp, #4*0] ; r0in LDR r2, [sp, #4*1] ; r1in TST r14, #&80000000 ; check for old/new API ANDEQ r8, r2, #&E0000000 ORREQ r14, r14, r8, LSR #1 ; old, munge old flag bits into r14 BICEQ r2, r2, #&E0000000 ; clear from r2 ORREQ r14, r14, #&40000000 ; ho hum fudge to make old bit 31 mirror TSTEQ r14, #&20000000 ; old bit 30 if using old API (behave as errata BICEQ r14, r14, #&40000000 ; in PRM 5a-662) TST r14, #&40000000 ; check for postpass BEQ %FA99 ; [no shuffle required, exit] ; Reorder the blocks according to the sorted array of pointers ADD r1, sp, #4*4 LDMIA r1, {r1, r8, r9} ; r4,r5,r6in ; r1 -> list of blocks [ False DREG r2, "pointer array " DREG r1, "base of blocks " DREG r8, "sizeof(element) " ] MOV r3, r2 ; r3 -> first item of current cycle BIC r0, r14, #&F0000000 ; r0 = n ADD r6, r2, r0, LSL #2 ; r6 -> end of array of pointers TST r14, #&10000000 ; punter forcing use of his temp slot? BNE %FT94 ; fine by me! CMP r8, #ScratchSpaceSize LDRLS r9, =ScratchSpace ; r9 -> temp slot (normally ScratchSpc) 94 [ False DREG r9, "temp slot " ] 91 SUB r14, r3, r2 MOV r14, r14, LSR #2 ; r14 = index (0..n-1) of current item MLA r4, r14, r8, r1 ; r4 -> current block MOV r5, r3 ; r5 -> current item BL MoveToTempSlot ; save first block in temp slot 92 LDR r7, [r5] ; r7 -> next block MOV r14, #0 STR r14, [r5] ; mark item 'done' SUB r5, r7, r1 ; r14 := index of next item (r8 pres.) DivRem r14, r5, r8, r0 ; r5,r0 corrupt ADD r5, r2, r14, LSL #2 ; r5 -> next item [ False DREG r7, " next block " DREG r5, " next item " ] CMP r5, r3 ; reached start of cycle? MOVEQ r7, r9 ; get back from temp slot if last one BL MoveFromGivenSlot ; corrupts flags, but preserves r5, r3... CMP r5, r3 MOVNE r4, r7 ; update r4 (current block) BNE %BT92 93 LDR r14, [r3, #4]! ; skip already-copied items CMP r3, r6 BCS %FA99 ; [reached end] CMP r14, #0 BEQ %BT93 B %BT91 ; [found one that hasn't been copied] ; No error return from HeapSort 99 Pull "r0-r3, hs_array, hs_procadr, hs_i, hs_j, hs_K, hs_R, lr" ; SWIHandler exit takes flags + mode from lr, not psr !!! ExitSWIHandler ; +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ ; In r4 -> element to be copied ; r8 = sizeof(element) ; r9 -> temp slot ; Out all preserved MoveToTempSlot Entry "r4, r8, r9" TST r4, #3 ; If base and element size wordy TSTEQ r8, #3 ; then do faster copy. Also temp wordy BNE %FT01 00 SUBS r8, r8, #4 LDRPL r14, [r4], #4 STRPL r14, [r9], #4 BPL %BT00 EXIT 01 SUBS r8, r8, #1 LDRPLB r14, [r4], #1 STRPLB r14, [r9], #1 BPL %BT01 EXIT ; +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ ; In r4 -> where element is to be copied ; r7 -> element to be copied ; r8 = sizeof(element) ; Out all preserved MoveFromGivenSlot Entry "r4, r7, r8" TST r4, #3 ; If dest and element size wordy TSTEQ r8, #3 ; then do faster copy. Also src wordy BNE %FT01 00 SUBS r8, r8, #4 LDRPL r14, [r7], #4 STRPL r14, [r4], #4 BPL %BT00 EXIT 01 SUBS r8, r8, #1 LDRPLB r14, [r7], #1 STRPLB r14, [r4], #1 BPL %BT01 EXIT ; +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ ; Built-in sorting procedures hs_Procedures DCD hs_CardinalCMP - hs_Procedures DCD hs_IntegerCMP - hs_Procedures DCD hs_CardinalPtrCMP - hs_Procedures DCD hs_IntegerPtrCMP - hs_Procedures DCD hs_StringCMP - hs_Procedures DCD hs_StringSensCMP - hs_Procedures ; +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ ; In r0, r1 -> cardinals ; Out flags set on (*r0) - (*r1) hs_CardinalPtrCMP LDR r0, [r0] LDR r1, [r1] ; ............................................................................. ; In r0, r1 = cardinals ; Out flags set on r0 - r1 hs_CardinalCMP CMP r0, r1 MSRCS CPSR_f, #C_bit ; CS -> GE (nv) MSRCC CPSR_f, #N_bit ; CC -> LT (Nv) MOV pc, lr ; +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ ; In r0, r1 -> integers ; Out flags set on (*r0) - (*r1) hs_IntegerPtrCMP LDR r0, [r0] LDR r1, [r1] ; ............................................................................. ; In r0, r1 = integers ; Out flags set on r0 - r1 hs_IntegerCMP CMP r0, r1 MOV pc, lr ; +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ ; Case-insensitive string comparison ; In r0, r1 -> strings - CtrlChar terminated (NB. Must be same CtrlChar !) ; Out flags set on (char *)(r0) - (char *)(r1) compare hs_StringCMP ROUT 10 LDRB r2, [r0], #1 LowerCase r2, r12 LDRB r3, [r1], #1 LowerCase r3, r12 CMP r2, r3 ; Differ ? MOVNE pc, lr ; GE or LT CMP r2, #space-1 ; Finished ? BHI %BT10 CMP r2, r2 ; return EQ (also GE) MOV pc, lr ; +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ ; Case-sensitive string comparison ; In r0, r1 -> strings - CtrlChar terminated (NB. Must be same CtrlChar !) ; Out flags set on (char *)(r0) - (char *)(r1) hs_StringSensCMP ROUT 10 LDRB r2, [r0], #1 LDRB r3, [r1], #1 CMP r2, r3 ; Differ ? MOVNE pc, lr ; GE or LT CMP r2, #space-1 ; Finished ? BHI %BT10 CMP r2, r2 ; return EQ (also GE) MOV pc, lr ; +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ LTORG END