; Copyright 1996 Acorn Computers Ltd ; ; Licensed under the Apache License, Version 2.0 (the "License"); ; you may not use this file except in compliance with the License. ; You may obtain a copy of the License at ; ; http://www.apache.org/licenses/LICENSE-2.0 ; ; Unless required by applicable law or agreed to in writing, software ; distributed under the License is distributed on an "AS IS" BASIS, ; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ; See the License for the specific language governing permissions and ; limitations under the License. ; ; sources.idcto - Ian Rickard's amazing new 5-multiply DCT. ; Thanks to Ian Rickards of ARM Ltd. ; Thanks also to Arai, Agui, Nakajima who actually invented the algorithm. r0 RN 0 r1 RN 1 r2 RN 2 r3 RN 3 r4 RN 4 r5 RN 5 r6 RN 6 r7 RN 7 r8 RN 8 r9 RN 9 r10 RN 10 r11 RN 11 r12 RN 12 sp RN 13 lr RN 14 pc RN 15 AREA |C$$code|, CODE, READONLY EXPORT asm_idctsq_1_d asm_idctsq_1_d ; on entry: ; takes coeff array in r0-r7 ; contains coeffs 0, 4, 2, 6, 5, 1, 7, 3 in r0-r7 respectively ; uses r8-r9 as scratch ; total 43 data processing instructions ; unused env pointer in r0 ; ptr to data in r1 STMDB sp!, {r4-r10} MOV r10,r1 LDMIA r10, {r0-r7} ; Now rearrange the inputs so that they can be passed in in the correct order ; 0 -> 0 ; 1 -> 4 ; 2 -> 2 ; 3 -> 6 ; 4 -> 5 ; 5 -> 1 ; 6 -> 7 ; 7 -> 3 ; MOV r8,r3 ; MOV r3,r7 ; MOV r7,r6 ; MOV r6,r8 ; 3,6,7 moved around ; MOV r8,r4 ; MOV r4,r1 ; MOV r1,r5 ; MOV r5,r4 ; 4,1,5 moved around MOV r8,r3 MOV r3,r6 MOV r6,r7 MOV r7,r8 MOV r8,r4 MOV r4,r5 MOV r5,r1 MOV r1,r8 ; all is well now. ; Now shift the numbers up by 8 so that the inputs are unscaled, but ; the output is shifted up by 8 - exactly like the existing code. MOV r0,r0,LSL #12 MOV r1,r1,LSL #12 MOV r2,r2,LSL #12 MOV r3,r3,LSL #12 MOV r4,r4,LSL #12 MOV r5,r5,LSL #12 MOV r6,r6,LSL #12 MOV r7,r7,LSL #12 ; ORR r0,r0,#128 ; ORR r1,r1,#128 ; ORR r2,r2,#128 ; ORR r3,r3,#128 ; ORR r4,r4,#128 ; ORR r5,r5,#128 ; ORR r6,r6,#128 ; ORR r7,r7,#128 ; Now start the DCT proper. ; starts ADD r5,r5,r6 SUB r6,r5,r6,ASL #1 SUB r4,r4,r7 ADD r7,r4,r7,ASL #1 SUB r5,r5,r7 ADD r7,r5,r7,ASL #1 ADD r2,r2,r3 SUB r3,r2,r3,ASL #1 ADD r8,r4,r6 ; mul r8 by 0.383 (or 392.2/1024) SUB r8,r8,r8,ASR #3 ; = 448/512 SUB r8,r8,r8,ASR #3 ; = 392/512 ; leave pending ASR #1 until instruction marked FIX_a5 ; mul r2 by 0.707 (or 724.0/1024) SUB r9,r2,r2,ASR #4 ; = 960/1024 ADD r9,r9,r9,ASR #2 ; = 1200/1024 SUB r2,r2,r9,ASR #2 ; = 724/1024 ADD r0,r0,r1 SUB r1,r0,r1,ASL #1 ADD r3,r2,r3 ; mul r4 by 0.541 (or 554.0/1024) ADD r9,r4,r4,ASR #1 ; = 768/512 SUB r9,r9,r9,ASR #3 ; = 672/512 ADD r4,r4,r9,ASR #4 ; = 554/512 ; leave ASR #1 until instruction marked FIX_a2 ; mul r5 by 0.707 (or 724.0/1024) SUB r9,r5,r5,ASR #4 ; = 960/1024 ADD r9,r9,r9,ASR #2 ; = 1200/1024 SUB r5,r5,r9,ASR #2 ; = 724/1024 ; mul r6 by 1.307 (or 1338.4/1024) ADD r9,r6,r6,ASR #1 ; = 1536/1024 SUB r6,r9,r9,ASR #3 ; = 1344/1024 SUB r6,r6,r9,ASR #8 ; = 1338/1024 ADD r0,r0,r3 SUB r3,r0,r3,ASL #1 ADD r1,r1,r2 SUB r2,r1,r2,ASL #1 ADD r4,r4,r8 ; FIX_a2 & FIX_a5 => FIX_r4 below SUB r6,r6,r8,ASR #1 ; FIX_a5 ADD r7,r6,r7 ADD r6,r5,r6 ADD r5,r5,r4,ASR #1 ; FIX_r4 ADD r0,r0,r7 SUB r7,r0,r7,ASL #1 ADD r1,r1,r6 SUB r6,r1,r6,ASL #1 ADD r2,r2,r5 SUB r5,r2,r5,ASL #1 ADD r3,r3,r4,ASR #1 ; FIX_r4 SUB r4,r3,r4 ; FIX_r4 ; ends MOV r0,r0,ASR #4 MOV r1,r1,ASR #4 MOV r2,r2,ASR #4 MOV r3,r3,ASR #4 MOV r4,r4,ASR #4 MOV r5,r5,ASR #4 MOV r6,r6,ASR #4 MOV r7,r7,ASR #4 STMIA r10, {r0-r7} LDMIA sp!, {r4-r10} MOVS pc,lr EXPORT asm_j_rev_dct GET sources.idct END