Index: apps/codecs/libmad/synth_full_arm.S =================================================================== --- apps/codecs/libmad/synth_full_arm.S (revision 28623) +++ apps/codecs/libmad/synth_full_arm.S (working copy) @@ -31,7 +31,12 @@ ;; r1 = fo ;; r2 = fe ;; r3 = D0ptr - ;; r4 = D1ptr + ;; r4 = D1ptr + + /*;; r5 = loop counter + ;; r6,r7 accumulator1 + ;; r8,r9 accumulator2 */ + synth_full_odd_sbsample: stmdb sp!, {r4-r11, lr} ldr r4, [sp, #36] @@ -40,88 +45,89 @@ mov r5, #15 add r2, r2, #32 .l: + /* ;; PROD_O and odd half of SB_SAMPLE*/ add r3, r3, #128 add r4, r4, #128 + ldr r7, [r3, #4] ldmia r1!, {r10, r11, r12, lr} - ldr r7, [r3, #4] + ldr r9, [r4, #120] smull r6, r7, r10, r7 - ldr r9, [r4, #120] + ldr sp, [r3, #60] smull r8, r9, r10, r9 - - ldr r10, [r3, #60] - smlal r6, r7, r11, r10 ldr r10, [r3, #52] + smlal r6, r7, r11, sp + ldr sp, [r3, #44] smlal r6, r7, r12, r10 - ldr r10, [r3, #44] - smlal r6, r7, lr, r10 - ldr r10, [r4, #64] + smlal r6, r7, lr, sp + ldr sp, [r4, #72] smlal r8, r9, r11, r10 - ldr r10, [r4, #72] - smlal r8, r9, r12, r10 ldr r10, [r4, #80] + smlal r8, r9, r12, sp smlal r8, r9, lr, r10 - + ldr r10, [r3, #36] + ldmia r1!, {r11, r12, sp, lr} - ldr r10, [r3, #36] smlal r6, r7, r11, r10 + + ldr r10, [r4, #88] /*;;1 cycle stall on arm9, but we free up r11*/ + smlal r8, r9, r11, r10 + ldr r10, [r3, #28] + ldr r11, [r3, #20] smlal r6, r7, r12, r10 - ldr r10, [r3, #20] - smlal r6, r7, sp, r10 ldr r10, [r3, #12] + smlal r6, r7, sp, r11 + ldr r11, [r4, #96] smlal r6, r7, lr, r10 - - ldr r10, [r4, #88] - smlal r8, r9, r11, r10 - ldr r10, [r4, #96] - smlal r8, r9, r12, r10 ldr r10, [r4, #104] + smlal r8, r9, r12, r11 + ldr r11, [r4, #112] smlal r8, r9, sp, r10 - ldr r10, [r4, #112] - smlal r8, r9, lr, r10 + smlal r8, r9, lr, r11 rsbs r6, r6, #0 rsc r7, r7, #0 - + + /* ;; PROD_A and even half of SB_SAMPLE*/ + ldr r10, [r3, #0] ldmia r2!, {r11, r12, sp, lr} - - ldr r10, [r3, #0] smlal r6, r7, r11, r10 - ldr r10, [r3, #56] - smlal r6, r7, r12, r10 + + ldr r10, [r4, #60] /*;;1 cycle stall on arm9, but we free up r11*/ + smlal r8, r9, r11, r10 + ldr r11, [r3, #56] ldr r10, [r3, #48] + smlal r6, r7, r12, r11 + ldr r11, [r3, #40] smlal r6, r7, sp, r10 - ldr r10, [r3, #40] - smlal r6, r7, lr, r10 - - ldr r10, [r4, #60] - smlal r8, r9, r11, r10 ldr r10, [r4, #68] + smlal r6, r7, lr, r11 + ldr r11, [r4, #76] smlal r8, r9, r12, r10 - ldr r10, [r4, #76] - smlal r8, r9, sp, r10 - ldr r10, [r4, #84] + ldr r10, [r4, #84] + smlal r8, r9, sp, r11 smlal r8, r9, lr, r10 - + + ldr r10, [r3, #32] ldmia r2!, {r11, r12, sp, lr} - ldr r10, [r3, #32] smlal r6, r7, r11, r10 + + ldr r10, [r4, #92] /*;;1 cycle stall on arm9, but we free up r11*/ + smlal r8, r9, r11, r10 + ldr r10, [r3, #24] + ldr r11, [r3, #16] smlal r6, r7, r12, r10 - ldr r10, [r3, #16] - smlal r6, r7, sp, r10 ldr r10, [r3, #8] + smlal r6, r7, sp, r11 + ldr r11, [r4, #100] smlal r6, r7, lr, r10 - - ldr r10, [r4, #92] - smlal r8, r9, r11, r10 - ldr r10, [r4, #100] - smlal r8, r9, r12, r10 ldr r10, [r4, #108] + smlal r8, r9, r12, r11 + ldr r11, [r4, #116] smlal r8, r9, sp, r10 - ldr r10, [r4, #116] - smlal r8, r9, lr, r10 + smlal r8, r9, lr, r11 movs r6, r6, lsr #16 adc r6, r6, r7, lsl #16 @@ -146,88 +152,88 @@ mov r5, #15 add r2, r2, #32 .l2: + /* ;; PROD_O and odd half of SB_SAMPLE*/ add r3, r3, #128 add r4, r4, #128 + ldr r7, [r3, #0] ldmia r1!, {r10, r11, r12, lr} - ldr r7, [r3, #0] + ldr r9, [r4, #60] smull r6, r7, r10, r7 - ldr r9, [r4, #60] + ldr sp, [r3, #56] smull r8, r9, r10, r9 - - ldr r10, [r3, #56] - smlal r6, r7, r11, r10 ldr r10, [r3, #48] + smlal r6, r7, r11, sp + ldr sp, [r3, #40] smlal r6, r7, r12, r10 - ldr r10, [r3, #40] - smlal r6, r7, lr, r10 + ldr r10, [r4, #68] + smlal r6, r7, lr, sp - ldr r10, [r4, #68] + ldr sp, [r4, #76] smlal r8, r9, r11, r10 - ldr r10, [r4, #76] - smlal r8, r9, r12, r10 ldr r10, [r4, #84] + smlal r8, r9, r12, sp smlal r8, r9, lr, r10 + + ldr r10, [r3, #32] + ldmia r1!, {r11, r12, sp, lr} - ldmia r1!, {r11, r12, sp, lr} - ldr r10, [r3, #32] smlal r6, r7, r11, r10 + ldr r10, [r4, #92] + smlal r8, r9, r11, r10 ldr r10, [r3, #24] + ldr r11, [r3, #16] smlal r6, r7, r12, r10 - ldr r10, [r3, #16] - smlal r6, r7, sp, r10 ldr r10, [r3, #8] + smlal r6, r7, sp, r11 + ldr r11, [r4, #100] smlal r6, r7, lr, r10 - - ldr r10, [r4, #92] - smlal r8, r9, r11, r10 - ldr r10, [r4, #100] - smlal r8, r9, r12, r10 ldr r10, [r4, #108] + smlal r8, r9, r12, r11 + ldr r11, [r4, #116] smlal r8, r9, sp, r10 - ldr r10, [r4, #116] - smlal r8, r9, lr, r10 + smlal r8, r9, lr, r11 rsbs r6, r6, #0 rsc r7, r7, #0 + ldr r10, [r3, #4] ldmia r2!, {r11, r12, sp, lr} - - ldr r10, [r3, #4] smlal r6, r7, r11, r10 + ldr r10, [r4, #120] /*;;1 cycle stall on arm9, but we free up r11*/ + smlal r8, r9, r11, r10 ldr r10, [r3, #60] + ldr r11, [r3, #52] smlal r6, r7, r12, r10 - ldr r10, [r3, #52] - smlal r6, r7, sp, r10 - ldr r10, [r3, #44] + ldr r10, [r3, #44] + smlal r6, r7, sp, r11 + ldr r11, [r4, #64] smlal r6, r7, lr, r10 - ldr r10, [r4, #120] - smlal r8, r9, r11, r10 - ldr r10, [r4, #64] - smlal r8, r9, r12, r10 ldr r10, [r4, #72] + smlal r8, r9, r12, r11 + ldr r11, [r4, #80] smlal r8, r9, sp, r10 - ldr r10, [r4, #80] - smlal r8, r9, lr, r10 + smlal r8, r9, lr, r11 + + ldr r10, [r3, #36] ldmia r2!, {r11, r12, sp, lr} - ldr r10, [r3, #36] smlal r6, r7, r11, r10 + ldr r10, [r4, #88] /*;;1 cycle stall on arm9, but we free up r11*/ + smlal r8, r9, r11, r10 + ldr r10, [r3, #28] + ldr r11, [r3, #20] smlal r6, r7, r12, r10 - ldr r10, [r3, #20] - smlal r6, r7, sp, r10 ldr r10, [r3, #12] + smlal r6, r7, sp, r11 + ldr r11, [r4, #96] smlal r6, r7, lr, r10 - - ldr r10, [r4, #88] - smlal r8, r9, r11, r10 - ldr r10, [r4, #96] - smlal r8, r9, r12, r10 ldr r10, [r4, #104] + smlal r8, r9, r12, r11 + ldr r11, [r4, #112] smlal r8, r9, sp, r10 - ldr r10, [r4, #112] - smlal r8, r9, lr, r10 + smlal r8, r9, lr, r11 movs r6, r6, lsr #16 adc r6, r6, r7, lsl #16