Index: apps/codecs/libmad/imdct_mcf5249.S =================================================================== RCS file: /cvsroot/rockbox/apps/codecs/libmad/imdct_mcf5249.S,v retrieving revision 1.2 diff -u -r1.2 imdct_mcf5249.S --- apps/codecs/libmad/imdct_mcf5249.S 25 May 2005 13:32:53 -0000 1.2 +++ apps/codecs/libmad/imdct_mcf5249.S 25 May 2005 14:48:06 -0000 @@ -16,8 +16,618 @@ * KIND, either express or implied. * ****************************************************************************/ -/* this will also be the home to III_imdct_l in the future */ +/* The following imdct_l implementation is optimized for the MCF5249, and + * utilizes the EMAC unit heavily. It is derived from imdct_l_arm.S. + */ + +/* A higher precision version of this can be made by not throwing away + * the accumulator extension bytes each time the accumulators are fetched. + * This would get 8 bits more precision in the accumulators in most places, + * and 3 bits more precision in all resulting samples. You should also + * remove the transform and window coef prescales I've added. + * I do however believe the current implementation has enough precision as it + * is, so I'm not going to implement this on the grounds of it being extremely + * boring. + */ + +/* Every STALL comment marks an EMAC stall. This usually happens when trying + to fetch an accumulator before it's finished computing. A stall takes two + cycles and can be avoided neatly by doing something else before fetching + the accumulator */ + +#define X0 0x00 +#define X1 0x04 +#define X2 0x08 +#define X3 0x0C +#define X4 0x10 +#define X5 0x14 +#define X6 0x18 +#define X7 0x1c +#define X8 0x20 +#define X9 0x24 +#define X10 0x28 +#define X11 0x2c +#define X12 0x30 +#define X13 0x34 +#define X14 0x38 +#define X15 0x3c +#define X16 0x40 +#define X17 0x44 + +#define x0 0x00 +#define x1 0x04 +#define x2 0x08 +#define x3 0x0C +#define x4 0x10 +#define x5 0x14 +#define x6 0x18 +#define x7 0x1c +#define x8 0x20 +#define x9 0x24 +#define x10 0x28 +#define x11 0x2c +#define x12 0x30 +#define x13 0x34 +#define x14 0x38 +#define x15 0x3c +#define x16 0x40 +#define x17 0x44 +#define x18 0x48 +#define x19 0x4c +#define x20 0x50 +#define x21 0x54 +#define x22 0x58 +#define x23 0x5c +#define x24 0x60 +#define x25 0x64 +#define x26 0x68 +#define x27 0x6c +#define x28 0x70 +#define x29 0x74 +#define x30 0x78 +#define x31 0x7c +#define x32 0x80 +#define x33 0x84 +#define x34 0x88 +#define x35 0x8c + +/* prescale all coefs so we won't have to waste time doing shifts each time + * we're going to save a sample in the code. + */ +/* TODO, calculate these properly, formulas are in layer3.c somewhere */ +#define K00 0x0ffc19fd*8 +#define K01 0x00b2aa3e*8 +#define K02 0x0fdcf549*8 +#define K03 0x0216a2a2*8 +#define K04 0x0f9ee890*8 +#define K05 0x03768962*8 +#define K06 0x0f426cb5*8 +#define K07 0x04cfb0e2*8 +#define K08 0x0ec835e8*8 +#define K09 0x061f78aa*8 +#define K10 0x0e313245*8 +#define K11 0x07635284*8 +#define K12 0x0d7e8807*8 +#define K13 0x0898c779*8 +#define K14 0x0cb19346*8 +#define K15 0x09bd7ca0*8 +#define K16 0x0bcbe352*8 +#define K17 0x0acf37ad*8 + +#define WL0 0x00b2aa3e*8 +#define WL1 0x0216a2a2*8 +#define WL2 0x03768962*8 +#define WL3 0x04cfb0e2*8 +#define WL4 0x061f78aa*8 +#define WL5 0x07635284*8 +#define WL6 0x0898c779*8 +#define WL7 0x09bd7ca0*8 +#define WL8 0x0acf37ad*8 +#define WL9 0x0bcbe352*8 +#define WL10 0x0cb19346*8 +#define WL11 0x0d7e8807*8 +#define WL12 0x0e313245*8 +#define WL13 0x0ec835e8*8 +#define WL14 0x0f426cb5*8 +#define WL15 0x0f9ee890*8 +#define WL16 0x0fdcf549*8 +#define WL17 0x0ffc19fd*8 + +.section .idata,"aw",@progbits +/* .data*/ + .align 4 +imdct36_long_karray: + .long K17, -K13, K10, -K06, -K05, K01, -K00, K04, -K07, K11, K12, -K16, 0x00000000 /* ct02 */ + .long K13, K07, K16, K01, K10, -K05, K04, -K11, K00, -K17, K06, -K12, 0x00100800 /* ct03 */ + .long K11, K17, K05, K12, -K01, K06, -K07, K00, -K13, K04, -K16, K10, 0x00100c00 /* ct03 */ + .long K07, K00, -K12, K05, -K16, -K10, K11, -K17, K04, K13, K01, K06, 0x00001400 /* ct02 */ + .long K05, K10, -K00, -K17, K07, -K13, K12, K06, -K16, K01, -K11, -K04, 0x000c1800 /* -ct05 */ + .long K01, K05, -K07, -K11, K13, K17, -K16, -K12, K10, K06, -K04, -K00, 0x00082000 /* -ct07 */ + .long -K16, K12, -K11, K07, K04, -K00, -K01, K05, -K06, K10, K13, -K17, 0x00144800 /* ct03 */ + .long -K12, K06, K17, -K00, -K11, K04, K05, -K10, K01, K16, -K07, -K13, 0x00045000 /* ct07 */ + .long -K10, K16, K04, -K13, -K00, K07, K06, -K01, -K12, -K05, K17, K11, 0x00085400 /* -ct07 */ + .long -K06, -K01, K13, K04, K17, -K11, -K10, -K16, -K05, K12, K00, K07, 0x000c5c00 /* -ct05 */ + .long -K04, -K11, -K01, K16, K06, K12, K13, -K07, -K17, -K00, -K10, -K05, 0x00006000 /* ct02 */ + .long -K00, -K04, -K06, -K10, -K12, -K16, -K17, -K13, -K11, -K07, -K05, -K01, 0x00106801 /* ct03 */ + +imdct36_short_karray: + .long K08, K09, K03, K15, K14, -K02 + +window_l_array: + .long WL0, WL1, WL2, WL3, WL4, WL5, WL6, WL7, WL8, WL9, WL10, WL11, WL12 + .long WL13, WL14, WL15, WL16, WL17 + + /*.text*/ + .section .cachetest,"ax",@progbits + .global III_imdct_l + +III_imdct_l: + lea.l (-21*4, %sp), %sp /* room for register saving and temp variables */ + movem.l %d2-%d7/%a2-%a6, (10*4, %sp) + movem.l (21*4 + 4, %sp), %a0-%a1 + /* a0 has mad_fixed_t const X[18], a1 has mad_fixed_t z[36] */ + + /* disable rounding if you plan to use the accumulator extension words */ + move.l #0xb0, %d0 /* frac. mode, saturate, round */ + move.l %d0, %macsr + + lea.l imdct36_short_karray, %a6 + movem.l (%a6), %d0-%d1 /* d0 = K08, d1 = K09 */ + addq.l #8, %a6 + move.l (X4, %a0), %d2 /* d2 = X4 */ + mac.l %d2, %d0, (X13, %a0), %d3, %acc0 /* acc0 = X4*K08, d3 = X13 */ + mac.l %d3, %d1, %acc0 /* acc0 += X13*K09 = ct01 */ + + mac.l %d2, %d1, (X16, %a0), %d5, %acc1 /* acc1 = X4*K09, d5 = X16 */ + msac.l %d3, %d0, (X7, %a0), %d2, %acc1 /* acc1 += X13*-K08 = ct00, d2 = X7 */ + /* STALL */ + move.l %acc0, %d6 + move.l %acc1, %d7 + movem.l %d6-%d7, (8*4, %sp) /* stack ct01, ct00 */ + + /***************************************************************/ + + add.l %d5, %d2 /* d2 = X7 + X16 */ + msac.l %d0, %d2, (X1, %a0), %d3, %acc0 /* acc0 += -K08*(X7 + X16), d3 = X1 */ + msac.l %d1, %d3, (X10, %a0), %d4, %acc0 /* acc0 += -K09*X1, d4 = X10 */ + + neg.l %d7 /* d7 has 32 bit part of acc1 */ + move.l %d7, %acc1 /* acc1 = -ct00 */ + + mac.l %d4, %d1, %acc0 /* acc0 += X10*K09 = ct06 */ + msac.l %d3, %d0, (X0, %a0), %d3, %acc1 /* acc1 += X1*-K08, d3 = X0 */ + mac.l %d2, %d1, (X8, %a0), %d2, %acc1 /* acc1 += (X7 + X16)*K09, d2 = X8 */ + mac.l %d4, %d0, (X5, %a0), %d6, %acc1 /* acc1 += X10*K08, d6 = X5 */ + + move.l %acc0, %acc2 /* acc0 = ct06 */ + move.l %acc1, %acc3 /* acc1 = ct04 */ + move.l %acc0, %a4 /* acc0 = ct06 */ + move.l %acc1, %a5 /* acc1 = ct04 */ + movem.l %a4-%a5, (6*4, %sp) /* stack ct06, ct04 */ + + /***************************************************************/ + + sub.l (X11, %a0), %d3 + sub.l (X12, %a0), %d3 /* d3 = X0 - X11 - X12 = ct14 */ + + sub.l (X3, %a0), %d2 + add.l (X15, %a0), %d2 /* d2 = X8 - X3 + X15 = ct16 */ + + move.l %d2, %d4 + add.l %d3, %d4 /* d4 = d2 + d3 = ct18 */ + + msac.l %d4, %d0, (X2, %a0), %d5, %acc0 /* acc0 += ct18*-K08, d5 = X2 */ + + sub.l (X9, %a0), %d5 + sub.l (X14, %a0), %d5 /* d5 = X2 - X9 - X14 = ct15 */ + + sub.l (X6, %a0), %d6 + sub.l (X17, %a0), %d6 /* d6 = X5 - X6 - X17 = ct17 */ + + move.l %d6, %d7 + add.l %d5, %d7 /* d7 = d6 + d5 = ct19 */ + + /* d2 = ct16 + * d3 = ct14 + * d4 = ct18 + * d5 = ct15 + * d6 = ct17 + * d7 = ct19 + */ + + mac.l %d7, %d1, %acc0 /* acc0 += ct19*K09 */ + + mac.l %d4, %d1, (%a6)+, %a2, %acc1 /* acc1 += ct18*K09, a2 = K03 */ + mac.l %d7, %d0, (%a6)+, %a3, %acc1 /* acc1 += ct19*K08, a3 = K15 */ + /* STALL */ + move.l %acc0, %d0 + move.l %d0, (x22, %a1) + move.l %acc1, %d1 + move.l %d1, (x4, %a1) + + /***************************************************************/ + + move.l %acc2, %acc0 /* acc0 = ct06 */ + move.l %acc3, %acc1 /* acc1 = ct04 */ + + /* acc0 = ct06 + * acc1 = ct04 + * d2 = ct16 + * d3 = ct14 + * d5 = ct15 + * d6 = ct17 + * a2 = K03 + * a3 = K15 + */ + mac.l %d3, %a2, (%a6)+, %a4, %acc0 /* acc0 += ct14*K03, a4 = K14 */ + mac.l %d5, %a3, (%a6)+, %d4, %acc0 /* acc0 += ct15*K15, d4 = -K02 */ + mac.l %d2, %a4, %acc0 /* acc0 += ct16*K14 */ + mac.l %d6, %d4, %acc0 /* acc0 += ct17*-K02 */ + + mac.l %d3, %a3, %acc1 /* acc1 += ct14*K15 */ + msac.l %d5, %a2, %acc1 /* acc1 += ct15*-K03 */ + msac.l %d6, %a4, %acc1 /* acc1 += ct17*-K14 */ + mac.l %d2, %d4, %acc1 /* acc1 += ct16*-K02 */ + + /* STALL */ + move.l %acc0, %d0 + move.l %d0, (x7, %a1) + + move.l %acc1, %d1 + move.l %d1, (x1, %a1) + + /***************************************************************/ + + move.l %acc3, %acc1 /* acc1 = ct04 */ + movclr.l %acc3, %d1 /* clear acc3 */ + + /* acc0 = ct06 + * acc1 = ct04 + * d2 = ct16 + * d3 = ct14 + * d4 = -K02 + * d5 = ct15 + * d6 = ct17 + * a2 = K03 + * a3 = K15 + * a4 = K14 + */ + + movclr.l %acc2, %d0 + neg.l %d0 + move.l %d0, %acc0 /* acc0 = -ct06 */ + + msac.l %d3, %a4, %acc0 /* acc0 += ct14*-K14 */ + msac.l %d2, %a2, %acc0 /* acc0 += ct16*-K03 */ + + msac.l %d5, %a4, %acc1 /* acc1 += ct15*-K14 */ + msac.l %d6, %a2, %acc1 /* acc1 += ct17*-K03 */ + mac.l %d2, %a3, (X1, %a0), %d2, %acc1 /* acc1 += ct16*K15, d2 = X1 */ + mac.l %d3, %d4, (X7, %a0), %d3, %acc1 /* acc1 += ct14*-K02, d3 = X7 */ + + msac.l %d6, %a3, (X16, %a0), %d6, %acc0 /* acc0 += ct17*-K15, d6 = X16 */ + msac.l %d5, %d4, (X10, %a0), %d5, %acc0 /* acc0 += ct15*K02, d5 = X10 */ + + move.l (8*4, %sp), %d7 /* fetch ct01, avoid emac stall */ + neg.l %d7 + + move.l %acc1, %d1 + move.l %d1, (x25, %a1) + + move.l %acc0, %d0 + move.l %d0, (x19, %a1) + + /***************************************************************/ + + move.l %d7, %acc0 /* acc0 = -ct01 */ + move.l %d7, %acc1 /* acc1 = -ct01 */ + + /* acc0 = -ct01 + * acc1 = -ct01 + * d2 = X1 + * d3 = X7 + * d4 = -K02 + * d5 = X10 + * d6 = X16 + * a2 = K03 + * a3 = K15 + * a4 = K14 + */ + + msac.l %d6, %a4, %acc1 /* acc1 += X16*-K14 */ + msac.l %d3, %a2, %acc1 /* acc1 += X7*-K03 */ + msac.l %d5, %d4, %acc1 /* acc1 += X10*K02 */ + + msac.l %d5, %a3, %acc0 /* acc0 += X10*-K15 */ + msac.l %d6, %a2, %acc0 /* acc0 += X16*-K03 */ + msac.l %d3, %a4, %acc0 /* acc0 += X7*-K14 */ + + mac.l %d2, %a3, %acc1 /* acc1 += X1*K15 = ct05 */ + mac.l %d2, %d4, %acc0 /* acc0 += X1*-K02 = ct03 */ + /* STALL */ + move.l %acc0, %d1 + move.l %acc1, %d7 + + move.l %d7, %d0 + neg.l %d0 + movem.l %d0-%d1/%d7, (3*4, %sp) /* stack -ct05, ct03, ct05 */ + + /***************************************************************/ + + move.l (9*4, %sp), %d0 + move.l %d0, %acc0 /* acc0 = ct00 */ + neg.l %d0 + move.l %d0, %acc1 /* acc1 = -ct00 */ + + /* acc0 = ct00 + * acc1 = -ct00 + * d2 = X1 + * d3 = X7 + * d4 = -K02 + * d5 = X10 + * d6 = X16 + * a2 = K03 + * a3 = K15 + * a4 = K14 + */ + + mac.l %d2, %a2, %acc1 /* acc1 += X1*K03 */ + msac.l %d5, %a4, %acc1 /* acc1 += X10*-K14 */ + mac.l %d6, %d4, %acc1 /* acc1 += X16*-K02 */ + mac.l %d3, %a3, %acc1 /* acc1 += X7*K15 = ct07 */ + + mac.l %d5, %a2, %acc0 /* acc0 += X10*K03 */ + msac.l %d2, %a4, %acc0 /* acc0 += X1*-K14 */ + msac.l %d3, %d4, %acc0 /* acc0 += X7*K02 */ + msac.l %d6, %a3, %acc0 /* acc0 += X16*-K15 = ct02 */ + + lea.l imdct36_long_karray, %a6 /* a6 points to transform coefs */ + movclr.l %acc0, %d0 + movclr.l %acc1, %d1 + move.l %d1, %d2 + neg.l %d2 + movem.l %d0-%d2, (%sp) /* stack ct02, ct07, -ct07 */ + + move.l (X0, %a0), %a2 /* load first sample pre-loop */ +.loop: + movem.l (%a6), %d0-%d7 /* load d0-d7 with transform coefs */ + mac.l %d0, %a2, (X2, %a0), %a2, %acc0 + mac.l %d1, %a2, (X3, %a0), %a2, %acc0 + mac.l %d2, %a2, (X5, %a0), %a2, %acc0 + mac.l %d3, %a2, (X6, %a0), %a2, %acc0 + mac.l %d4, %a2, (X8, %a0), %a2, %acc0 + mac.l %d5, %a2, (X9, %a0), %a2, %acc0 + mac.l %d6, %a2, (X11, %a0), %a2, %acc0 + mac.l %d7, %a2, (X12, %a0), %a2, %acc0 + movem.l (8*4, %a6), %d0-%d4 /* load remaining transforms coefs */ + mac.l %d0, %a2, (X14, %a0), %a2, %acc0 + mac.l %d1, %a2, (X15, %a0), %a2, %acc0 + mac.l %d2, %a2, (X17, %a0), %a2, %acc0 + mac.l %d3, %a2, (X0, %a0), %a2, %acc0 + lea.l (13*4, %a6), %a6 /* point to next coef row, avoid emac stall */ + move.l %d4, %d0 + movclr.l %acc0, %d3 /* get result */ + moveq.l #16, %d1 + lsr.l %d1, %d0 /* d0 = index into stack for ctxx */ + move.l (%sp, %d0.l*1), %d2 + add.l %d2, %d3 /* add accumulator result to ctxx */ + move.l %d4, %d0 + lsr.l #8, %d0 /* get index into x[i] */ + and.l #0x000000ff, %d0 + move.l %d3, (%a1, %d0.l*1) /* save x[i] */ + btst #0, %d4 /* check if finished bit is set */ + jeq .loop /* jump back if not set */ + /* if finished, continue to windowing */ + +/* excuse the general lack of comments here, i plan on getting around + to commenting it better */ +imdct_l_windowing: + move.l (21*4 + 12, %sp), %d0 /* parameter 3, window mode */ + movem.l (%a1), %d1-%d7/%a2-%a3 /* d1..d7/a2..a3 = x0..x8 */ + btst.l #1, %d0 /* BLOCK_MODE_STOP is the only one with bit 1 set */ + jne stop_block_x0_to_x17 + +/* start and normal blocks are treated the same for x0..x17 */ +normal_block_x0_to_x17: + lea.l window_l_array + 18*4, %a0 /* a0 = &window[18] */ + move.l -(%a0), %a6 /* a6 = window[17] */ + + /* window x9..x17 first, because our input x9..x17 is equal to -x8..-x0 + (in that order). first do x[i] = window[i]*x[i]. */ + msac.l %a6, %d1, -(%a0), %a6, %acc0 + msac.l %a6, %d2, -(%a0), %a6, %acc1 + msac.l %a6, %d3, -(%a0), %a6, %acc2 + msac.l %a6, %d4, -(%a0), %a6, %acc3 + /* we need to reverse the order of the input samples here, and need a2 and + a3 for this, so copy them somewhere else, we haven't used them yet. */ + move.l %a2, %d1 + move.l %a3, %d2 + movclr.l %acc0, %a5 + movclr.l %acc1, %a4 + movclr.l %acc2, %a3 + movclr.l %acc3, %a2 + msac.l %a6, %d5, -(%a0), %a6, %acc0 + msac.l %a6, %d6, -(%a0), %a6, %acc1 + msac.l %a6, %d7, -(%a0), %a6, %acc2 + msac.l %a6, %d1, -(%a0), %a6, %acc3 + lea.l (-9*4, %a0), %a0 /* a0 = &window[0], avoid emac stall */ + movclr.l %acc0, %d7 + movclr.l %acc1, %d6 + movclr.l %acc2, %d5 + movclr.l %acc3, %d4 + msac.l %a6, %d2, (%a0)+, %a6, %acc0 + /* STALL */ + movclr.l %acc0, %d3 + movem.l %d3-%d7/%a2-%a5, (x9, %a1) /* save windowed data */ + movem.l (%a1), %d1-%d7/%a2-%a3 /* d1-d7,a2-a3 = x0..x8 */ + + /* now do x[i] = window[i]*x[i], for i = 0..8 */ + mac.l %a6, %d1, (%a0)+, %a6, %acc0 + mac.l %a6, %d2, (%a0)+, %a6, %acc1 + mac.l %a6, %d3, (%a0)+, %a6, %acc2 + mac.l %a6, %d4, (%a0)+, %a6, %acc3 + /* STALL */ + movclr.l %acc0, %d1 + movclr.l %acc1, %d2 + movclr.l %acc2, %d3 + movclr.l %acc3, %d4 + mac.l %a6, %d5, (%a0)+, %a6, %acc0 + mac.l %a6, %d6, (%a0)+, %a6, %acc1 + mac.l %a6, %d7, (%a0)+, %a6, %acc2 + mac.l %a6, %a2, (%a0)+, %a6, %acc3 + /* STALL */ + movclr.l %acc0, %d5 + movclr.l %acc1, %d6 + movclr.l %acc2, %d7 + movclr.l %acc3, %a2 + mac.l %a6, %a3, %acc0 + /* STALL */ + movclr.l %acc0, %a3 + + movem.l %d1-%d7/%a2-%a3, (%a1) /* save windowed data */ + btst.l #0, %d0 /* BLOCK_MODE_START has bit 1 set */ + jne start_block_x18_to_x35 + +normal_block_x18_to_x35: + lea.l window_l_array, %a0 /* a0 = &window[0] */ + movem.l (x18, %a1), %d1-%d7/%a2-%a3 /* d1..d7/a2..a3 = x18..x26 */ + move.l (%a0)+, %a6 + + /* do x[i] = window[i]*x[i] for i = 27..35. x27..x35 = x26..x18, in + that order. */ + mac.l %a6, %d1, (%a0)+, %a6, %acc0 + mac.l %a6, %d2, (%a0)+, %a6, %acc1 + mac.l %a6, %d3, (%a0)+, %a6, %acc2 + mac.l %a6, %d4, (%a0)+, %a6, %acc3 + /* again, need to reverse order and haven't used these yet */ + move.l %a2, %d1 + move.l %a3, %d2 + movclr.l %acc0, %a5 + movclr.l %acc1, %a4 + movclr.l %acc2, %a3 + movclr.l %acc3, %a2 + mac.l %a6, %d5, (%a0)+, %a6, %acc0 + mac.l %a6, %d6, (%a0)+, %a6, %acc1 + mac.l %a6, %d7, (%a0)+, %a6, %acc2 + mac.l %a6, %d1, (%a0)+, %a6, %acc3 + lea.l (9*4, %a0), %a0 /* a0 = &window[18], avoid emac stall */ + movclr.l %acc0, %d7 + movclr.l %acc1, %d6 + movclr.l %acc2, %d5 + movclr.l %acc3, %d4 + mac.l %a6, %d2, -(%a0), %a6, %acc0 + /* STALL */ + movclr.l %acc0, %d3 + + movem.l %d3-%d7/%a2-%a5, (x27, %a1) /* save windowed output */ + movem.l (x18, %a1), %d1-%d7/%a2-%a3 /* d1..d7,a2..a3 = x18..x27 */ + + /* do x[i] = window[i]*x[i] for i = 18..26. */ + mac.l %a6, %d1, -(%a0), %a6, %acc0 + mac.l %a6, %d2, -(%a0), %a6, %acc1 + mac.l %a6, %d3, -(%a0), %a6, %acc2 + mac.l %a6, %d4, -(%a0), %a6, %acc3 + /* STALL */ + movclr.l %acc0, %d1 + movclr.l %acc1, %d2 + movclr.l %acc2, %d3 + movclr.l %acc3, %d4 + mac.l %a6, %d5, -(%a0), %a6, %acc0 + mac.l %a6, %d6, -(%a0), %a6, %acc1 + mac.l %a6, %d7, -(%a0), %a6, %acc2 + mac.l %a6, %a2, -(%a0), %a6, %acc3 + /* STALL */ + movclr.l %acc0, %d5 + movclr.l %acc1, %d6 + movclr.l %acc2, %d7 + movclr.l %acc3, %a2 + mac.l %a6, %a3, %acc0 + movclr.l %acc0, %a3 + + movem.l %d1-%d7/%a2-%a3, (x18, %a1) + movem.l (10*4, %sp), %d2-%d7/%a2-%a6 + lea.l (21*4, %sp), %sp + rts + +stop_block_x0_to_x17: + /* windowed x12..x17 = -x0..-x5 */ + move.l %d6, %d0 + neg.l %d0 /* d0 = -x5 */ + move.l %d2, %d6 + neg.l %d6 /* d6 = -x1 */ + move.l %d5, %d2 + neg.l %d2 /* d2 = -x4 */ + move.l %d3, %d5 + neg.l %d5 /* d5 = -x2 */ + move.l %d4, %d3 + neg.l %d3 /* d3 = -x3 */ + sub.l %a5, %a5 + sub.l %d1, %a5 /* a5 = -x0 */ + movem.l %d0/%d2-%d3/%d5-%d6/%a5, (x12, %a1) + + lea.l window_l_array, %a0 /* a0 = &window[0] */ + move.l (1*4, %a0), %a6 + + mac.l %a6, %d7, (4*4, %a0), %a6, %acc0 + mac.l %a6, %a2, (7*4, %a0), %a6, %acc1 + mac.l %a6, %a3, (10*4, %a0), %a6, %acc2 + msac.l %a6, %a3, (13*4, %a0), %a6, %acc3 + /* STALL */ + movclr.l %acc0, %d0 + movclr.l %acc1, %d1 + movclr.l %acc2, %d2 + movclr.l %acc3, %d3 + msac.l %a6, %a2, (16*4, %a0), %a6, %acc0 + msac.l %a6, %d7, %acc1 + /* STALL */ + movclr.l %acc0, %d4 + movclr.l %acc1, %d5 + + movem.l %d0-%d5, (6*4, %a1) + clr.l %d1 + clr.l %d2 + clr.l %d3 + clr.l %d4 + clr.l %d5 + clr.l %d6 + movem.l %d1-%d6, (%a1) + jra normal_block_x18_to_x35 + +start_block_x18_to_x35: + movem.l (x24, %a1), %d0-%d2 + lea.l window_l_array, %a0 + move.l (1*4, %a0), %a6 + /* TODO movem coefs */ + mac.l %a6, %d0, (4*4, %a0), %a6, %acc0 + mac.l %a6, %d1, (7*4, %a0), %a6, %acc1 + mac.l %a6, %d2, (10*4, %a0), %a6, %acc2 + mac.l %a6, %d2, (13*4, %a0), %a6, %acc3 + /* STALL */ + movclr.l %acc0, %a2 + movclr.l %acc1, %d7 + movclr.l %acc2, %d6 + movclr.l %acc3, %d5 + mac.l %a6, %d1, (16*4, %a0), %a6, %acc0 + mac.l %a6, %d0, %acc1 + /* STALL */ + movclr.l %acc0, %d4 + movclr.l %acc1, %d3 + + movem.l %d3-%d7/%a2, (x24, %a1) + clr.l %d1 + clr.l %d2 + clr.l %d3 + clr.l %d4 + clr.l %d5 + clr.l %d6 + movem.l %d1-%d6, (x30, %a1) + + movem.l (10*4, %sp), %d2-%d7/%a2-%a6 + lea.l (21*4, %sp), %sp + rts + + /* this routine uses imdct_s and window_s from layer3.c */ .global III_imdct_s III_imdct_s: /* we need to save 9 registers and 36 samples of temp buffer */ Index: apps/codecs/libmad/layer3.c =================================================================== RCS file: /cvsroot/rockbox/apps/codecs/libmad/layer3.c,v retrieving revision 1.8 diff -u -r1.8 layer3.c --- apps/codecs/libmad/layer3.c 13 Apr 2005 13:15:58 -0000 1.8 +++ apps/codecs/libmad/layer3.c 25 May 2005 14:48:06 -0000 @@ -1764,589 +1764,6 @@ * NAME: imdct36 * DESCRIPTION: perform X[18]->x[36] IMDCT */ - -# if CONFIG_CPU==MCF5249 && !defined(SIMULATOR) -/* emac optimized imdct36, it is very ugly and i hope to replace it soon. - * for now it is actually somewhat faster than the stock implementation. - */ -static inline -void imdct36(mad_fixed_t const X[18], mad_fixed_t x[36]) -{ - mad_fixed_t t[16]; - /* assumes FRACBITS = 28 */ - asm volatile ( - "move.l #0xb0, %%d0\n\t" /* frac. mode, saturate, round */ - "move.l %%d0, %%macsr\n\t" - "move.l (4*4, %[X]), %%d0\n\t" - "move.l #0x0ec835e8, %%d1\n\t" - "mac.l %%d0, %%d1, (13*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x061f78aa, %%d1\n\t" - "mac.l %%d0, %%d1, (1*4, %[X]), %%d0, %%acc0\n\t" - "move.l %%acc0, %%d7\n\t" - "asl.l #3, %%d7\n\t" - "move.l %%d7, (6*4, %[t])\n\t" - - "sub.l (10*4, %[X]), %%d0\n\t" - "move.l %%d0, (14*4, %[t])\n\t" - "move.l #0x061f78aa, %%d1\n\t" - "msac.l %%d0, %%d1, (7*4, %[X]), %%d0, %%acc0\n\t" - "add.l (16*4, %[X]), %%d0\n\t" - "move.l %%d0, (15*4, %[t])\n\t" - "move.l #0x0ec835e8, %%d1\n\t" - "msac.l %%d0, %%d1, (%[X]), %%d2, %%acc0\n\t" - "move.l %%acc0, %%d6\n\t" - "asl.l #3, %%d6\n\t" - "move.l %%d6, (%[t])\n\t" - - "sub.l (11*4, %[X]), %%d2\n\t" /* store t8-t11 in d2-d5, will need them soon */ - "sub.l (12*4, %[X]), %%d2\n\t" - "move.l %%d2, (8*4, %[t])\n\t" - "move.l #0x0216a2a2, %%d1\n\t" - "mac.l %%d2, %%d1, (2*4, %[X]), %%d3, %%acc0\n\t" - - "sub.l (9*4, %[X]), %%d3\n\t" - "sub.l (14*4, %[X]), %%d3\n\t" - "move.l %%d3, (9*4, %[t])\n\t" - "move.l #0x09bd7ca0, %%d1\n\t" - "mac.l %%d3, %%d1, (3*4, %[X]), %%d4, %%acc0\n\t" - - "sub.l (8*4, %[X]), %%d4\n\t" - "sub.l (15*4, %[X]), %%d4\n\t" - "move.l %%d4, (10*4, %[t])\n\t" - "move.l #0x0cb19346, %%d1\n\t" - "msac.l %%d4, %%d1, (5*4, %[X]), %%d5, %%acc0\n\t" - - "sub.l (6*4, %[X]), %%d5\n\t" - "sub.l (17*4, %[X]), %%d5\n\t" - "move.l %%d5, (11*4, %[t])\n\t" - "move.l #0x0fdcf549, %%d1\n\t" - "msac.l %%d5, %%d1, (%[X]), %%d0, %%acc0\n\t" - - "movclr.l %%acc0, %%d7\n\t" - "asl.l #3, %%d7\n\t" - "move.l %%d7, (7*4, %[x])\n\t" - "neg.l %%d7\n\t" - "move.l %%d7, (10*4, %[x])\n\t" - - "move.l #0x0cb19346, %%d1\n\t" - "msac.l %%d2, %%d1, (3*4, %[X]), %%d2, %%acc0\n\t" /* preload for t12 statement */ - "move.l #0x0fdcf549, %%d1\n\t" - "mac.l %%d3, %%d1, (8*4, %[X]), %%d3, %%acc0\n\t" - "move.l #0x0216a2a2, %%d1\n\t" - "mac.l %%d4, %%d1, (11*4, %[X]), %%d4, %%acc0\n\t" - "move.l #0x09bd7ca0, %%d1\n\t" - "msac.l %%d5, %%d1, (12*4, %[X]), %%d5, %%acc0\n\t" - "movclr.l %%acc0, %%d7\n\t" - "asl.l #3, %%d7\n\t" - "sub.l %%d6, %%d7\n\t" /* t0 is still in d6 */ - "move.l %%d7, (19*4, %[x])\n\t" - "move.l %%d7, (34*4, %[x])\n\t" - - "sub.l %%d2, %%d0\n\t" - "add.l %%d3, %%d0\n\t" - "sub.l %%d4, %%d0\n\t" - "sub.l %%d5, %%d0\n\t" - "add.l (15*4, %[X]), %%d0\n\t" - - "move.l (2*4, %[X]), %%d3\n\t" - "add.l (5*4, %[X]), %%d3\n\t" - "sub.l (6*4, %[X]), %%d3\n\t" - "sub.l (9*4, %[X]), %%d3\n\t" - "sub.l (14*4, %[X]), %%d3\n\t" - "sub.l (17*4, %[X]), %%d3\n\t" - - "move.l %%d0, (12*4, %[t])\n\t" - "move.l %%d3, (13*4, %[t])\n\t" - - "move.l #0x0ec835e8, %%d1\n\t" - "msac.l %%d0, %%d1, (1*4, %[X]), %%d2, %%acc0\n\t" - "move.l #0x061f78aa, %%d1\n\t" - "mac.l %%d3, %%d1, (7*4, %[X]), %%d3, %%acc0\n\t" - "movclr.l %%acc0, %%d7\n\t" - "asl.l #3, %%d7\n\t" - "add.l %%d6, %%d7\n\t" - "move.l %%d7, (22*4, %[x])\n\t" - "move.l %%d7, (31*4, %[x])\n\t" - - "move.l #0x09bd7ca0, %%d1\n\t" - "msac.l %%d1, %%d2, (10*4, %[X]), %%d2, %%acc0\n\t" - "move.l #0x0216a2a2, %%d1\n\t" - "mac.l %%d1, %%d3, (16*4, %[X]), %%d3, %%acc0\n\t" - "move.l #0x0fdcf549, %%d1\n\t" - "msac.l %%d1, %%d2, (6*4, %[t]), %%d2, %%acc0\n\t" - "move.l #0x0cb19346, %%d1\n\t" - "mac.l %%d1, %%d3, (%[X]), %%d0, %%acc0\n\t" - "movclr.l %%acc0, %%d7\n\t" - "asl.l #3, %%d7\n\t" - "add.l %%d2, %%d7\n\t" - "move.l %%d7, (1*4, %[t])\n\t" - - "move.l #0x03768962, %%d1\n\t" - "mac.l %%d1, %%d0, (2*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0e313245, %%d1\n\t" - "mac.l %%d1, %%d0, (3*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0ffc19fd, %%d1\n\t" - "msac.l %%d1, %%d0, (5*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0acf37ad, %%d1\n\t" - "msac.l %%d1, %%d0, (6*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x04cfb0e2, %%d1\n\t" - "mac.l %%d1, %%d0, (8*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0898c779, %%d1\n\t" - "msac.l %%d1, %%d0, (9*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0d7e8807, %%d1\n\t" - "mac.l %%d1, %%d0, (11*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0f426cb5, %%d1\n\t" - "mac.l %%d1, %%d0, (12*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0bcbe352, %%d1\n\t" - "msac.l %%d1, %%d0, (14*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x00b2aa3e, %%d1\n\t" - "mac.l %%d1, %%d0, (15*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x07635284, %%d1\n\t" - "msac.l %%d1, %%d0, (17*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0f9ee890, %%d1\n\t" - "msac.l %%d1, %%d0, (%[X]), %%d0, %%acc0\n\t" - "movclr.l %%acc0, %%d6\n\t" - "asl.l #3, %%d6\n\t" - "add.l %%d7, %%d6\n\t" - "move.l %%d6, (6*4, %[x])\n\t" - "neg.l %%d6\n\t" - "move.l %%d6, (11*4, %[x])\n\t" - - "move.l #0x0f426cb5, %%d1\n\t" - "msac.l %%d1, %%d0, (2*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x00b2aa3e, %%d1\n\t" - "msac.l %%d1, %%d0, (3*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0898c779, %%d1\n\t" - "mac.l %%d1, %%d0, (5*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0f9ee890, %%d1\n\t" - "mac.l %%d1, %%d0, (6*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0acf37ad, %%d1\n\t" - "mac.l %%d1, %%d0, (8*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x07635284, %%d1\n\t" - "msac.l %%d1, %%d0, (9*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0e313245, %%d1\n\t" - "msac.l %%d1, %%d0, (11*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0bcbe352, %%d1\n\t" - "msac.l %%d1, %%d0, (12*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x03768962, %%d1\n\t" - "msac.l %%d1, %%d0, (14*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0d7e8807, %%d1\n\t" - "mac.l %%d1, %%d0, (15*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0ffc19fd, %%d1\n\t" - "mac.l %%d1, %%d0, (17*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x04cfb0e2, %%d1\n\t" - "mac.l %%d1, %%d0, (%[X]), %%d0, %%acc0\n\t" - "movclr.l %%acc0, %%d6\n\t" - "asl.l #3, %%d6\n\t" - "add.l %%d7, %%d6\n\t" - "move.l %%d6, (23*4, %[x])\n\t" - "move.l %%d6, (30*4, %[x])\n\t" - - "move.l #0x0bcbe352, %%d1\n\t" - "msac.l %%d1, %%d0, (2*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0d7e8807, %%d1\n\t" - "mac.l %%d1, %%d0, (3*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x07635284, %%d1\n\t" - "msac.l %%d1, %%d0, (5*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x04cfb0e2, %%d1\n\t" - "mac.l %%d1, %%d0, (6*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0f9ee890, %%d1\n\t" - "mac.l %%d1, %%d0, (8*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0ffc19fd, %%d1\n\t" - "msac.l %%d1, %%d0, (9*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x00b2aa3e, %%d1\n\t" - "msac.l %%d1, %%d0, (11*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x03768962, %%d1\n\t" - "mac.l %%d1, %%d0, (12*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0f426cb5, %%d1\n\t" - "msac.l %%d1, %%d0, (14*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0e313245, %%d1\n\t" - "mac.l %%d1, %%d0, (15*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0898c779, %%d1\n\t" - "mac.l %%d1, %%d0, (17*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0acf37ad, %%d1\n\t" - "msac.l %%d1, %%d0, (4*4, %[X]), %%d0, %%acc0\n\t" - "movclr.l %%acc0, %%d6\n\t" - "asl.l #3, %%d6\n\t" - "sub.l %%d7, %%d6\n\t" - "move.l %%d6, (18*4, %[x])\n\t" - "move.l %%d6, (35*4, %[x])\n\t" - - "move.l #0x061f78aa, %%d1\n\t" - "mac.l %%d1, %%d0, (13*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0ec835e8, %%d1\n\t" - "msac.l %%d1, %%d0, (1*4, %[X]), %%d0, %%acc0\n\t" - "move.l %%acc0, %%d5\n\t" - "asl.l #3, %%d5\n\t" - "move.l %%d5, (7*4, %[t])\n\t" - - "move.l #0x0cb19346, %%d1\n\t" - "msac.l %%d1, %%d0, (7*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0fdcf549, %%d1\n\t" - "mac.l %%d1, %%d0, (10*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0216a2a2, %%d1\n\t" - "mac.l %%d1, %%d0, (16*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x09bd7ca0, %%d1\n\t" - "msac.l %%d1, %%d0, (%[X]), %%d0, %%acc0\n\t" - "move.l %%acc0, %%d7\n\t" - "asl.l #3, %%d7\n\t" - "move.l %%d7, (2*4, %[t])\n\t" - - "move.l #0x04cfb0e2, %%d1\n\t" - "mac.l %%d1, %%d0, (2*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0ffc19fd, %%d1\n\t" - "mac.l %%d1, %%d0, (3*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0d7e8807, %%d1\n\t" - "msac.l %%d1, %%d0, (5*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x03768962, %%d1\n\t" - "mac.l %%d1, %%d0, (6*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0bcbe352, %%d1\n\t" - "msac.l %%d1, %%d0, (8*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0e313245, %%d1\n\t" - "msac.l %%d1, %%d0, (9*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x07635284, %%d1\n\t" - "mac.l %%d1, %%d0, (11*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0acf37ad, %%d1\n\t" - "msac.l %%d1, %%d0, (12*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0f9ee890, %%d1\n\t" - "mac.l %%d1, %%d0, (14*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0898c779, %%d1\n\t" - "mac.l %%d1, %%d0, (15*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x00b2aa3e, %%d1\n\t" - "mac.l %%d1, %%d0, (17*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0f426cb5, %%d1\n\t" - "mac.l %%d1, %%d0, (%[X]), %%d0, %%acc0\n\t" - "movclr.l %%acc0, %%d6\n\t" - "asl.l #3, %%d6\n\t" - "move.l %%d6, (5*4, %[x])\n\t" - "neg.l %%d6\n\t" - "move.l %%d6, (12*4, %[x])\n\t" - - "move.l #0x0acf37ad, %%d1\n\t" - "mac.l %%d1, %%d0, (2*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0898c779, %%d1\n\t" - "msac.l %%d1, %%d0, (3*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0e313245, %%d1\n\t" - "mac.l %%d1, %%d0, (5*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0f426cb5, %%d1\n\t" - "msac.l %%d1, %%d0, (6*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x03768962, %%d1\n\t" - "msac.l %%d1, %%d0, (8*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x00b2aa3e, %%d1\n\t" - "mac.l %%d1, %%d0, (9*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0ffc19fd, %%d1\n\t" - "msac.l %%d1, %%d0, (11*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0f9ee890, %%d1\n\t" - "mac.l %%d1, %%d0, (12*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x04cfb0e2, %%d1\n\t" - "msac.l %%d1, %%d0, (14*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x07635284, %%d1\n\t" - "mac.l %%d1, %%d0, (15*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0d7e8807, %%d1\n\t" - "mac.l %%d1, %%d0, (17*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0bcbe352, %%d1\n\t" - "msac.l %%d1, %%d0, (%[X]), %%d0, %%acc0\n\t" - "movclr.l %%acc0, %%d6\n\t" - "asl.l #3, %%d6\n\t" - "add.l %%d7, %%d6\n\t" - "move.l %%d6, (%[x])\n\t" - "neg.l %%d6\n\t" - "move.l %%d6, (17*4, %[x])\n\t" - - "move.l #0x0f9ee890, %%d1\n\t" - "msac.l %%d1, %%d0, (2*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x07635284, %%d1\n\t" - "msac.l %%d1, %%d0, (3*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x00b2aa3e, %%d1\n\t" - "msac.l %%d1, %%d0, (5*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0bcbe352, %%d1\n\t" - "mac.l %%d1, %%d0, (6*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0f426cb5, %%d1\n\t" - "mac.l %%d1, %%d0, (8*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0d7e8807, %%d1\n\t" - "mac.l %%d1, %%d0, (9*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0898c779, %%d1\n\t" - "mac.l %%d1, %%d0, (11*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x04cfb0e2, %%d1\n\t" - "msac.l %%d1, %%d0, (12*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0acf37ad, %%d1\n\t" - "msac.l %%d1, %%d0, (14*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0ffc19fd, %%d1\n\t" - "msac.l %%d1, %%d0, (15*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0e313245, %%d1\n\t" - "msac.l %%d1, %%d0, (17*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x03768962, %%d1\n\t" - "msac.l %%d1, %%d0, (1*4, %[X]), %%d0, %%acc0\n\t" - "movclr.l %%acc0, %%d6\n\t" - "asl.l #3, %%d6\n\t" - "add.l %%d7, %%d6\n\t" - "move.l %%d6, (24*4, %[x])\n\t" - "move.l %%d6, (29*4, %[x])\n\t" - - "move.l #0x0216a2a2, %%d1\n\t" - "msac.l %%d1, %%d0, (7*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x09bd7ca0, %%d1\n\t" - "msac.l %%d1, %%d0, (10*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0cb19346, %%d1\n\t" - "mac.l %%d1, %%d0, (16*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0fdcf549, %%d1\n\t" - "mac.l %%d1, %%d0, (%[X]), %%d0, %%acc0\n\t" - "movclr.l %%acc0, %%d7\n\t" - "asl.l #3, %%d7\n\t" - "add.l %%d5, %%d7\n\t" - "move.l %%d7, (3*4, %[t])\n\t" - - "move.l #0x00b2aa3e, %%d1\n\t" - "mac.l %%d1, %%d0, (2*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x03768962, %%d1\n\t" - "mac.l %%d1, %%d0, (3*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x04cfb0e2, %%d1\n\t" - "msac.l %%d1, %%d0, (5*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x07635284, %%d1\n\t" - "msac.l %%d1, %%d0, (6*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0898c779, %%d1\n\t" - "mac.l %%d1, %%d0, (8*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0acf37ad, %%d1\n\t" - "mac.l %%d1, %%d0, (9*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0bcbe352, %%d1\n\t" - "msac.l %%d1, %%d0, (11*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0d7e8807, %%d1\n\t" - "msac.l %%d1, %%d0, (12*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0e313245, %%d1\n\t" - "mac.l %%d1, %%d0, (14*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0f426cb5, %%d1\n\t" - "mac.l %%d1, %%d0, (15*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0f9ee890, %%d1\n\t" - "msac.l %%d1, %%d0, (17*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0ffc19fd, %%d1\n\t" - "msac.l %%d1, %%d0, (%[X]), %%d0, %%acc0\n\t" - "movclr.l %%acc0, %%d6\n\t" - "asl.l #3, %%d6\n\t" - "add.l %%d7, %%d6\n\t" - "move.l %%d6, (8*4, %[x])\n\t" - "neg.l %%d6\n\t" - "move.l %%d6, (9*4, %[x])\n\t" - - "move.l #0x0e313245, %%d1\n\t" - "msac.l %%d1, %%d0, (2*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0bcbe352, %%d1\n\t" - "mac.l %%d1, %%d0, (3*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0f9ee890, %%d1\n\t" - "mac.l %%d1, %%d0, (5*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0898c779, %%d1\n\t" - "msac.l %%d1, %%d0, (6*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0ffc19fd, %%d1\n\t" - "msac.l %%d1, %%d0, (8*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x04cfb0e2, %%d1\n\t" - "mac.l %%d1, %%d0, (9*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0f426cb5, %%d1\n\t" - "mac.l %%d1, %%d0, (11*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x00b2aa3e, %%d1\n\t" - "msac.l %%d1, %%d0, (12*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0d7e8807, %%d1\n\t" - "msac.l %%d1, %%d0, (14*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x03768962, %%d1\n\t" - "msac.l %%d1, %%d0, (15*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0acf37ad, %%d1\n\t" - "mac.l %%d1, %%d0, (17*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x07635284, %%d1\n\t" - "mac.l %%d1, %%d0, (%[X]), %%d0, %%acc0\n\t" - "movclr.l %%acc0, %%d6\n\t" - "asl.l #3, %%d6\n\t" - "add.l %%d7, %%d6\n\t" - "move.l %%d6, (21*4, %[x])\n\t" - "move.l %%d6, (32*4, %[x])\n\t" - - "move.l #0x0d7e8807, %%d1\n\t" - "msac.l %%d1, %%d0, (2*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0f426cb5, %%d1\n\t" - "mac.l %%d1, %%d0, (3*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0acf37ad, %%d1\n\t" - "mac.l %%d1, %%d0, (5*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0ffc19fd, %%d1\n\t" - "msac.l %%d1, %%d0, (6*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x07635284, %%d1\n\t" - "msac.l %%d1, %%d0, (8*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0f9ee890, %%d1\n\t" - "mac.l %%d1, %%d0, (9*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x03768962, %%d1\n\t" - "mac.l %%d1, %%d0, (11*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0e313245, %%d1\n\t" - "msac.l %%d1, %%d0, (12*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x00b2aa3e, %%d1\n\t" - "mac.l %%d1, %%d0, (14*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0bcbe352, %%d1\n\t" - "mac.l %%d1, %%d0, (15*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x04cfb0e2, %%d1\n\t" - "msac.l %%d1, %%d0, (17*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0898c779, %%d1\n\t" - "msac.l %%d1, %%d0, (14*4, %[t]), %%d0, %%acc0\n\t" - "movclr.l %%acc0, %%d6\n\t" - "asl.l #3, %%d6\n\t" - "sub.l %%d7, %%d6\n\t" - "move.l %%d6, (20*4, %[x])\n\t" - "move.l %%d6, (33*4, %[x])\n\t" - - "move.l #0x0ec835e8, %%d1\n\t" - "msac.l %%d1, %%d0, (15*4, %[t]), %%d0, %%acc0\n\t" - "move.l #0x061f78aa, %%d1\n\t" - "mac.l %%d1, %%d0, (12*4, %[t]), %%d0, %%acc0\n\t" - "movclr.l %%acc0, %%d6\n\t" - "asl.l #3, %%d6\n\t" - "sub.l %%d5, %%d6\n\t" - "move.l %%d6, (4*4, %[t])\n\t" - - "move.l #0x061f78aa, %%d1\n\t" - "mac.l %%d1, %%d0, (13*4, %[t]), %%d0, %%acc0\n\t" - "move.l #0x0ec835e8, %%d1\n\t" - "mac.l %%d1, %%d0, (8*4, %[t]), %%d0, %%acc0\n\t" - "movclr.l %%acc0, %%d7\n\t" /* don't need t7 anymore */ - "asl.l #3, %%d7\n\t" - "add.l %%d6, %%d7\n\t" - "move.l %%d7, (4*4, %[x])\n\t" - "neg.l %%d7\n\t" - "move.l %%d7, (13*4, %[x])\n\t" - - "move.l #0x09bd7ca0, %%d1\n\t" - "mac.l %%d1, %%d0, (9*4, %[t]), %%d0, %%acc0\n\t" - "move.l #0x0216a2a2, %%d1\n\t" - "msac.l %%d1, %%d0, (10*4, %[t]), %%d0, %%acc0\n\t" - "move.l #0x0fdcf549, %%d1\n\t" - "mac.l %%d1, %%d0, (11*4, %[t]), %%d0, %%acc0\n\t" - "move.l #0x0cb19346, %%d1\n\t" - "msac.l %%d1, %%d0, (8*4, %[t]), %%d0, %%acc0\n\t" - "movclr.l %%acc0, %%d7\n\t" - "asl.l #3, %%d7\n\t" - "add.l %%d6, %%d7\n\t" - "move.l %%d7, (1*4, %[x])\n\t" - "neg.l %%d7\n\t" - "move.l %%d7, (16*4, %[x])\n\t" - - "move.l #0x0fdcf549, %%d1\n\t" - "msac.l %%d1, %%d0, (9*4, %[t]), %%d0, %%acc0\n\t" - "move.l #0x0cb19346, %%d1\n\t" - "msac.l %%d1, %%d0, (10*4, %[t]), %%d0, %%acc0\n\t" - "move.l #0x09bd7ca0, %%d1\n\t" - "msac.l %%d1, %%d0, (11*4, %[t]), %%d0, %%acc0\n\t" - "move.l #0x0216a2a2, %%d1\n\t" - "msac.l %%d1, %%d0, (1*4, %[X]), %%d0, %%acc0\n\t" - "movclr.l %%acc0, %%d7\n\t" - "asl.l #3, %%d7\n\t" - "add.l %%d6, %%d7\n\t" - "move.l %%d7, (25*4, %[x])\n\t" - "move.l %%d7, (28*4, %[x])\n\t" - - "move.l #0x0fdcf549, %%d1\n\t" - "msac.l %%d1, %%d0, (7*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0cb19346, %%d1\n\t" - "msac.l %%d1, %%d0, (10*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x09bd7ca0, %%d1\n\t" - "msac.l %%d1, %%d0, (16*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0216a2a2, %%d1\n\t" - "msac.l %%d1, %%d0, (%[X]), %%d0, %%acc0\n\t" - "movclr.l %%acc0, %%d6\n\t" - "asl.l #3, %%d6\n\t" - "sub.l (6*4, %[t]), %%d6\n\t" - "move.l %%d6, (5*4, %[t])\n\t" - - "move.l #0x0898c779, %%d1\n\t" - "mac.l %%d1, %%d0, (2*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x04cfb0e2, %%d1\n\t" - "mac.l %%d1, %%d0, (3*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0bcbe352, %%d1\n\t" - "mac.l %%d1, %%d0, (5*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x00b2aa3e, %%d1\n\t" - "mac.l %%d1, %%d0, (6*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0e313245, %%d1\n\t" - "mac.l %%d1, %%d0, (8*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x03768962, %%d1\n\t" - "msac.l %%d1, %%d0, (9*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0f9ee890, %%d1\n\t" - "mac.l %%d1, %%d0, (11*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x07635284, %%d1\n\t" - "msac.l %%d1, %%d0, (12*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0ffc19fd, %%d1\n\t" - "mac.l %%d1, %%d0, (14*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0acf37ad, %%d1\n\t" - "msac.l %%d1, %%d0, (15*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0f426cb5, %%d1\n\t" - "mac.l %%d1, %%d0, (17*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0d7e8807, %%d1\n\t" - "msac.l %%d1, %%d0, (%[X]), %%d0, %%acc0\n\t" - "movclr.l %%acc0, %%d7\n\t" - "asl.l #3, %%d7\n\t" - "add.l %%d6, %%d7\n\t" - "move.l %%d7, (2*4, %[x])\n\t" - "neg.l %%d7\n\t" - "move.l %%d7, (15*4, %[x])\n\t" - - "move.l #0x07635284, %%d1\n\t" - "mac.l %%d1, %%d0, (2*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0acf37ad, %%d1\n\t" - "mac.l %%d1, %%d0, (3*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x03768962, %%d1\n\t" - "mac.l %%d1, %%d0, (5*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0d7e8807, %%d1\n\t" - "mac.l %%d1, %%d0, (6*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x00b2aa3e, %%d1\n\t" - "msac.l %%d1, %%d0, (8*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0f426cb5, %%d1\n\t" - "mac.l %%d1, %%d0, (9*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x04cfb0e2, %%d1\n\t" - "msac.l %%d1, %%d0, (11*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0ffc19fd, %%d1\n\t" - "mac.l %%d1, %%d0, (12*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0898c779, %%d1\n\t" - "msac.l %%d1, %%d0, (14*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0f9ee890, %%d1\n\t" - "mac.l %%d1, %%d0, (15*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0bcbe352, %%d1\n\t" - "msac.l %%d1, %%d0, (17*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0e313245, %%d1\n\t" - "mac.l %%d1, %%d0, (%[X]), %%d0, %%acc0\n\t" - "movclr.l %%acc0, %%d7\n\t" - "asl.l #3, %%d7\n\t" - "add.l %%d6, %%d7\n\t" - "move.l %%d7, (3*4, %[x])\n\t" - "neg.l %%d7\n\t" - "move.l %%d7, (14*4, %[x])\n\t" - - "move.l #0x0ffc19fd, %%d1\n\t" - "msac.l %%d1, %%d0, (2*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0f9ee890, %%d1\n\t" - "msac.l %%d1, %%d0, (3*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0f426cb5, %%d1\n\t" - "msac.l %%d1, %%d0, (5*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0e313245, %%d1\n\t" - "msac.l %%d1, %%d0, (6*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0d7e8807, %%d1\n\t" - "msac.l %%d1, %%d0, (8*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0bcbe352, %%d1\n\t" - "msac.l %%d1, %%d0, (9*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0acf37ad, %%d1\n\t" - "msac.l %%d1, %%d0, (11*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x0898c779, %%d1\n\t" - "msac.l %%d1, %%d0, (12*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x07635284, %%d1\n\t" - "msac.l %%d1, %%d0, (14*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x04cfb0e2, %%d1\n\t" - "msac.l %%d1, %%d0, (15*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x03768962, %%d1\n\t" - "msac.l %%d1, %%d0, (17*4, %[X]), %%d0, %%acc0\n\t" - "move.l #0x00b2aa3e, %%d1\n\t" - "msac.l %%d1, %%d0, (%[X]), %%d0, %%acc0\n\t" - "movclr.l %%acc0, %%d7\n\t" - "asl.l #3, %%d7\n\t" - "add.l %%d6, %%d7\n\t" - "move.l %%d7, (26*4, %[x])\n\t" - "move.l %%d7, (27*4, %[x])\n\t" - : : [X] "a" (X), [x] "a" (x), [t] "a" (t) - : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7"); - /* pfew */ -} - -#else - static inline void imdct36(mad_fixed_t const X[18], mad_fixed_t x[36]) { @@ -2633,8 +2050,6 @@ x[26] = x[27] = MAD_F_MLZ(hi, lo) + t5; } -#endif /* MCF5249 */ - # endif /* @@ -2938,6 +2353,9 @@ */ static enum mad_error III_decode(struct mad_bitptr *ptr, struct mad_frame *frame, + struct sideinfo *si, unsigned int nch) __attribute__ ((section (".cachetest"))); +static +enum mad_error III_decode(struct mad_bitptr *ptr, struct mad_frame *frame, struct sideinfo *si, unsigned int nch) { struct mad_header *header = &frame->header; Index: apps/plugins/plugin.lds =================================================================== RCS file: /cvsroot/rockbox/apps/plugins/plugin.lds,v retrieving revision 1.11 diff -u -r1.11 plugin.lds --- apps/plugins/plugin.lds 20 Apr 2005 18:54:20 -0000 1.11 +++ apps/plugins/plugin.lds 25 May 2005 14:48:06 -0000 @@ -43,6 +43,7 @@ .text : { *(.entry) *(.text) + *(.cachetest) } > PLUGIN_RAM .data : { Index: firmware/export/config-h100.h =================================================================== RCS file: /cvsroot/rockbox/firmware/export/config-h100.h,v retrieving revision 1.26 diff -u -r1.26 config-h100.h --- firmware/export/config-h100.h 23 May 2005 16:23:25 -0000 1.26 +++ firmware/export/config-h100.h 25 May 2005 14:48:06 -0000 @@ -69,4 +69,7 @@ /* Define this if you can control the S/PDIF power */ #define HAVE_SPDIF_POWER +/* We have ASM version of imdct_l for libmad */ +#define ASO_IMDCT + #endif