Index: apps/codecs/libatrac/fixp_math.h =================================================================== --- apps/codecs/libatrac/fixp_math.h (revision 28427) +++ apps/codecs/libatrac/fixp_math.h (working copy) @@ -108,4 +108,16 @@ return (int32_t)temp; } + + static inline int32_t fixmul32x16(int16_t x, int32_t y) + { + int64_t temp; + temp = y; + temp *= x; + + temp >>= 15; //15+31-15 = 31 bits + + return (int32_t)temp; + } + #endif Index: apps/codecs/libatrac/atrac3_arm.S =================================================================== --- apps/codecs/libatrac/atrac3_arm.S (revision 28427) +++ apps/codecs/libatrac/atrac3_arm.S (working copy) @@ -106,114 +106,114 @@ .iqmf_dewindow_outer_loop: /* outer loop 0...counter-1 */ /* 0.. 7 */ - ldmia r2!, {r4, r5} /* load win[0..1] */ + ldmia r2!, {r4, r5, r8, r9} /* load win[0..1] */ ldmia r1!, {r6, r7} /* load in[0..1] */ - smull lr , r9, r4, r6 /* s1 = win[0] * in[0] */ - smull r12, r8, r5, r7 /* s2 = win[1] * in[1] */ - ldmia r2!, {r4, r5} /* load win[i...i+1] */ + smulwb lr, r6, r4 //smull lr , r9, r4, r6 /* s1 = win[0] * in[0] */ + smulwt r12, r7, r4 //smull r12, r8, r5, r7 /* s2 = win[1] * in[1] */ + ldmia r1!, {r6, r7} /* load in[i...i+1] */ - smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */ - smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */ - ldmia r2!, {r4, r5} /* load win[i...i+1] */ + smlawb lr, lr, r6, r5 //smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */ + smlawt r12, r12, r7,r5 /* s2 = win[i+1] * in[i+1] */ + //ldmia r2!, {r4, r5} /* load win[i...i+1] */ ldmia r1!, {r6, r7} /* load in[i...i+1] */ - smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */ - smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */ - ldmia r2!, {r4, r5} /* load win[i...i+1] */ + smlawb lr, lr, r6, r8 //smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */ + smlawt r12, r12, r7, r8 //smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */ + // ldmia r2!, {r4, r5} /* load win[i...i+1] */ ldmia r1!, {r6, r7} /* load in[i...i+1] */ - smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */ - smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */ + smlawb lr, lr, r6, r9 //smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */ + smlawt r12, r12, r7, r9 //smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */ /* 8..15 */ - ldmia r2!, {r4, r5} /* load win[i...i+1] */ + ldmia r2!, {r4, r5, r8, r9} /* load win[0..1] */ + ldmia r1!, {r6, r7} /* load in[0..1] */ + smulwb lr, r6, r4 //smull lr , r9, r4, r6 /* s1 = win[0] * in[0] */ + smulwt r12, r7, r4 //smull r12, r8, r5, r7 /* s2 = win[1] * in[1] */ + ldmia r1!, {r6, r7} /* load in[i...i+1] */ - smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */ - smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */ - ldmia r2!, {r4, r5} /* load win[i...i+1] */ + smlawb lr, lr, r6, r5 //smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */ + smlawt r12, r12, r7,r5 /* s2 = win[i+1] * in[i+1] */ + //ldmia r2!, {r4, r5} /* load win[i...i+1] */ ldmia r1!, {r6, r7} /* load in[i...i+1] */ - smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */ - smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */ - ldmia r2!, {r4, r5} /* load win[i...i+1] */ + smlawb lr, lr, r6, r8 //smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */ + smlawt r12, r12, r7, r8 //smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */ + // ldmia r2!, {r4, r5} /* load win[i...i+1] */ ldmia r1!, {r6, r7} /* load in[i...i+1] */ - smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */ - smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */ - ldmia r2!, {r4, r5} /* load win[i...i+1] */ - ldmia r1!, {r6, r7} /* load in[i...i+1] */ - smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */ - smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */ + smlawb lr, lr, r6, r9 //smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */ + smlawt r12, r12, r7, r9 //smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */ /* 16..23 */ - ldmia r2!, {r4, r5} /* load win[i...i+1] */ - ldmia r1!, {r6, r7} /* load in[i...i+1] */ - smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */ - smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */ - ldmia r2!, {r4, r5} /* load win[i...i+1] */ - ldmia r1!, {r6, r7} /* load in[i...i+1] */ - smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */ - smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */ - ldmia r2!, {r4, r5} /* load win[i...i+1] */ - ldmia r1!, {r6, r7} /* load in[i...i+1] */ - smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */ - smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */ - ldmia r2!, {r4, r5} /* load win[i...i+1] */ - ldmia r1!, {r6, r7} /* load in[i...i+1] */ - smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */ - smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */ + ldmia r2!, {r4, r5, r8, r9} /* load win[0..1] */ + ldmia r1!, {r6, r7} /* load in[0..1] */ + smulwb lr, r6, r4 //smull lr , r9, r4, r6 /* s1 = win[0] * in[0] */ + smulwt r12, r7, r4 //smull r12, r8, r5, r7 /* s2 = win[1] * in[1] */ + + ldmia r1!, {r6, r7} /* load in[i...i+1] */ + smlawb lr, lr, r6, r5 //smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */ + smlawt r12, r12, r7,r5 /* s2 = win[i+1] * in[i+1] */ + //ldmia r2!, {r4, r5} /* load win[i...i+1] */ + ldmia r1!, {r6, r7} /* load in[i...i+1] */ + smlawb lr, lr, r6, r8 //smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */ + smlawt r12, r12, r7, r8 //smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */ + // ldmia r2!, {r4, r5} /* load win[i...i+1] */ + ldmia r1!, {r6, r7} /* load in[i...i+1] */ + smlawb lr, lr, r6, r9 //smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */ + smlawt r12, r12, r7, r9 //smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */ /* 24..31 */ - ldmia r2!, {r4, r5} /* load win[i...i+1] */ + ldmia r2!, {r4, r5, r8, r9} /* load win[0..1] */ + ldmia r1!, {r6, r7} /* load in[0..1] */ + smulwb lr, r6, r4 //smull lr , r9, r4, r6 /* s1 = win[0] * in[0] */ + smulwt r12, r7, r4 //smull r12, r8, r5, r7 /* s2 = win[1] * in[1] */ + ldmia r1!, {r6, r7} /* load in[i...i+1] */ - smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */ - smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */ - ldmia r2!, {r4, r5} /* load win[i...i+1] */ + smlawb lr, lr, r6, r5 //smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */ + smlawt r12, r12, r7,r5 /* s2 = win[i+1] * in[i+1] */ + //ldmia r2!, {r4, r5} /* load win[i...i+1] */ ldmia r1!, {r6, r7} /* load in[i...i+1] */ - smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */ - smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */ - ldmia r2!, {r4, r5} /* load win[i...i+1] */ + smlawb lr, lr, r6, r8 //smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */ + smlawt r12, r12, r7, r8 //smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */ + // ldmia r2!, {r4, r5} /* load win[i...i+1] */ ldmia r1!, {r6, r7} /* load in[i...i+1] */ - smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */ - smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */ - ldmia r2!, {r4, r5} /* load win[i...i+1] */ - ldmia r1!, {r6, r7} /* load in[i...i+1] */ - smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */ - smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */ + smlawb lr, lr, r6, r9 //smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */ + smlawt r12, r12, r7, r9 //smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */ /* 32..39 */ - ldmia r2!, {r4, r5} /* load win[i...i+1] */ + ldmia r2!, {r4, r5, r8, r9} /* load win[0..1] */ + ldmia r1!, {r6, r7} /* load in[0..1] */ + smulwb lr, r6, r4 //smull lr , r9, r4, r6 /* s1 = win[0] * in[0] */ + smulwt r12, r7, r4 //smull r12, r8, r5, r7 /* s2 = win[1] * in[1] */ + ldmia r1!, {r6, r7} /* load in[i...i+1] */ - smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */ - smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */ - ldmia r2!, {r4, r5} /* load win[i...i+1] */ + smlawb lr, lr, r6, r5 //smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */ + smlawt r12, r12, r7,r5 /* s2 = win[i+1] * in[i+1] */ + //ldmia r2!, {r4, r5} /* load win[i...i+1] */ ldmia r1!, {r6, r7} /* load in[i...i+1] */ - smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */ - smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */ - ldmia r2!, {r4, r5} /* load win[i...i+1] */ + smlawb lr, lr, r6, r8 //smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */ + smlawt r12, r12, r7, r8 //smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */ + // ldmia r2!, {r4, r5} /* load win[i...i+1] */ ldmia r1!, {r6, r7} /* load in[i...i+1] */ - smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */ - smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */ - ldmia r2!, {r4, r5} /* load win[i...i+1] */ - ldmia r1!, {r6, r7} /* load in[i...i+1] */ - smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */ - smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */ + smlawb lr, lr, r6, r9 //smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */ + smlawt r12, r12, r7, r9 //smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */ /* 40..47 */ - ldmia r2!, {r4, r5} /* load win[i...i+1] */ + ldmia r2!, {r4, r5, r8, r9} /* load win[0..1] */ + ldmia r1!, {r6, r7} /* load in[0..1] */ + smulwb lr, r6, r4 //smull lr , r9, r4, r6 /* s1 = win[0] * in[0] */ + smulwt r12, r7, r4 //smull r12, r8, r5, r7 /* s2 = win[1] * in[1] */ + ldmia r1!, {r6, r7} /* load in[i...i+1] */ - smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */ - smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */ - ldmia r2!, {r4, r5} /* load win[i...i+1] */ + smlawb lr, lr, r6, r5 //smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */ + smlawt r12, r12, r7,r5 /* s2 = win[i+1] * in[i+1] */ + //ldmia r2!, {r4, r5} /* load win[i...i+1] */ ldmia r1!, {r6, r7} /* load in[i...i+1] */ - smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */ - smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */ - ldmia r2!, {r4, r5} /* load win[i...i+1] */ + smlawb lr, lr, r6, r8 //smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */ + smlawt r12, r12, r7, r8 //smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */ + // ldmia r2!, {r4, r5} /* load win[i...i+1] */ ldmia r1!, {r6, r7} /* load in[i...i+1] */ - smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */ - smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */ - ldmia r2!, {r4, r5} /* load win[i...i+1] */ - ldmia r1!, {r6, r7} /* load in[i...i+1] */ - smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */ - smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */ + smlawb lr, lr, r6, r9 //smlal lr , r9, r4, r6 /* s1 = win[i ] * in[i ] */ + smlawt r12, r12, r7, r9 //smlal r12, r8, r5, r7 /* s2 = win[i+1] * in[i+1] */ - mov lr , lr , lsr #31 - orr r9, lr , r9, lsl #1 /* s1 = low>>31 || hi<<1 */ - mov r12, r12, lsr #31 - orr r8, r12, r8, lsl #1 /* s2 = low>>31 || hi<<1 */ + mov lr , lr , lsr #7 + // orr r9, lr , r9, lsl #1 /* s1 = low>>31 || hi<<1 */ + mov r12, r12, lsr #7 + //orr r8, r12, r8, lsl #1 /* s2 = low>>31 || hi<<1 */ - stmia r0!, {r8, r9} /* store result out[0]=s2, out[1]=s1 */ + stmia r0!, {r12, lr} /* store result out[0]=s2, out[1]=s1 */ sub r1, r1, #184 /* roll back 64 entries = 184 bytes */ sub r2, r2, #192 /* roll back 48 entries = 192 bytes = win[0] */ Index: apps/codecs/libatrac/atrac3.c =================================================================== --- apps/codecs/libatrac/atrac3.c (revision 28427) +++ apps/codecs/libatrac/atrac3.c (working copy) @@ -44,18 +44,15 @@ #define JOINT_STEREO 0x12 #define STEREO 0x2 -#ifdef ROCKBOX -#undef DEBUGF -#define DEBUGF(...) -#endif /* ROCKBOX */ + /* FFMAX/MIN/SWAP and av_clip were taken from libavutil/common.h */ #define FFMAX(a,b) ((a) > (b) ? (a) : (b)) #define FFMIN(a,b) ((a) > (b) ? (b) : (a)) #define FFSWAP(type,a,b) do{type SWAP_tmp= b; b= a; a= SWAP_tmp;}while(0) static VLC spectral_coeff_tab[7]; -static int32_t qmf_window[48] IBSS_ATTR; +static int16_t qmf_window[48] IBSS_ATTR; static int32_t atrac3_spectrum [2][1024] IBSS_ATTR __attribute__((aligned(16))); static int32_t atrac3_IMDCT_buf[2][ 512] IBSS_ATTR __attribute__((aligned(16))); static int32_t atrac3_prevFrame[2][1024] IBSS_ATTR; @@ -122,7 +119,7 @@ extern void atrac3_iqmf_dewindowing(int32_t *out, int32_t *in, - int32_t *win, + int16_t *win, unsigned int nIn); #elif defined (CPU_COLDFIRE) #define MULTIPLY_ADD_BLOCK \ @@ -141,7 +138,7 @@ static inline void atrac3_iqmf_dewindowing(int32_t *out, int32_t *in, - int32_t *win, + int16_t *win, unsigned int nIn) { int32_t j; @@ -177,19 +174,19 @@ } #else #define MULTIPLY_ADD_BLOCK(y1, y2, x, c, k) \ - y1 += fixmul31(c[k], x[k]); k++; \ - y2 += fixmul31(c[k], x[k]); k++; \ - y1 += fixmul31(c[k], x[k]); k++; \ - y2 += fixmul31(c[k], x[k]); k++; \ - y1 += fixmul31(c[k], x[k]); k++; \ - y2 += fixmul31(c[k], x[k]); k++; \ - y1 += fixmul31(c[k], x[k]); k++; \ - y2 += fixmul31(c[k], x[k]); k++; + y1 += fixmul32x16(c[k], x[k]); k++; \ + y2 += fixmul32x16(c[k], x[k]); k++; \ + y1 += fixmul32x16(c[k], x[k]); k++; \ + y2 += fixmul32x16(c[k], x[k]); k++; \ + y1 += fixmul32x16(c[k], x[k]); k++; \ + y2 += fixmul32x16(c[k], x[k]); k++; \ + y1 += fixmul32x16(c[k], x[k]); k++; \ + y2 += fixmul32x16(c[k], x[k]); k++; static inline void atrac3_iqmf_dewindowing(int32_t *out, int32_t *in, - int32_t *win, + int16_t *win, unsigned int nIn) { int32_t i, j, s1, s2; @@ -206,7 +203,9 @@ out[0] = s2; out[1] = s1; + } + } #endif @@ -320,9 +319,10 @@ /* Generate the QMF window. */ for (i=0 ; i<24; i++) { s = qmf_48tap_half_fix[i] << 1; - qmf_window[i] = s; - qmf_window[47 - i] = s; +// DEBUGF("%f %d %f\n",qmf_48tap_half_fix[i]/2147483648.0,s>>16, ((int16_t)(s>>16))/65536.0); + qmf_window[i] = qmf_window[47-i] = (int16_t)((s+(1<<15))>>16); } + } @@ -1229,7 +1229,7 @@ vlcs_initialized = 1; } - + init_atrac3_transforms(); /* init the joint-stereo decoding data */