Index: apps/codecs/libfaad/sbr_qmf.c =================================================================== --- apps/codecs/libfaad/sbr_qmf.c (revision 27293) +++ apps/codecs/libfaad/sbr_qmf.c (working copy) @@ -38,6 +38,12 @@ #include "sbr_qmf_c.h" #include "sbr_syntax.h" +#ifdef FIXED_POINT + #define FAAD_SCALE(X) ((X)>>1) +#else + #define FAAD_SCALE(X) ((X)*scale) +#endif + qmfa_info *qmfa_init(uint8_t channels) { qmfa_info *qmfa = (qmfa_info*)faad_malloc(sizeof(qmfa_info)); @@ -66,42 +72,53 @@ void sbr_qmf_analysis_32(sbr_info *sbr, qmfa_info *qmfa, const real_t *input, qmf_t X[MAX_NTSRHFG][64], uint8_t offset, uint8_t kx) { - ALIGN real_t u[64]; + static ALIGN real_t u[64] IBSS_ATTR_FAAD_LARGE_IRAM; #ifndef SBR_LOW_POWER - static ALIGN real_t in_real[32], in_imag[32], out_real[32], out_imag[32]; + static ALIGN real_t in_real[32] IBSS_ATTR_FAAD_LARGE_IRAM; + static ALIGN real_t in_imag[32] IBSS_ATTR_FAAD_LARGE_IRAM; + static ALIGN real_t out_real[32] IBSS_ATTR_FAAD_LARGE_IRAM; + static ALIGN real_t out_imag[32] IBSS_ATTR_FAAD_LARGE_IRAM; #else ALIGN real_t y[32]; #endif - uint16_t in = 0; - uint8_t l; + uint32_t in = 0; + uint32_t l, idx0, idx1; /* qmf subsample l */ for (l = 0; l < sbr->numTimeSlotsRate; l++) { - int16_t n; + int32_t n; /* shift input buffer x */ /* input buffer is not shifted anymore, x is implemented as double ringbuffer */ //memmove(qmfa->x + 32, qmfa->x, (320-32)*sizeof(real_t)); /* add new samples to input buffer x */ - for (n = 32 - 1; n >= 0; n--) + idx0 = qmfa->x_index + 31; idx1 = idx0 + 320; + for (n = 32 - 1; n >= 0; n-=4) { #ifdef FIXED_POINT - qmfa->x[qmfa->x_index + n] = qmfa->x[qmfa->x_index + n + 320] = (input[in++]) >> 4; + qmfa->x[idx0--] = qmfa->x[idx1--] = (input[in++]) >> 4; + qmfa->x[idx0--] = qmfa->x[idx1--] = (input[in++]) >> 4; + qmfa->x[idx0--] = qmfa->x[idx1--] = (input[in++]) >> 4; + qmfa->x[idx0--] = qmfa->x[idx1--] = (input[in++]) >> 4; #else - qmfa->x[qmfa->x_index + n] = qmfa->x[qmfa->x_index + n + 320] = input[in++]; + qmfa->x[idx0--] = qmfa->x[idx1--] = (input[in++]); + qmfa->x[idx0--] = qmfa->x[idx1--] = (input[in++]); + qmfa->x[idx0--] = qmfa->x[idx1--] = (input[in++]); + qmfa->x[idx0--] = qmfa->x[idx1--] = (input[in++]); #endif } /* window and summation to create array u */ for (n = 0; n < 64; n++) { - u[n] = MUL_F(qmfa->x[qmfa->x_index + n], qmf_c[2*n]) + - MUL_F(qmfa->x[qmfa->x_index + n + 64], qmf_c[2*(n + 64)]) + - MUL_F(qmfa->x[qmfa->x_index + n + 128], qmf_c[2*(n + 128)]) + - MUL_F(qmfa->x[qmfa->x_index + n + 192], qmf_c[2*(n + 192)]) + - MUL_F(qmfa->x[qmfa->x_index + n + 256], qmf_c[2*(n + 256)]); + idx0 = qmfa->x_index + n; idx1 = n * 2; + u[n] = MUL_F(qmfa->x[idx0 ], qmf_c[idx1]) + + MUL_F(qmfa->x[idx0 + 64], qmf_c[idx1 + 2 * 64]) + + MUL_F(qmfa->x[idx0 + 128], qmf_c[idx1 + 2 * 128]) + + MUL_F(qmfa->x[idx0 + 192], qmf_c[idx1 + 2 * 192]) + + MUL_F(qmfa->x[idx0 + 256], qmf_c[idx1 + 2 * 256]); } /* update ringbuffer index */ @@ -132,18 +149,19 @@ QMF_RE(X[l + offset][n]) = 0; } } -#else +#else /* #ifdef SBR_LOW_POWER */ // Reordering of data moved from DCT_IV to here - in_imag[31] = u[1]; - in_real[0] = u[0]; - for (n = 1; n < 31; n++) + idx0 = 30; idx1 = 63; + in_imag[31] = u[ 1]; in_real[ 0] = u[ 0]; + for (n = 1; n < 31; n+=4) { - in_imag[31 - n] = u[n+1]; - in_real[n] = -u[64-n]; + in_imag[idx0--] = u[n+1]; in_real[n ] = -u[idx1--]; + in_imag[idx0--] = u[n+2]; in_real[n+1] = -u[idx1--]; + in_imag[idx0--] = u[n+3]; in_real[n+2] = -u[idx1--]; + in_imag[idx0--] = u[n+4]; in_real[n+3] = -u[idx1--]; } - in_imag[0] = u[32]; - in_real[31] = -u[33]; + in_imag[ 0] = u[32]; in_real[31] = -u[33]; // dct4_kernel is DCT_IV without reordering which is done before and after FFT dct4_kernel(in_real, in_imag, out_real, out_imag); @@ -180,7 +198,7 @@ QMF_IM(X[l + offset][2*n+1]) = 0; } } -#endif +#endif /* #ifdef SBR_LOW_POWER */ } } @@ -249,8 +267,8 @@ void sbr_qmf_synthesis_32(sbr_info *sbr, qmfs_info *qmfs, qmf_t X[MAX_NTSRHFG][64], real_t *output) { - ALIGN real_t x[16]; - ALIGN real_t y[16]; + static ALIGN real_t x[16]; + static ALIGN real_t y[16]; int16_t n, k, out = 0; uint8_t l; @@ -384,18 +402,27 @@ qmfs->v_index = (1280-128); } } -#else +#else /* #ifdef SBR_LOW_POWER */ + +#define FAAD_CMPLX_PRETWIDDLE_SUB(k) \ + FAAD_SCALE((MUL_F(QMF_RE(X[l][k]), RE(qmf32_pre_twiddle[k])) - \ + MUL_F(QMF_IM(X[l][k]), IM(qmf32_pre_twiddle[k])))) \ + +#define FAAD_CMPLX_PRETWIDDLE_ADD(k) \ + FAAD_SCALE((MUL_F(QMF_IM(X[l][k]), RE(qmf32_pre_twiddle[k])) + \ + MUL_F(QMF_RE(X[l][k]), IM(qmf32_pre_twiddle[k])))) \ + void sbr_qmf_synthesis_32(sbr_info *sbr, qmfs_info *qmfs, qmf_t X[MAX_NTSRHFG][64], real_t *output) { - ALIGN real_t x1[32], x2[32]; + static ALIGN real_t x1[32] IBSS_ATTR_FAAD_LARGE_IRAM; + static ALIGN real_t x2[32] IBSS_ATTR_FAAD_LARGE_IRAM; #ifndef FIXED_POINT real_t scale = 1.f/64.f; #endif - int16_t n, k, out = 0; - uint8_t l; + int32_t n, k, idx0, idx1, out = 0; + uint32_t l; - /* qmf subsample l */ for (l = 0; l < sbr->numTimeSlotsRate; l++) { @@ -405,43 +432,42 @@ /* calculate 64 samples */ /* complex pre-twiddle */ - for (k = 0; k < 32; k++) + for (k = 0; k < 32;) { - x1[k] = MUL_F(QMF_RE(X[l][k]), RE(qmf32_pre_twiddle[k])) - MUL_F(QMF_IM(X[l][k]), IM(qmf32_pre_twiddle[k])); - x2[k] = MUL_F(QMF_IM(X[l][k]), RE(qmf32_pre_twiddle[k])) + MUL_F(QMF_RE(X[l][k]), IM(qmf32_pre_twiddle[k])); - -#ifndef FIXED_POINT - x1[k] *= scale; - x2[k] *= scale; -#else - x1[k] >>= 1; - x2[k] >>= 1; -#endif + x1[k] = FAAD_CMPLX_PRETWIDDLE_SUB(k); x2[k] = FAAD_CMPLX_PRETWIDDLE_ADD(k); k++; + x1[k] = FAAD_CMPLX_PRETWIDDLE_SUB(k); x2[k] = FAAD_CMPLX_PRETWIDDLE_ADD(k); k++; + x1[k] = FAAD_CMPLX_PRETWIDDLE_SUB(k); x2[k] = FAAD_CMPLX_PRETWIDDLE_ADD(k); k++; + x1[k] = FAAD_CMPLX_PRETWIDDLE_SUB(k); x2[k] = FAAD_CMPLX_PRETWIDDLE_ADD(k); k++; } /* transform */ DCT4_32(x1, x1); DST4_32(x2, x2); - for (n = 0; n < 32; n++) + idx0 = qmfs->v_index; + idx1 = qmfs->v_index + 63; + for (n = 0; n < 32; n+=2) { - qmfs->v[qmfs->v_index + n] = qmfs->v[qmfs->v_index + 640 + n] = -x1[n] + x2[n]; - qmfs->v[qmfs->v_index + 63 - n] = qmfs->v[qmfs->v_index + 640 + 63 - n] = x1[n] + x2[n]; + qmfs->v[idx0] = qmfs->v[idx0 + 640] = -x1[n ] + x2[n ]; idx0++; + qmfs->v[idx1] = qmfs->v[idx1 + 640] = x1[n ] + x2[n ]; idx1--; + qmfs->v[idx0] = qmfs->v[idx0 + 640] = -x1[n+1] + x2[n+1]; idx0++; + qmfs->v[idx1] = qmfs->v[idx1 + 640] = x1[n+1] + x2[n+1]; idx1--; } /* calculate 32 output samples and window */ for (k = 0; k < 32; k++) { - output[out++] = MUL_F(qmfs->v[qmfs->v_index + k], qmf_c[2*k]) + - MUL_F(qmfs->v[qmfs->v_index + 96 + k], qmf_c[64 + 2*k]) + - MUL_F(qmfs->v[qmfs->v_index + 128 + k], qmf_c[128 + 2*k]) + - MUL_F(qmfs->v[qmfs->v_index + 224 + k], qmf_c[192 + 2*k]) + - MUL_F(qmfs->v[qmfs->v_index + 256 + k], qmf_c[256 + 2*k]) + - MUL_F(qmfs->v[qmfs->v_index + 352 + k], qmf_c[320 + 2*k]) + - MUL_F(qmfs->v[qmfs->v_index + 384 + k], qmf_c[384 + 2*k]) + - MUL_F(qmfs->v[qmfs->v_index + 480 + k], qmf_c[448 + 2*k]) + - MUL_F(qmfs->v[qmfs->v_index + 512 + k], qmf_c[512 + 2*k]) + - MUL_F(qmfs->v[qmfs->v_index + 608 + k], qmf_c[576 + 2*k]); + idx0 = qmfs->v_index + k; idx1 = 2*k; + output[out++] = MUL_F(qmfs->v[idx0 ], qmf_c[idx1 ]) + + MUL_F(qmfs->v[idx0 + 96], qmf_c[idx1 + 64]) + + MUL_F(qmfs->v[idx0 + 128], qmf_c[idx1 + 128]) + + MUL_F(qmfs->v[idx0 + 224], qmf_c[idx1 + 192]) + + MUL_F(qmfs->v[idx0 + 256], qmf_c[idx1 + 256]) + + MUL_F(qmfs->v[idx0 + 352], qmf_c[idx1 + 320]) + + MUL_F(qmfs->v[idx0 + 384], qmf_c[idx1 + 384]) + + MUL_F(qmfs->v[idx0 + 480], qmf_c[idx1 + 448]) + + MUL_F(qmfs->v[idx0 + 512], qmf_c[idx1 + 512]) + + MUL_F(qmfs->v[idx0 + 608], qmf_c[idx1 + 576]); } /* update ringbuffer index */ @@ -454,31 +480,24 @@ void sbr_qmf_synthesis_64(sbr_info *sbr, qmfs_info *qmfs, qmf_t X[MAX_NTSRHFG][64], real_t *output) { -// ALIGN real_t x1[64], x2[64]; #ifndef SBR_LOW_POWER - static ALIGN real_t in_real1[32], in_imag1[32], out_real1[32], out_imag1[32]; - static ALIGN real_t in_real2[32], in_imag2[32], out_real2[32], out_imag2[32]; + static ALIGN real_t in_real1[32] IBSS_ATTR_FAAD_LARGE_IRAM; + static ALIGN real_t in_imag1[32] IBSS_ATTR_FAAD_LARGE_IRAM; + static ALIGN real_t out_real1[32] IBSS_ATTR_FAAD_LARGE_IRAM; + static ALIGN real_t out_imag1[32] IBSS_ATTR_FAAD_LARGE_IRAM; + static ALIGN real_t in_real2[32] IBSS_ATTR_FAAD_LARGE_IRAM; + static ALIGN real_t in_imag2[32] IBSS_ATTR_FAAD_LARGE_IRAM; + static ALIGN real_t out_real2[32] IBSS_ATTR_FAAD_LARGE_IRAM; + static ALIGN real_t out_imag2[32] IBSS_ATTR_FAAD_LARGE_IRAM; #endif qmf_t * pX; - real_t * pring_buffer_1, * pring_buffer_3; -// real_t * ptemp_1, * ptemp_2; -#ifdef PREFER_POINTERS - // These pointers are used if target platform has autoinc address generators - real_t * pring_buffer_2, * pring_buffer_4; - real_t * pring_buffer_5, * pring_buffer_6; - real_t * pring_buffer_7, * pring_buffer_8; - real_t * pring_buffer_9, * pring_buffer_10; - const real_t * pqmf_c_1, * pqmf_c_2, * pqmf_c_3, * pqmf_c_4; - const real_t * pqmf_c_5, * pqmf_c_6, * pqmf_c_7, * pqmf_c_8; - const real_t * pqmf_c_9, * pqmf_c_10; -#endif // #ifdef PREFER_POINTERS + real_t * p_buf_1, * p_buf_3; #ifndef FIXED_POINT real_t scale = 1.f/64.f; #endif - int16_t n, k, out = 0; - uint8_t l; - - + int32_t n, k, idx0, idx1, out = 0; + uint32_t l; + /* qmf subsample l */ for (l = 0; l < sbr->numTimeSlotsRate; l++) { @@ -487,139 +506,55 @@ //memmove(qmfs->v + 128, qmfs->v, (1280-128)*sizeof(real_t)); /* calculate 128 samples */ -#ifndef FIXED_POINT - pX = X[l]; - in_imag1[31] = scale*QMF_RE(pX[1]); - in_real1[0] = scale*QMF_RE(pX[0]); - in_imag2[31] = scale*QMF_IM(pX[63-1]); - in_real2[0] = scale*QMF_IM(pX[63-0]); + in_imag1[31] = FAAD_SCALE(QMF_RE(pX[ 1])); + in_real1[ 0] = FAAD_SCALE(QMF_RE(pX[ 0])); + in_imag2[31] = FAAD_SCALE(QMF_IM(pX[62])); + in_real2[ 0] = FAAD_SCALE(QMF_IM(pX[63])); for (k = 1; k < 31; k++) { - in_imag1[31 - k] = scale*QMF_RE(pX[2*k + 1]); - in_real1[ k] = scale*QMF_RE(pX[2*k ]); - in_imag2[31 - k] = scale*QMF_IM(pX[63 - (2*k + 1)]); - in_real2[ k] = scale*QMF_IM(pX[63 - (2*k )]); + in_imag1[31 - k] = FAAD_SCALE(QMF_RE(pX[ (2*k + 1)])); + in_real1[ k] = FAAD_SCALE(QMF_RE(pX[ (2*k )])); + in_imag2[31 - k] = FAAD_SCALE(QMF_IM(pX[63 - (2*k + 1)])); + in_real2[ k] = FAAD_SCALE(QMF_IM(pX[63 - (2*k )])); } - in_imag1[0] = scale*QMF_RE(pX[63]); - in_real1[31] = scale*QMF_RE(pX[62]); - in_imag2[0] = scale*QMF_IM(pX[63-63]); - in_real2[31] = scale*QMF_IM(pX[63-62]); + in_imag1[ 0] = FAAD_SCALE(QMF_RE(pX[63])); + in_real1[31] = FAAD_SCALE(QMF_RE(pX[62])); + in_imag2[ 0] = FAAD_SCALE(QMF_IM(pX[ 0])); + in_real2[31] = FAAD_SCALE(QMF_IM(pX[ 1])); -#else - - pX = X[l]; - - in_imag1[31] = QMF_RE(pX[1]) >> 1; - in_real1[0] = QMF_RE(pX[0]) >> 1; - in_imag2[31] = QMF_IM(pX[62]) >> 1; - in_real2[0] = QMF_IM(pX[63]) >> 1; - for (k = 1; k < 31; k++) - { - in_imag1[31 - k] = QMF_RE(pX[2*k + 1]) >> 1; - in_real1[ k] = QMF_RE(pX[2*k ]) >> 1; - in_imag2[31 - k] = QMF_IM(pX[63 - (2*k + 1)]) >> 1; - in_real2[ k] = QMF_IM(pX[63 - (2*k )]) >> 1; - } - in_imag1[0] = QMF_RE(pX[63]) >> 1; - in_real1[31] = QMF_RE(pX[62]) >> 1; - in_imag2[0] = QMF_IM(pX[0]) >> 1; - in_real2[31] = QMF_IM(pX[1]) >> 1; - -#endif - - // dct4_kernel is DCT_IV without reordering which is done before and after FFT dct4_kernel(in_real1, in_imag1, out_real1, out_imag1); dct4_kernel(in_real2, in_imag2, out_real2, out_imag2); + p_buf_1 = qmfs->v + qmfs->v_index; + p_buf_3 = p_buf_1 + 1280; - pring_buffer_1 = qmfs->v + qmfs->v_index; - pring_buffer_3 = pring_buffer_1 + 1280; -#ifdef PREFER_POINTERS - pring_buffer_2 = pring_buffer_1 + 127; - pring_buffer_4 = pring_buffer_1 + (1280 + 127); -#endif // #ifdef PREFER_POINTERS -// ptemp_1 = x1; -// ptemp_2 = x2; -#ifdef PREFER_POINTERS - for (n = 0; n < 32; n ++) - { - //real_t x1 = *ptemp_1++; - //real_t x2 = *ptemp_2++; - // pring_buffer_3 and pring_buffer_4 are needed only for double ring buffer - *pring_buffer_1++ = *pring_buffer_3++ = out_real2[n] - out_real1[n]; - *pring_buffer_2-- = *pring_buffer_4-- = out_real2[n] + out_real1[n]; - //x1 = *ptemp_1++; - //x2 = *ptemp_2++; - *pring_buffer_1++ = *pring_buffer_3++ = out_imag2[31-n] + out_imag1[31-n]; - *pring_buffer_2-- = *pring_buffer_4-- = out_imag2[31-n] - out_imag1[31-n]; - } -#else // #ifdef PREFER_POINTERS - + idx0 = 0; idx1 = 127; for (n = 0; n < 32; n++) { - // pring_buffer_3 and pring_buffer_4 are needed only for double ring buffer - pring_buffer_1[2*n] = pring_buffer_3[2*n] = out_real2[n] - out_real1[n]; - pring_buffer_1[127-2*n] = pring_buffer_3[127-2*n] = out_real2[n] + out_real1[n]; - pring_buffer_1[2*n+1] = pring_buffer_3[2*n+1] = out_imag2[31-n] + out_imag1[31-n]; - pring_buffer_1[127-(2*n+1)] = pring_buffer_3[127-(2*n+1)] = out_imag2[31-n] - out_imag1[31-n]; + p_buf_1[idx0] = p_buf_3[idx0] = out_real2[ n] - out_real1[ n]; idx0++; + p_buf_1[idx1] = p_buf_3[idx1] = out_real2[ n] + out_real1[ n]; idx1--; + p_buf_1[idx0] = p_buf_3[idx0] = out_imag2[31-n] + out_imag1[31-n]; idx0++; + p_buf_1[idx1] = p_buf_3[idx1] = out_imag2[31-n] - out_imag1[31-n]; idx1--; } -#endif // #ifdef PREFER_POINTERS + p_buf_1 = qmfs->v + qmfs->v_index; - pring_buffer_1 = qmfs->v + qmfs->v_index; -#ifdef PREFER_POINTERS - pring_buffer_2 = pring_buffer_1 + 192; - pring_buffer_3 = pring_buffer_1 + 256; - pring_buffer_4 = pring_buffer_1 + (256 + 192); - pring_buffer_5 = pring_buffer_1 + 512; - pring_buffer_6 = pring_buffer_1 + (512 + 192); - pring_buffer_7 = pring_buffer_1 + 768; - pring_buffer_8 = pring_buffer_1 + (768 + 192); - pring_buffer_9 = pring_buffer_1 + 1024; - pring_buffer_10 = pring_buffer_1 + (1024 + 192); - pqmf_c_1 = qmf_c; - pqmf_c_2 = qmf_c + 64; - pqmf_c_3 = qmf_c + 128; - pqmf_c_4 = qmf_c + 192; - pqmf_c_5 = qmf_c + 256; - pqmf_c_6 = qmf_c + 320; - pqmf_c_7 = qmf_c + 384; - pqmf_c_8 = qmf_c + 448; - pqmf_c_9 = qmf_c + 512; - pqmf_c_10 = qmf_c + 576; -#endif // #ifdef PREFER_POINTERS - /* calculate 64 output samples and window */ for (k = 0; k < 64; k++) { -#ifdef PREFER_POINTERS - output[out++] = - MUL_F(*pring_buffer_1++, *pqmf_c_1++) + - MUL_F(*pring_buffer_2++, *pqmf_c_2++) + - MUL_F(*pring_buffer_3++, *pqmf_c_3++) + - MUL_F(*pring_buffer_4++, *pqmf_c_4++) + - MUL_F(*pring_buffer_5++, *pqmf_c_5++) + - MUL_F(*pring_buffer_6++, *pqmf_c_6++) + - MUL_F(*pring_buffer_7++, *pqmf_c_7++) + - MUL_F(*pring_buffer_8++, *pqmf_c_8++) + - MUL_F(*pring_buffer_9++, *pqmf_c_9++) + - MUL_F(*pring_buffer_10++, *pqmf_c_10++); -#else // #ifdef PREFER_POINTERS - output[out++] = - MUL_F(pring_buffer_1[k+0], qmf_c[k+0]) + - MUL_F(pring_buffer_1[k+192], qmf_c[k+64]) + - MUL_F(pring_buffer_1[k+256], qmf_c[k+128]) + - MUL_F(pring_buffer_1[k+(256+192)], qmf_c[k+192]) + - MUL_F(pring_buffer_1[k+512], qmf_c[k+256]) + - MUL_F(pring_buffer_1[k+(512+192)], qmf_c[k+320]) + - MUL_F(pring_buffer_1[k+768], qmf_c[k+384]) + - MUL_F(pring_buffer_1[k+(768+192)], qmf_c[k+448]) + - MUL_F(pring_buffer_1[k+1024], qmf_c[k+512]) + - MUL_F(pring_buffer_1[k+(1024+192)], qmf_c[k+576]); -#endif // #ifdef PREFER_POINTERS + output[out++] = MUL_F(p_buf_1[k ], qmf_c[k ]) + + MUL_F(p_buf_1[k+ 192 ], qmf_c[k+ 64]) + + MUL_F(p_buf_1[k+ 256 ], qmf_c[k+128]) + + MUL_F(p_buf_1[k+ 256+192], qmf_c[k+192]) + + MUL_F(p_buf_1[k+ 512 ], qmf_c[k+256]) + + MUL_F(p_buf_1[k+ 512+192], qmf_c[k+320]) + + MUL_F(p_buf_1[k+ 768 ], qmf_c[k+384]) + + MUL_F(p_buf_1[k+ 768+192], qmf_c[k+448]) + + MUL_F(p_buf_1[k+1024 ], qmf_c[k+512]) + + MUL_F(p_buf_1[k+1024+192], qmf_c[k+576]); } /* update ringbuffer index */ Index: apps/codecs/libfaad/sbr_qmf_c.h =================================================================== --- apps/codecs/libfaad/sbr_qmf_c.h (revision 27293) +++ apps/codecs/libfaad/sbr_qmf_c.h (working copy) @@ -38,7 +38,7 @@ #pragma warning(disable:4244) #endif -ALIGN static const real_t qmf_c[640] = { +ALIGN static const real_t qmf_c[640] ICONST_ATTR_FAAD_LARGE_IRAM = { FRAC_CONST(0), FRAC_CONST(-0.00055252865047), FRAC_CONST(-0.00056176925738), FRAC_CONST(-0.00049475180896), FRAC_CONST(-0.00048752279712), FRAC_CONST(-0.00048937912498), Index: apps/codecs/libfaad/sbr_dct.c =================================================================== --- apps/codecs/libfaad/sbr_dct.c (revision 27293) +++ apps/codecs/libfaad/sbr_dct.c (working copy) @@ -1449,12 +1449,11 @@ #else - #define n 32 #define log2n 5 // w_array_real[i] = cos(2*M_PI*i/32) -static const real_t w_array_real[] = { +static const real_t w_array_real[] ICONST_ATTR_FAAD_LARGE_IRAM = { FRAC_CONST(1.000000000000000), FRAC_CONST(0.980785279337272), FRAC_CONST(0.923879528329380), FRAC_CONST(0.831469603195765), FRAC_CONST(0.707106765732237), FRAC_CONST(0.555570210304169), @@ -1466,7 +1465,7 @@ }; // w_array_imag[i] = sin(-2*M_PI*i/32) -static const real_t w_array_imag[] = { +static const real_t w_array_imag[] ICONST_ATTR_FAAD_LARGE_IRAM = { FRAC_CONST(0.000000000000000), FRAC_CONST(-0.195090327375064), FRAC_CONST(-0.382683442461104), FRAC_CONST(-0.555570246648862), FRAC_CONST(-0.707106796640858), FRAC_CONST(-0.831469627480512), @@ -1707,7 +1706,7 @@ #undef n #undef log2n -static const real_t dct4_64_tab[] = { +static const real_t dct4_64_tab[] ICONST_ATTR_FAAD_LARGE_IRAM = { COEF_CONST(0.999924719333649), COEF_CONST(0.998118102550507), COEF_CONST(0.993906974792480), COEF_CONST(0.987301409244537), COEF_CONST(0.978317379951477), COEF_CONST(0.966976463794708), @@ -1806,55 +1805,58 @@ COEF_CONST(0.897167563438416), COEF_CONST(0.949727773666382) }; +// Tables with bit reverse values for 5 bits, bit reverse of i at i-th position +static const uint32_t bit_rev_tab[32] ICONST_ATTR_FAAD_LARGE_IRAM = { + 0,16,8,24,4,20,12,28,2,18,10,26,6,22,14,30, + 1,17,9,25,5,21,13,29,3,19,11,27,7,23,15,31 +}; + /* size 64 only! */ void dct4_kernel(real_t * in_real, real_t * in_imag, real_t * out_real, real_t * out_imag) { - // Tables with bit reverse values for 5 bits, bit reverse of i at i-th position - const uint8_t bit_rev_tab[32] = { 0,16,8,24,4,20,12,28,2,18,10,26,6,22,14,30,1,17,9,25,5,21,13,29,3,19,11,27,7,23,15,31 }; - uint16_t i, i_rev; + uint32_t i, i_rev; + real_t x_re, x_im, tmp; /* Step 2: modulate */ // 3*32=96 multiplications // 3*32=96 additions for (i = 0; i < 32; i++) { - real_t x_re, x_im, tmp; x_re = in_real[i]; x_im = in_imag[i]; - tmp = MUL_C(x_re + x_im, dct4_64_tab[i]); - in_real[i] = MUL_C(x_im, dct4_64_tab[i + 64]) + tmp; - in_imag[i] = MUL_C(x_re, dct4_64_tab[i + 32]) + tmp; + tmp = MUL_C(x_re + x_im, dct4_64_tab[i ]); + in_real[i] = MUL_C(x_im , dct4_64_tab[i + 64]) + tmp; + in_imag[i] = MUL_C(x_re , dct4_64_tab[i + 32]) + tmp; } /* Step 3: FFT, but with output in bit reverse order */ - fft_dif(in_real, in_imag); + fft_dif(in_real, in_imag); /* Step 4: modulate + bitreverse reordering */ // 3*31+2=95 multiplications // 3*31+2=95 additions for (i = 0; i < 16; i++) { - real_t x_re, x_im, tmp; i_rev = bit_rev_tab[i]; - x_re = in_real[i_rev]; - x_im = in_imag[i_rev]; - - tmp = MUL_C(x_re + x_im, dct4_64_tab[i + 3*32]); - out_real[i] = MUL_C(x_im, dct4_64_tab[i + 5*32]) + tmp; - out_imag[i] = MUL_C(x_re, dct4_64_tab[i + 4*32]) + tmp; + x_re = in_real[i_rev]; + x_im = in_imag[i_rev]; + tmp = MUL_C(x_re + x_im, dct4_64_tab[i + 3*32]); + out_real[i] = MUL_C(x_im , dct4_64_tab[i + 5*32]) + tmp; + out_imag[i] = MUL_C(x_re , dct4_64_tab[i + 4*32]) + tmp; } // i = 16, i_rev = 1 = rev(16); - out_imag[16] = MUL_C(in_imag[1] - in_real[1], dct4_64_tab[16 + 3*32]); - out_real[16] = MUL_C(in_real[1] + in_imag[1], dct4_64_tab[16 + 3*32]); + x_re = in_real[1]; + x_im = in_imag[1]; + out_imag[16] = MUL_C(x_im - x_re, dct4_64_tab[16 + 3*32]); + out_real[16] = MUL_C(x_re + x_im, dct4_64_tab[16 + 3*32]); for (i = 17; i < 32; i++) { - real_t x_re, x_im, tmp; i_rev = bit_rev_tab[i]; - x_re = in_real[i_rev]; - x_im = in_imag[i_rev]; - tmp = MUL_C(x_re + x_im, dct4_64_tab[i + 3*32]); - out_real[i] = MUL_C(x_im, dct4_64_tab[i + 5*32]) + tmp; - out_imag[i] = MUL_C(x_re, dct4_64_tab[i + 4*32]) + tmp; + x_re = in_real[i_rev]; + x_im = in_imag[i_rev]; + tmp = MUL_C(x_re + x_im, dct4_64_tab[i + 3*32]); + out_real[i] = MUL_C(x_im , dct4_64_tab[i + 5*32]) + tmp; + out_imag[i] = MUL_C(x_re , dct4_64_tab[i + 4*32]) + tmp; } }