Index: apps/codecs/libmusepack/mpc_decoder.c =================================================================== --- apps/codecs/libmusepack/mpc_decoder.c (revision 28551) +++ apps/codecs/libmusepack/mpc_decoder.c (working copy) @@ -62,8 +62,10 @@ //Decoder globals (g_Y_L and g_Y_R do not fit into iram for all targets) static mpc_decoder g_mpc_decoder IBSS_ATTR; -static MPC_SAMPLE_FORMAT g_Y_L[MPC_FRAME_LENGTH] IBSS_ATTR_MPC_LARGE_IRAM; -static MPC_SAMPLE_FORMAT g_Y_R[MPC_FRAME_LENGTH] IBSS_ATTR_MPC_LARGE_IRAM; +static MPC_SAMPLE_FORMAT g_V_L[MPC_V_MEM + 960 ] IBSS_ATTR __attribute__((aligned(16))); +static MPC_SAMPLE_FORMAT g_Y_L[MPC_FRAME_LENGTH] IBSS_ATTR_MPC_LARGE_IRAM __attribute__((aligned(16))); +static MPC_SAMPLE_FORMAT g_V_R[MPC_V_MEM + 960 ] IBSS_ATTR __attribute__((aligned(16))); +static MPC_SAMPLE_FORMAT g_Y_R[MPC_FRAME_LENGTH] IBSS_ATTR_MPC_LARGE_IRAM __attribute__((aligned(16))); //SV7 globals (decoding results for bundled quantizers (3- and 5-step)) static const mpc_int32_t g_sv7_idx30[] ICONST_ATTR = @@ -236,9 +238,13 @@ d->__r1 = 1; d->__r2 = 1; + d->V_L = g_V_L; + d->V_R = g_V_R; d->Y_L = g_Y_L; d->Y_R = g_Y_R; - + + memset(d->V_L, 0, sizeof(g_V_L)); + memset(d->V_R, 0, sizeof(g_V_R)); memset(d->Y_L, 0, sizeof(g_Y_L)); memset(d->Y_R, 0, sizeof(g_Y_R)); Index: apps/codecs/libmusepack/synth_filter.c =================================================================== --- apps/codecs/libmusepack/synth_filter.c (revision 28551) +++ apps/codecs/libmusepack/synth_filter.c (working copy) @@ -66,9 +66,9 @@ // by the correct amount to have proper scaled output #define D(value) MAKE_MPC_SAMPLE((double)value*(double)(0x1000)) #endif - + // Di_opt coefficients are +/- 2^17 (pre-shifted by <<16) -static const MPC_SAMPLE_FORMAT Di_opt [512] ICONST_ATTR = { +static const MPC_SAMPLE_FORMAT Di_opt [512] ICONST_ATTR __attribute__((aligned(16))) = { /* 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 */ /* 0 */ D( 0), -D( 29), D(213), -D( 459), D(2037), -D(5153), D( 6574), -D(37489), D(75038), D(37489), D(6574), D(5153), D(2037), D(459), D(213), D(29), /* 1 */ -D( 1), -D( 31), D(218), -D( 519), D(2000), -D(5517), D( 5959), -D(39336), D(74992), D(35640), D(7134), D(4788), D(2063), D(401), D(208), D(26), Index: apps/codecs/libmusepack/decoder.h =================================================================== --- apps/codecs/libmusepack/decoder.h (revision 28551) +++ apps/codecs/libmusepack/decoder.h (working copy) @@ -87,8 +87,8 @@ mpc_uint8_t SCF_shift[256]; #endif - MPC_SAMPLE_FORMAT V_L[MPC_V_MEM + 960]; - MPC_SAMPLE_FORMAT V_R[MPC_V_MEM + 960]; + MPC_SAMPLE_FORMAT *V_L; + MPC_SAMPLE_FORMAT *V_R; MPC_SAMPLE_FORMAT *Y_L; MPC_SAMPLE_FORMAT *Y_R; MPC_SAMPLE_FORMAT SCF[256]; ///< holds adapted scalefactors (for clipping prevention) Index: apps/codecs/libmusepack/synth_filter_arm.S =================================================================== --- apps/codecs/libmusepack/synth_filter_arm.S (revision 28551) +++ apps/codecs/libmusepack/synth_filter_arm.S (working copy) @@ -164,7 +164,7 @@ * r10 = lo, r11 = hi of 31..17 * r12 = V[31..16] *****************************************/ - mov lr, #15 + mov lr, #15*8 add r12, r1, #30*4 /* r12 = V[31] */ .loop15: ldmia r2!, { r3-r6 } /* load D[00..03] */ @@ -238,21 +238,19 @@ /* store Data[01..15] */ mov r8, r8, lsr #16 orr r8, r8, r9, lsl #16 /* (lo>>16) || (hi<<16) */ - str r8, [r0] /* store Data */ /* store Data[31..17] */ - add r0, r0, lr, asl #3 /* r0 = r0 + 2*lr [words] */ mov r10, r10, lsr #16 orr r10, r10, r11, lsl #16 /* (lo>>16) || (hi<<16) */ rsb r10, r10, #0 /* r10 = -r10 */ - str r10, [r0], #4 /* store Data */ - sub r0, r0, lr, asl #3 /* r0 = r0 - 2*lr [words] */ + str r10, [r0, lr] /* store Data */ + str r8, [r0], #4 /* store Data */ /* correct adresses for next loop */ sub r12, r12, #4 /* r12 = V-- */ add r1, r1, #4 /* r1 = V++ */ /* next loop */ - subs lr, lr, #1 + subs lr, lr, #8 bgt .loop15 - + /****************************************** * V[16] with internal symmetry *****************************************/ @@ -293,7 +291,6 @@ mov r8, r8, lsr #16 orr r8, r8, r9, lsl #16 /* (lo>>16) || (hi<<16) */ str r8, [r0], #4 /* store Data */ - add r1, r1, #4 /* V++ */ ldmpc regs=r4-r11 #elif ARM_ARCH < 6 /* arm9 and above */ @@ -365,7 +362,7 @@ * r10 = lo, r11 = hi of 31..17 * r12 = V[31..16] *****************************************/ - mov lr, #15 + mov lr, #15*8 add r12, r1, #30*4 /* r12 = V[31] */ .loop15: ldmia r2!, { r3-r4 } /* load D[00..01] */ @@ -443,19 +440,17 @@ /* store Data[01..15] */ mov r8, r8, lsr #16 orr r8, r8, r9, lsl #16 /* (lo>>16) || (hi<<16) */ - str r8, [r0] /* store Data */ /* store Data[31..17] */ - add r0, r0, lr, asl #3 /* r0 = r0 + 2*lr [words] */ mov r10, r10, lsr #16 orr r10, r10, r11, lsl #16 /* (lo>>16) || (hi<<16) */ rsb r10, r10, #0 /* r10 = -r10 */ - str r10, [r0], #4 /* store Data */ - sub r0, r0, lr, asl #3 /* r0 = r0 - 2*lr [words] */ + str r10, [r0, lr] /* store Data */ + str r8, [r0], #4 /* store Data */ /* correct adresses for next loop */ sub r12, r12, #4 /* r12 = V-- */ add r1, r1, #4 /* r1 = V++ */ /* next loop */ - subs lr, lr, #1 + subs lr, lr, #8 bgt .loop15 /****************************************** @@ -498,7 +493,6 @@ mov r8, r8, lsr #16 orr r8, r8, r9, lsl #16 /* (lo>>16) || (hi<<16) */ str r8, [r0], #4 /* store Data */ - add r1, r1, #4 /* V++ */ ldmpc regs=r4-r11 #else @@ -645,11 +639,10 @@ rsb r11, r11, #0 /* r11 = -r11 */ /* store Data[01..15] */ mov r9, r9, lsl #2 - str r9, [r0] /* store Data */ /* store Data[31..17] */ mov r11, r11, lsl #2 str r11, [r0, lr] /* store Data */ - add r0, r0, #4 /* r0++ */ + str r9, [r0], #4 /* store Data */ /* next loop */ subs lr, lr, #8 bgt .loop15