Index: apps/codecs/libtremor/asm_mcf5249.h =================================================================== --- apps/codecs/libtremor/asm_mcf5249.h (revision 20572) +++ apps/codecs/libtremor/asm_mcf5249.h (working copy) @@ -140,7 +140,7 @@ /* asm versions of vector operations for block.c, window.c */ /* assumes MAC is initialized & accumulators cleared */ static inline -void vect_add(ogg_int32_t *x, ogg_int32_t *y, int n) +void vect_add_right_left(ogg_int32_t *x, const ogg_int32_t *y, int n) { /* align to 16 bytes */ while(n>0 && (int)x&16) { @@ -172,6 +172,14 @@ n--; } } +static inline +void vect_add_left_right(ogg_int32_t *x, const ogg_int32_t *y, int n) +{ + /* coldfire asm has symmetrical versions of vect_add_right_left + and vect_add_left_right (since symmetrical versions of + vect_mult_fw and vect_mult_bw i.e. both use MULT31) */ + vect_add_right_left(x, y, n ); +} static inline void vect_copy(ogg_int32_t *x, ogg_int32_t *y, int n) @@ -199,7 +207,6 @@ } } - static inline void vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n) { Index: apps/codecs/libtremor/misc.h =================================================================== --- apps/codecs/libtremor/misc.h (revision 20572) +++ apps/codecs/libtremor/misc.h (working copy) @@ -156,21 +156,21 @@ #define _V_VECT_OPS static inline -void vect_add(ogg_int32_t *x, ogg_int32_t *y, int n) +void vect_add_right_left(ogg_int32_t *x, const ogg_int32_t *y, int n) { - while (n>0) { - *x++ += *y++; - n--; - } + do { + *x = ((*x)<<1) + (*y++); + ++x; + } while (--n); } -static inline -void vect_copy(ogg_int32_t *x, ogg_int32_t *y, int n) +static inline +void vect_add_left_right(ogg_int32_t *x, const ogg_int32_t *y, int n) { - while (n>0) { - *x++ = *y++; - n--; - } + do { + *x += ((*y++)<<1); + ++x; + } while (--n); } static inline @@ -188,12 +188,17 @@ void vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n) { while(n>0) { - *data = MULT31(*data, *window); + *data = MULT32(*data, *window); // note don't do the shift on BW data++; window--; n--; } } + +static inline vect_copy(ogg_int32_t *x, const ogg_int32_t *y, int n) +{ + memcpy(x,y,n*sizeof(ogg_int32_t)); +} #endif #endif Index: apps/codecs/libtremor/os_types.h =================================================================== --- apps/codecs/libtremor/os_types.h (revision 20572) +++ apps/codecs/libtremor/os_types.h (working copy) @@ -25,9 +25,11 @@ #ifdef _LOW_ACCURACY_ # define X(n) (((((n)>>22)+1)>>1) - ((((n)>>22)+1)>>9)) # define LOOKUP_T const unsigned char +# define LOOKUP_TNC unsigned char #else # define X(n) (n) # define LOOKUP_T const ogg_int32_t +# define LOOKUP_TNC ogg_int32_t #endif /* make it easy on the folks that want to compile the libs with a @@ -46,6 +48,8 @@ void *ogg_realloc(void *ptr, size_t size); long ogg_tmpmalloc_pos(void); void ogg_tmpmalloc_free(long pos); +void iram_malloc_init(void); +void *iram_malloc(size_t size); typedef short ogg_int16_t; typedef int ogg_int32_t; Index: apps/codecs/libtremor/mapping0.c =================================================================== --- apps/codecs/libtremor/mapping0.c (revision 20572) +++ apps/codecs/libtremor/mapping0.c (working copy) @@ -182,8 +182,6 @@ static int seq = 0; -#define CHANNELS 2 /* max 2 channels on the ihp-1xx (stereo) */ - static int mapping0_inverse(vorbis_block *vb,vorbis_look_mapping *l){ vorbis_dsp_state *vd=vb->vd; vorbis_info *vi=vd->vi; Index: apps/codecs/libtremor/config-tremor.h =================================================================== --- apps/codecs/libtremor/config-tremor.h (revision 20572) +++ apps/codecs/libtremor/config-tremor.h (working copy) @@ -13,10 +13,6 @@ #define BIG_ENDIAN 0 #endif -#ifndef ICONST_ATTR_TREMOR_WINDOW -#define ICONST_ATTR_TREMOR_WINDOW ICONST_ATTR -#endif - #ifndef ICODE_ATTR_TREMOR_MDCT #define ICODE_ATTR_TREMOR_MDCT ICODE_ATTR #endif @@ -25,4 +21,29 @@ #define ICODE_ATTR_TREMOR_NOT_MDCT ICODE_ATTR #endif +/* Define CPU of large IRAM (MCF5250) */ +#if (CONFIG_CPU == MCF5250) +// PCM_BUFFER : 32768 Byte (4096*2*4) +// WINDOW_LOOKUP : 4608 Byte (128*4 + 1024*4) +// TOTAL : 37376 +#define IRAM_IBSS_SIZE 37376 + +/* Define CPU of large IRAM (PP5022/5024) */ +#elif (CONFIG_CPU == PP5022) || (CONFIG_CPU == PP5024) +// PCM_BUFFER : 32768 byte (4096*2*4 or 2048*4*4) +// WINDOW_LOOKUP : 9216 Byte (256*4 + 2048*4) +// TOTAL : 41984 +#define IRAM_IBSS_SIZE 41984 + +/* Define CPU of Normal IRAM (96KB)*/ +#else +// PCM_BUFFER : 16384 Byte (2048*2*4) +// WINDOW_LOOKUP : 4608 Byte (128*4 + 1024*4) +// TOTAL : 20992 +#define IRAM_IBSS_SIZE 20992 +#endif + +/* max 2 channels */ +#define CHANNELS 2 + // #define _LOW_ACCURACY_ Index: apps/codecs/libtremor/synthesis.c =================================================================== --- apps/codecs/libtremor/synthesis.c (revision 20572) +++ apps/codecs/libtremor/synthesis.c (working copy) @@ -25,16 +25,9 @@ #include "os.h" -/* IRAM buffer keep the block pcm data; only for windows size upto 2048 - for space restrictions. - libVorbis 1.1 Oggenc doesn't use larger windows anyway. */ -/* max 2 channels on the ihp-1xx (stereo), 2048 samples (2*2048*4=16Kb) */ -#define IRAM_PCM_END 2048 -#define CHANNELS 2 +static ogg_int32_t *_pcm_vect[CHANNELS] IBSS_ATTR; +static int ipcm_buf_in_use IBSS_ATTR; -static ogg_int32_t *ipcm_vect[CHANNELS] IBSS_ATTR; -static ogg_int32_t ipcm_buff[CHANNELS*IRAM_PCM_END] IBSS_ATTR LINE_ATTR; - int vorbis_synthesis(vorbis_block *vb,ogg_packet *op,int decodep) ICODE_ATTR_TREMOR_NOT_MDCT; int vorbis_synthesis(vorbis_block *vb,ogg_packet *op,int decodep){ @@ -76,23 +69,32 @@ vb->eofflag=op->e_o_s; if(decodep && vi->channels<=CHANNELS){ + vb->pcm = _pcm_vect; + /* alloc pcm passback storage */ vb->pcmend=ci->blocksizes[vb->W]; - if (vb->pcmend<=IRAM_PCM_END) { + if (vd->iram_pcm_storage >= vb->pcmend) { /* use statically allocated iram buffer */ - vb->pcm = ipcm_vect; - for(i=0; ipcm[i] = &ipcm_buff[i*IRAM_PCM_END]; + if(vd->reset_pcmb || vb->pcm[0]==NULL) + { + // one-time initialisation at codec start NOT for every block synthesis start + for(i=0; ichannels; i++) + vb->pcm[i] = &vd->iram_pcm[i*vd->iram_pcm_storage]; + ipcm_buf_in_use = 1; + } } else { - /* dynamic allocation (slower) */ - vb->pcm=(ogg_int32_t **)_vorbis_block_alloc(vb,sizeof(*vb->pcm)*vi->channels); - for(i=0;ichannels;i++) - vb->pcm[i]=(ogg_int32_t *)_vorbis_block_alloc(vb,vb->pcmend*sizeof(*vb->pcm[i])); + if(vd->reset_pcmb || vb->pcm[0]==NULL) + { + /* dynamic allocation (slower) */ + for(i=0;ichannels;i++) + vb->pcm[i]=(ogg_int32_t *)_vorbis_block_alloc(vb,vb->pcmend*sizeof(*vb->pcm[i])); + ipcm_buf_in_use = 0; + } } + vd->reset_pcmb = false; /* unpack_header enforces range checking */ type=ci->map_type[ci->mode_param[mode]->mapping]; - return(_mapping_P[type]->inverse(vb,b->mode[mode])); }else{ /* no pcm */ Index: apps/codecs/libtremor/oggmalloc.c =================================================================== --- apps/codecs/libtremor/oggmalloc.c (revision 20572) +++ apps/codecs/libtremor/oggmalloc.c (working copy) @@ -1,4 +1,5 @@ #include "os_types.h" +#include "misc.h" // for LINE_ATTR #if defined(CPU_ARM) || defined(CPU_COLDFIRE) #include @@ -81,3 +82,22 @@ { tmp_ptr = pos; } + +/* Allocate IRAM buffer */ +static unsigned char iram_buff[IRAM_IBSS_SIZE] IBSS_ATTR __attribute__ ((aligned (16))); +static size_t iram_remain; + +void iram_malloc_init(void){ + iram_remain=IRAM_IBSS_SIZE; +} + +void *iram_malloc(size_t size){ + void* x; + + if(size>iram_remain)return NULL; + + x = &iram_buff[IRAM_IBSS_SIZE-iram_remain]; + iram_remain-=size; + + return x; +} Index: apps/codecs/libtremor/window_lookup.h =================================================================== --- apps/codecs/libtremor/window_lookup.h (revision 20572) +++ apps/codecs/libtremor/window_lookup.h (working copy) @@ -51,7 +51,7 @@ X(0x7ffdcf39), X(0x7fff6dac), X(0x7fffed01), X(0x7fffffc4), }; -static LOOKUP_T vwin256[128] ICONST_ATTR_TREMOR_WINDOW = { +static LOOKUP_T vwin256[128] = { X(0x0001f018), X(0x00117066), X(0x00306e9e), X(0x005ee5f1), X(0x009ccf26), X(0x00ea208b), X(0x0146cdea), X(0x01b2c87f), X(0x022dfedf), X(0x02b85ced), X(0x0351cbbd), X(0x03fa317f), @@ -284,7 +284,7 @@ X(0x7fffffdd), X(0x7ffffff7), X(0x7fffffff), X(0x7fffffff), }; -static LOOKUP_T vwin2048[1024] ICONST_ATTR_TREMOR_WINDOW = { +static LOOKUP_T vwin2048[1024] = { X(0x000007c0), X(0x000045c4), X(0x0000c1ca), X(0x00017bd3), X(0x000273de), X(0x0003a9eb), X(0x00051df9), X(0x0006d007), X(0x0008c014), X(0x000aee1e), X(0x000d5a25), X(0x00100428), Index: apps/codecs/libtremor/window.c =================================================================== --- apps/codecs/libtremor/window.c (revision 20572) +++ apps/codecs/libtremor/window.c (working copy) @@ -68,11 +68,17 @@ long rightbegin=n/2+n/4-rn/4; long rightend=rightbegin+rn/2; - memset((void *)&d[0], 0, sizeof(ogg_int32_t)*leftbegin); + // we can get away with not memsetting to zero, because we know the + // overlap-add step doesn't try to add anything outside of the overlapping + // window ranges +// memset((void *)&d[0], 0, sizeof(ogg_int32_t)*leftbegin); + /* mcf5249_vect_zero(&d[0], leftbegin); */ vect_mult_fw(&d[leftbegin], &window[lW][0], leftend-leftbegin); vect_mult_bw(&d[rightbegin], &window[nW][rn/2-1], rightend-rightbegin); - memset((void *)&d[rightend], 0, sizeof(ogg_int32_t)*(n-rightend)); + + // as above +// memset((void *)&d[rightend], 0, sizeof(ogg_int32_t)*(n-rightend)); /* mcf5249_vect_zero(&d[rightend], n-rightend); */ } Index: apps/codecs/libtremor/window.h =================================================================== --- apps/codecs/libtremor/window.h (revision 20572) +++ apps/codecs/libtremor/window.h (working copy) @@ -21,7 +21,7 @@ extern const void *_vorbis_window(int type,int left); extern void _vorbis_apply_window(ogg_int32_t *d,const void *window[2], long *blocksizes, - int lW,int W,int nW); + int lW,int W,int nW) ICODE_ATTR; #endif Index: apps/codecs/libtremor/res012.c =================================================================== --- apps/codecs/libtremor/res012.c (revision 20572) +++ apps/codecs/libtremor/res012.c (working copy) @@ -172,8 +172,6 @@ return(look); } -#define CHANNELS 2 - /* a truncated packet here just means 'stop working'; it's not an error */ static int _01inverse(vorbis_block *vb,vorbis_look_residue *vl, ogg_int32_t **in,int ch, Index: apps/codecs/libtremor/ivorbiscodec.h =================================================================== --- apps/codecs/libtremor/ivorbiscodec.h (revision 20572) +++ apps/codecs/libtremor/ivorbiscodec.h (working copy) @@ -76,6 +76,10 @@ ogg_int64_t sequence; void *backend_state; + + ogg_int32_t *iram_pcm; // IRAM PCM buffer + int iram_pcm_storage; + bool reset_pcmb; } vorbis_dsp_state; typedef struct vorbis_block{ Index: apps/codecs/libtremor/asm_arm.h =================================================================== --- apps/codecs/libtremor/asm_arm.h (revision 20572) +++ apps/codecs/libtremor/asm_arm.h (working copy) @@ -99,106 +99,121 @@ #define _V_VECT_OPS /* asm versions of vector operations for block.c, window.c */ +/* SOME IMPORTANT NOTES: vect_mult_bw does NOT do a final shift, + meaning that result of vect_mult_bw is only 31 bits not 32. We do the + shift in-place in vect_add_xxxx instead to save an instruction */ static inline -void vect_add(ogg_int32_t *x, ogg_int32_t *y, int n) +void vect_add_right_left(ogg_int32_t *x, const ogg_int32_t *y, int n) { - while (n>=4) { - asm volatile ("ldmia %[x], {r0, r1, r2, r3};" + /* first arg is right subframe of previous frame and second arg + is left subframe of current frame. overlap left onto right overwriting + the right subframe */ + + do{ + asm volatile ( + "ldmia %[x], {r0, r1, r2, r3};" "ldmia %[y]!, {r4, r5, r6, r7};" - "add r0, r0, r4;" - "add r1, r1, r5;" - "add r2, r2, r6;" - "add r3, r3, r7;" + "add r0, r4, r0, lsl #1;" + "add r1, r5, r1, lsl #1;" + "add r2, r6, r2, lsl #1;" + "add r3, r7, r3, lsl #1;" "stmia %[x]!, {r0, r1, r2, r3};" + "ldmia %[x], {r0, r1, r2, r3};" + "ldmia %[y]!, {r4, r5, r6, r7};" + "add r0, r4, r0, lsl #1;" + "add r1, r5, r1, lsl #1;" + "add r2, r6, r2, lsl #1;" + "add r3, r7, r3, lsl #1;" + "stmia %[x]!, {r0, r1, r2, r3};" : [x] "+r" (x), [y] "+r" (y) : : "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "memory"); - n -= 4; - } - /* add final elements */ - while (n>0) { - *x++ += *y++; - n--; - } + n -= 8; + } while (n); } static inline -void vect_copy(ogg_int32_t *x, ogg_int32_t *y, int n) +void vect_add_left_right(ogg_int32_t *x, const ogg_int32_t *y, int n) { - while (n>=4) { - asm volatile ("ldmia %[y]!, {r0, r1, r2, r3};" + /* first arg is left subframe of current frame and second arg + is right subframe of previous frame. overlap right onto left overwriting + the LEFT subframe */ + do{ + asm volatile ( + "ldmia %[x], {r0, r1, r2, r3};" + "ldmia %[y]!, {r4, r5, r6, r7};" + "add r0, r0, r4, lsl #1;" + "add r1, r1, r5, lsl #1;" + "add r2, r2, r6, lsl #1;" + "add r3, r3, r7, lsl #1;" "stmia %[x]!, {r0, r1, r2, r3};" + "ldmia %[x], {r0, r1, r2, r3};" + "ldmia %[y]!, {r4, r5, r6, r7};" + "add r0, r0, r4, lsl #1;" + "add r1, r1, r5, lsl #1;" + "add r2, r2, r6, lsl #1;" + "add r3, r3, r7, lsl #1;" + "stmia %[x]!, {r0, r1, r2, r3};" : [x] "+r" (x), [y] "+r" (y) : : "r0", "r1", "r2", "r3", + "r4", "r5", "r6", "r7", "memory"); - n -= 4; - } - /* copy final elements */ - while (n>0) { - *x++ = *y++; - n--; - } + n -= 8; + } while (n); } static inline void vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n) { - while (n>=4) { - asm volatile ("ldmia %[d], {r0, r1, r2, r3};" + /* Note, mult_fw uses MULT31 */ + do{ + asm volatile ( + "ldmia %[d], {r0, r1, r2, r3};" "ldmia %[w]!, {r4, r5, r6, r7};" - "smull r8, r9, r0, r4;" - "mov r0, r9, lsl #1;" - "smull r8, r9, r1, r5;" - "mov r1, r9, lsl #1;" - "smull r8, r9, r2, r6;" - "mov r2, r9, lsl #1;" - "smull r8, r9, r3, r7;" - "mov r3, r9, lsl #1;" + "smull r8, r0, r4, r0;" + "mov r0, r0, lsl #1;" + "smull r8, r1, r5, r1;" + "mov r1, r1, lsl #1;" + "smull r8, r2, r6, r2;" + "mov r2, r2, lsl #1;" + "smull r8, r3, r7, r3;" + "mov r3, r3, lsl #1;" "stmia %[d]!, {r0, r1, r2, r3};" : [d] "+r" (data), [w] "+r" (window) : : "r0", "r1", "r2", "r3", - "r4", "r5", "r6", "r7", "r8", "r9", + "r4", "r5", "r6", "r7", "r8", "memory", "cc"); n -= 4; - } - while(n>0) { - *data = MULT31(*data, *window); - data++; - window++; - n--; - } + } while (n); } static inline void vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n) { - while (n>=4) { + /* NOTE mult_bw uses MULT_32 i.e. doesn't shift result left at end */ + /* On ARM, we can do the shift at the same time as the overlap-add */ + do{ asm volatile ("ldmia %[d], {r0, r1, r2, r3};" "ldmda %[w]!, {r4, r5, r6, r7};" - "smull r8, r9, r0, r7;" - "mov r0, r9, lsl #1;" - "smull r8, r9, r1, r6;" - "mov r1, r9, lsl #1;" - "smull r8, r9, r2, r5;" - "mov r2, r9, lsl #1;" - "smull r8, r9, r3, r4;" - "mov r3, r9, lsl #1;" + "smull r8, r0, r7, r0;" + "smull r7, r1, r6, r1;" + "smull r6, r2, r5, r2;" + "smull r5, r3, r4, r3;" "stmia %[d]!, {r0, r1, r2, r3};" : [d] "+r" (data), [w] "+r" (window) : : "r0", "r1", "r2", "r3", - "r4", "r5", "r6", "r7", "r8", "r9", + "r4", "r5", "r6", "r7", "r8", "memory", "cc"); n -= 4; - } - while(n>0) { - *data = MULT31(*data, *window); - data++; - window--; - n--; - } + } while (n); } +static inline vect_copy(ogg_int32_t *x, const ogg_int32_t *y, int n) +{ + memcpy(x,y,n*sizeof(ogg_int32_t)); +} + #endif #endif Index: apps/codecs/libtremor/block.c =================================================================== --- apps/codecs/libtremor/block.c (revision 20572) +++ apps/codecs/libtremor/block.c (working copy) @@ -11,7 +11,7 @@ * * ******************************************************************** - function: PCM data vector blocking, windowing and dis/reassembly + function: data vector blocking, windowing and dis/reassembly ********************************************************************/ @@ -36,6 +36,11 @@ return(ret); } +static ogg_int32_t* _pcmp [CHANNELS] IBSS_ATTR; +static ogg_int32_t* _pcmbp[CHANNELS] IBSS_ATTR; +static ogg_int32_t* _pcmret[CHANNELS] IBSS_ATTR; +static int internal_pcm_in_use IBSS_ATTR; + /* pcm accumulator examples (not exhaustive): <-------------- lW ----------------> @@ -145,19 +150,45 @@ static int _vds_init(vorbis_dsp_state *v,vorbis_info *vi){ int i; + long b_size[2]; + LOOKUP_TNC *iramposw; + ogg_int32_t *internal_pcm=NULL; + codec_setup_info *ci=(codec_setup_info *)vi->codec_setup; private_state *b=NULL; memset(v,0,sizeof(*v)); + v->reset_pcmb=true; b=(private_state *)(v->backend_state=_ogg_calloc(1,sizeof(*b))); v->vi=vi; b->modebits=ilog(ci->modes); + + /* allocate IRAM buffer for the PCM data */ + iram_malloc_init(); + v->iram_pcm=(ogg_int32_t *)iram_malloc(vi->channels*ci->blocksizes[1]*sizeof(ogg_int32_t)); + if(v->iram_pcm != NULL) v->iram_pcm_storage=ci->blocksizes[1]; + else v->iram_pcm_storage=0; + + v->centerW=0; + + /* Vorbis I uses only window type 0 */ + b_size[0]=ci->blocksizes[0]/2; + b_size[1]=ci->blocksizes[1]/2; + b->window[0]=_vorbis_window(0,b_size[0]); + b->window[1]=_vorbis_window(0,b_size[1]); + + /* If fast internal ram sizes are enough, copy data there. */ + /* give preference to the larger window over the smaller window + (on the assumption that both windows are equally likely used) */ + for(i=1; i>=0; i--){ + iramposw=(LOOKUP_TNC *)iram_malloc(b_size[i]*sizeof(LOOKUP_TNC)); + if(iramposw!=NULL) { + memcpy(iramposw, b->window[i], b_size[i]*sizeof(LOOKUP_TNC)); + b->window[i]=iramposw; + } + } - /* Vorbis I uses only window type 0 */ - b->window[0]=_vorbis_window(0,ci->blocksizes[0]/2); - b->window[1]=_vorbis_window(0,ci->blocksizes[1]/2); - /* finish the codebooks */ if(!ci->fullbooks){ ci->fullbooks=(codebook *)_ogg_calloc(ci->books,sizeof(*ci->fullbooks)); @@ -169,14 +200,34 @@ } } + // if we can get away with it, put a double buffer into IRAM too, so that + // overlap-add runs iram-to-iram v->pcm_storage=ci->blocksizes[1]; - v->pcm=(ogg_int32_t **)_ogg_malloc(vi->channels*sizeof(*v->pcm)); - v->pcmb=(ogg_int32_t **)_ogg_malloc(vi->channels*sizeof(*v->pcmb)); - v->pcmret=(ogg_int32_t **)_ogg_malloc(vi->channels*sizeof(*v->pcmret)); + v->pcm=_pcmp; + v->pcmret=_pcmret; + v->pcmb=_pcmbp; - for(i=0;ichannels;i++) - v->pcm[i]=(ogg_int32_t *)_ogg_calloc(v->pcm_storage,sizeof(*v->pcm[i])); - + _pcmp[0]=NULL; + _pcmp[1]=NULL; + _pcmbp[0]=NULL; + _pcmbp[1]=NULL; + + if(NULL != (internal_pcm = iram_malloc(vi->channels*v->pcm_storage*sizeof(ogg_int32_t)))) + { + // one-time initialisation at codec start or on switch from + // blocksizes greater than IRAM_PCM_END to sizes that fit + for(i=0;ichannels;i++) + v->pcm[i]=&internal_pcm[i*v->pcm_storage]; + internal_pcm_in_use = true; + } + else + { + // one-time initialisation at codec start or on switch from + // blocksizes that fit in IRAM_PCM_END to those that don't + for(i=0;ichannels;i++) + v->pcm[i]=(ogg_int32_t *)_ogg_calloc(v->pcm_storage,sizeof(*v->pcm[i])); + internal_pcm_in_use = false; + } /* all 1 (large block) or 0 (small block) */ /* explicitly set for the sake of clarity */ @@ -203,13 +254,17 @@ ci=vi->codec_setup; if(!ci)return -1; - v->centerW=ci->blocksizes[1]/2; - v->pcm_current=v->centerW; + v->centerW=0; + v->pcm_current=0; v->pcm_returned=-1; v->granulepos=-1; v->sequence=-1; ((private_state *)(v->backend_state))->sample_count=-1; + + // indicate to synthesis code that buffers no longer valid and + // will need to get fresh ones + v->reset_pcmb = true; return(0); } @@ -228,11 +283,12 @@ codec_setup_info *ci=(codec_setup_info *)(vi?vi->codec_setup:NULL); private_state *b=(private_state *)v->backend_state; - if(v->pcm){ - for(i=0;ichannels;i++) - if(v->pcm[i])_ogg_free(v->pcm[i]); - _ogg_free(v->pcm); - if(v->pcmret)_ogg_free(v->pcmret); + if(!internal_pcm_in_use) + { + if(v->pcm){ + for(i=0;ichannels;i++) + if(v->pcm[i])_ogg_free(v->pcm[i]); + } } /* free mode lookups; these are actually vorbis_look_mapping structs */ @@ -258,7 +314,7 @@ calling (as it relies on the previous block). */ int vorbis_synthesis_blockin(vorbis_dsp_state *v,vorbis_block *vb) - ICODE_ATTR_TREMOR_NOT_MDCT; + ICODE_ATTR; int vorbis_synthesis_blockin(vorbis_dsp_state *v,vorbis_block *vb){ vorbis_info *vi=v->vi; codec_setup_info *ci=(codec_setup_info *)vi->codec_setup; @@ -281,79 +337,86 @@ if(vb->pcm){ /* no pcm to process if vorbis_synthesis_trackonly was called on block */ - int n=ci->blocksizes[v->W]/2; + int pn=0; + + if(internal_pcm_in_use) + { + pn = ci->blocksizes[v->lW]/2; + } + else + { + pn = v->centerW; + v->centerW = (ci->blocksizes[1]/2) - v->centerW; + } + int n0=ci->blocksizes[0]/2; int n1=ci->blocksizes[1]/2; - - int thisCenter; - int prevCenter; - - if(v->centerW){ - thisCenter=n1; - prevCenter=0; - }else{ - thisCenter=0; - prevCenter=n1; - } - - /* v->pcm is now used like a two-stage double buffer. We don't want - to have to constantly shift *or* adjust memory usage. Don't - accept a new block until the old is shifted out */ - + /* overlap/add PCM */ - - for(j=0;jchannels;j++){ - /* the overlap/add section */ - if(v->lW){ - if(v->W){ - /* large/large */ - ogg_int32_t *pcm=v->pcm[j]+prevCenter; - ogg_int32_t *p=vb->pcm[j]; - vect_add(p, pcm, n1); - v->pcmb[j]=p; - }else{ - /* large/small */ - ogg_int32_t *pcm=v->pcm[j]+prevCenter+n1/2-n0/2; - ogg_int32_t *p=vb->pcm[j]; - vect_add(pcm, p, n0); - v->pcmb[j]=v->pcm[j]+prevCenter; + // nb nothing to overlap with on first block so don't bother + if(v->pcm_returned!=-1) + { + for(j=0;jchannels;j++) + { + ogg_int32_t *pcm=v->pcm[j]+pn; + ogg_int32_t *p=vb->pcm[j]; + + /* the overlap/add section */ + if(v->lW == v->W) + { + /* large/large or small/small */ + vect_add_right_left(pcm,p,n1); + v->pcmb[j]=pcm; } - }else{ - if(v->W){ - /* small/large */ - ogg_int32_t *pcm=v->pcm[j]+prevCenter; - ogg_int32_t *p=vb->pcm[j]+n1/2-n0/2; - vect_add(p, pcm, n0); - v->pcmb[j]=p; - }else{ - /* small/small */ - ogg_int32_t *pcm=v->pcm[j]+prevCenter; - ogg_int32_t *p=vb->pcm[j]; - vect_add(p, pcm, n0); - v->pcmb[j]=p; + else if (!v->W) + { + /* large/small */ + vect_add_right_left(pcm + n1/2 - n0/2, p, n0); + v->pcmb[j]=pcm; } + else + { + /* small/large */ + p += n1/2 - n0/2; + vect_add_left_right(p,pcm,n0); + v->pcmb[j]=p; + } } - - /* the copy section */ + } + + /* the copy section */ + if(internal_pcm_in_use) + { + // just flip the pointers over as we have a double buffer in iram + ogg_int32_t *p; + p=v->pcm[0]; + v->pcm[0]=vb->pcm[0]; + vb->pcm[0] = p; + p=v->pcm[1]; + v->pcm[1]=vb->pcm[1]; + vb->pcm[1] = p; + } + else + { + for(j=0;jchannels;j++) { - ogg_int32_t *pcm=v->pcm[j]+thisCenter; - ogg_int32_t *p=vb->pcm[j]+n; - vect_copy(pcm, p, n); + // at best only vb->pcm is in iram, and that's where we do the + // synthesis, so we copy out the right-hand subframe of last + // synthesis into (noniram) local buffer so we can still do + // synth in iram + vect_copy(v->pcm[j]+v->centerW, + vb->pcm[j]+ci->blocksizes[v->W]/2, + ci->blocksizes[v->W]/2); } } - if(v->centerW) - v->centerW=0; - else - v->centerW=n1; - /* deal with initial packet state; we do this using the explicit pcm_returned==-1 flag otherwise we're sensitive to first block being short or long */ if(v->pcm_returned==-1){ - v->pcm_returned=thisCenter; - v->pcm_current=thisCenter; + v->pcm_returned=0; + v->pcm_current=0; }else{ v->pcm_returned=0; v->pcm_current=ci->blocksizes[v->lW]/4+ci->blocksizes[v->W]/4;