summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDave Hooper <dave@beermex.com>2009-04-25 11:25:13 +0000
committerDave Hooper <dave@beermex.com>2009-04-25 11:25:13 +0000
commit67fb5415f78a3198030a6285d1ccc641044f149b (patch)
tree1af65f6512f42361a5e83207d4b76b00265776cd
parent738824ccdd327da7d9d13fe9d2a48e74c40ad62f (diff)
downloadrockbox-67fb5415f78a3198030a6285d1ccc641044f149b.tar.gz
rockbox-67fb5415f78a3198030a6285d1ccc641044f149b.zip
Commit FS#9882 - make better use of iram at different quality encodings, remove redundant memsets, implement doublebuffer if it will fit in iram to save a mempcy each frame, and some alignment fixes for coldfire
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@20783 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--apps/codecs/lib/asm_mcf5249.h8
-rw-r--r--apps/codecs/libtremor/asm_arm.h134
-rw-r--r--apps/codecs/libtremor/asm_mcf5249.h21
-rw-r--r--apps/codecs/libtremor/block.c222
-rw-r--r--apps/codecs/libtremor/config-tremor.h29
-rw-r--r--apps/codecs/libtremor/ivorbiscodec.h4
-rw-r--r--apps/codecs/libtremor/mapping0.c2
-rw-r--r--apps/codecs/libtremor/misc.h20
-rw-r--r--apps/codecs/libtremor/oggmalloc.c24
-rw-r--r--apps/codecs/libtremor/os_types.h4
-rw-r--r--apps/codecs/libtremor/res012.c2
-rw-r--r--apps/codecs/libtremor/synthesis.c36
-rw-r--r--apps/codecs/libtremor/window.c12
-rw-r--r--apps/codecs/libtremor/window_lookup.h4
14 files changed, 335 insertions, 187 deletions
diff --git a/apps/codecs/lib/asm_mcf5249.h b/apps/codecs/lib/asm_mcf5249.h
index 20899f0a5b..e3dc8dd684 100644
--- a/apps/codecs/lib/asm_mcf5249.h
+++ b/apps/codecs/lib/asm_mcf5249.h
@@ -143,7 +143,7 @@ static inline
void vect_add(int32_t *x, int32_t *y, int n)
{
/* align to 16 bytes */
- while(n>0 && (int)x&16) {
+ while(n>0 && (int)x&15) {
*x++ += *y++;
n--;
}
@@ -177,7 +177,7 @@ static inline
void vect_copy(int32_t *x, int32_t *y, int n)
{
/* align to 16 bytes */
- while(n>0 && (int)x&16) {
+ while(n>0 && (int)x&15) {
*x++ = *y++;
n--;
}
@@ -204,7 +204,7 @@ static inline
void vect_mult_fw(int32_t *data, int32_t *window, int n)
{
/* ensure data is aligned to 16-bytes */
- while(n>0 && (int)data%16) {
+ while(n>0 && (int)data&15) {
*data = MULT31(*data, *window);
data++;
window++;
@@ -258,7 +258,7 @@ static inline
void vect_mult_bw(int32_t *data, int32_t *window, int n)
{
/* ensure at least data is aligned to 16-bytes */
- while(n>0 && (int)data%16) {
+ while(n>0 && (int)data&15) {
*data = MULT31(*data, *window);
data++;
window--;
diff --git a/apps/codecs/libtremor/asm_arm.h b/apps/codecs/libtremor/asm_arm.h
index bc09ac5170..5a8109841f 100644
--- a/apps/codecs/libtremor/asm_arm.h
+++ b/apps/codecs/libtremor/asm_arm.h
@@ -99,104 +99,120 @@ static inline void XNPROD31(ogg_int32_t a, ogg_int32_t b,
#define _V_VECT_OPS
/* asm versions of vector operations for block.c, window.c */
+/* SOME IMPORTANT NOTES: this implementation of vect_mult_bw does
+ NOT do a final shift, meaning that the result of vect_mult_bw is
+ only 31 bits not 32. This is so that we can do the shift in-place
+ in vect_add_xxxx instead to save one instruction for each mult on arm */
static inline
-void vect_add(ogg_int32_t *x, ogg_int32_t *y, int n)
+void vect_add_right_left(ogg_int32_t *x, const ogg_int32_t *y, int n)
{
- while (n>=4) {
- asm volatile ("ldmia %[x], {r0, r1, r2, r3};"
+ /* first arg is right subframe of previous frame and second arg
+ is left subframe of current frame. overlap left onto right overwriting
+ the right subframe */
+
+ do{
+ asm volatile (
+ "ldmia %[x], {r0, r1, r2, r3};"
"ldmia %[y]!, {r4, r5, r6, r7};"
- "add r0, r0, r4;"
- "add r1, r1, r5;"
- "add r2, r2, r6;"
- "add r3, r3, r7;"
+ "add r0, r4, r0, lsl #1;"
+ "add r1, r5, r1, lsl #1;"
+ "add r2, r6, r2, lsl #1;"
+ "add r3, r7, r3, lsl #1;"
+ "stmia %[x]!, {r0, r1, r2, r3};"
+ "ldmia %[x], {r0, r1, r2, r3};"
+ "ldmia %[y]!, {r4, r5, r6, r7};"
+ "add r0, r4, r0, lsl #1;"
+ "add r1, r5, r1, lsl #1;"
+ "add r2, r6, r2, lsl #1;"
+ "add r3, r7, r3, lsl #1;"
"stmia %[x]!, {r0, r1, r2, r3};"
: [x] "+r" (x), [y] "+r" (y)
: : "r0", "r1", "r2", "r3",
"r4", "r5", "r6", "r7",
"memory");
- n -= 4;
- }
- /* add final elements */
- while (n>0) {
- *x++ += *y++;
- n--;
- }
+ n -= 8;
+ } while (n);
}
static inline
-void vect_copy(ogg_int32_t *x, ogg_int32_t *y, int n)
+void vect_add_left_right(ogg_int32_t *x, const ogg_int32_t *y, int n)
{
- while (n>=4) {
- asm volatile ("ldmia %[y]!, {r0, r1, r2, r3};"
+ /* first arg is left subframe of current frame and second arg
+ is right subframe of previous frame. overlap right onto left overwriting
+ the LEFT subframe */
+ do{
+ asm volatile (
+ "ldmia %[x], {r0, r1, r2, r3};"
+ "ldmia %[y]!, {r4, r5, r6, r7};"
+ "add r0, r0, r4, lsl #1;"
+ "add r1, r1, r5, lsl #1;"
+ "add r2, r2, r6, lsl #1;"
+ "add r3, r3, r7, lsl #1;"
+ "stmia %[x]!, {r0, r1, r2, r3};"
+ "ldmia %[x], {r0, r1, r2, r3};"
+ "ldmia %[y]!, {r4, r5, r6, r7};"
+ "add r0, r0, r4, lsl #1;"
+ "add r1, r1, r5, lsl #1;"
+ "add r2, r2, r6, lsl #1;"
+ "add r3, r3, r7, lsl #1;"
"stmia %[x]!, {r0, r1, r2, r3};"
: [x] "+r" (x), [y] "+r" (y)
: : "r0", "r1", "r2", "r3",
+ "r4", "r5", "r6", "r7",
"memory");
- n -= 4;
- }
- /* copy final elements */
- while (n>0) {
- *x++ = *y++;
- n--;
- }
+ n -= 8;
+ } while (n);
}
static inline
void vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n)
{
- while (n>=4) {
- asm volatile ("ldmia %[d], {r0, r1, r2, r3};"
+ /* Note, mult_fw uses MULT31 */
+ do{
+ asm volatile (
+ "ldmia %[d], {r0, r1, r2, r3};"
"ldmia %[w]!, {r4, r5, r6, r7};"
- "smull r8, r9, r0, r4;"
- "mov r0, r9, lsl #1;"
- "smull r8, r9, r1, r5;"
- "mov r1, r9, lsl #1;"
- "smull r8, r9, r2, r6;"
- "mov r2, r9, lsl #1;"
- "smull r8, r9, r3, r7;"
- "mov r3, r9, lsl #1;"
+ "smull r8, r0, r4, r0;"
+ "mov r0, r0, lsl #1;"
+ "smull r8, r1, r5, r1;"
+ "mov r1, r1, lsl #1;"
+ "smull r8, r2, r6, r2;"
+ "mov r2, r2, lsl #1;"
+ "smull r8, r3, r7, r3;"
+ "mov r3, r3, lsl #1;"
"stmia %[d]!, {r0, r1, r2, r3};"
: [d] "+r" (data), [w] "+r" (window)
: : "r0", "r1", "r2", "r3",
- "r4", "r5", "r6", "r7", "r8", "r9",
+ "r4", "r5", "r6", "r7", "r8",
"memory", "cc");
n -= 4;
- }
- while(n>0) {
- *data = MULT31(*data, *window);
- data++;
- window++;
- n--;
- }
+ } while (n);
}
static inline
void vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n)
{
- while (n>=4) {
+ /* NOTE mult_bw uses MULT_32 i.e. doesn't shift result left at end */
+ /* On ARM, we can do the shift at the same time as the overlap-add */
+ do{
asm volatile ("ldmia %[d], {r0, r1, r2, r3};"
"ldmda %[w]!, {r4, r5, r6, r7};"
- "smull r8, r9, r0, r7;"
- "mov r0, r9, lsl #1;"
- "smull r8, r9, r1, r6;"
- "mov r1, r9, lsl #1;"
- "smull r8, r9, r2, r5;"
- "mov r2, r9, lsl #1;"
- "smull r8, r9, r3, r4;"
- "mov r3, r9, lsl #1;"
+ "smull r8, r0, r7, r0;"
+ "smull r7, r1, r6, r1;"
+ "smull r6, r2, r5, r2;"
+ "smull r5, r3, r4, r3;"
"stmia %[d]!, {r0, r1, r2, r3};"
: [d] "+r" (data), [w] "+r" (window)
: : "r0", "r1", "r2", "r3",
- "r4", "r5", "r6", "r7", "r8", "r9",
+ "r4", "r5", "r6", "r7", "r8",
"memory", "cc");
n -= 4;
- }
- while(n>0) {
- *data = MULT31(*data, *window);
- data++;
- window--;
- n--;
- }
+ } while (n);
+}
+
+static inline void vect_copy(ogg_int32_t *x, const ogg_int32_t *y, int n)
+{
+ memcpy(x,y,n*sizeof(ogg_int32_t));
}
#endif
diff --git a/apps/codecs/libtremor/asm_mcf5249.h b/apps/codecs/libtremor/asm_mcf5249.h
index 64dfb1b785..224a861afd 100644
--- a/apps/codecs/libtremor/asm_mcf5249.h
+++ b/apps/codecs/libtremor/asm_mcf5249.h
@@ -140,10 +140,10 @@ void XNPROD31(ogg_int32_t a, ogg_int32_t b,
/* asm versions of vector operations for block.c, window.c */
/* assumes MAC is initialized & accumulators cleared */
static inline
-void vect_add(ogg_int32_t *x, ogg_int32_t *y, int n)
+void vect_add_right_left(ogg_int32_t *x, const ogg_int32_t *y, int n)
{
/* align to 16 bytes */
- while(n>0 && (int)x&16) {
+ while(n>0 && (int)x&15) {
*x++ += *y++;
n--;
}
@@ -172,12 +172,20 @@ void vect_add(ogg_int32_t *x, ogg_int32_t *y, int n)
n--;
}
}
+static inline
+void vect_add_left_right(ogg_int32_t *x, const ogg_int32_t *y, int n)
+{
+ /* coldfire asm has symmetrical versions of vect_add_right_left
+ and vect_add_left_right (since symmetrical versions of
+ vect_mult_fw and vect_mult_bw i.e. both use MULT31) */
+ vect_add_right_left(x, y, n );
+}
static inline
-void vect_copy(ogg_int32_t *x, ogg_int32_t *y, int n)
+void vect_copy(ogg_int32_t *x, const ogg_int32_t *y, int n)
{
/* align to 16 bytes */
- while(n>0 && (int)x&16) {
+ while(n>0 && (int)x&15) {
*x++ = *y++;
n--;
}
@@ -199,12 +207,11 @@ void vect_copy(ogg_int32_t *x, ogg_int32_t *y, int n)
}
}
-
static inline
void vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n)
{
/* ensure data is aligned to 16-bytes */
- while(n>0 && (int)data%16) {
+ while(n>0 && (int)data&15) {
*data = MULT31(*data, *window);
data++;
window++;
@@ -258,7 +265,7 @@ static inline
void vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n)
{
/* ensure at least data is aligned to 16-bytes */
- while(n>0 && (int)data%16) {
+ while(n>0 && (int)data&15) {
*data = MULT31(*data, *window);
data++;
window--;
diff --git a/apps/codecs/libtremor/block.c b/apps/codecs/libtremor/block.c
index e609fc44f7..eb087e12a9 100644
--- a/apps/codecs/libtremor/block.c
+++ b/apps/codecs/libtremor/block.c
@@ -36,6 +36,13 @@ static int ilog(unsigned int v){
return(ret);
}
+static ogg_int32_t* _pcmp [CHANNELS] IBSS_ATTR;
+static ogg_int32_t* _pcmbp[CHANNELS] IBSS_ATTR;
+static ogg_int32_t* _pcmret[CHANNELS] IBSS_ATTR;
+/* if true, we have both pcm buffers in iram and we use a bufferflip.
+ if false, we have one in iram and one in mem, and we use a memcpy */
+static bool iram_pcm_doublebuffer IBSS_ATTR;
+
/* pcm accumulator examples (not exhaustive):
<-------------- lW ---------------->
@@ -145,18 +152,44 @@ int vorbis_block_clear(vorbis_block *vb){
static int _vds_init(vorbis_dsp_state *v,vorbis_info *vi){
int i;
+ long b_size[2];
+ LOOKUP_TNC *iramposw;
+ ogg_int32_t *internal_pcm=NULL;
+
codec_setup_info *ci=(codec_setup_info *)vi->codec_setup;
private_state *b=NULL;
memset(v,0,sizeof(*v));
+ v->reset_pcmb=true;
b=(private_state *)(v->backend_state=_ogg_calloc(1,sizeof(*b)));
v->vi=vi;
b->modebits=ilog(ci->modes);
-
+
+ /* allocate IRAM buffer for the PCM data generated by synthesis */
+ iram_malloc_init();
+ v->iram_pcm=(ogg_int32_t *)iram_malloc(vi->channels*ci->blocksizes[1]*sizeof(ogg_int32_t));
+ if(v->iram_pcm != NULL) v->iram_pcm_storage=ci->blocksizes[1];
+ else v->iram_pcm_storage=0;
+
+ v->centerW=0;
+
/* Vorbis I uses only window type 0 */
- b->window[0]=_vorbis_window(0,ci->blocksizes[0]/2);
- b->window[1]=_vorbis_window(0,ci->blocksizes[1]/2);
+ b_size[0]=ci->blocksizes[0]/2;
+ b_size[1]=ci->blocksizes[1]/2;
+ b->window[0]=_vorbis_window(0,b_size[0]);
+ b->window[1]=_vorbis_window(0,b_size[1]);
+
+ /* allocate IRAM buffer for window tables too, if sufficient iram available */
+ /* give preference to the larger window over the smaller window
+ (on the assumption that both windows are equally likely used) */
+ for(i=1; i>=0; i--){
+ iramposw=(LOOKUP_TNC *)iram_malloc(b_size[i]*sizeof(LOOKUP_TNC));
+ if(iramposw!=NULL) {
+ memcpy(iramposw, b->window[i], b_size[i]*sizeof(LOOKUP_TNC));
+ b->window[i]=iramposw;
+ }
+ }
/* finish the codebooks */
if(!ci->fullbooks){
@@ -169,14 +202,34 @@ static int _vds_init(vorbis_dsp_state *v,vorbis_info *vi){
}
}
+ /* if we can get away with it, put a double buffer into IRAM too, so that
+ overlap-add runs iram-to-iram and we avoid needing to memcpy */
v->pcm_storage=ci->blocksizes[1];
- v->pcm=(ogg_int32_t **)_ogg_malloc(vi->channels*sizeof(*v->pcm));
- v->pcmb=(ogg_int32_t **)_ogg_malloc(vi->channels*sizeof(*v->pcmb));
- v->pcmret=(ogg_int32_t **)_ogg_malloc(vi->channels*sizeof(*v->pcmret));
-
- for(i=0;i<vi->channels;i++)
- v->pcm[i]=(ogg_int32_t *)_ogg_calloc(v->pcm_storage,sizeof(*v->pcm[i]));
-
+ v->pcm=_pcmp;
+ v->pcmret=_pcmret;
+ v->pcmb=_pcmbp;
+
+ _pcmp[0]=NULL;
+ _pcmp[1]=NULL;
+ _pcmbp[0]=NULL;
+ _pcmbp[1]=NULL;
+
+ if(NULL != (internal_pcm = iram_malloc(vi->channels*v->pcm_storage*sizeof(ogg_int32_t))))
+ {
+ /* one-time initialisation at codec start or on switch from
+ blocksizes greater than IRAM_PCM_END to sizes that fit */
+ for(i=0;i<vi->channels;i++)
+ v->pcm[i]=&internal_pcm[i*v->pcm_storage];
+ iram_pcm_doublebuffer = true;
+ }
+ else
+ {
+ /* one-time initialisation at codec start or on switch from
+ blocksizes that fit in IRAM_PCM_END to those that don't */
+ for(i=0;i<vi->channels;i++)
+ v->pcm[i]=(ogg_int32_t *)_ogg_calloc(v->pcm_storage,sizeof(*v->pcm[i]));
+ iram_pcm_doublebuffer = false;
+ }
/* all 1 (large block) or 0 (small block) */
/* explicitly set for the sake of clarity */
@@ -203,13 +256,17 @@ int vorbis_synthesis_restart(vorbis_dsp_state *v){
ci=vi->codec_setup;
if(!ci)return -1;
- v->centerW=ci->blocksizes[1]/2;
- v->pcm_current=v->centerW;
+ v->centerW=0;
+ v->pcm_current=0;
v->pcm_returned=-1;
v->granulepos=-1;
v->sequence=-1;
((private_state *)(v->backend_state))->sample_count=-1;
+
+ /* indicate to synthesis code that buffer pointers no longer valid
+ (if we're using double pcm buffer) and will need to reset them */
+ v->reset_pcmb = true;
return(0);
}
@@ -228,11 +285,12 @@ void vorbis_dsp_clear(vorbis_dsp_state *v){
codec_setup_info *ci=(codec_setup_info *)(vi?vi->codec_setup:NULL);
private_state *b=(private_state *)v->backend_state;
- if(v->pcm){
- for(i=0;i<vi->channels;i++)
- if(v->pcm[i])_ogg_free(v->pcm[i]);
- _ogg_free(v->pcm);
- if(v->pcmret)_ogg_free(v->pcmret);
+ if(!iram_pcm_doublebuffer)
+ {
+ if(v->pcm){
+ for(i=0;i<vi->channels;i++)
+ if(v->pcm[i])_ogg_free(v->pcm[i]);
+ }
}
/* free mode lookups; these are actually vorbis_look_mapping structs */
@@ -258,7 +316,7 @@ void vorbis_dsp_clear(vorbis_dsp_state *v){
calling (as it relies on the previous block). */
int vorbis_synthesis_blockin(vorbis_dsp_state *v,vorbis_block *vb)
- ICODE_ATTR_TREMOR_NOT_MDCT;
+ ICODE_ATTR;
int vorbis_synthesis_blockin(vorbis_dsp_state *v,vorbis_block *vb){
vorbis_info *vi=v->vi;
codec_setup_info *ci=(codec_setup_info *)vi->codec_setup;
@@ -278,85 +336,91 @@ int vorbis_synthesis_blockin(vorbis_dsp_state *v,vorbis_block *vb){
}
v->sequence=vb->sequence;
+ int n=ci->blocksizes[v->W]/2;
+ int ln=ci->blocksizes[v->lW]/2;
- if(vb->pcm){ /* no pcm to process if vorbis_synthesis_trackonly
- was called on block */
- int n=ci->blocksizes[v->W]/2;
+ if(LIKELY(vb->pcm)){ /* no pcm to process if vorbis_synthesis_trackonly
+ was called on block */
+ int prevCenter;
int n0=ci->blocksizes[0]/2;
int n1=ci->blocksizes[1]/2;
-
- int thisCenter;
- int prevCenter;
-
- if(v->centerW){
- thisCenter=n1;
- prevCenter=0;
- }else{
- thisCenter=0;
- prevCenter=n1;
+
+ if(iram_pcm_doublebuffer)
+ {
+ prevCenter = ln;
+ }
+ else
+ {
+ prevCenter = v->centerW;
+ v->centerW = n1 - v->centerW;
}
-
- /* v->pcm is now used like a two-stage double buffer. We don't want
- to have to constantly shift *or* adjust memory usage. Don't
- accept a new block until the old is shifted out */
/* overlap/add PCM */
-
- for(j=0;j<vi->channels;j++){
- /* the overlap/add section */
- if(v->lW){
- if(v->W){
- /* large/large */
- ogg_int32_t *pcm=v->pcm[j]+prevCenter;
- ogg_int32_t *p=vb->pcm[j];
- vect_add(p, pcm, n1);
- v->pcmb[j]=p;
- }else{
- /* large/small */
- ogg_int32_t *pcm=v->pcm[j]+prevCenter+n1/2-n0/2;
- ogg_int32_t *p=vb->pcm[j];
- vect_add(pcm, p, n0);
- v->pcmb[j]=v->pcm[j]+prevCenter;
+ /* nb nothing to overlap with on first block so don't bother */
+ if(LIKELY(v->pcm_returned!=-1))
+ {
+ for(j=0;j<vi->channels;j++)
+ {
+ ogg_int32_t *pcm=v->pcm[j]+prevCenter;
+ ogg_int32_t *p=vb->pcm[j];
+
+ /* the overlap/add section */
+ if(v->lW == v->W)
+ {
+ /* large/large or small/small */
+ vect_add_right_left(pcm,p,n);
+ v->pcmb[j]=pcm;
}
- }else{
- if(v->W){
- /* small/large */
- ogg_int32_t *pcm=v->pcm[j]+prevCenter;
- ogg_int32_t *p=vb->pcm[j]+n1/2-n0/2;
- vect_add(p, pcm, n0);
- v->pcmb[j]=p;
- }else{
- /* small/small */
- ogg_int32_t *pcm=v->pcm[j]+prevCenter;
- ogg_int32_t *p=vb->pcm[j];
- vect_add(p, pcm, n0);
- v->pcmb[j]=p;
+ else if (!v->W)
+ {
+ /* large/small */
+ vect_add_right_left(pcm + (n1-n0)/2, p, n0);
+ v->pcmb[j]=pcm;
+ }
+ else
+ {
+ /* small/large */
+ p += (n1-n0)/2;
+ vect_add_left_right(p,pcm,n0);
+ v->pcmb[j]=p;
}
}
-
- /* the copy section */
+ }
+
+ /* the copy section */
+ if(iram_pcm_doublebuffer)
+ {
+ /* just flip the pointers over as we have a double buffer in iram */
+ ogg_int32_t *p;
+ p=v->pcm[0];
+ v->pcm[0]=vb->pcm[0];
+ vb->pcm[0] = p;
+ p=v->pcm[1];
+ v->pcm[1]=vb->pcm[1];
+ vb->pcm[1] = p;
+ }
+ else
+ {
+ for(j=0;j<vi->channels;j++)
{
- ogg_int32_t *pcm=v->pcm[j]+thisCenter;
- ogg_int32_t *p=vb->pcm[j]+n;
- vect_copy(pcm, p, n);
+ /* at best only vb->pcm is in iram, and that's where we do the
+ synthesis, so we copy out the right-hand subframe of last
+ synthesis into (noniram) local buffer so we can still do
+ synth in iram */
+ vect_copy(v->pcm[j]+v->centerW, vb->pcm[j]+n, n);
}
}
- if(v->centerW)
- v->centerW=0;
- else
- v->centerW=n1;
-
/* deal with initial packet state; we do this using the explicit
pcm_returned==-1 flag otherwise we're sensitive to first block
being short or long */
if(v->pcm_returned==-1){
- v->pcm_returned=thisCenter;
- v->pcm_current=thisCenter;
+ v->pcm_returned=0;
+ v->pcm_current=0;
}else{
v->pcm_returned=0;
- v->pcm_current=ci->blocksizes[v->lW]/4+ci->blocksizes[v->W]/4;
+ v->pcm_current=(n+ln)/2;
}
}
@@ -375,7 +439,7 @@ int vorbis_synthesis_blockin(vorbis_dsp_state *v,vorbis_block *vb){
if(b->sample_count==-1){
b->sample_count=0;
}else{
- b->sample_count+=ci->blocksizes[v->lW]/4+ci->blocksizes[v->W]/4;
+ b->sample_count+=(n+ln)/2;
}
if(v->granulepos==-1){
@@ -406,7 +470,7 @@ int vorbis_synthesis_blockin(vorbis_dsp_state *v,vorbis_block *vb){
}
}else{
- v->granulepos+=ci->blocksizes[v->lW]/4+ci->blocksizes[v->W]/4;
+ v->granulepos+=(n+ln)/2;
if(vb->granulepos!=-1 && v->granulepos!=vb->granulepos){
if(v->granulepos>vb->granulepos){
diff --git a/apps/codecs/libtremor/config-tremor.h b/apps/codecs/libtremor/config-tremor.h
index 7cfcb7e35d..d360b5fd73 100644
--- a/apps/codecs/libtremor/config-tremor.h
+++ b/apps/codecs/libtremor/config-tremor.h
@@ -13,10 +13,6 @@
#define BIG_ENDIAN 0
#endif
-#ifndef ICONST_ATTR_TREMOR_WINDOW
-#define ICONST_ATTR_TREMOR_WINDOW ICONST_ATTR
-#endif
-
#ifndef ICODE_ATTR_TREMOR_MDCT
#define ICODE_ATTR_TREMOR_MDCT ICODE_ATTR
#endif
@@ -25,4 +21,29 @@
#define ICODE_ATTR_TREMOR_NOT_MDCT ICODE_ATTR
#endif
+/* Define CPU of large IRAM (MCF5250) */
+#if (CONFIG_CPU == MCF5250)
+/* PCM_BUFFER : 32768 Byte (4096*2*4) *
+ * WINDOW_LOOKUP : 4608 Byte (128*4 + 1024*4) *
+ * TOTAL : 37376 */
+#define IRAM_IBSS_SIZE 37376
+
+/* Define CPU of large IRAM (PP5022/5024) */
+#elif (CONFIG_CPU == PP5022) || (CONFIG_CPU == PP5024)
+/* PCM_BUFFER : 32768 byte (4096*2*4 or 2048*4*4) *
+ * WINDOW_LOOKUP : 9216 Byte (256*4 + 2048*4) *
+ * TOTAL : 41984 */
+#define IRAM_IBSS_SIZE 41984
+
+/* Define CPU of Normal IRAM (96KB) (and SIM also) */
+#else
+/* PCM_BUFFER : 16384 Byte (2048*2*4) *
+ * WINDOW_LOOKUP : 4608 Byte (128*4 + 1024*4) *
+ * TOTAL : 20992 */
+#define IRAM_IBSS_SIZE 20992
+#endif
+
+/* max 2 channels */
+#define CHANNELS 2
+
// #define _LOW_ACCURACY_
diff --git a/apps/codecs/libtremor/ivorbiscodec.h b/apps/codecs/libtremor/ivorbiscodec.h
index 2574a11f2a..a9526d56a6 100644
--- a/apps/codecs/libtremor/ivorbiscodec.h
+++ b/apps/codecs/libtremor/ivorbiscodec.h
@@ -76,6 +76,10 @@ typedef struct vorbis_dsp_state{
ogg_int64_t sequence;
void *backend_state;
+
+ ogg_int32_t *iram_pcm; /* IRAM PCM buffer */
+ int iram_pcm_storage; /* size of IRAM PCM buffer */
+ bool reset_pcmb;
} vorbis_dsp_state;
typedef struct vorbis_block{
diff --git a/apps/codecs/libtremor/mapping0.c b/apps/codecs/libtremor/mapping0.c
index 2bb7a46d79..8b2343c56f 100644
--- a/apps/codecs/libtremor/mapping0.c
+++ b/apps/codecs/libtremor/mapping0.c
@@ -182,8 +182,6 @@ static vorbis_info_mapping *mapping0_unpack(vorbis_info *vi,oggpack_buffer *opb)
static int seq = 0;
-#define CHANNELS 2 /* max 2 channels on the ihp-1xx (stereo) */
-
static int mapping0_inverse(vorbis_block *vb,vorbis_look_mapping *l){
vorbis_dsp_state *vd=vb->vd;
vorbis_info *vi=vd->vi;
diff --git a/apps/codecs/libtremor/misc.h b/apps/codecs/libtremor/misc.h
index e94236c2a8..59ce6dbb74 100644
--- a/apps/codecs/libtremor/misc.h
+++ b/apps/codecs/libtremor/misc.h
@@ -155,8 +155,11 @@ static inline void XNPROD31(ogg_int32_t a, ogg_int32_t b,
#ifndef _V_VECT_OPS
#define _V_VECT_OPS
+/* generic misc.h has symmetrical versions of vect_add_right_left
+ and vect_add_left_right (since symmetrical versions of
+ vect_mult_fw and vect_mult_bw i.e. both use MULT31) */
static inline
-void vect_add(ogg_int32_t *x, ogg_int32_t *y, int n)
+void vect_add_right_left(ogg_int32_t *x, const ogg_int32_t *y, int n)
{
while (n>0) {
*x++ += *y++;
@@ -164,13 +167,10 @@ void vect_add(ogg_int32_t *x, ogg_int32_t *y, int n)
}
}
-static inline
-void vect_copy(ogg_int32_t *x, ogg_int32_t *y, int n)
+static inline
+void vect_add_left_right(ogg_int32_t *x, const ogg_int32_t *y, int n)
{
- while (n>0) {
- *x++ = *y++;
- n--;
- }
+ vect_add_right_left(x,y,n);
}
static inline
@@ -194,6 +194,12 @@ void vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n)
n--;
}
}
+
+/* generic memcpy is probably optimal */
+static inline void vect_copy(ogg_int32_t *x, const ogg_int32_t *y, int n)
+{
+ memcpy(x,y,n*sizeof(ogg_int32_t));
+}
#endif
#endif
diff --git a/apps/codecs/libtremor/oggmalloc.c b/apps/codecs/libtremor/oggmalloc.c
index 4aa2760629..6da7cfcedc 100644
--- a/apps/codecs/libtremor/oggmalloc.c
+++ b/apps/codecs/libtremor/oggmalloc.c
@@ -81,3 +81,27 @@ void ogg_tmpmalloc_free(long pos)
{
tmp_ptr = pos;
}
+
+/* Allocate IRAM buffer */
+static unsigned char iram_buff[IRAM_IBSS_SIZE] IBSS_ATTR __attribute__ ((aligned (16)));
+static size_t iram_remain;
+
+void iram_malloc_init(void){
+ iram_remain=IRAM_IBSS_SIZE;
+}
+
+void *iram_malloc(size_t size){
+ void* x;
+
+ /* always ensure 16-byte aligned */
+ if(size&0x0f)
+ size=(size-(size&0x0f))+16;
+
+ if(size>iram_remain)
+ return NULL;
+
+ x = &iram_buff[IRAM_IBSS_SIZE-iram_remain];
+ iram_remain-=size;
+
+ return x;
+}
diff --git a/apps/codecs/libtremor/os_types.h b/apps/codecs/libtremor/os_types.h
index 5738ef4911..4c7d17ef3a 100644
--- a/apps/codecs/libtremor/os_types.h
+++ b/apps/codecs/libtremor/os_types.h
@@ -25,9 +25,11 @@
#ifdef _LOW_ACCURACY_
# define X(n) (((((n)>>22)+1)>>1) - ((((n)>>22)+1)>>9))
# define LOOKUP_T const unsigned char
+# define LOOKUP_TNC unsigned char
#else
# define X(n) (n)
# define LOOKUP_T const ogg_int32_t
+# define LOOKUP_TNC ogg_int32_t
#endif
/* make it easy on the folks that want to compile the libs with a
@@ -46,6 +48,8 @@ void *ogg_tmpcalloc(size_t nmemb, size_t size);
void *ogg_realloc(void *ptr, size_t size);
long ogg_tmpmalloc_pos(void);
void ogg_tmpmalloc_free(long pos);
+void iram_malloc_init(void);
+void *iram_malloc(size_t size);
typedef short ogg_int16_t;
typedef int ogg_int32_t;
diff --git a/apps/codecs/libtremor/res012.c b/apps/codecs/libtremor/res012.c
index 46b782def1..a42660a065 100644
--- a/apps/codecs/libtremor/res012.c
+++ b/apps/codecs/libtremor/res012.c
@@ -172,8 +172,6 @@ static vorbis_look_residue *res0_look(vorbis_dsp_state *vd,vorbis_info_mode *vm,
return(look);
}
-#define CHANNELS 2
-
/* a truncated packet here just means 'stop working'; it's not an error */
static int _01inverse(vorbis_block *vb,vorbis_look_residue *vl,
ogg_int32_t **in,int ch,
diff --git a/apps/codecs/libtremor/synthesis.c b/apps/codecs/libtremor/synthesis.c
index cef240e796..b1c5eeccef 100644
--- a/apps/codecs/libtremor/synthesis.c
+++ b/apps/codecs/libtremor/synthesis.c
@@ -25,15 +25,7 @@
#include "os.h"
-/* IRAM buffer keep the block pcm data; only for windows size upto 2048
- for space restrictions.
- libVorbis 1.1 Oggenc doesn't use larger windows anyway. */
-/* max 2 channels on the ihp-1xx (stereo), 2048 samples (2*2048*4=16Kb) */
-#define IRAM_PCM_END 2048
-#define CHANNELS 2
-
static ogg_int32_t *ipcm_vect[CHANNELS] IBSS_ATTR;
-static ogg_int32_t ipcm_buff[CHANNELS*IRAM_PCM_END] IBSS_ATTR LINE_ATTR;
int vorbis_synthesis(vorbis_block *vb,ogg_packet *op,int decodep)
ICODE_ATTR_TREMOR_NOT_MDCT;
@@ -76,23 +68,33 @@ int vorbis_synthesis(vorbis_block *vb,ogg_packet *op,int decodep){
vb->eofflag=op->e_o_s;
if(decodep && vi->channels<=CHANNELS){
+ vb->pcm = ipcm_vect;
+
/* alloc pcm passback storage */
vb->pcmend=ci->blocksizes[vb->W];
- if (vb->pcmend<=IRAM_PCM_END) {
+ if (vd->iram_pcm_storage >= vb->pcmend) {
/* use statically allocated iram buffer */
- vb->pcm = ipcm_vect;
- for(i=0; i<CHANNELS; i++)
- vb->pcm[i] = &ipcm_buff[i*IRAM_PCM_END];
+ if(vd->reset_pcmb || vb->pcm[0]==NULL)
+ {
+ /* one-time initialisation at codec start
+ NOT for every block synthesis start
+ allows us to flip between buffers once initialised
+ by simply flipping pointers */
+ for(i=0; i<vi->channels; i++)
+ vb->pcm[i] = &vd->iram_pcm[i*vd->iram_pcm_storage];
+ }
} else {
- /* dynamic allocation (slower) */
- vb->pcm=(ogg_int32_t **)_vorbis_block_alloc(vb,sizeof(*vb->pcm)*vi->channels);
- for(i=0;i<vi->channels;i++)
- vb->pcm[i]=(ogg_int32_t *)_vorbis_block_alloc(vb,vb->pcmend*sizeof(*vb->pcm[i]));
+ if(vd->reset_pcmb || vb->pcm[0]==NULL)
+ {
+ /* dynamic allocation (slower) */
+ for(i=0;i<vi->channels;i++)
+ vb->pcm[i]=(ogg_int32_t *)_vorbis_block_alloc(vb,vb->pcmend*sizeof(*vb->pcm[i]));
+ }
}
+ vd->reset_pcmb = false;
/* unpack_header enforces range checking */
type=ci->map_type[ci->mode_param[mode]->mapping];
-
return(_mapping_P[type]->inverse(vb,b->mode[mode]));
}else{
/* no pcm */
diff --git a/apps/codecs/libtremor/window.c b/apps/codecs/libtremor/window.c
index 14d97cf6ac..7b48886939 100644
--- a/apps/codecs/libtremor/window.c
+++ b/apps/codecs/libtremor/window.c
@@ -68,11 +68,15 @@ void _vorbis_apply_window(ogg_int32_t *d,const void *window_p[2],
long rightbegin=n/2+n/4-rn/4;
long rightend=rightbegin+rn/2;
- memset((void *)&d[0], 0, sizeof(ogg_int32_t)*leftbegin);
- /* mcf5249_vect_zero(&d[0], leftbegin); */
+ /* Following memset is not required - we are careful to only overlap/add the
+ regions that geniunely overlap in the window region, and the portions
+ outside that region are not added (so don't need to be zerod). see block.c
+ memset((void *)&d[0], 0, sizeof(ogg_int32_t)*leftbegin); */
+
vect_mult_fw(&d[leftbegin], &window[lW][0], leftend-leftbegin);
vect_mult_bw(&d[rightbegin], &window[nW][rn/2-1], rightend-rightbegin);
- memset((void *)&d[rightend], 0, sizeof(ogg_int32_t)*(n-rightend));
- /* mcf5249_vect_zero(&d[rightend], n-rightend); */
+
+ /* Again - memset not needed
+ memset((void *)&d[rightend], 0, sizeof(ogg_int32_t)*(n-rightend)); */
}
diff --git a/apps/codecs/libtremor/window_lookup.h b/apps/codecs/libtremor/window_lookup.h
index ccf316e227..5363b81042 100644
--- a/apps/codecs/libtremor/window_lookup.h
+++ b/apps/codecs/libtremor/window_lookup.h
@@ -51,7 +51,7 @@ static LOOKUP_T vwin128[64] = {
X(0x7ffdcf39), X(0x7fff6dac), X(0x7fffed01), X(0x7fffffc4),
};
-static LOOKUP_T vwin256[128] ICONST_ATTR_TREMOR_WINDOW = {
+static LOOKUP_T vwin256[128] = {
X(0x0001f018), X(0x00117066), X(0x00306e9e), X(0x005ee5f1),
X(0x009ccf26), X(0x00ea208b), X(0x0146cdea), X(0x01b2c87f),
X(0x022dfedf), X(0x02b85ced), X(0x0351cbbd), X(0x03fa317f),
@@ -284,7 +284,7 @@ static LOOKUP_T vwin1024[512] = {
X(0x7fffffdd), X(0x7ffffff7), X(0x7fffffff), X(0x7fffffff),
};
-static LOOKUP_T vwin2048[1024] ICONST_ATTR_TREMOR_WINDOW = {
+static LOOKUP_T vwin2048[1024] = {
X(0x000007c0), X(0x000045c4), X(0x0000c1ca), X(0x00017bd3),
X(0x000273de), X(0x0003a9eb), X(0x00051df9), X(0x0006d007),
X(0x0008c014), X(0x000aee1e), X(0x000d5a25), X(0x00100428),