summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNils Wallménius <nils@rockbox.org>2011-09-22 11:47:51 +0000
committerNils Wallménius <nils@rockbox.org>2011-09-22 11:47:51 +0000
commit412cdd6cf7d6c9025532fe226ed2500403890203 (patch)
tree7fe6b8f48a2401027cac16403b040b153e190d61
parentb1298c2c442ebe1db5d58cbea5ffab6d2e302d74 (diff)
downloadrockbox-412cdd6cf7d6c9025532fe226ed2500403890203.tar.gz
rockbox-412cdd6cf7d6c9025532fe226ed2500403890203.zip
libtremor: port over ffmpeg's windowing code
Use the windowing approach from ffmpeg in tremor, does the mdct doubling, windowing and overlap add in one go. Also uses less memory so all the processing buffers fit in iram on targets with small iram for the common blocksizes (256/2048) now. Speeds up decoding of vorbis files by 3MHz for 256/2048 and 20MHz for 512/4096 files on h300. Speeds up decoding of vorbis files by 3MHz for 256/2048 and 4.5MHz for 512/4096 on the beast. Speeds up decoding of vorbis files by 0.3MHz for 256/2048 and 1MHz for 512/4096 on c200v1. git-svn-id: svn://svn.rockbox.org/rockbox/trunk@30580 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--apps/codecs/libtremor/asm_arm.h168
-rw-r--r--apps/codecs/libtremor/asm_mcf5249.h32
-rw-r--r--apps/codecs/libtremor/block.c181
-rw-r--r--apps/codecs/libtremor/config-tremor.h8
-rw-r--r--apps/codecs/libtremor/ffmpeg_render_line.h71
-rw-r--r--apps/codecs/libtremor/ivorbiscodec.h16
-rw-r--r--apps/codecs/libtremor/mapping0.c28
-rw-r--r--apps/codecs/libtremor/misc.h27
-rw-r--r--apps/codecs/libtremor/synthesis.c20
-rw-r--r--apps/codecs/libtremor/window.c4
-rw-r--r--apps/codecs/libtremor/window.h4
11 files changed, 143 insertions, 416 deletions
diff --git a/apps/codecs/libtremor/asm_arm.h b/apps/codecs/libtremor/asm_arm.h
index eb0d3ca789..42f82ec088 100644
--- a/apps/codecs/libtremor/asm_arm.h
+++ b/apps/codecs/libtremor/asm_arm.h
@@ -17,174 +17,6 @@
#ifdef _ARM_ASSEM_
-#if !defined(_V_WIDE_MATH) && !defined(_LOW_ACCURACY_)
-#define _V_WIDE_MATH
-#ifndef _TREMOR_VECT_OPS
-#define _TREMOR_VECT_OPS
-/* asm versions of vector operations for block.c, window.c */
-/* SOME IMPORTANT NOTES: this implementation of vect_mult_bw does
- NOT do a final shift, meaning that the result of vect_mult_bw is
- only 31 bits not 32. This is so that we can do the shift in-place
- in vect_add_xxxx instead to save one instruction for each mult on arm */
-static inline
-void vect_add_right_left(ogg_int32_t *x, const ogg_int32_t *y, int n)
-{
- /* first arg is right subframe of previous frame and second arg
- is left subframe of current frame. overlap left onto right overwriting
- the right subframe */
-
- do{
- asm volatile (
- "ldmia %[x], {r0, r1, r2, r3};"
- "ldmia %[y]!, {r4, r5, r6, r7};"
- "add r0, r4, r0, lsl #1;"
- "add r1, r5, r1, lsl #1;"
- "add r2, r6, r2, lsl #1;"
- "add r3, r7, r3, lsl #1;"
- "stmia %[x]!, {r0, r1, r2, r3};"
- "ldmia %[x], {r0, r1, r2, r3};"
- "ldmia %[y]!, {r4, r5, r6, r7};"
- "add r0, r4, r0, lsl #1;"
- "add r1, r5, r1, lsl #1;"
- "add r2, r6, r2, lsl #1;"
- "add r3, r7, r3, lsl #1;"
- "stmia %[x]!, {r0, r1, r2, r3};"
- : [x] "+r" (x), [y] "+r" (y)
- : : "r0", "r1", "r2", "r3",
- "r4", "r5", "r6", "r7",
- "memory");
- n -= 8;
- } while (n);
-}
-
-static inline
-void vect_add_left_right(ogg_int32_t *x, const ogg_int32_t *y, int n)
-{
- /* first arg is left subframe of current frame and second arg
- is right subframe of previous frame. overlap right onto left overwriting
- the LEFT subframe */
- do{
- asm volatile (
- "ldmia %[x], {r0, r1, r2, r3};"
- "ldmia %[y]!, {r4, r5, r6, r7};"
- "add r0, r0, r4, lsl #1;"
- "add r1, r1, r5, lsl #1;"
- "add r2, r2, r6, lsl #1;"
- "add r3, r3, r7, lsl #1;"
- "stmia %[x]!, {r0, r1, r2, r3};"
- "ldmia %[x], {r0, r1, r2, r3};"
- "ldmia %[y]!, {r4, r5, r6, r7};"
- "add r0, r0, r4, lsl #1;"
- "add r1, r1, r5, lsl #1;"
- "add r2, r2, r6, lsl #1;"
- "add r3, r3, r7, lsl #1;"
- "stmia %[x]!, {r0, r1, r2, r3};"
- : [x] "+r" (x), [y] "+r" (y)
- : : "r0", "r1", "r2", "r3",
- "r4", "r5", "r6", "r7",
- "memory");
- n -= 8;
- } while (n);
-}
-
-#if ARM_ARCH >= 6
-static inline
-void ogg_vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n)
-{
- /* Note, mult_fw uses MULT31 */
- do{
- asm volatile (
- "ldmia %[d], {r0, r1, r2, r3};"
- "ldmia %[w]!, {r4, r5, r6, r7};"
- "smmul r0, r4, r0;"
- "smmul r1, r5, r1;"
- "smmul r2, r6, r2;"
- "smmul r3, r7, r3;"
- "mov r0, r0, lsl #1;"
- "mov r1, r1, lsl #1;"
- "mov r2, r2, lsl #1;"
- "mov r3, r3, lsl #1;"
- "stmia %[d]!, {r0, r1, r2, r3};"
- : [d] "+r" (data), [w] "+r" (window)
- : : "r0", "r1", "r2", "r3",
- "r4", "r5", "r6", "r7",
- "memory" );
- n -= 4;
- } while (n);
-}
-#else
-static inline
-void ogg_vect_mult_fw(ogg_int32_t *data, LOOKUP_T *window, int n)
-{
- /* Note, mult_fw uses MULT31 */
- do{
- asm volatile (
- "ldmia %[d], {r0, r1, r2, r3};"
- "ldmia %[w]!, {r4, r5, r6, r7};"
- "smull r8, r0, r4, r0;"
- "mov r0, r0, lsl #1;"
- "smull r8, r1, r5, r1;"
- "mov r1, r1, lsl #1;"
- "smull r8, r2, r6, r2;"
- "mov r2, r2, lsl #1;"
- "smull r8, r3, r7, r3;"
- "mov r3, r3, lsl #1;"
- "stmia %[d]!, {r0, r1, r2, r3};"
- : [d] "+r" (data), [w] "+r" (window)
- : : "r0", "r1", "r2", "r3",
- "r4", "r5", "r6", "r7", "r8",
- "memory" );
- n -= 4;
- } while (n);
-}
-#endif
-
-#if ARM_ARCH >= 6
-static inline
-void ogg_vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n)
-{
- /* NOTE mult_bw uses MULT_32 i.e. doesn't shift result left at end */
- /* On ARM, we can do the shift at the same time as the overlap-add */
- do{
- asm volatile ("ldmia %[d], {r0, r1, r2, r3};"
- "ldmda %[w]!, {r4, r5, r6, r7};"
- "smmul r0, r7, r0;"
- "smmul r1, r6, r1;"
- "smmul r2, r5, r2;"
- "smmul r3, r4, r3;"
- "stmia %[d]!, {r0, r1, r2, r3};"
- : [d] "+r" (data), [w] "+r" (window)
- : : "r0", "r1", "r2", "r3",
- "r4", "r5", "r6", "r7",
- "memory" );
- n -= 4;
- } while (n);
-}
-#else
-static inline
-void ogg_vect_mult_bw(ogg_int32_t *data, LOOKUP_T *window, int n)
-{
- /* NOTE mult_bw uses MULT_32 i.e. doesn't shift result left at end */
- /* On ARM, we can do the shift at the same time as the overlap-add */
- do{
- asm volatile ("ldmia %[d], {r0, r1, r2, r3};"
- "ldmda %[w]!, {r4, r5, r6, r7};"
- "smull r8, r0, r7, r0;"
- "smull r7, r1, r6, r1;"
- "smull r6, r2, r5, r2;"
- "smull r5, r3, r4, r3;"
- "stmia %[d]!, {r0, r1, r2, r3};"
- : [d] "+r" (data), [w] "+r" (window)
- : : "r0", "r1", "r2", "r3",
- "r4", "r5", "r6", "r7", "r8",
- "memory" );
- n -= 4;
- } while (n);
-}
-#endif
-#endif
-#endif
-
#ifndef _V_LSP_MATH_ASM
#define _V_LSP_MATH_ASM
diff --git a/apps/codecs/libtremor/asm_mcf5249.h b/apps/codecs/libtremor/asm_mcf5249.h
index 3e7d46983e..66de07615f 100644
--- a/apps/codecs/libtremor/asm_mcf5249.h
+++ b/apps/codecs/libtremor/asm_mcf5249.h
@@ -28,37 +28,5 @@
#define MB()
-#ifndef _TREMOR_VECT_OPS
-#define _TREMOR_VECT_OPS
-static inline
-void vect_add_left_right(ogg_int32_t *x, const ogg_int32_t *y, int n)
-{
- /* coldfire asm has symmetrical versions of vect_add_right_left
- and vect_add_left_right (since symmetrical versions of
- vect_mult_fw and vect_mult_bw i.e. both use MULT31) */
- vect_add(x, y, n );
-}
-
-static inline
-void vect_add_right_left(ogg_int32_t *x, const ogg_int32_t *y, int n)
-{
- /* coldfire asm has symmetrical versions of vect_add_right_left
- and vect_add_left_right (since symmetrical versions of
- vect_mult_fw and vect_mult_bw i.e. both use MULT31) */
- vect_add(x, y, n );
-}
-
-static inline
-void ogg_vect_mult_fw(int32_t *data, const int32_t *window, int n)
-{
- vect_mult_fw(data, window, n);
-}
-
-static inline
-void ogg_vect_mult_bw(int32_t *data, const int32_t *window, int n)
-{
- vect_mult_bw(data, window, n);
-}
-#endif
#endif
#endif
diff --git a/apps/codecs/libtremor/block.c b/apps/codecs/libtremor/block.c
index d678719cbe..b7ce5adc4e 100644
--- a/apps/codecs/libtremor/block.c
+++ b/apps/codecs/libtremor/block.c
@@ -25,6 +25,7 @@
#include "window.h"
#include "registry.h"
#include "misc.h"
+#include "ffmpeg_render_line.h"
//#include <codecs/lib/codeclib.h>
static int ilog(unsigned int v){
@@ -37,11 +38,10 @@ static int ilog(unsigned int v){
return(ret);
}
-static ogg_int32_t* _pcmp [CHANNELS] IBSS_ATTR;
static ogg_int32_t* _pcmbp[CHANNELS] IBSS_ATTR;
static ogg_int32_t* _pcmret[CHANNELS] IBSS_ATTR;
/* save original pointers returned by malloc so we can free it easily */
-static ogg_int32_t* pcm_copy[CHANNELS] = {NULL};
+static ogg_int32_t* malloc_pointers[3] = {NULL};
/* pcm accumulator examples (not exhaustive):
@@ -153,14 +153,13 @@ int vorbis_block_clear(vorbis_block *vb){
static int _vds_init(vorbis_dsp_state *v,vorbis_info *vi){
int i;
long b_size[2];
-
+
codec_setup_info *ci=(codec_setup_info *)vi->codec_setup;
private_state *b=NULL;
if(ci==NULL) return 1;
memset(v,0,sizeof(*v));
- v->reset_pcmb=true;
b=(private_state *)(v->backend_state=_ogg_calloc(1,sizeof(*b)));
v->vi=vi;
@@ -169,16 +168,42 @@ static int _vds_init(vorbis_dsp_state *v,vorbis_info *vi){
#ifdef TREMOR_USE_IRAM
/* allocate IRAM buffer for the PCM data generated by synthesis */
iram_malloc_init();
- v->first_pcm = iram_malloc(vi->channels*ci->blocksizes[1]*sizeof(ogg_int32_t));
- /* when can't allocate IRAM buffer, allocate normal RAM buffer */
- if(v->first_pcm == NULL)
+
+ v->floors = iram_malloc(vi->channels*ci->blocksizes[1]/2*sizeof(ogg_int32_t));
+ v->residues[0] = iram_malloc(vi->channels*ci->blocksizes[1]/2*sizeof(ogg_int32_t));
+ /* if we can get away with it, put a double buffer into IRAM too, so that
+ overlap-add runs iram-to-iram and we avoid needing to memcpy */
+ v->residues[1] = iram_malloc(vi->channels*ci->blocksizes[1]/2*sizeof(ogg_int32_t));
+ if (v->residues[1] == NULL)
+ v->saved = iram_malloc(vi->channels*ci->blocksizes[1]/4*sizeof(ogg_int32_t));
+
#endif
- {
- pcm_copy[0] = _ogg_malloc(vi->channels*ci->blocksizes[1]*sizeof(ogg_int32_t));
- v->first_pcm = pcm_copy[0];
+
+ if (v->residues[0] == NULL) {
+ malloc_pointers[0] = _ogg_malloc(vi->channels*ci->blocksizes[1]/2*sizeof(ogg_int32_t));
+ v->residues[0] = malloc_pointers[0];
+ }
+
+ if (v->residues[1] == NULL && v->saved == NULL) {
+ malloc_pointers[1] = _ogg_malloc(vi->channels*ci->blocksizes[1]/2*sizeof(ogg_int32_t));
+ v->residues[1] = malloc_pointers[1];
}
- v->centerW=0;
+ if (v->floors == NULL) {
+ malloc_pointers[2] = _ogg_malloc(vi->channels*ci->blocksizes[1]/2*sizeof(ogg_int32_t));
+ v->floors = malloc_pointers[2];
+ }
+
+ /* needed for the first overlap/add */
+ if (v->saved) {
+ memset(v->saved, 0, vi->channels*ci->blocksizes[1]/4*sizeof(ogg_int32_t));
+ for (i = 0; i < vi->channels; i++)
+ v->saved_ptr[i] = v->saved + i*ci->blocksizes[1]/4;
+ } else {
+ memset(v->residues[1], 0, vi->channels*ci->blocksizes[1]/2*sizeof(ogg_int32_t));
+ for (i = 0; i < vi->channels; i++)
+ v->saved_ptr[i] = v->residues[1] + i*ci->blocksizes[1]/2;
+ }
/* Vorbis I uses only window type 0 */
b_size[0]=ci->blocksizes[0]/2;
@@ -214,37 +239,13 @@ static int _vds_init(vorbis_dsp_state *v,vorbis_info *vi){
}
}
- /* if we can get away with it, put a double buffer into IRAM too, so that
- overlap-add runs iram-to-iram and we avoid needing to memcpy */
v->pcm_storage=ci->blocksizes[1];
- v->pcm=_pcmp;
v->pcmret=_pcmret;
v->pcmb=_pcmbp;
- _pcmp[0]=NULL;
- _pcmp[1]=NULL;
_pcmbp[0]=NULL;
_pcmbp[1]=NULL;
-#ifdef TREMOR_USE_IRAM
- if(NULL != (v->iram_double_pcm = iram_malloc(vi->channels*v->pcm_storage*sizeof(ogg_int32_t))))
- {
- /* one-time initialisation at codec start or on switch from
- blocksizes greater than IRAM_PCM_END to sizes that fit */
- for(i=0;i<vi->channels;i++)
- v->pcm[i]=&v->iram_double_pcm[i*v->pcm_storage];
- }
- else
-#endif
- {
- /* one-time initialisation at codec start or on switch from
- blocksizes that fit in IRAM_PCM_END to those that don't */
- /* save copy of the pointer so we can free it easily later */
- pcm_copy[1] = _ogg_calloc(vi->channels*v->pcm_storage,sizeof(*v->pcm[i]));
- for(i=0;i<vi->channels;i++)
- v->pcm[i] = pcm_copy[1]+i*v->pcm_storage;
- }
-
/* all 1 (large block) or 0 (small block) */
/* explicitly set for the sake of clarity */
v->lW=0; /* previous window size */
@@ -274,35 +275,19 @@ abort_books:
int vorbis_synthesis_restart(vorbis_dsp_state *v){
vorbis_info *vi=v->vi;
codec_setup_info *ci;
- int i;
if(!v->backend_state)return -1;
if(!vi)return -1;
ci=vi->codec_setup;
if(!ci)return -1;
- v->centerW=0;
v->pcm_current=0;
v->pcm_returned=-1;
v->granulepos=-1;
v->sequence=-1;
((private_state *)(v->backend_state))->sample_count=-1;
-
- /* indicate to synthesis code that buffer pointers no longer valid
- (if we're using double pcm buffer) and will need to reset them */
- v->reset_pcmb = true;
- /* also reset our copy of the double buffer pointers if we have one */
-#ifdef TREMOR_USE_IRAM
- if(v->iram_double_pcm)
- {
- for(i=0;i<vi->channels;i++)
- v->pcm[i]=&v->iram_double_pcm[i*v->pcm_storage];
- }
-#else
- for(i=0;i<vi->channels;i++)
- v->pcm[i] = pcm_copy[1]+i*v->pcm_storage;
-#endif
+
return(0);
}
@@ -323,11 +308,10 @@ void vorbis_dsp_clear(vorbis_dsp_state *v){
if(vi != NULL)
{
/* pcm buffer came from oggmalloc rather than iram */
- for(i=0;i<2;i++)
- if(pcm_copy[i])
- {
- _ogg_free(pcm_copy[i]);
- pcm_copy[i] = NULL;
+ for(i=0;i<3;i++)
+ if(malloc_pointers[i]) {
+ _ogg_free(malloc_pointers[i]);
+ malloc_pointers[i] = NULL;
}
}
@@ -359,10 +343,6 @@ int vorbis_synthesis_blockin(vorbis_dsp_state *v,vorbis_block *vb){
vorbis_info *vi=v->vi;
codec_setup_info *ci=(codec_setup_info *)vi->codec_setup;
private_state *b=v->backend_state;
- int j;
-#ifdef TREMOR_USE_IRAM
- bool iram_pcm_doublebuffer = (NULL != v->iram_double_pcm);
-#endif
if(v->pcm_current>v->pcm_returned && v->pcm_returned!=-1)return(OV_EINVAL);
@@ -380,79 +360,11 @@ int vorbis_synthesis_blockin(vorbis_dsp_state *v,vorbis_block *vb){
int n=ci->blocksizes[v->W]/2;
int ln=ci->blocksizes[v->lW]/2;
- if(LIKELY(vb->pcm)){ /* no pcm to process if vorbis_synthesis_trackonly
- was called on block */
- int prevCenter;
- int n0=ci->blocksizes[0]/2;
- int n1=ci->blocksizes[1]/2;
-
-#ifdef TREMOR_USE_IRAM
- if(!iram_pcm_doublebuffer)
- {
- prevCenter = v->centerW;
- v->centerW = n1 - v->centerW;
- }
- else
-#endif
- prevCenter = ln;
-
- /* overlap/add PCM */
- /* nb nothing to overlap with on first block so don't bother */
- if(LIKELY(v->pcm_returned!=-1))
- {
- for(j=0;j<vi->channels;j++)
- {
- ogg_int32_t *pcm=v->pcm[j]+prevCenter;
- ogg_int32_t *p=vb->pcm[j];
-
- /* the overlap/add section */
- if(v->lW == v->W)
- {
- /* large/large or small/small */
- vect_add_right_left(pcm,p,n);
- v->pcmb[j]=pcm;
- }
- else if (!v->W)
- {
- /* large/small */
- vect_add_right_left(pcm + (n1-n0)/2, p, n0);
- v->pcmb[j]=pcm;
- }
- else
- {
- /* small/large */
- p += (n1-n0)/2;
- vect_add_left_right(p,pcm,n0);
- v->pcmb[j]=p;
- }
- }
- }
-#ifdef TREMOR_USE_IRAM
- /* the copy section */
- if(!iram_pcm_doublebuffer)
- {
- for(j=0;j<vi->channels;j++)
- {
- /* at best only vb->pcm is in iram, and that's where we do the
- synthesis, so we copy out the right-hand subframe of last
- synthesis into (noniram) local buffer so we can still do
- synth in iram */
- vect_copy(v->pcm[j]+v->centerW, vb->pcm[j]+n, n);
- }
- }
- else
-#endif
- {
- /* just flip the pointers over as we have a double buffer in iram */
- ogg_int32_t *p;
- p=v->pcm[0];
- v->pcm[0]=vb->pcm[0];
- vb->pcm[0] = p;
- p=v->pcm[1];
- v->pcm[1]=vb->pcm[1];
- vb->pcm[1] = p;
- }
-
+ if(LIKELY(vb->pcmend != 0)){ /* no pcm to process if vorbis_synthesis_trackonly
+ was called on block */
+ window_overlap_add(ci->blocksizes[v->W], ci->blocksizes[v->lW],
+ ci->blocksizes[0], ci->blocksizes[1], vi->channels,
+ b->window[v->W & v->lW], v);
/* deal with initial packet state; we do this using the explicit
pcm_returned==-1 flag otherwise we're sensitive to first block
being short or long */
@@ -464,7 +376,6 @@ int vorbis_synthesis_blockin(vorbis_dsp_state *v,vorbis_block *vb){
v->pcm_returned=0;
v->pcm_current=(n+ln)/2;
}
-
}
/* track the frame number... This is for convenience, but also
diff --git a/apps/codecs/libtremor/config-tremor.h b/apps/codecs/libtremor/config-tremor.h
index eba0fe0912..cf2dbe977d 100644
--- a/apps/codecs/libtremor/config-tremor.h
+++ b/apps/codecs/libtremor/config-tremor.h
@@ -40,10 +40,10 @@
/* Define CPU of Normal IRAM (96KB) */
#else
-/* PCM_BUFFER : 16384 Byte (2048*2*4) *
- * WINDOW_LOOKUP : 4608 Byte (128*4 + 1024*4) *
- * TOTAL : 20992 */
-#define IRAM_IBSS_SIZE 20992
+/* floor and double residue buffer : 24576 Byte (2048/2*4*2*3) *
+ * WINDOW_LOOKUP : 4608 Byte (128*4 + 1024*4) *
+ * TOTAL : 29184 */
+#define IRAM_IBSS_SIZE 29184
#endif
#endif
diff --git a/apps/codecs/libtremor/ffmpeg_render_line.h b/apps/codecs/libtremor/ffmpeg_render_line.h
index 1b760ae20e..a08952d95d 100644
--- a/apps/codecs/libtremor/ffmpeg_render_line.h
+++ b/apps/codecs/libtremor/ffmpeg_render_line.h
@@ -21,6 +21,9 @@
*/
/* render_line and friend taken from ffmpeg (libavcodec/vorbis.c) */
+
+#include "misc.h"
+
static inline void render_line_unrolled(int x, int y, int x1,
int sy, int ady, int adx,
const ogg_int32_t *lookup, ogg_int32_t *buf)
@@ -45,8 +48,8 @@ static inline void render_line_unrolled(int x, int y, int x1,
}
}
-static void render_line(int x0, int y0, int x1, int y1,
- const ogg_int32_t *lookup, ogg_int32_t *buf)
+static inline void render_line(int x0, int y0, int x1, int y1,
+ const ogg_int32_t *lookup, ogg_int32_t *buf)
{
int dy = y1 - y0;
int adx = x1 - x0;
@@ -72,3 +75,67 @@ static void render_line(int x0, int y0, int x1, int y1,
}
}
}
+
+#ifndef INCL_OPTIMIZED_VECTOR_FMUL_WINDOW
+#define INCL_OPTIMIZED_VECTOR_FMUL_WINDOW
+static inline void ff_vector_fmul_window_c(ogg_int32_t *dst, const ogg_int32_t *src0,
+ const ogg_int32_t *src1, const ogg_int32_t *win, int len){
+ int i,j;
+ dst += len;
+ win += len;
+ src0+= len;
+ for(i=-len, j=len-1; i<0; i++, j--) {
+ ogg_int32_t s0 = src0[i];
+ ogg_int32_t s1 = src1[j];
+ ogg_int32_t wi = win[i];
+ ogg_int32_t wj = win[j];
+ XNPROD31(s0, s1, wj, wi, &dst[i], &dst[j]);
+ /*
+ dst[i] = MULT31(s0,wj) - MULT31(s1,wi);
+ dst[j] = MULT31(s0,wi) + MULT31(s1,wj);
+ */
+ }
+}
+#endif
+
+static inline void copy_normalize(ogg_int32_t *dst, ogg_int32_t *src, int len)
+{
+ memcpy(dst, src, len * sizeof(ogg_int32_t));
+}
+
+static inline void window_overlap_add(unsigned int blocksize, unsigned int lastblock,
+ unsigned int bs0, unsigned int bs1, int ch,
+ const ogg_int32_t *win, vorbis_dsp_state *v)
+{
+ unsigned retlen = (blocksize + lastblock) / 4;
+ int j;
+ for (j = 0; j < ch; j++) {
+ ogg_int32_t *residue = v->residues[v->ri] + j * blocksize / 2;
+ ogg_int32_t *saved;
+ saved = v->saved_ptr[j];
+ ogg_int32_t *ret = v->floors + j * retlen;
+ ogg_int32_t *buf = residue;
+
+ if (v->W == v->lW) {
+ ff_vector_fmul_window_c(ret, saved, buf, win, blocksize / 4);
+ } else if (v->W > v->lW) {
+ ff_vector_fmul_window_c(ret, saved, buf, win, bs0 / 4);
+ copy_normalize(ret+bs0/2, buf+bs0/4, (bs1-bs0)/4);
+ } else {
+ copy_normalize(ret, saved, (bs1 - bs0) / 4);
+ ff_vector_fmul_window_c(ret + (bs1 - bs0) / 4, saved + (bs1 - bs0) / 4, buf, win, bs0 / 4);
+ }
+ if (v->residues[1] == NULL) {
+ memcpy(saved, buf + blocksize / 4, blocksize / 4 * sizeof(ogg_int32_t));
+ v->saved_ptr[j] = v->saved + j * bs1 / 4;
+ } else {
+ v->saved_ptr[j] = buf + blocksize / 4;
+ }
+
+ v->pcmb[j] = ret;
+ }
+
+ if (v->residues[1] != NULL) {
+ v->ri ^= 1;
+ }
+}
diff --git a/apps/codecs/libtremor/ivorbiscodec.h b/apps/codecs/libtremor/ivorbiscodec.h
index 23b62c48bd..73ba4aeb2a 100644
--- a/apps/codecs/libtremor/ivorbiscodec.h
+++ b/apps/codecs/libtremor/ivorbiscodec.h
@@ -59,7 +59,13 @@ typedef struct vorbis_info{
typedef struct vorbis_dsp_state{
vorbis_info *vi;
- ogg_int32_t **pcm;
+ ogg_int32_t *residues[2];
+ ogg_int32_t *floors;
+ ogg_int32_t *saved;
+ ogg_int32_t *saved_ptr[CHANNELS];
+
+ int ri;
+
ogg_int32_t **pcmb;
ogg_int32_t **pcmret;
int pcm_storage;
@@ -71,23 +77,15 @@ typedef struct vorbis_dsp_state{
long lW;
long W;
long nW;
- long centerW;
ogg_int64_t granulepos;
ogg_int64_t sequence;
void *backend_state;
-
- ogg_int32_t *first_pcm; /* PCM buffer (for normal RAM or IRAM)*/
-#ifdef TREMOR_USE_IRAM
- ogg_int32_t *iram_double_pcm; /* PCM 2nd buffer for IRAM */
-#endif
- bool reset_pcmb;
} vorbis_dsp_state;
typedef struct vorbis_block{
/* necessary stream state for linking to the framing abstraction */
- ogg_int32_t **pcm; /* this is a pointer into local storage */
oggpack_buffer opb;
long lW;
diff --git a/apps/codecs/libtremor/mapping0.c b/apps/codecs/libtremor/mapping0.c
index 9042b9c174..084d5e076d 100644
--- a/apps/codecs/libtremor/mapping0.c
+++ b/apps/codecs/libtremor/mapping0.c
@@ -302,7 +302,6 @@ static int mapping0_inverse(vorbis_block *vb,vorbis_look_mapping *l){
vorbis_dsp_state *vd=vb->vd;
vorbis_info *vi=vd->vi;
codec_setup_info *ci=(codec_setup_info *)vi->codec_setup;
- private_state *b=(private_state *)vd->backend_state;
vorbis_look_mapping0 *look=(vorbis_look_mapping0 *)l;
vorbis_info_mapping0 *info=look->map;
@@ -329,8 +328,8 @@ static int mapping0_inverse(vorbis_block *vb,vorbis_look_mapping *l){
if(floormemo[i])
nonzero[i]=1;
else
- nonzero[i]=0;
- memset(vb->pcm[i],0,sizeof(*vb->pcm[i])*n/2);
+ nonzero[i]=0;
+ memset(vd->floors + i * ci->blocksizes[vb->W]/2,0,sizeof(ogg_int32_t)*n/2);
}
/* channel coupling can 'dirty' the nonzero listing */
@@ -351,7 +350,7 @@ static int mapping0_inverse(vorbis_block *vb,vorbis_look_mapping *l){
zerobundle[ch_in_bundle]=1;
else
zerobundle[ch_in_bundle]=0;
- pcmbundle[ch_in_bundle++]=vb->pcm[j];
+ pcmbundle[ch_in_bundle++] = vd->floors + j * ci->blocksizes[vb->W]/2;
}
}
@@ -365,8 +364,8 @@ static int mapping0_inverse(vorbis_block *vb,vorbis_look_mapping *l){
/* channel coupling */
for(i=info->coupling_steps-1;i>=0;i--){
- ogg_int32_t *pcmM=vb->pcm[info->coupling_mag[i]];
- ogg_int32_t *pcmA=vb->pcm[info->coupling_ang[i]];
+ ogg_int32_t *pcmM = vd->floors + info->coupling_mag[i] * ci->blocksizes[vb->W]/2;
+ ogg_int32_t *pcmA = vd->floors + info->coupling_ang[i] * ci->blocksizes[vb->W]/2;
channel_couple(pcmM,pcmA,n);
}
@@ -378,24 +377,21 @@ static int mapping0_inverse(vorbis_block *vb,vorbis_look_mapping *l){
/* transform the PCM data; takes PCM vector, vb; modifies PCM vector */
/* only MDCT right now.... */
-
for(i=0;i<vi->channels;i++){
- ogg_int32_t *pcm=vb->pcm[i];
+ ogg_int32_t *pcm = vd->floors + i*ci->blocksizes[vb->W]/2;
int submap=info->chmuxlist[i];
if(nonzero[i]) {
/* compute and apply spectral envelope */
look->floor_func[submap]->
inverse2(vb,look->floor_look[submap],floormemo[i],pcm);
-
- ff_imdct_calc(ci->blocksizes_nbits[vb->W],
- (int32_t*)pcm,
- (int32_t*)pcm);
- /* window the data */
- _vorbis_apply_window(pcm,b->window,ci->blocksizes,vb->lW,vb->W,vb->nW);
+
+ ff_imdct_half(ci->blocksizes_nbits[vb->W],
+ (int32_t*)vd->residues[vd->ri] + i*ci->blocksizes[vb->W]/2,
+ (int32_t*)&vd->floors[i*ci->blocksizes[vb->W]/2]);
}
- else
- memset(pcm, 0, sizeof(ogg_int32_t)*n);
+ else
+ memset(vd->residues[vd->ri] + i*ci->blocksizes[vb->W]/2, 0, sizeof(ogg_int32_t)*n/2);
}
//for(j=0;j<vi->channels;j++)
diff --git a/apps/codecs/libtremor/misc.h b/apps/codecs/libtremor/misc.h
index 0b0ff4d3a7..592a60ffd8 100644
--- a/apps/codecs/libtremor/misc.h
+++ b/apps/codecs/libtremor/misc.h
@@ -26,7 +26,6 @@
#include "asm_arm.h"
#include "asm_mcf5249.h"
-
/* Some prototypes that were not defined elsewhere */
void *_vorbis_block_alloc(vorbis_block *vb,long bytes);
@@ -155,32 +154,6 @@ static inline void XNPROD31(ogg_int32_t a, ogg_int32_t b,
}
*/
#endif
-#ifndef _TREMOR_VECT_OPS
-#define _TREMOR_VECT_OPS
-static inline
-void vect_add_left_right(ogg_int32_t *x, const ogg_int32_t *y, int n)
-{
- vect_add(x, y, n );
-}
-
-static inline
-void vect_add_right_left(ogg_int32_t *x, const ogg_int32_t *y, int n)
-{
- vect_add(x, y, n );
-}
-
-static inline
-void ogg_vect_mult_fw(int32_t *data, const int32_t *window, int n)
-{
- vect_mult_fw(data, window, n);
-}
-
-static inline
-void ogg_vect_mult_bw(int32_t *data, const int32_t *window, int n)
-{
- vect_mult_bw(data, window, n);
-}
-#endif
#if 0
#ifndef _V_VECT_OPS
diff --git a/apps/codecs/libtremor/synthesis.c b/apps/codecs/libtremor/synthesis.c
index 657aa74e11..d1ef99ae37 100644
--- a/apps/codecs/libtremor/synthesis.c
+++ b/apps/codecs/libtremor/synthesis.c
@@ -25,15 +25,13 @@
#include "os.h"
-static ogg_int32_t *ipcm_vect[CHANNELS] IBSS_ATTR;
-
static inline int _vorbis_synthesis1(vorbis_block *vb,ogg_packet *op,int decodep){
vorbis_dsp_state *vd= vb ? vb->vd : 0;
private_state *b= vd ? (private_state *)vd->backend_state: 0;
vorbis_info *vi= vd ? vd->vi : 0;
codec_setup_info *ci= vi ? (codec_setup_info *)vi->codec_setup : 0;
oggpack_buffer *opb=vb ? &vb->opb : 0;
- int type,mode,i;
+ int type,mode;
if (!vd || !b || !vi || !ci || !opb) {
return OV_EBADPACKET;
@@ -75,22 +73,8 @@ static inline int _vorbis_synthesis1(vorbis_block *vb,ogg_packet *op,int decodep
if(decodep && vi->channels<=CHANNELS)
{
- vb->pcm = ipcm_vect;
-
/* set pcm end point */
vb->pcmend=ci->blocksizes[vb->W];
- /* use statically allocated buffer */
- if(vd->reset_pcmb || vb->pcm[0]==NULL)
- {
- /* one-time initialisation at codec start
- NOT for every block synthesis start
- allows us to flip between buffers once initialised
- by simply flipping pointers */
- for(i=0; i<vi->channels; i++)
- vb->pcm[i] = &vd->first_pcm[i*ci->blocksizes[1]];
-
- }
- vd->reset_pcmb = false;
/* unpack_header enforces range checking */
type=ci->map_type[ci->mode_param[mode]->mapping];
@@ -98,8 +82,6 @@ static inline int _vorbis_synthesis1(vorbis_block *vb,ogg_packet *op,int decodep
}else{
/* no pcm */
vb->pcmend=0;
- vb->pcm=NULL;
-
return(0);
}
}
diff --git a/apps/codecs/libtremor/window.c b/apps/codecs/libtremor/window.c
index e46008aef0..3bc947f0e5 100644
--- a/apps/codecs/libtremor/window.c
+++ b/apps/codecs/libtremor/window.c
@@ -53,7 +53,7 @@ const void *_vorbis_window(int type, int left){
return(0);
}
}
-
+#if 0
void _vorbis_apply_window(ogg_int32_t *d,const void *window_p[2],
long *blocksizes,
int lW,int W,int nW){
@@ -79,4 +79,4 @@ void _vorbis_apply_window(ogg_int32_t *d,const void *window_p[2],
/* Again - memset not needed
memset((void *)&d[rightend], 0, sizeof(ogg_int32_t)*(n-rightend)); */
}
-
+#endif
diff --git a/apps/codecs/libtremor/window.h b/apps/codecs/libtremor/window.h
index b242ec2354..166d0b8e9c 100644
--- a/apps/codecs/libtremor/window.h
+++ b/apps/codecs/libtremor/window.h
@@ -19,9 +19,9 @@
#define _V_WINDOW_
extern const void *_vorbis_window(int type,int left);
-extern void _vorbis_apply_window(ogg_int32_t *d,const void *window[2],
+/*extern void _vorbis_apply_window(ogg_int32_t *d,const void *window[2],
long *blocksizes,
- int lW,int W,int nW);
+ int lW,int W,int nW);*/
#endif