diff options
Diffstat (limited to 'apps')
-rw-r--r-- | apps/codecs/libspeex/SOURCES | 3 | ||||
-rw-r--r-- | apps/codecs/libspeex/config-speex.h | 5 | ||||
-rw-r--r-- | apps/codecs/libspeex/filters.c | 2 | ||||
-rw-r--r-- | apps/codecs/libspeex/filters_cf.S | 165 | ||||
-rw-r--r-- | apps/codecs/libspeex/nb_celp.c | 3 |
5 files changed, 178 insertions, 0 deletions
diff --git a/apps/codecs/libspeex/SOURCES b/apps/codecs/libspeex/SOURCES index df73565734..a5d4318282 100644 --- a/apps/codecs/libspeex/SOURCES +++ b/apps/codecs/libspeex/SOURCES @@ -42,3 +42,6 @@ vbr.c vorbis_psy.c vq.c window.c +#ifdef CPU_COLDFIRE +filters_cf.S +#endif diff --git a/apps/codecs/libspeex/config-speex.h b/apps/codecs/libspeex/config-speex.h index b7385eee02..31ebe80741 100644 --- a/apps/codecs/libspeex/config-speex.h +++ b/apps/codecs/libspeex/config-speex.h @@ -11,6 +11,11 @@ #define ARM4_ASM #endif +/* Make use of Coldfire assembly optimizations */ +#if defined(CPU_COLDFIRE) +#define COLDFIRE_ASM +#endif + /* Make use of Blackfin assembly optimizations */ /* #undef BFIN_ASM */ diff --git a/apps/codecs/libspeex/filters.c b/apps/codecs/libspeex/filters.c index a6a5f62d26..02f93a27b1 100644 --- a/apps/codecs/libspeex/filters.c +++ b/apps/codecs/libspeex/filters.c @@ -45,6 +45,8 @@ #include "filters_sse.h" #elif defined (ARM4_ASM) || defined(ARM5E_ASM) #include "filters_arm4.h" +#elif defined (COLDFIRE_ASM) +#define OVERRIDE_IIR_MEM16 #elif defined (BFIN_ASM) #include "filters_bfin.h" #endif diff --git a/apps/codecs/libspeex/filters_cf.S b/apps/codecs/libspeex/filters_cf.S new file mode 100644 index 0000000000..579af11581 --- /dev/null +++ b/apps/codecs/libspeex/filters_cf.S @@ -0,0 +1,165 @@ +/* Copyright (C) 2007 Thom Johansen */ +/** + @file filters_cf.S + @brief Various analysis/synthesis filters (Coldfire version) +*/ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + - Neither the name of the Xiph.org Foundation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + .text +/* void iir_mem16(const spx_word16_t *x, const spx_coef_t *den, spx_word16_t *y, int N, int ord, spx_mem_t *mem, char *stack) */ + .global iir_mem16 +iir_mem16: + lea.l (-44, %sp), %sp + movem.l %d2-%d7/%a2-%a6, (%sp) + movem.l (44+4, %sp), %a3-%a5 | a3 = x, a4 = den, a5 = y + movem.l (44+20, %sp), %d0/%a6 | d0 = ord, a6 = mem + moveq.l #8, %d1 | Jump to correct routine based on 'ord' + cmp.l %d1, %d0 + jeq .order_8 + moveq.l #10, %d1 + cmp.l %d1, %d0 + jeq .order_10 + jra .exit + + | d0 = y[i], d1-d7, a0 = mem[0] .. mem[7] + | a3 = x, a4 = den, a5 = y, a6 = temp +.order_8: + movem.l (%a6), %d1-%d7/%a0 | Fetch mem[] array +0: + moveq.l #13, %d0 + add.l #4096, %d1 + asr.l %d0, %d1 | mem[0] >> 13 with rounding + move.w (%a3)+, %d0 + ext.l %d0 + add.l %d1, %d0 | Add with x[i] + move.l #32768, %d1 + add.l %d1, %d0 | Bias result to [0..65535] + cmp.l #65535, %d0 | Clip to [0..65535] range + jle 1f + spl.b %d0 + ext.w %d0 +1: + sub.l %d1, %d0 | Bias clipped result back to [-32768..32767] + neg.l %d0 | msac.w is bugged in gas, do this for now + move.w %d0, (%a5)+ | Write result to y[i] + move.l (%a4)+, %a6 | Fetch den[0] and den[1] + mac.w %a6u, %d0l, %acc0 + mac.w %a6l, %d0l, (%a4)+, %a6, %acc1 + mac.w %a6u, %d0l, %acc2 + mac.w %a6l, %d0l, (%a4)+, %a6, %acc3 + movclr.l %acc0, %d1 + add.l %d2, %d1 | mem[0] = mem[1] - den[0]*y[i] + movclr.l %acc1, %d2 + add.l %d3, %d2 | mem[1] = mem[2] - den[1]*y[i] + movclr.l %acc2, %d3 + add.l %d4, %d3 | mem[2] = mem[3] - den[2]*y[i] + movclr.l %acc3, %d4 + add.l %d5, %d4 | mem[3] = mem[4] - den[3]*y[i] + mac.w %a6u, %d0l, %acc0 + mac.w %a6l, %d0l, (%a4)+, %a6, %acc1 + mac.w %a6u, %d0l, %acc2 + mac.w %a6l, %d0l, %acc3 + lea.l (-16, %a4), %a4 | wrap den pointer back to den[0] + movclr.l %acc0, %d5 + add.l %d6, %d5 | mem[4] = mem[5] - den[4]*y[i] + movclr.l %acc1, %d6 + add.l %d7, %d6 | mem[5] = mem[6] - den[5]*y[i] + movclr.l %acc2, %d7 + add.l %a0, %d7 | mem[6] = mem[7] - den[6]*y[i] + movclr.l %acc3, %a0 | mem[7] = -den[7]*y[i] + subq.l #1, (44+16, %sp) | Have we done all samples? + jne 0b + move.l (44+24, %sp), %a6 | Fetch mem pointer + movem.l %d1-%d7/%a0, (%a6) | Save back mem[] + jra .exit + + | d0 = y[i], d1-d7, a0-a2 = mem[0] .. mem[9] + | a3 = x, a4 = den, a5 = y, a6 = temp +.order_10: + movem.l (%a6), %d1-%d7/%a0-%a2 | Fetch mem[] array +0: + moveq.l #13, %d0 + add.l #4096, %d1 + asr.l %d0, %d1 | mem[0] >> 13 with rounding + move.w (%a3)+, %d0 + ext.l %d0 + add.l %d1, %d0 | Add with x[i] + move.l #32768, %d1 + add.l %d1, %d0 | Bias result to [0..65535] + cmp.l #65535, %d0 | Clip to [0..65535] range + jle 1f + spl.b %d0 + ext.w %d0 +1: + sub.l %d1, %d0 | Bias clipped result back to [-32768..32767] + neg.l %d0 | msac.w is bugged in gas, do this for now + move.w %d0, (%a5)+ | Write result to y[i] + move.l (%a4)+, %a6 | Fetch den[0] and den[1] + mac.w %a6u, %d0l, %acc0 + mac.w %a6l, %d0l, (%a4)+, %a6, %acc1 + mac.w %a6u, %d0l, %acc2 + mac.w %a6l, %d0l, (%a4)+, %a6, %acc3 + movclr.l %acc0, %d1 + add.l %d2, %d1 | mem[0] = mem[1] - den[0]*y[i] + movclr.l %acc1, %d2 + add.l %d3, %d2 | mem[1] = mem[2] - den[1]*y[i] + movclr.l %acc2, %d3 + add.l %d4, %d3 | mem[2] = mem[3] - den[2]*y[i] + movclr.l %acc3, %d4 + add.l %d5, %d4 | mem[3] = mem[4] - den[3]*y[i] + mac.w %a6u, %d0l, %acc0 + mac.w %a6l, %d0l, (%a4)+, %a6, %acc1 + mac.w %a6u, %d0l, %acc2 + mac.w %a6l, %d0l, (%a4)+, %a6, %acc3 + lea.l (-20, %a4), %a4 | wrap den pointer back to den[0] + movclr.l %acc0, %d5 + add.l %d6, %d5 | mem[4] = mem[5] - den[4]*y[i] + movclr.l %acc1, %d6 + add.l %d7, %d6 | mem[5] = mem[6] - den[5]*y[i] + movclr.l %acc2, %d7 + add.l %a0, %d7 | mem[6] = mem[7] - den[6]*y[i] + movclr.l %acc3, %a0 + add.l %a1, %a0 | mem[7] = mem[8] - den[7]*y[i] + mac.w %a6u, %d0l, %acc0 + mac.w %a6l, %d0l, %acc1 + movclr.l %acc0, %a1 + add.l %a2, %a1 | mem[8] = mem[9] - den[8]*y[i] + movclr.l %acc1, %a2 | mem[9] = -den[9]*y[i] + + subq.l #1, (44+16, %sp) | Have we done all samples? + jne 0b + move.l (44+24, %sp), %a6 | Fetch mem pointer + movem.l %d1-%d7/%a0-%a2, (%a6) | Save back mem[] + +.exit: + movem.l (%sp), %d2-%d7/%a2-%a6 + lea.l (44, %sp), %sp + rts + diff --git a/apps/codecs/libspeex/nb_celp.c b/apps/codecs/libspeex/nb_celp.c index 0798ffbcf4..5d167a2aa3 100644 --- a/apps/codecs/libspeex/nb_celp.c +++ b/apps/codecs/libspeex/nb_celp.c @@ -1108,6 +1108,9 @@ void *nb_decoder_init(const SpeexMode *m) st->isWideband = 0; st->highpass_enabled = 1; +#ifdef CPU_COLDFIRE + coldfire_set_macsr(0); // Integer mode +#endif #ifdef ENABLE_VALGRIND VALGRIND_MAKE_READABLE(st, NB_DEC_STACK); #endif |