diff options
author | Andree Buschmann <AndreeBuschmann@t-online.de> | 2012-10-06 14:17:30 +0200 |
---|---|---|
committer | Nils Wallménius <nils@rockbox.org> | 2012-10-06 14:25:20 +0200 |
commit | dceec0909295b56c140b83cd6f8d019fddb2b689 (patch) | |
tree | bdd49c4367ec988caecd0fe17e5a54284dbb8cef /lib/rbcodec/codecs/libopus | |
parent | 78ca74a56f1b7535468e77e1af99ca7ea3097b6e (diff) | |
download | rockbox-dceec0909295b56c140b83cd6f8d019fddb2b689.tar.gz rockbox-dceec0909295b56c140b83cd6f8d019fddb2b689.zip |
opus: speed up comb_filter
Skip expensive multiply-accumulate loop when gains are 0 and
just copy using memcpy if soure and destination are not the same
Speeds up decoding of a 64kbps test file by 6MHz on h300 (cf)
7MHz on c200 (pp) and 6MHz on fuzev1 (amsv1)
Change-Id: Ibbc9ddfd45a9ac661467b1327b8c67761924fb8b
Signed-off-by: Nils Wallménius <nils@rockbox.org>
Diffstat (limited to 'lib/rbcodec/codecs/libopus')
-rw-r--r-- | lib/rbcodec/codecs/libopus/celt/celt.c | 90 |
1 files changed, 58 insertions, 32 deletions
diff --git a/lib/rbcodec/codecs/libopus/celt/celt.c b/lib/rbcodec/codecs/libopus/celt/celt.c index 74ebee91b4..a4e5131a04 100644 --- a/lib/rbcodec/codecs/libopus/celt/celt.c +++ b/lib/rbcodec/codecs/libopus/celt/celt.c @@ -497,43 +497,69 @@ static void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N, opus_val16 g0, opus_val16 g1, int tapset0, int tapset1, const opus_val16 *window, int overlap) { - int i; - /* printf ("%d %d %f %f\n", T0, T1, g0, g1); */ - opus_val16 g00, g01, g02, g10, g11, g12; - static const opus_val16 gains[3][3] = { + /* Multiply-adds are only needed if g0 or g1 are non-zero. In all other cases a simple + * copy of vector x to y is possible. */ + if (g0!=0 || g1!=0) + { + int i; + opus_val16 g00, g01, g02, g10, g11, g12, idx0, idx1; + static const opus_val16 gains[3][3] = { {QCONST16(0.3066406250f, 15), QCONST16(0.2170410156f, 15), QCONST16(0.1296386719f, 15)}, {QCONST16(0.4638671875f, 15), QCONST16(0.2680664062f, 15), QCONST16(0.f, 15)}, {QCONST16(0.7998046875f, 15), QCONST16(0.1000976562f, 15), QCONST16(0.f, 15)}}; - g00 = MULT16_16_Q15(g0, gains[tapset0][0]); - g01 = MULT16_16_Q15(g0, gains[tapset0][1]); - g02 = MULT16_16_Q15(g0, gains[tapset0][2]); - g10 = MULT16_16_Q15(g1, gains[tapset1][0]); - g11 = MULT16_16_Q15(g1, gains[tapset1][1]); - g12 = MULT16_16_Q15(g1, gains[tapset1][2]); - for (i=0;i<overlap;i++) + g00 = MULT16_16_Q15(g0, gains[tapset0][0]); + g01 = MULT16_16_Q15(g0, gains[tapset0][1]); + g02 = MULT16_16_Q15(g0, gains[tapset0][2]); + g10 = MULT16_16_Q15(g1, gains[tapset1][0]); + g11 = MULT16_16_Q15(g1, gains[tapset1][1]); + g12 = MULT16_16_Q15(g1, gains[tapset1][2]); + /* printf("g0 %d g1 %d\n", g0,g1); */ + idx0 = -T0; + idx1 = -T1; + for (i=0;i<overlap;i++,idx0++,idx1++) + { + opus_val16 f0, f1; + f1 = MULT16_16_Q15(window[i],window[i]); + f0 = Q15ONE - f1; + y[i] = x[i] + + MULT16_32_Q15(MULT16_16_Q15(f0,g02), x[idx0-2]) + + MULT16_32_Q15(MULT16_16_Q15(f0,g01), x[idx0-1]) + + MULT16_32_Q15(MULT16_16_Q15(f0,g00), x[idx0 ]) + + MULT16_32_Q15(MULT16_16_Q15(f0,g01), x[idx0+1]) + + MULT16_32_Q15(MULT16_16_Q15(f0,g02), x[idx0+2]) + + MULT16_32_Q15(MULT16_16_Q15(f1,g12), x[idx1-2]) + + MULT16_32_Q15(MULT16_16_Q15(f1,g11), x[idx1-1]) + + MULT16_32_Q15(MULT16_16_Q15(f1,g10), x[idx1 ]) + + MULT16_32_Q15(MULT16_16_Q15(f1,g11), x[idx1+1]) + + MULT16_32_Q15(MULT16_16_Q15(f1,g12), x[idx1+2]); + } + /* No multiply-add required if g1=0 as all multiplicants are =0. */ + if (g1!=0) + { + idx1 = overlap-T1; + for (i=overlap;i<N;i++,idx1++) + { + y[i] = x[i] + + MULT16_32_Q15(g12, x[idx1-2]) + + MULT16_32_Q15(g11, x[idx1-1]) + + MULT16_32_Q15(g10, x[idx1 ]) + + MULT16_32_Q15(g11, x[idx1+1]) + + MULT16_32_Q15(g12, x[idx1+2]); + } + } + /* Only perform vector copy if source and destination are not same. */ + else if (x != y) + { + /* Copy part of vector from x[overlap..N] to y[overlap..N] */ + OPUS_COPY(y+overlap, x+overlap, N-overlap); + } + } + /* Only perform vector copy if source and destination are not same. */ + else if (x != y) { - opus_val16 f; - f = MULT16_16_Q15(window[i],window[i]); - y[i] = x[i] - + MULT16_32_Q15(MULT16_16_Q15((Q15ONE-f),g00),x[i-T0]) - + MULT16_32_Q15(MULT16_16_Q15((Q15ONE-f),g01),x[i-T0-1]) - + MULT16_32_Q15(MULT16_16_Q15((Q15ONE-f),g01),x[i-T0+1]) - + MULT16_32_Q15(MULT16_16_Q15((Q15ONE-f),g02),x[i-T0-2]) - + MULT16_32_Q15(MULT16_16_Q15((Q15ONE-f),g02),x[i-T0+2]) - + MULT16_32_Q15(MULT16_16_Q15(f,g10),x[i-T1]) - + MULT16_32_Q15(MULT16_16_Q15(f,g11),x[i-T1-1]) - + MULT16_32_Q15(MULT16_16_Q15(f,g11),x[i-T1+1]) - + MULT16_32_Q15(MULT16_16_Q15(f,g12),x[i-T1-2]) - + MULT16_32_Q15(MULT16_16_Q15(f,g12),x[i-T1+2]); - + /* Copy full vector from x[0..N] to y[0..N] */ + OPUS_COPY(y, x, N); } - for (i=overlap;i<N;i++) - y[i] = x[i] - + MULT16_32_Q15(g10,x[i-T1]) - + MULT16_32_Q15(g11,x[i-T1-1]) - + MULT16_32_Q15(g11,x[i-T1+1]) - + MULT16_32_Q15(g12,x[i-T1-2]) - + MULT16_32_Q15(g12,x[i-T1+2]); } static const signed char tf_select_table[4][8] = { |