summaryrefslogtreecommitdiffstats
path: root/apps
diff options
context:
space:
mode:
Diffstat (limited to 'apps')
-rw-r--r--apps/codecs/atrac3_rm.c2
-rw-r--r--apps/codecs/libatrac/SOURCES3
-rw-r--r--apps/codecs/libatrac/atrac3.c42
-rw-r--r--apps/codecs/libatrac/atrac3.h4
-rw-r--r--apps/codecs/libatrac/atrac3_armv5e.S163
5 files changed, 205 insertions, 9 deletions
diff --git a/apps/codecs/atrac3_rm.c b/apps/codecs/atrac3_rm.c
index 6c559ec868..bad9831a25 100644
--- a/apps/codecs/atrac3_rm.c
+++ b/apps/codecs/atrac3_rm.c
@@ -30,7 +30,7 @@ CODEC_HEADER
static RMContext rmctx;
static RMPacket pkt;
-static ATRAC3Context q IBSS_ATTR;
+static ATRAC3Context q IBSS_ATTR __attribute__ ((aligned (32)));
static void init_rm(RMContext *rmctx)
{
diff --git a/apps/codecs/libatrac/SOURCES b/apps/codecs/libatrac/SOURCES
index 3eaf4c9c3b..85f011cb87 100644
--- a/apps/codecs/libatrac/SOURCES
+++ b/apps/codecs/libatrac/SOURCES
@@ -1,5 +1,8 @@
atrac3.c
#if defined(CPU_ARM)
atrac3_arm.S
+#if (ARM_ARCH >= 5)
+atrac3_armv5e.S
+#endif
#endif
diff --git a/apps/codecs/libatrac/atrac3.c b/apps/codecs/libatrac/atrac3.c
index b93b058bb2..f6085fa2fa 100644
--- a/apps/codecs/libatrac/atrac3.c
+++ b/apps/codecs/libatrac/atrac3.c
@@ -55,7 +55,11 @@
#define FFSWAP(type,a,b) do{type SWAP_tmp= b; b= a; a= SWAP_tmp;}while(0)
static VLC spectral_coeff_tab[7];
+#if defined(CPU_ARM) && (ARM_ARCH >= 5) /*ARMv5e+ uses 32x16 multiplication*/
+static int16_t qmf_window[48] IBSS_ATTR __attribute__ ((aligned (32)));
+#else
static int32_t qmf_window[48] IBSS_ATTR;
+#endif
static int32_t atrac3_spectrum [2][1024] IBSS_ATTR __attribute__((aligned(16)));
static int32_t atrac3_IMDCT_buf[2][ 512] IBSS_ATTR __attribute__((aligned(16)));
static int32_t atrac3_prevFrame[2][1024] IBSS_ATTR;
@@ -118,12 +122,30 @@ static channel_unit channel_units[2] IBSS_ATTR_LARGE_IRAM;
* }
*/
-#if defined(CPU_ARM)
+#if defined(CPU_ARM) && (ARM_ARCH >= 5)
extern void
- atrac3_iqmf_dewindowing(int32_t *out,
+ atrac3_iqmf_dewindowing_armv5e(int32_t *out,
int32_t *in,
- int32_t *win,
+ int16_t *win,
unsigned int nIn);
+ static inline void
+ atrac3_iqmf_dewindowing(int32_t *out,
+ int32_t *in,
+ int16_t *win,
+ unsigned int nIn)
+ {
+ //atrac3_iqmf_dewindowing_armv5e(out, in, win, nIn);
+
+ }
+
+
+#elif defined(CPU_ARM)
+ extern void
+ atrac3_iqmf_dewindowing(int32_t *out,
+ int32_t *in,
+ int16_t *win,
+ unsigned int nIn);
+
#elif defined (CPU_COLDFIRE)
#define MULTIPLY_ADD_BLOCK \
"movem.l (%[win]), %%d0-%%d7 \n\t" \
@@ -206,7 +228,9 @@ static channel_unit channel_units[2] IBSS_ATTR_LARGE_IRAM;
out[0] = s2;
out[1] = s1;
+
}
+
}
#endif
@@ -244,6 +268,7 @@ atrac3_imdct_windowing(int32_t *buffer,
static void iqmf (int32_t *inlo, int32_t *inhi, unsigned int nIn, int32_t *pOut, int32_t *delayBuf, int32_t *temp)
{
+
/* Restore the delay buffer */
memcpy(temp, delayBuf, 46*sizeof(int32_t));
@@ -274,6 +299,7 @@ static void IMLT(int32_t *pInput, int32_t *pOutput)
/* Windowing. */
atrac3_imdct_windowing(pOutput, window_lookup);
+
}
@@ -320,9 +346,13 @@ static void init_atrac3_transforms(void)
/* Generate the QMF window. */
for (i=0 ; i<24; i++) {
s = qmf_48tap_half_fix[i] << 1;
- qmf_window[i] = s;
- qmf_window[47 - i] = s;
+ #if defined(CPU_ARM) && (ARM_ARCH >= 5)
+ qmf_window[i] = qmf_window[47-i] = (int16_t)((s+(1<<15))>>16);
+ #else
+ qmf_window[i] = qmf_window[47-i] = s;
+ #endif
}
+
}
@@ -1229,7 +1259,7 @@ int atrac3_decode_init(ATRAC3Context *q, struct mp3entry *id3)
vlcs_initialized = 1;
}
-
+
init_atrac3_transforms();
/* init the joint-stereo decoding data */
diff --git a/apps/codecs/libatrac/atrac3.h b/apps/codecs/libatrac/atrac3.h
index 74dd992e1b..afe582ab72 100644
--- a/apps/codecs/libatrac/atrac3.h
+++ b/apps/codecs/libatrac/atrac3.h
@@ -67,6 +67,7 @@ typedef struct {
} channel_unit;
typedef struct {
+ int32_t outSamples[2048];
GetBitContext gb;
//@{
/** stream data */
@@ -90,8 +91,7 @@ typedef struct {
int weighting_delay[6];
//@}
//@{
- /** data buffers */
- int32_t outSamples[2048];
+ /** data buffers */
uint8_t decoded_bytes_buffer[1024];
int32_t tempBuf[1070];
//@}
diff --git a/apps/codecs/libatrac/atrac3_armv5e.S b/apps/codecs/libatrac/atrac3_armv5e.S
new file mode 100644
index 0000000000..1add5faef5
--- /dev/null
+++ b/apps/codecs/libatrac/atrac3_armv5e.S
@@ -0,0 +1,163 @@
+/***************************************************************************
+ * __________ __ ___.
+ * Open \______ \ ____ ____ | | _\_ |__ _______ ___
+ * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
+ * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
+ * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
+ * \/ \/ \/ \/ \/
+ * $Id:
+ *
+ * Copyright (C) 2010 by Michael Giacomelli
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+
+#include "config.h"
+
+ .section .text, "ax", %progbits
+
+
+/****************************************************************************
+ * atrac3_iqmf_dewindowing_armv5e(int32_t *out,
+ * int32_t *in,
+ * int32_t *win,
+ * unsigned int nIn);
+ *
+ * Dewindowing step within iqmf of atrac3 synthesis using 16 bit filter
+ * coefficients and armv5e packed multiply instructions. Uses 2.5 cycles
+ * per filter coefficient (ideal). Benchmarked 3.54 per coefficient (Clip+).
+ *
+ * Reference implementation:
+ *
+ * for (j = nIn; j != 0; j--) {
+ * s1 = fixmul32(in[0], win[0]);
+ * s2 = fixmul32(in[1], win[1]);
+ * for (i = 2; i < 48; i += 2) {
+ * s1 += fixmul32(in[i ], win[i ]);
+ * s2 += fixmul32(in[i+1], win[i+1]);
+ * }
+ * out[0] = s2 << 1;
+ * out[1] = s1 << 1;
+ * in += 2;
+ * out += 2;
+ * }
+ * Note: r12 is a scratch register and can be used without restorage.
+ ****************************************************************************/
+ .align 2
+ .global atrac3_iqmf_dewindowing_armv5e
+ .type atrac3_iqmf_dewindowing_armv5e, %function
+
+atrac3_iqmf_dewindowing_armv5e:
+ /* r0 = dest */
+ /* r1 = input samples */
+ /* r2 = window coefficients */
+ /* r3 = counter */
+ stmfd sp!, {r4-r11, lr} /* save non-scratch registers */
+
+.iqmf_dewindow_outer_loop: /* outer loop 0...counter-1 */
+ /* 0.. 7 */
+ ldmia r2!, {r4, r5, r8, r9} /* load win[0..7] */
+ ldmia r1!, {r6, r7, r10, r11} /* load in[0..3] to avoid stall on arm11 */
+ smulwb lr, r6, r4 /* s1 = in[0] * win[0] */
+ smulwt r12, r7, r4 /* s2 = in[1] * win[1] */
+ smlawb lr, r10, r5, lr /* s1 += in[i ] * win[i ] >> 16 */
+ smlawt r12, r11,r5, r12 /* s2 += in[i+1] * win[i+1] >> 16 */
+
+ ldmia r1!, {r6, r7, r10, r11} /* load in[i...i+3] */
+ smlawb lr, r6, r8, lr /* s1 += in[i ] * win[i ] >> 16 */
+ smlawt r12, r7, r8, r12 /* s2 += in[i+1] * win[i+1] >> 16 */
+ smlawb lr, r10, r9, lr /* s1 += in[i ] * win[i ] >> 16 */
+ smlawt r12, r11, r9, r12 /* s2 += in[i+1] * win[i+1] >> 16 */
+
+ /* 8..15 */
+ ldmia r2!, {r4, r5, r8, r9} /* load win[8..15] */
+ ldmia r1!, {r6, r7, r10, r11} /* load in[i...i+3] */
+ smlawb lr, r6, r4, lr /* s1 += in[i ] * win[i ] >> 16 */
+ smlawt r12, r7, r4, r12 /* s2 += in[i+1] * win[i+1] >> 16 */
+ smlawb lr, r10, r5, lr /* s1 += in[i ] * win[i ] >> 16 */
+ smlawt r12, r11,r5, r12 /* s2 += in[i+1] * win[i+1] >> 16 */
+
+ ldmia r1!, {r6, r7, r10, r11} /* load in[i...i+3] */
+ smlawb lr, r6, r8, lr /* s1 += in[i ] * win[i ] >> 16 */
+ smlawt r12, r7, r8, r12 /* s2 += in[i+1] * win[i+1] >> 16 */
+ smlawb lr, r10, r9, lr /* s1 += in[i ] * win[i ] >> 16 */
+ smlawt r12, r11,r9, r12 /* s2 += in[i+1] * win[i+1] >> 16 */
+
+ /* 16..23 */
+ ldmia r2!, {r4, r5, r8, r9} /* load win[16..23] */
+ ldmia r1!, {r6, r7, r10, r11} /* load in[i...i+3] */
+ smlawb lr, r6, r4, lr /* s1 += in[i ] * win[i ] >> 16 */
+ smlawt r12, r7, r4, r12 /* s2 += in[i+1] * win[i+1] >> 16 */
+ smlawb lr, r10, r5, lr /* s1 += in[i ] * win[i ] >> 16 */
+ smlawt r12, r11,r5, r12 /* s2 += in[i+1] * win[i+1] >> 16 */
+
+ ldmia r1!, {r6, r7, r10, r11} /* load in[i...i+3] */
+ smlawb lr, r6, r8, lr /* s1 += in[i ] * win[i ] >> 16 */
+ smlawt r12, r7, r8, r12 /* s2 += in[i+1] * win[i+1] >> 16 */
+ smlawb lr, r10, r9, lr /* s1 += in[i ] * win[i ] >> 16 */
+ smlawt r12, r11,r9, r12 /* s2 += in[i+1] * win[i+1] >> 16 */
+
+ /* 24..31 */
+ ldmia r2!, {r4, r5, r8, r9} /* load win[24..31] */
+ ldmia r1!, {r6, r7, r10, r11} /* load in[i...i+3] */
+ smlawb lr, r6, r4, lr /* s1 += in[i ] * win[i ] >> 16 */
+ smlawt r12, r7, r4, r12 /* s2 += in[i+1] * win[i+1] >> 16 */
+ smlawb lr, r10, r5, lr /* s1 += in[i ] * win[i ] >> 16 */
+ smlawt r12, r11,r5, r12 /* s2 += in[i+1] * win[i+1] >> 16 */
+
+ ldmia r1!, {r6, r7, r10, r11} /* load in[i...i+3] */
+ smlawb lr, r6, r8, lr /* s1 += in[i ] * win[i ] >> 16 */
+ smlawt r12, r7, r8, r12 /* s2 += in[i+1] * win[i+1] >> 16 */
+ smlawb lr, r10, r9, lr /* s1 += in[i ] * win[i ] >> 16 */
+ smlawt r12, r11,r9, r12 /* s2 += in[i+1] * win[i+1] >> 16 */
+
+ /* 32..39 */
+ ldmia r2!, {r4, r5, r8, r9} /* load win[32..39] */
+ ldmia r1!, {r6, r7, r10, r11} /* load in[i...i+3] */
+ smlawb lr, r6, r4, lr /* s1 += in[i ] * win[i ] >> 16 */
+ smlawt r12, r7, r4, r12 /* s2 += in[i+1] * win[i+1] >> 16 */
+ smlawb lr, r10, r5, lr /* s1 += in[i ] * win[i ] >> 16 */
+ smlawt r12, r11,r5, r12 /* s2 += in[i+1] * win[i+1] >> 16 */
+
+ ldmia r1!, {r6, r7, r10, r11} /* load in[i...i+3] */
+ smlawb lr, r6, r8, lr /* s1 += in[i ] * win[i ] >> 16 */
+ smlawt r12, r7, r8, r12 /* s2 += in[i+1] * win[i+1] >> 16 */
+ smlawb lr, r10, r9, lr /* s1 += in[i ] * win[i ] >> 16 */
+ smlawt r12, r11,r9, r12 /* s2 += in[i+1] * win[i+1] >> 16 */
+
+ /* 40..47 */
+ ldmia r2!, {r4, r5, r8, r9} /* load win[40..47] */
+ ldmia r1!, {r6, r7, r10, r11} /* load in[i...i+3] */
+ smlawb lr, r6, r4, lr /* s1 += in[i ] * win[i ] >> 16 */
+ smlawt r12, r7, r4, r12 /* s2 += in[i+1] * win[i+1] >> 16 */
+ smlawb lr, r10, r5, lr /* s1 += in[i ] * win[i ] >> 16 */
+ smlawt r12, r11,r5, r12 /* s2 += in[i+1] * win[i+1] >> 16 */
+
+ ldmia r1!, {r6, r7, r10, r11} /* load in[i...i+3] */
+ smlawb lr, r6, r8, lr /* s1 += in[i ] * win[i ] >> 16 */
+ smlawt r12, r7, r8, r12 /* s2 += in[i+1] * win[i+1] >> 16 */
+ smlawb lr, r10, r9, lr /* s1 += in[i ] * win[i ] >> 16 */
+ smlawt r12, r11,r9, r12 /* s2 += in[i+1] * win[i+1] >> 16 */
+
+
+ mov lr , lr , lsl #1
+ mov r12, r12, lsl #1
+
+ stmia r0!, {r12, lr} /* store result out[0]=s2, out[1]=s1 */
+ sub r1, r1, #184 /* roll back 64 entries = 184 bytes */
+ sub r2, r2, #96 /* roll back 48 entries * 2 bytes = 96 bytes = win[0] */
+
+ subs r3, r3, #1 /* outer loop -= 1 */
+ bgt .iqmf_dewindow_outer_loop
+
+ ldmpc regs=r4-r11 /* restore registers */
+
+.atrac3_iqmf_dewindowing_armv5e_end:
+ .size atrac3_iqmf_dewindowing_armv5e,.atrac3_iqmf_dewindowing_armv5e_end-atrac3_iqmf_dewindowing_armv5e