diff options
Diffstat (limited to 'apps')
-rw-r--r-- | apps/codecs/atrac3_rm.c | 2 | ||||
-rw-r--r-- | apps/codecs/libatrac/SOURCES | 3 | ||||
-rw-r--r-- | apps/codecs/libatrac/atrac3.c | 42 | ||||
-rw-r--r-- | apps/codecs/libatrac/atrac3.h | 4 | ||||
-rw-r--r-- | apps/codecs/libatrac/atrac3_armv5e.S | 163 |
5 files changed, 205 insertions, 9 deletions
diff --git a/apps/codecs/atrac3_rm.c b/apps/codecs/atrac3_rm.c index 6c559ec868..bad9831a25 100644 --- a/apps/codecs/atrac3_rm.c +++ b/apps/codecs/atrac3_rm.c @@ -30,7 +30,7 @@ CODEC_HEADER static RMContext rmctx; static RMPacket pkt; -static ATRAC3Context q IBSS_ATTR; +static ATRAC3Context q IBSS_ATTR __attribute__ ((aligned (32))); static void init_rm(RMContext *rmctx) { diff --git a/apps/codecs/libatrac/SOURCES b/apps/codecs/libatrac/SOURCES index 3eaf4c9c3b..85f011cb87 100644 --- a/apps/codecs/libatrac/SOURCES +++ b/apps/codecs/libatrac/SOURCES @@ -1,5 +1,8 @@ atrac3.c #if defined(CPU_ARM) atrac3_arm.S +#if (ARM_ARCH >= 5) +atrac3_armv5e.S +#endif #endif diff --git a/apps/codecs/libatrac/atrac3.c b/apps/codecs/libatrac/atrac3.c index b93b058bb2..f6085fa2fa 100644 --- a/apps/codecs/libatrac/atrac3.c +++ b/apps/codecs/libatrac/atrac3.c @@ -55,7 +55,11 @@ #define FFSWAP(type,a,b) do{type SWAP_tmp= b; b= a; a= SWAP_tmp;}while(0) static VLC spectral_coeff_tab[7]; +#if defined(CPU_ARM) && (ARM_ARCH >= 5) /*ARMv5e+ uses 32x16 multiplication*/ +static int16_t qmf_window[48] IBSS_ATTR __attribute__ ((aligned (32))); +#else static int32_t qmf_window[48] IBSS_ATTR; +#endif static int32_t atrac3_spectrum [2][1024] IBSS_ATTR __attribute__((aligned(16))); static int32_t atrac3_IMDCT_buf[2][ 512] IBSS_ATTR __attribute__((aligned(16))); static int32_t atrac3_prevFrame[2][1024] IBSS_ATTR; @@ -118,12 +122,30 @@ static channel_unit channel_units[2] IBSS_ATTR_LARGE_IRAM; * } */ -#if defined(CPU_ARM) +#if defined(CPU_ARM) && (ARM_ARCH >= 5) extern void - atrac3_iqmf_dewindowing(int32_t *out, + atrac3_iqmf_dewindowing_armv5e(int32_t *out, int32_t *in, - int32_t *win, + int16_t *win, unsigned int nIn); + static inline void + atrac3_iqmf_dewindowing(int32_t *out, + int32_t *in, + int16_t *win, + unsigned int nIn) + { + //atrac3_iqmf_dewindowing_armv5e(out, in, win, nIn); + + } + + +#elif defined(CPU_ARM) + extern void + atrac3_iqmf_dewindowing(int32_t *out, + int32_t *in, + int16_t *win, + unsigned int nIn); + #elif defined (CPU_COLDFIRE) #define MULTIPLY_ADD_BLOCK \ "movem.l (%[win]), %%d0-%%d7 \n\t" \ @@ -206,7 +228,9 @@ static channel_unit channel_units[2] IBSS_ATTR_LARGE_IRAM; out[0] = s2; out[1] = s1; + } + } #endif @@ -244,6 +268,7 @@ atrac3_imdct_windowing(int32_t *buffer, static void iqmf (int32_t *inlo, int32_t *inhi, unsigned int nIn, int32_t *pOut, int32_t *delayBuf, int32_t *temp) { + /* Restore the delay buffer */ memcpy(temp, delayBuf, 46*sizeof(int32_t)); @@ -274,6 +299,7 @@ static void IMLT(int32_t *pInput, int32_t *pOutput) /* Windowing. */ atrac3_imdct_windowing(pOutput, window_lookup); + } @@ -320,9 +346,13 @@ static void init_atrac3_transforms(void) /* Generate the QMF window. */ for (i=0 ; i<24; i++) { s = qmf_48tap_half_fix[i] << 1; - qmf_window[i] = s; - qmf_window[47 - i] = s; + #if defined(CPU_ARM) && (ARM_ARCH >= 5) + qmf_window[i] = qmf_window[47-i] = (int16_t)((s+(1<<15))>>16); + #else + qmf_window[i] = qmf_window[47-i] = s; + #endif } + } @@ -1229,7 +1259,7 @@ int atrac3_decode_init(ATRAC3Context *q, struct mp3entry *id3) vlcs_initialized = 1; } - + init_atrac3_transforms(); /* init the joint-stereo decoding data */ diff --git a/apps/codecs/libatrac/atrac3.h b/apps/codecs/libatrac/atrac3.h index 74dd992e1b..afe582ab72 100644 --- a/apps/codecs/libatrac/atrac3.h +++ b/apps/codecs/libatrac/atrac3.h @@ -67,6 +67,7 @@ typedef struct { } channel_unit; typedef struct { + int32_t outSamples[2048]; GetBitContext gb; //@{ /** stream data */ @@ -90,8 +91,7 @@ typedef struct { int weighting_delay[6]; //@} //@{ - /** data buffers */ - int32_t outSamples[2048]; + /** data buffers */ uint8_t decoded_bytes_buffer[1024]; int32_t tempBuf[1070]; //@} diff --git a/apps/codecs/libatrac/atrac3_armv5e.S b/apps/codecs/libatrac/atrac3_armv5e.S new file mode 100644 index 0000000000..1add5faef5 --- /dev/null +++ b/apps/codecs/libatrac/atrac3_armv5e.S @@ -0,0 +1,163 @@ +/*************************************************************************** + * __________ __ ___. + * Open \______ \ ____ ____ | | _\_ |__ _______ ___ + * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ / + * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < < + * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \ + * \/ \/ \/ \/ \/ + * $Id: + * + * Copyright (C) 2010 by Michael Giacomelli + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY + * KIND, either express or implied. + * + ****************************************************************************/ + +#include "config.h" + + .section .text, "ax", %progbits + + +/**************************************************************************** + * atrac3_iqmf_dewindowing_armv5e(int32_t *out, + * int32_t *in, + * int32_t *win, + * unsigned int nIn); + * + * Dewindowing step within iqmf of atrac3 synthesis using 16 bit filter + * coefficients and armv5e packed multiply instructions. Uses 2.5 cycles + * per filter coefficient (ideal). Benchmarked 3.54 per coefficient (Clip+). + * + * Reference implementation: + * + * for (j = nIn; j != 0; j--) { + * s1 = fixmul32(in[0], win[0]); + * s2 = fixmul32(in[1], win[1]); + * for (i = 2; i < 48; i += 2) { + * s1 += fixmul32(in[i ], win[i ]); + * s2 += fixmul32(in[i+1], win[i+1]); + * } + * out[0] = s2 << 1; + * out[1] = s1 << 1; + * in += 2; + * out += 2; + * } + * Note: r12 is a scratch register and can be used without restorage. + ****************************************************************************/ + .align 2 + .global atrac3_iqmf_dewindowing_armv5e + .type atrac3_iqmf_dewindowing_armv5e, %function + +atrac3_iqmf_dewindowing_armv5e: + /* r0 = dest */ + /* r1 = input samples */ + /* r2 = window coefficients */ + /* r3 = counter */ + stmfd sp!, {r4-r11, lr} /* save non-scratch registers */ + +.iqmf_dewindow_outer_loop: /* outer loop 0...counter-1 */ + /* 0.. 7 */ + ldmia r2!, {r4, r5, r8, r9} /* load win[0..7] */ + ldmia r1!, {r6, r7, r10, r11} /* load in[0..3] to avoid stall on arm11 */ + smulwb lr, r6, r4 /* s1 = in[0] * win[0] */ + smulwt r12, r7, r4 /* s2 = in[1] * win[1] */ + smlawb lr, r10, r5, lr /* s1 += in[i ] * win[i ] >> 16 */ + smlawt r12, r11,r5, r12 /* s2 += in[i+1] * win[i+1] >> 16 */ + + ldmia r1!, {r6, r7, r10, r11} /* load in[i...i+3] */ + smlawb lr, r6, r8, lr /* s1 += in[i ] * win[i ] >> 16 */ + smlawt r12, r7, r8, r12 /* s2 += in[i+1] * win[i+1] >> 16 */ + smlawb lr, r10, r9, lr /* s1 += in[i ] * win[i ] >> 16 */ + smlawt r12, r11, r9, r12 /* s2 += in[i+1] * win[i+1] >> 16 */ + + /* 8..15 */ + ldmia r2!, {r4, r5, r8, r9} /* load win[8..15] */ + ldmia r1!, {r6, r7, r10, r11} /* load in[i...i+3] */ + smlawb lr, r6, r4, lr /* s1 += in[i ] * win[i ] >> 16 */ + smlawt r12, r7, r4, r12 /* s2 += in[i+1] * win[i+1] >> 16 */ + smlawb lr, r10, r5, lr /* s1 += in[i ] * win[i ] >> 16 */ + smlawt r12, r11,r5, r12 /* s2 += in[i+1] * win[i+1] >> 16 */ + + ldmia r1!, {r6, r7, r10, r11} /* load in[i...i+3] */ + smlawb lr, r6, r8, lr /* s1 += in[i ] * win[i ] >> 16 */ + smlawt r12, r7, r8, r12 /* s2 += in[i+1] * win[i+1] >> 16 */ + smlawb lr, r10, r9, lr /* s1 += in[i ] * win[i ] >> 16 */ + smlawt r12, r11,r9, r12 /* s2 += in[i+1] * win[i+1] >> 16 */ + + /* 16..23 */ + ldmia r2!, {r4, r5, r8, r9} /* load win[16..23] */ + ldmia r1!, {r6, r7, r10, r11} /* load in[i...i+3] */ + smlawb lr, r6, r4, lr /* s1 += in[i ] * win[i ] >> 16 */ + smlawt r12, r7, r4, r12 /* s2 += in[i+1] * win[i+1] >> 16 */ + smlawb lr, r10, r5, lr /* s1 += in[i ] * win[i ] >> 16 */ + smlawt r12, r11,r5, r12 /* s2 += in[i+1] * win[i+1] >> 16 */ + + ldmia r1!, {r6, r7, r10, r11} /* load in[i...i+3] */ + smlawb lr, r6, r8, lr /* s1 += in[i ] * win[i ] >> 16 */ + smlawt r12, r7, r8, r12 /* s2 += in[i+1] * win[i+1] >> 16 */ + smlawb lr, r10, r9, lr /* s1 += in[i ] * win[i ] >> 16 */ + smlawt r12, r11,r9, r12 /* s2 += in[i+1] * win[i+1] >> 16 */ + + /* 24..31 */ + ldmia r2!, {r4, r5, r8, r9} /* load win[24..31] */ + ldmia r1!, {r6, r7, r10, r11} /* load in[i...i+3] */ + smlawb lr, r6, r4, lr /* s1 += in[i ] * win[i ] >> 16 */ + smlawt r12, r7, r4, r12 /* s2 += in[i+1] * win[i+1] >> 16 */ + smlawb lr, r10, r5, lr /* s1 += in[i ] * win[i ] >> 16 */ + smlawt r12, r11,r5, r12 /* s2 += in[i+1] * win[i+1] >> 16 */ + + ldmia r1!, {r6, r7, r10, r11} /* load in[i...i+3] */ + smlawb lr, r6, r8, lr /* s1 += in[i ] * win[i ] >> 16 */ + smlawt r12, r7, r8, r12 /* s2 += in[i+1] * win[i+1] >> 16 */ + smlawb lr, r10, r9, lr /* s1 += in[i ] * win[i ] >> 16 */ + smlawt r12, r11,r9, r12 /* s2 += in[i+1] * win[i+1] >> 16 */ + + /* 32..39 */ + ldmia r2!, {r4, r5, r8, r9} /* load win[32..39] */ + ldmia r1!, {r6, r7, r10, r11} /* load in[i...i+3] */ + smlawb lr, r6, r4, lr /* s1 += in[i ] * win[i ] >> 16 */ + smlawt r12, r7, r4, r12 /* s2 += in[i+1] * win[i+1] >> 16 */ + smlawb lr, r10, r5, lr /* s1 += in[i ] * win[i ] >> 16 */ + smlawt r12, r11,r5, r12 /* s2 += in[i+1] * win[i+1] >> 16 */ + + ldmia r1!, {r6, r7, r10, r11} /* load in[i...i+3] */ + smlawb lr, r6, r8, lr /* s1 += in[i ] * win[i ] >> 16 */ + smlawt r12, r7, r8, r12 /* s2 += in[i+1] * win[i+1] >> 16 */ + smlawb lr, r10, r9, lr /* s1 += in[i ] * win[i ] >> 16 */ + smlawt r12, r11,r9, r12 /* s2 += in[i+1] * win[i+1] >> 16 */ + + /* 40..47 */ + ldmia r2!, {r4, r5, r8, r9} /* load win[40..47] */ + ldmia r1!, {r6, r7, r10, r11} /* load in[i...i+3] */ + smlawb lr, r6, r4, lr /* s1 += in[i ] * win[i ] >> 16 */ + smlawt r12, r7, r4, r12 /* s2 += in[i+1] * win[i+1] >> 16 */ + smlawb lr, r10, r5, lr /* s1 += in[i ] * win[i ] >> 16 */ + smlawt r12, r11,r5, r12 /* s2 += in[i+1] * win[i+1] >> 16 */ + + ldmia r1!, {r6, r7, r10, r11} /* load in[i...i+3] */ + smlawb lr, r6, r8, lr /* s1 += in[i ] * win[i ] >> 16 */ + smlawt r12, r7, r8, r12 /* s2 += in[i+1] * win[i+1] >> 16 */ + smlawb lr, r10, r9, lr /* s1 += in[i ] * win[i ] >> 16 */ + smlawt r12, r11,r9, r12 /* s2 += in[i+1] * win[i+1] >> 16 */ + + + mov lr , lr , lsl #1 + mov r12, r12, lsl #1 + + stmia r0!, {r12, lr} /* store result out[0]=s2, out[1]=s1 */ + sub r1, r1, #184 /* roll back 64 entries = 184 bytes */ + sub r2, r2, #96 /* roll back 48 entries * 2 bytes = 96 bytes = win[0] */ + + subs r3, r3, #1 /* outer loop -= 1 */ + bgt .iqmf_dewindow_outer_loop + + ldmpc regs=r4-r11 /* restore registers */ + +.atrac3_iqmf_dewindowing_armv5e_end: + .size atrac3_iqmf_dewindowing_armv5e,.atrac3_iqmf_dewindowing_armv5e_end-atrac3_iqmf_dewindowing_armv5e |