summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMichael Sevakis <jethead71@rockbox.org>2010-04-25 20:04:47 +0000
committerMichael Sevakis <jethead71@rockbox.org>2010-04-25 20:04:47 +0000
commit91bdc3ea90035b3dae19e2f6484ea886eef94433 (patch)
treee6113841a0e3bbd00e1ad46068f5e1237d4181e7
parentb9fa116703227ccbaca02d65d8f726f6e31ceebe (diff)
downloadrockbox-91bdc3ea90035b3dae19e2f6484ea886eef94433.tar.gz
rockbox-91bdc3ea90035b3dae19e2f6484ea886eef94433.zip
Optimized DSP sample out functions for armv6. (For stereo output) ~9% faster than SVN asm and about 4% faster than SVN asm rearranged to observe pipeline hazards.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@25717 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--apps/SOURCES3
-rw-r--r--apps/dsp_arm.S5
-rw-r--r--apps/dsp_arm_v6.S127
3 files changed, 134 insertions, 1 deletions
diff --git a/apps/SOURCES b/apps/SOURCES
index 7bc263a153..66f2a7da51 100644
--- a/apps/SOURCES
+++ b/apps/SOURCES
@@ -158,6 +158,9 @@ dsp_cf.S
eq_cf.S
#elif defined(CPU_ARM)
dsp_arm.S
+#if ARM_ARCH >= 6
+dsp_arm_v6.S
+#endif
eq_arm.S
#endif
#endif
diff --git a/apps/dsp_arm.S b/apps/dsp_arm.S
index b90e632782..f924569bc5 100644
--- a/apps/dsp_arm.S
+++ b/apps/dsp_arm.S
@@ -18,6 +18,7 @@
* KIND, either express or implied.
*
****************************************************************************/
+ #include "config.h"
/****************************************************************************
* void channels_process_sound_chan_mono(int count, int32_t *buf[])
@@ -83,7 +84,8 @@ channels_process_sound_chan_karaoke:
ldmfd sp!, {r4-r5, pc}
.karaokeend:
.size channels_process_sound_chan_karaoke,.karaokeend-channels_process_sound_chan_karaoke
-
+
+#if ARM_ARCH < 6
/****************************************************************************
* void sample_output_mono(int count, struct dsp_data *data,
* const int32_t *src[], int16_t *dst)
@@ -195,6 +197,7 @@ sample_output_stereo:
ldmfd sp!, {r4-r10, pc}
.sosend:
.size sample_output_stereo,.sosend-sample_output_stereo
+#endif /* ARM_ARCH < 6 */
/****************************************************************************
* void apply_crossfeed(int count, int32_t* src[])
diff --git a/apps/dsp_arm_v6.S b/apps/dsp_arm_v6.S
new file mode 100644
index 0000000000..39949498ea
--- /dev/null
+++ b/apps/dsp_arm_v6.S
@@ -0,0 +1,127 @@
+/***************************************************************************
+ * __________ __ ___.
+ * Open \______ \ ____ ____ | | _\_ |__ _______ ___
+ * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
+ * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
+ * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
+ * \/ \/ \/ \/ \/
+ * $Id$
+ *
+ * Copyright (C) 2010 Michael Sevakis
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+
+/****************************************************************************
+ * void sample_output_mono(int count, struct dsp_data *data,
+ * const int32_t *src[], int16_t *dst)
+ */
+ .section .text, "ax", %progbits
+ .align 2
+ .global sample_output_mono
+ .type sample_output_mono, %function
+sample_output_mono:
+ @ input: r0 = count, r1 = data, r2 = src, r3 = dst
+ stmfd sp!, { r4, lr } @
+ @
+ ldr r1, [r1] @ r1 = data->output_scale
+ ldr r2, [r2] @ r2 = src[0]
+ @
+ mov r4, #1 @ r4 = 1 << (scale - 1)
+ mov r4, r4, lsl r1 @
+ subs r0, r0, #1 @ odd: end at 0; even: end at -1
+ mov r4, r4, lsr #1 @
+ beq 2f @ Zero? Only one sample!
+ @
+1: @
+ ldmia r2!, { r12, r14 } @ load Mi0, Mi1
+ qadd r12, r12, r4 @ round, scale, saturate and
+ qadd r14, r14, r4 @ pack Mi0 to So0, Mi1 to So1
+ mov r12, r12, asr r1 @
+ mov r14, r14, asr r1 @
+ ssat r12, #16, r12 @
+ ssat r14, #16, r14 @
+ pkhbt r12, r12, r12, asl #16 @
+ pkhbt r14, r14, r14, asl #16 @
+ subs r0, r0, #2 @
+ stmia r3!, { r12, r14 } @ store So0, So1
+ bgt 1b @
+ @
+ ldmltfd sp!, { r4, pc } @ if count was even, we're done
+ @
+2: @
+ ldr r12, [r2] @ round, scale, saturate
+ qadd r12, r12, r4 @ and pack Mi to So
+ mov r12, r12, asr r1 @
+ ssat r12, #16, r12 @
+ pkhbt r12, r12, r12, asl #16 @
+ str r12, [r3] @ store So
+ @
+ ldmfd sp!, { r4, pc } @
+ .size sample_output_mono, .-sample_output_mono
+
+/****************************************************************************
+ * void sample_output_stereo(int count, struct dsp_data *data,
+ * const int32_t *src[], int16_t *dst)
+ */
+ .section .text, "ax", %progbits
+ .align 2
+ .global sample_output_stereo
+ .type sample_output_stereo, %function
+sample_output_stereo:
+ @ input: r0 = count, r1 = data, r2 = src, r3 = dst
+ stmfd sp!, { r4-r7, lr } @
+ @
+ ldr r1, [r1] @ r1 = data->output_scale
+ ldmia r2, { r2, r4 } @ r2 = src[0], r4 = src[1]
+ @
+ mov r5, #1 @ r5 = 1 << (scale - 1)
+ mov r5, r5, lsl r1 @
+ subs r0, r0, #1 @ odd: end at 0; even: end at -1
+ mov r5, r5, lsr #1 @
+ beq 2f @ Zero? Only one sample!
+ @
+1: @
+ ldmia r2!, { r6, r7 } @ r6, r7 = Li0, Li1
+ ldmia r4!, { r12, r14 } @ r12, r14 = Ri0, Ri1
+ qadd r6, r6, r5 @ round, scale, saturate and pack
+ qadd r7, r7, r5 @ Li0+Ri0 to So0, Li1+Ri1 to So1
+ qadd r12, r12, r5 @
+ qadd r14, r14, r5 @
+ mov r6, r6, asr r1 @
+ mov r7, r7, asr r1 @
+ mov r12, r12, asr r1 @
+ mov r14, r14, asr r1 @
+ ssat r6, #16, r6 @
+ ssat r12, #16, r12 @
+ ssat r7, #16, r7 @
+ ssat r14, #16, r14 @
+ pkhbt r6, r6, r12, asl #16 @
+ pkhbt r7, r7, r14, asl #16 @
+ subs r0, r0, #2 @
+ stmia r3!, { r6, r7 } @ store So0, So1
+ bgt 1b @
+ @
+ ldmltfd sp!, { r4-r7, pc } @ if count was even, we're done
+ @
+2: @
+ ldr r6, [r2] @ r6 = Li
+ ldr r12, [r4] @ r12 = Ri
+ qadd r6, r6, r5 @ round, scale, saturate
+ qadd r12, r12, r5 @ and pack Li+Ri to So
+ mov r6, r6, asr r1 @
+ mov r12, r12, asr r1 @
+ ssat r6, #16, r6 @
+ ssat r12, #16, r12 @
+ pkhbt r6, r6, r12, asl #16 @
+ str r6, [r3] @ store So
+ @
+ ldmfd sp!, { r4-r7, pc } @
+ .size sample_output_stereo, .-sample_output_stereo