summaryrefslogtreecommitdiffstats
path: root/apps
diff options
context:
space:
mode:
authorMichael Giacomelli <giac2000@hotmail.com>2010-11-29 22:34:51 +0000
committerMichael Giacomelli <giac2000@hotmail.com>2010-11-29 22:34:51 +0000
commit9929512682a999c440606cc9e4d4074a294ca616 (patch)
tree8cf3c79a1757f3c0173bcf2c140a7c4de3177493 /apps
parent90d77fb77ac3ad1f8ec24837fe2e0d340b4b5ba9 (diff)
downloadrockbox-9929512682a999c440606cc9e4d4074a294ca616.tar.gz
rockbox-9929512682a999c440606cc9e4d4074a294ca616.tar.bz2
rockbox-9929512682a999c440606cc9e4d4074a294ca616.zip
ARM9 optimized synth_full for libmad. Speeds up mp3 decoding by an even 2 MHz on all ARM9 and later devices. Note this is only optimized for arm9 (non-E), although it is faster on later devices. An arm9E/11 version will be needed for optimal performance on newer devices.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@28710 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps')
-rw-r--r--apps/codecs/libmad/synth_full_arm.S182
1 files changed, 94 insertions, 88 deletions
diff --git a/apps/codecs/libmad/synth_full_arm.S b/apps/codecs/libmad/synth_full_arm.S
index dec437f666..8d312de645 100644
--- a/apps/codecs/libmad/synth_full_arm.S
+++ b/apps/codecs/libmad/synth_full_arm.S
@@ -31,7 +31,12 @@
;; r1 = fo
;; r2 = fe
;; r3 = D0ptr
- ;; r4 = D1ptr
+ ;; r4 = D1ptr
+
+ /*;; r5 = loop counter
+ ;; r6,r7 accumulator1
+ ;; r8,r9 accumulator2 */
+
synth_full_odd_sbsample:
stmdb sp!, {r4-r11, lr}
ldr r4, [sp, #36]
@@ -40,88 +45,89 @@ synth_full_odd_sbsample:
mov r5, #15
add r2, r2, #32
.l:
+ /* ;; PROD_O and odd half of SB_SAMPLE*/
add r3, r3, #128
add r4, r4, #128
- ldmia r1!, {r10, r11, r12, lr}
ldr r7, [r3, #4]
- smull r6, r7, r10, r7
+ ldmia r1!, {r10, r11, r12, lr}
ldr r9, [r4, #120]
+ smull r6, r7, r10, r7
+ ldr sp, [r3, #60]
smull r8, r9, r10, r9
-
- ldr r10, [r3, #60]
- smlal r6, r7, r11, r10
ldr r10, [r3, #52]
+ smlal r6, r7, r11, sp
+ ldr sp, [r3, #44]
smlal r6, r7, r12, r10
- ldr r10, [r3, #44]
- smlal r6, r7, lr, r10
-
ldr r10, [r4, #64]
+ smlal r6, r7, lr, sp
+ ldr sp, [r4, #72]
smlal r8, r9, r11, r10
- ldr r10, [r4, #72]
- smlal r8, r9, r12, r10
ldr r10, [r4, #80]
+ smlal r8, r9, r12, sp
smlal r8, r9, lr, r10
-
+ ldr r10, [r3, #36]
+
ldmia r1!, {r11, r12, sp, lr}
- ldr r10, [r3, #36]
smlal r6, r7, r11, r10
+
+ ldr r10, [r4, #88] /*;;1 cycle stall on arm9, but we free up r11*/
+ smlal r8, r9, r11, r10
+
ldr r10, [r3, #28]
+ ldr r11, [r3, #20]
smlal r6, r7, r12, r10
- ldr r10, [r3, #20]
- smlal r6, r7, sp, r10
ldr r10, [r3, #12]
+ smlal r6, r7, sp, r11
+ ldr r11, [r4, #96]
smlal r6, r7, lr, r10
-
- ldr r10, [r4, #88]
- smlal r8, r9, r11, r10
- ldr r10, [r4, #96]
- smlal r8, r9, r12, r10
ldr r10, [r4, #104]
+ smlal r8, r9, r12, r11
+ ldr r11, [r4, #112]
smlal r8, r9, sp, r10
- ldr r10, [r4, #112]
- smlal r8, r9, lr, r10
+ smlal r8, r9, lr, r11
rsbs r6, r6, #0
rsc r7, r7, #0
-
- ldmia r2!, {r11, r12, sp, lr}
-
+
+ /* ;; PROD_A and even half of SB_SAMPLE*/
ldr r10, [r3, #0]
+ ldmia r2!, {r11, r12, sp, lr}
smlal r6, r7, r11, r10
- ldr r10, [r3, #56]
- smlal r6, r7, r12, r10
+
+ ldr r10, [r4, #60] /*;;1 cycle stall on arm9, but we free up r11*/
+ smlal r8, r9, r11, r10
+ ldr r11, [r3, #56]
ldr r10, [r3, #48]
+ smlal r6, r7, r12, r11
+ ldr r11, [r3, #40]
smlal r6, r7, sp, r10
- ldr r10, [r3, #40]
- smlal r6, r7, lr, r10
-
- ldr r10, [r4, #60]
- smlal r8, r9, r11, r10
ldr r10, [r4, #68]
+ smlal r6, r7, lr, r11
+ ldr r11, [r4, #76]
smlal r8, r9, r12, r10
- ldr r10, [r4, #76]
- smlal r8, r9, sp, r10
- ldr r10, [r4, #84]
+ ldr r10, [r4, #84]
+ smlal r8, r9, sp, r11
smlal r8, r9, lr, r10
-
- ldmia r2!, {r11, r12, sp, lr}
+
ldr r10, [r3, #32]
+ ldmia r2!, {r11, r12, sp, lr}
smlal r6, r7, r11, r10
+
+ ldr r10, [r4, #92] /*;;1 cycle stall on arm9, but we free up r11*/
+ smlal r8, r9, r11, r10
+
ldr r10, [r3, #24]
+ ldr r11, [r3, #16]
smlal r6, r7, r12, r10
- ldr r10, [r3, #16]
- smlal r6, r7, sp, r10
ldr r10, [r3, #8]
+ smlal r6, r7, sp, r11
+ ldr r11, [r4, #100]
smlal r6, r7, lr, r10
-
- ldr r10, [r4, #92]
- smlal r8, r9, r11, r10
- ldr r10, [r4, #100]
- smlal r8, r9, r12, r10
ldr r10, [r4, #108]
+ smlal r8, r9, r12, r11
+ ldr r11, [r4, #116]
smlal r8, r9, sp, r10
- ldr r10, [r4, #116]
- smlal r8, r9, lr, r10
+ smlal r8, r9, lr, r11
movs r6, r6, lsr #16
adc r6, r6, r7, lsl #16
@@ -146,88 +152,88 @@ synth_full_even_sbsample:
mov r5, #15
add r2, r2, #32
.l2:
+ /* ;; PROD_O and odd half of SB_SAMPLE*/
add r3, r3, #128
add r4, r4, #128
- ldmia r1!, {r10, r11, r12, lr}
ldr r7, [r3, #0]
- smull r6, r7, r10, r7
+ ldmia r1!, {r10, r11, r12, lr}
ldr r9, [r4, #60]
+ smull r6, r7, r10, r7
+ ldr sp, [r3, #56]
smull r8, r9, r10, r9
-
- ldr r10, [r3, #56]
- smlal r6, r7, r11, r10
ldr r10, [r3, #48]
+ smlal r6, r7, r11, sp
+ ldr sp, [r3, #40]
smlal r6, r7, r12, r10
- ldr r10, [r3, #40]
- smlal r6, r7, lr, r10
-
ldr r10, [r4, #68]
+ smlal r6, r7, lr, sp
+
+ ldr sp, [r4, #76]
smlal r8, r9, r11, r10
- ldr r10, [r4, #76]
- smlal r8, r9, r12, r10
ldr r10, [r4, #84]
+ smlal r8, r9, r12, sp
smlal r8, r9, lr, r10
-
- ldmia r1!, {r11, r12, sp, lr}
+
ldr r10, [r3, #32]
+ ldmia r1!, {r11, r12, sp, lr}
+
smlal r6, r7, r11, r10
+ ldr r10, [r4, #92]
+ smlal r8, r9, r11, r10
ldr r10, [r3, #24]
+ ldr r11, [r3, #16]
smlal r6, r7, r12, r10
- ldr r10, [r3, #16]
- smlal r6, r7, sp, r10
ldr r10, [r3, #8]
+ smlal r6, r7, sp, r11
+ ldr r11, [r4, #100]
smlal r6, r7, lr, r10
-
- ldr r10, [r4, #92]
- smlal r8, r9, r11, r10
- ldr r10, [r4, #100]
- smlal r8, r9, r12, r10
ldr r10, [r4, #108]
+ smlal r8, r9, r12, r11
+ ldr r11, [r4, #116]
smlal r8, r9, sp, r10
- ldr r10, [r4, #116]
- smlal r8, r9, lr, r10
+ smlal r8, r9, lr, r11
rsbs r6, r6, #0
rsc r7, r7, #0
- ldmia r2!, {r11, r12, sp, lr}
-
ldr r10, [r3, #4]
+ ldmia r2!, {r11, r12, sp, lr}
smlal r6, r7, r11, r10
+ ldr r10, [r4, #120] /*;;1 cycle stall on arm9, but we free up r11*/
+ smlal r8, r9, r11, r10
ldr r10, [r3, #60]
+ ldr r11, [r3, #52]
smlal r6, r7, r12, r10
- ldr r10, [r3, #52]
- smlal r6, r7, sp, r10
- ldr r10, [r3, #44]
+ ldr r10, [r3, #44]
+ smlal r6, r7, sp, r11
+ ldr r11, [r4, #64]
smlal r6, r7, lr, r10
- ldr r10, [r4, #120]
- smlal r8, r9, r11, r10
- ldr r10, [r4, #64]
- smlal r8, r9, r12, r10
ldr r10, [r4, #72]
+ smlal r8, r9, r12, r11
+ ldr r11, [r4, #80]
smlal r8, r9, sp, r10
- ldr r10, [r4, #80]
- smlal r8, r9, lr, r10
- ldmia r2!, {r11, r12, sp, lr}
+ smlal r8, r9, lr, r11
+
ldr r10, [r3, #36]
+ ldmia r2!, {r11, r12, sp, lr}
smlal r6, r7, r11, r10
+ ldr r10, [r4, #88] /*;;1 cycle stall on arm9, but we free up r11*/
+ smlal r8, r9, r11, r10
+
ldr r10, [r3, #28]
+ ldr r11, [r3, #20]
smlal r6, r7, r12, r10
- ldr r10, [r3, #20]
- smlal r6, r7, sp, r10
ldr r10, [r3, #12]
+ smlal r6, r7, sp, r11
+ ldr r11, [r4, #96]
smlal r6, r7, lr, r10
-
- ldr r10, [r4, #88]
- smlal r8, r9, r11, r10
- ldr r10, [r4, #96]
- smlal r8, r9, r12, r10
ldr r10, [r4, #104]
+ smlal r8, r9, r12, r11
+ ldr r11, [r4, #112]
smlal r8, r9, sp, r10
- ldr r10, [r4, #112]
- smlal r8, r9, lr, r10
+ smlal r8, r9, lr, r11
movs r6, r6, lsr #16
adc r6, r6, r7, lsl #16