summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDave Hooper <dave@beermex.com>2009-08-29 11:50:15 +0000
committerDave Hooper <dave@beermex.com>2009-08-29 11:50:15 +0000
commit59cdbf5efca64962fe6a6c85eb03b64552eae6d2 (patch)
tree494580f9ee8ec0531abd71ead95fc1f2365d9d1e
parenteeb1594494729596bd083c298ebfd65ed29411c7 (diff)
downloadrockbox-59cdbf5efca64962fe6a6c85eb03b64552eae6d2.tar.gz
rockbox-59cdbf5efca64962fe6a6c85eb03b64552eae6d2.tar.bz2
rockbox-59cdbf5efca64962fe6a6c85eb03b64552eae6d2.zip
Rerrange some registers in butterfly_generic to combine some 2-word stores into 4-word stores and remove some redundant mov instructions. Shave off some additional instructions (stacking and additions) in butterfly_32 by getting butterfly_8 and butterfly_16 to do the address incrementing for us. Add a few comments.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@22525 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--apps/codecs/lib/mdct_arm.S130
1 files changed, 69 insertions, 61 deletions
diff --git a/apps/codecs/lib/mdct_arm.S b/apps/codecs/lib/mdct_arm.S
index bacc049f6b..76139838a6 100644
--- a/apps/codecs/lib/mdct_arm.S
+++ b/apps/codecs/lib/mdct_arm.S
@@ -38,6 +38,9 @@
.global mdct_butterfly_generic_loop
mdct_butterfly_8:
+@ inputs: r0,r1,r2,r3,r4,r5,r6,r10,r11 &lr
+@ uses: r8,r9,r12(scratch)
+@ modifies: r0,r1,r2,r3,r4,r5,r6,r10,r11. increments r0 by #8*4
add r9, r5, r1 @ x4 + x0
sub r5, r5, r1 @ x4 - x0
add r7, r6, r2 @ x5 + x1
@@ -55,11 +58,15 @@ mdct_butterfly_8:
sub r6, r12, r7 @ y5 = (x7 + x3) - (x5 + x1)
add r10, r8, r9 @ y6 = (x6 + x2) + (x4 + x0)
add r11, r12, r7 @ y7 = (x7 + x3) + (x5 + x1)
- stmia r0, {r1, r2, r3, r4, r5, r6, r10, r11}
+ stmia r0!, {r1, r2, r3, r4, r5, r6, r10, r11}
mov pc, lr
mdct_butterfly_16:
+@ inputs: r0,r1 &lr
+@ uses: r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12
+@ modifies: r0. increments r0 by #16*4
+@ calls mdct_butterfly_8 via bl so need to stack lr for return address
str lr, [sp, #-4]!
add r1, r0, #8*4
@@ -112,9 +119,13 @@ mdct_butterfly_16:
sub r0, r0, #4*4
ldmia r0, {r1, r2, r3, r4}
bl mdct_butterfly_8
- add r0, r0, #8*4
+
+ @ mdct_butterfly_8 will have incremented r0 by #8*4 already
ldmia r0, {r1, r2, r3, r4, r5, r6, r10, r11}
+
bl mdct_butterfly_8
+ @ mdct_butterfly_8 increments r0 by another #8*4 here
+ @ at end, r0 has been incremented by #16*4
ldr pc, [sp], #4
@@ -164,23 +175,23 @@ mdct_butterfly_32:
add r7, r7, r3 @ y21 = x21 + x5
rsb r3, r7, r3, asl #1 @ x5 - x21
add r8, r8, r4 @ y22 = x22 + x6
- sub r4, r8, r4, asl #1 @ x22 - x6
+ sub r11, r8, r4, asl #1 @ x22 - x6
add r9, r9, r5 @ y23 = x23 + x7
- rsb r5, r9, r5, asl #1 @ x7 - x23
-
+ rsb r10, r9, r5, asl #1 @ x7 - x23
stmia r1!, {r6, r7, r8, r9}
- smull r10, r6, lr, r2
+ @r4,r5,r6,r7,r8,r9 now free
+ @ we don't use r5, r8, r9 below
+
+ smull r4, r6, lr, r2
rsb r2, r2, #0
- smlal r10, r6, r12, r3
- smull r10, r7, lr, r3
- smlal r10, r7, r12, r2
+ smlal r4, r6, r12, r3
+ smull r4, r7, lr, r3
+ smlal r4, r7, r12, r2
mov r6, r6, asl #1
mov r7, r7, asl #1
- mov r8, r5
- mov r9, r4
- stmia r0!, {r6, r7, r8, r9}
+ stmia r0!, {r6, r7, r10, r11}
ldmia r0, {r2, r3, r4, r5}
ldmia r1, {r6, r7, r8, r9}
@@ -221,30 +232,29 @@ mdct_butterfly_32:
add r7, r7, r3 @ y29 = x29 + x13
sub r3, r7, r3, asl #1 @ x29 - x13
add r8, r8, r4 @ y30 = x30 + x14
- sub r4, r8, r4, asl #1 @ x30 - x14
+ sub r10, r8, r4, asl #1 @ x30 - x14
add r9, r9, r5 @ y31 = x31 + x15
- sub r5, r9, r5, asl #1 @ x31 - x15
-
+ sub r11, r9, r5, asl #1 @ x31 - x15
stmia r1, {r6, r7, r8, r9}
- smull r10, r7, r12, r3
+ @ r4,r5,r6,r7,r8,r9 now free
+ @ we don't use r5,r8,r9 below
+
+ smull r4, r7, r12, r3
rsb r3, r3, #0
- smlal r10, r7, lr, r2
- smull r10, r6, lr, r3
- smlal r10, r6, r12, r2
+ smlal r4, r7, lr, r2
+ smull r4, r6, lr, r3
+ smlal r4, r6, r12, r2
mov r6, r6, asl #1
mov r7, r7, asl #1
- mov r8, r4
- mov r9, r5
- stmia r0, {r6, r7, r8, r9}
+ stmia r0, {r6, r7, r10, r11}
sub r0, r0, #12*4
- str r0, [sp, #-4]!
bl mdct_butterfly_16
- ldr r0, [sp], #4
- add r0, r0, #16*4
+ @ we know mdct_butterfly_16 increments r0 by #16*4
+ @ and we wanted to advance by #16*4 anyway, so just call again
bl mdct_butterfly_16
ldmia sp!, {r4-r11, pc}
@@ -278,19 +288,18 @@ mdct_butterfly_generic_loop:
mov r8, r8, asl #1
mov r9, r9, asl #1
- stmdb r1!, {r8, r9}
add r2, r2, r3, asl #2
- ldmia r2, {r6, r7}
- smull r5, r8, r6, r11
+ ldmia r2, {r12, r14}
+ smull r5, r6, r12, r11
rsb r11, r11, #0
- smlal r5, r8, r7, r10
- smull r5, r9, r6, r10
- smlal r5, r9, r7, r11
+ smlal r5, r6, r14, r10
+ smull r5, r7, r12, r10
+ smlal r5, r7, r14, r11
- mov r8, r8, asl #1
- mov r9, r9, asl #1
- stmdb r1!, {r8, r9}
+ mov r6, r6, asl #1
+ mov r7, r7, asl #1
+ stmdb r1!, {r6, r7, r8, r9}
add r2, r2, r3, asl #2
cmp r2, r4
@@ -321,19 +330,19 @@ mdct_butterfly_generic_loop:
mov r8, r8, asl #1
mov r9, r9, asl #1
- stmdb r1!, {r8, r9}
+
sub r2, r2, r3, asl #2
- ldmia r2, {r6, r7}
- smull r5, r9, r6, r11
+ ldmia r2, {r12, r14}
+ smull r5, r7, r12, r11
rsb r11, r11, #0
- smlal r5, r9, r7, r10
- smull r5, r8, r6, r10
- smlal r5, r8, r7, r11
+ smlal r5, r7, r14, r10
+ smull r5, r6, r12, r10
+ smlal r5, r6, r14, r11
- mov r8, r8, asl #1
- mov r9, r9, asl #1
- stmdb r1!, {r8, r9}
+ mov r6, r6, asl #1
+ mov r7, r7, asl #1
+ stmdb r1!, {r6, r7, r8, r9}
sub r2, r2, r3, asl #2
cmp r2, r4
@@ -364,19 +373,19 @@ mdct_butterfly_generic_loop:
mov r8, r8, asl #1
mov r9, r9, asl #1
- stmdb r1!, {r8, r9}
+
add r2, r2, r3, asl #2
- ldmia r2, {r6, r7}
- smull r5, r8, r6, r10
+ ldmia r2, {r12, r14}
+ smull r5, r6, r12, r10
rsb r10, r10, #0
- smlal r5, r8, r7, r11
- smull r5, r9, r6, r11
- smlal r5, r9, r7, r10
+ smlal r5, r6, r14, r11
+ smull r5, r7, r12, r11
+ smlal r5, r7, r14, r10
- mov r8, r8, asl #1
- mov r9, r9, asl #1
- stmdb r1!, {r8, r9}
+ mov r6, r6, asl #1
+ mov r7, r7, asl #1
+ stmdb r1!, {r6, r7, r8, r9}
add r2, r2, r3, asl #2
cmp r2, r4
@@ -407,19 +416,18 @@ mdct_butterfly_generic_loop:
mov r8, r8, asl #1
mov r9, r9, asl #1
- stmdb r1!, {r8, r9}
sub r2, r2, r3, asl #2
- ldmia r2, {r6, r7}
- smull r5, r9, r6, r10
+ ldmia r2, {r12, r14}
+ smull r5, r7, r12, r10
rsb r10, r10, #0
- smlal r5, r9, r7, r11
- smull r5, r8, r6, r11
- smlal r5, r8, r7, r10
+ smlal r5, r7, r14, r11
+ smull r5, r6, r12, r11
+ smlal r5, r6, r14, r10
- mov r8, r8, asl #1
- mov r9, r9, asl #1
- stmdb r1!, {r8, r9}
+ mov r6, r6, asl #1
+ mov r7, r7, asl #1
+ stmdb r1!, {r6, r7, r8, r9}
sub r2, r2, r3, asl #2
cmp r2, r4