summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndree Buschmann <AndreeBuschmann@t-online.de>2010-07-28 19:36:15 +0000
committerAndree Buschmann <AndreeBuschmann@t-online.de>2010-07-28 19:36:15 +0000
commit3bb8020f787514cd853d17e6d5ee9df29b156e28 (patch)
tree86b3068c75f59d207eb46eff330bd44678af3d4f
parent2fefcdf31ccd4d43f578f4954be7821b99a7958a (diff)
downloadrockbox-3bb8020f787514cd853d17e6d5ee9df29b156e28.tar.gz
rockbox-3bb8020f787514cd853d17e6d5ee9df29b156e28.zip
Add ARM assembler to libwmapro vector_fixmul_scalar(). Speeds up decoding by 1% on PP5022.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@27603 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--apps/codecs/libwmapro/wmapro_math.h66
1 files changed, 48 insertions, 18 deletions
diff --git a/apps/codecs/libwmapro/wmapro_math.h b/apps/codecs/libwmapro/wmapro_math.h
index 6f8d6dbe0f..c94fa80271 100644
--- a/apps/codecs/libwmapro/wmapro_math.h
+++ b/apps/codecs/libwmapro/wmapro_math.h
@@ -180,7 +180,7 @@
}
#endif /* CPU_COLDFIRE, CPU_ARM */
-#ifdef CPU_COLDFIRE
+#if defined(CPU_COLDFIRE)
static inline void vector_fixmul_window(int32_t *dst, const int32_t *src0,
const int32_t *src1, const int32_t *win,
int len)
@@ -190,23 +190,23 @@ static inline void vector_fixmul_window(int32_t *dst, const int32_t *src0,
win += len;
src0+= len;
for(i=-len, j=len-1; i<0; i++, j--) {
- int32_t s0 = src0[i];
- int32_t s1 = src1[j];
- int32_t wi = -win[i];
- int32_t wj = -win[j];
-
- asm volatile ("mac.l %[s0], %[wj], %%acc0\n\t"
- "msac.l %[s1], %[wi], %%acc0\n\t"
- "mac.l %[s0], %[wi], %%acc1\n\t"
- "mac.l %[s1], %[wj], %%acc1\n\t"
- "movclr.l %%acc0, %[s0]\n\t"
- "move.l %[s0], (%[dst_i])\n\t"
- "movclr.l %%acc1, %[s0]\n\t"
- "move.l %[s0], (%[dst_j])\n\t"
- : [s0] "+r" (s0) /* this register is clobbered so specify it as an input */
- : [dst_i] "a" (&dst[i]), [dst_j] "a" (&dst[j]),
- [s1] "r" (s1), [wi] "r" (wi), [wj] "r" (wj)
- : "cc", "memory");
+ int32_t s0 = src0[i];
+ int32_t s1 = src1[j];
+ int32_t wi = -win[i];
+ int32_t wj = -win[j];
+ asm volatile (
+ "mac.l %[s0], %[wj], %%acc0\n\t"
+ "msac.l %[s1], %[wi], %%acc0\n\t"
+ "mac.l %[s0], %[wi], %%acc1\n\t"
+ "mac.l %[s1], %[wj], %%acc1\n\t"
+ "movclr.l %%acc0, %[s0]\n\t"
+ "move.l %[s0], (%[dst_i])\n\t"
+ "movclr.l %%acc1, %[s0]\n\t"
+ "move.l %[s0], (%[dst_j])\n\t"
+ : [s0] "+r" (s0) /* this register is clobbered so specify it as an input */
+ : [dst_i] "a" (&dst[i]), [dst_j] "a" (&dst[j]),
+ [s1] "r" (s1), [wi] "r" (wi), [wj] "r" (wj)
+ : "cc", "memory");
}
}
#else
@@ -229,6 +229,35 @@ static inline void vector_fixmul_window(int32_t *dst, const int32_t *src0,
}
#endif
+#if defined(CPU_ARM)
+static inline void vector_fixmul_scalar(int32_t *dst, const int32_t *src,
+ int32_t mul, int len)
+{
+ /* len is _always_ a multiple of 4, because len is the difference of sfb's
+ * which themselves are always a multiple of 4. */
+ int i;
+ for (i=0; i<len; i+=4) {
+ asm volatile (
+ "ldmia %[src]!, {r1-r4} \n\t"
+ "smull r0, r5, r1, %[mul] \n\t"
+ "mov r0, r0, lsr #24 \n\t"
+ "orr r0, r0, r5, lsl #8 \n\t"
+ "smull r1, r5, r2, %[mul] \n\t"
+ "mov r1, r1, lsr #24 \n\t"
+ "orr r1, r1, r5, lsl #8 \n\t"
+ "smull r2, r5, r3, %[mul] \n\t"
+ "mov r2, r2, lsr #24 \n\t"
+ "orr r2, r2, r5, lsl #8 \n\t"
+ "smull r3, r5, r4, %[mul] \n\t"
+ "mov r3, r3, lsr #24 \n\t"
+ "orr r3, r3, r5, lsl #8 \n\t"
+ "stmia %[dst]!, {r0-r3} \n"
+ : [dst]"+r"(dst), [src]"+r"(src)
+ : [mul]"r"(mul)
+ : "r0", "r1", "r2", "r3", "r4", "r5", "memory");
+ }
+}
+#else
static inline void vector_fixmul_scalar(int32_t *dst, const int32_t *src,
int32_t mul, int len)
{
@@ -242,6 +271,7 @@ static inline void vector_fixmul_scalar(int32_t *dst, const int32_t *src,
dst[i+3] = fixmul24(src[i+3], mul);
}
}
+#endif /* CPU_ARM */
static inline int av_clip(int a, int amin, int amax)
{