summaryrefslogtreecommitdiffstats
path: root/apps
diff options
context:
space:
mode:
authorJens Arnold <amiconn@rockbox.org>2009-12-23 23:44:56 +0000
committerJens Arnold <amiconn@rockbox.org>2009-12-23 23:44:56 +0000
commit9e45b27e08ccd554e1e7080605e505ca066b4ae9 (patch)
tree2fa40423d9e5eed3bcf193a616fabfe7610ba340 /apps
parent5509372f03e01235d4663d80bb769c52711688a9 (diff)
downloadrockbox-9e45b27e08ccd554e1e7080605e505ca066b4ae9.tar.gz
rockbox-9e45b27e08ccd554e1e7080605e505ca066b4ae9.tar.bz2
rockbox-9e45b27e08ccd554e1e7080605e505ca066b4ae9.zip
Assembler optimised bitreverse for ARM, again a little bit faster and smaller. * Separated 'x' and 'ret' gives gcc a bit more freedom regarding register allocation (except on coldfire where it doesn't matter).
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@24107 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps')
-rw-r--r--apps/codecs/libtremor/codebook.c88
1 files changed, 68 insertions, 20 deletions
diff --git a/apps/codecs/libtremor/codebook.c b/apps/codecs/libtremor/codebook.c
index a4fc9ee6c0..c6027480a7 100644
--- a/apps/codecs/libtremor/codebook.c
+++ b/apps/codecs/libtremor/codebook.c
@@ -142,29 +142,77 @@ int vorbis_staticbook_unpack(oggpack_buffer *opb,static_codebook *s){
static inline ogg_uint32_t bitreverse(register ogg_uint32_t x)
{
- unsigned int mask;
-#if defined(CPU_ARM) && ARM_ARCH >= 6
- asm ("rev %[x], %[x]" : [x] "+r" (x)); /* swap bytes */
+ unsigned tmp, ret;
+#ifdef CPU_ARM
+#if ARM_ARCH >= 6
+ unsigned mask = 0x0f0f0f0f;
#else
-#if defined(CPU_COLDFIRE)
- asm ("swap %[x]" : [x] "+r" (x)); /* swap halfwords */
-#else
- x = (x>>16) | (x<<16);
+ unsigned mask = 0x00ff00ff;
#endif
- mask = x&0x00ff00ff;
- x ^= mask;
- x = (x >> 8) | (mask << 8); /* bytes swapped */
+ asm (
+#if ARM_ARCH >= 6
+ "rev %[r], %[x] \n" /* swap halfwords and bytes */
+ "and %[t], %[m], %[r] \n" /* Sequence is one instruction */
+ "eor %[r], %[t], %[r] \n" /* longer than on <= ARMv5, but */
+ "mov %[t], %[t], lsl #4 \n" /* interlock free */
+ "orr %[r], %[t], %[r], lsr #4\n" /* nibbles swapped */
+ "eor %[m], %[m], %[m], lsl #2\n" /* mask = 0x33333333 */
+ "and %[t], %[m], %[r] \n"
+ "eor %[r], %[t], %[r] \n"
+ "mov %[t], %[t], lsl #2 \n"
+ "orr %[r], %[t], %[r], lsr #2\n" /* dibits swapped */
+ "eor %[m], %[m], %[m], lsl #1\n" /* mask = 0x55555555 */
+ "and %[t], %[m], %[r] \n"
+ "eor %[r], %[t], %[r] \n"
+ "mov %[t], %[t], lsl #1 \n"
+ "orr %[r], %[t], %[r], lsr #1\n" /* bits swapped */
+#else /* ARM_ARCH <= 5 */
+ "mov %[r], %[x], ror #16 \n" /* swap halfwords */
+ "and %[t], %[m], %[r], lsr #8\n"
+ "eor %[r], %[r], %[t], lsl #8\n"
+ "orr %[r], %[t], %[r], lsl #8\n" /* bytes swapped */
+ "eor %[m], %[m], %[m], lsl #4\n" /* mask = 0x0f0f0f0f */
+ "and %[t], %[m], %[r], lsr #4\n"
+ "eor %[r], %[r], %[t], lsl #4\n"
+ "orr %[r], %[t], %[r], lsl #4\n" /* nibbles swapped */
+ "eor %[m], %[m], %[m], lsl #2\n" /* mask = 0x33333333 */
+ "and %[t], %[m], %[r], lsr #2\n"
+ "eor %[r], %[r], %[t], lsl #2\n"
+ "orr %[r], %[t], %[r], lsl #2\n" /* dibits swapped */
+ "eor %[m], %[m], %[m], lsl #1\n" /* mask = 0x55555555 */
+ "and %[t], %[m], %[r], lsr #1\n"
+ "eor %[r], %[r], %[t], lsl #1\n"
+ "orr %[r], %[t], %[r], lsl #1\n" /* bits swapped */
+#endif /* ARM_ARCH */
+ : /* outputs */
+ [m]"+r"(mask),
+ [r]"=r"(ret),
+ [t]"=r"(tmp)
+ : /* inputs */
+ [x]"r"(x)
+ );
+#else /* !CPU_ARM */
+
+#ifdef CPU_COLDFIRE
+ ret = x;
+ asm ("swap %[r]" : [r] "+r" (ret)); /* swap halfwords */
+#else
+ ret = (x>>16) | (x<<16);
#endif
- mask = x&0x0f0f0f0f;
- x ^= mask;
- x = (x >> 4) | (mask << 4); /* 4-bit units swapped */
- mask = x&0x33333333;
- x ^= mask;
- x = (x >> 2) | (mask << 2); /* 2-bit units swapped */
- mask = x&0x55555555;
- x ^= mask;
- x = (x >> 1) | (mask << 1); /* done */
- return x;
+ tmp = ret & 0x00ff00ff;
+ ret ^= tmp;
+ ret = (ret >> 8) | (tmp << 8); /* bytes swapped */
+ tmp = ret & 0x0f0f0f0f;
+ ret ^= tmp;
+ ret = (ret >> 4) | (tmp << 4); /* 4-bit units swapped */
+ tmp = ret & 0x33333333;
+ ret ^= tmp;
+ ret = (ret >> 2) | (tmp << 2); /* 2-bit units swapped */
+ tmp = ret & 0x55555555;
+ ret ^= tmp;
+ ret = (ret >> 1) | (tmp << 1); /* done */
+#endif /* !CPU_ARM */
+ return ret;
}
STIN long decode_packed_entry_number(codebook *book,