summaryrefslogtreecommitdiffstats
path: root/firmware
diff options
context:
space:
mode:
authorJörg Hohensohn <hohensoh@rockbox.org>2004-03-03 07:18:26 +0000
committerJörg Hohensohn <hohensoh@rockbox.org>2004-03-03 07:18:26 +0000
commit239a91c28cce4a120af21f7ea598217f54e17d0c (patch)
treee89ef8a4beb9b6c7c214b9ee004fc98af3432f62 /firmware
parent860586d992a1a434b3d40e594a755e5fb450f394 (diff)
downloadrockbox-239a91c28cce4a120af21f7ea598217f54e17d0c.tar.gz
rockbox-239a91c28cce4a120af21f7ea598217f54e17d0c.tar.bz2
rockbox-239a91c28cce4a120af21f7ea598217f54e17d0c.zip
14% faster bitswap, thanks Jens
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@4337 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'firmware')
-rw-r--r--firmware/bitswap.S81
1 files changed, 41 insertions, 40 deletions
diff --git a/firmware/bitswap.S b/firmware/bitswap.S
index da628a3b7f..990ecb4d00 100644
--- a/firmware/bitswap.S
+++ b/firmware/bitswap.S
@@ -18,7 +18,7 @@
****************************************************************************/
.section .icode,"ax",@progbits
- .align 4
+ .align 2
.global _bitswap
.type _bitswap,@function
@@ -26,68 +26,69 @@
*
* r0 Temporary (required by some instructions)
* r1 Low byte
- * r2 High byte
- * r3 Result after flip
- * r4 Data
+ * r2 High byte / final result
+ * r4 &Data
* r5 Length
- * r6 1
* r7 Flip table
*/
+/* The instruction order below is a bit strange, because:
+ * 1) Keeping load/stores on longword boundaries means the instruction fetch
+ * won't compete with the memory access (because instructions are fetched
+ * in pairs).
+ * 2) Using the result of a fetch in the next instruction causes a stall
+ * (except in certain circumstances).
+ * See the SH-1 programming manual for details.
+ */
+
_bitswap:
mov.l .fliptable,r7
- mov #1,r6
+ add #-2,r4 /* ptr is used shifted by 2 */
+ add r4,r5 /* r5 = end_address - 2 */
+ add #-1,r5 /* r5 = &last_byte - 2 */
mov r4,r0
- tst #1,r0 /* odd address? */
- bt .init /* no, address is even */
+ tst #1,r0 /* even address? */
+ bt .init /* yes */
- mov.b @r4,r0 /* swap first byte */
+ add #1,r4 /* r4 now even */
+ mov.b @(1,r4),r0 /* no, swap first byte */
extu.b r0,r0
mov.b @(r0,r7),r0
- mov.b r0,@r4
- add #1,r4
- add #-1,r5
- bra .init
+ mov.b r0,@(1,r4)
- /* The instruction order below is a bit strange, because:
- * 1) Keeping load/stores on longword boundaries means the instruction
- * fetch won't compete with the memory access (because instructions
- * are fetched in pairs).
- * 2) Using the result of a fetch in the next instruction causes a
- * stall (except in certain circumstances).
- * See the SH-1 programming manual for details.
- */
+.init:
+ cmp/hi r4,r5 /* at least 2 bytes to swap? */
+ bf .last /* no, skip main loop */
.loop:
- mov.w @r4,r1 /* data to flip */
- add #-2,r5
- swap.b r1,r2 /* get high byte */
+ mov.w @(2,r4),r0 /* data to flip */
+ add #2,r4 /* early increment */
+ swap.b r0,r2 /* get high byte */
+ extu.b r0,r0 /* prepare low byte */
+ mov.b @(r0,r7),r1 /* swap low byte */
extu.b r2,r0 /* prepare high byte */
mov.b @(r0,r7),r2 /* swap high byte */
- extu.b r1,r0 /* perpare low byte */
- mov.b @(r0,r7),r1 /* swap low byte */
- extu.b r2,r2 /* zero extend high byte */
- swap.b r2,r3 /* put high byte in result */
- extu.b r1,r0 /* zero extend low byte */
- or r0,r3 /* put low byte in result */
- mov.w r3,@r4 /* store result */
- add #2,r4
-.init:
- cmp/gt r6,r5 /* while [bytes remaining] > 1 */
- bt .loop /* (at least 2 bytes left) */
+ extu.b r1,r1 /* zero extend low byte */
+ shll8 r2 /* shift high byte, low byte zeroed */
+ or r1,r2 /* put low byte in result */
+ mov.w r2,@r4 /* store result, ptr already incr'd */
+ cmp/hi r4,r5 /* while &last_byte > data */
+ bt .loop
- cmp/eq r6,r5
- bf .exit /* if not 1 byte left, exit */
+.last:
+ cmp/eq r4,r5 /* if behind (&last_byte - 2), exit */
+ bf .exit
- mov.b @r4,r0 /* swap last byte */
+ mov.b @(2,r4),r0 /* swap last byte */
extu.b r0,r0
mov.b @(r0,r7),r0
- mov.b r0,@r4
+ mov.b r0,@(2,r4)
+
.exit:
rts
nop
- .align 4
+ .align 2
.fliptable:
.long _fliptable