summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorWilliam Wilgus <wilgus.william@gmail.com>2022-03-10 15:48:51 -0500
committerWilliam Wilgus <me.theuser@yahoo.com>2022-03-12 00:00:01 -0500
commit376ffbcf9aabae6f47d62ba4734ae1bb230ebce3 (patch)
treeb47cf383e7d5db2d0fd099ae008cb8402a7a8fb8
parenteecf8409896f90e9c8d49b7d0eea6b5799c90f07 (diff)
downloadrockbox-376ffbcf9a.tar.gz
rockbox-376ffbcf9a.zip
ARM support, optimize popcount fn
Change-Id: Iec02d0b5973721a3943b9c23ced3afc721cd3753
-rw-r--r--lib/arm_support/support-arm.S43
1 files changed, 24 insertions, 19 deletions
diff --git a/lib/arm_support/support-arm.S b/lib/arm_support/support-arm.S
index 442a629fca..f99d086b0b 100644
--- a/lib/arm_support/support-arm.S
+++ b/lib/arm_support/support-arm.S
@@ -705,6 +705,9 @@ __aeabi_idivmod:
/*
* int __popcountsi2(unsigned int x)
* int __popcountdi2(unsigned long x)
+ * x = x - ((x >> 1) & 0x55555555);
+ * x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
+ * c = ((x + (x >> 4) & 0xF0F0F0F) * 0x1010101) >> 24;
*/
.section .text.__popcountsi2, "ax", %progbits
.global __popcountsi2
@@ -712,23 +715,25 @@ __aeabi_idivmod:
.global __popcountdi2
.type __popcountdi2, %function
.set __popcountdi2, __popcountsi2
+
__popcountsi2:
- mov r1, #0x33 @ r1 = 0x33333333
- orr r1, r1, r1, lsl #8 @ ...
- orr r1, r1, r1, lsl #16 @ ...
- eor r2, r1, r1, lsl #1 @ r2 = 0x55555555
- and r2, r2, r0, lsr #1 @ r2 = (x >> 1) & 0x55555555
- sub r0, r0, r2 @ x = x - ((x >> 1) & 0x55555555)
- and r2, r1, r0 @ r2 = x & 0x33333333
- and r1, r1, r0, lsr #2 @ r1 = (x >> 2) & 0x33333333
- add r0, r2, r1 @ x = (x & 0x33333333) + ((x >> 2) & 0x33333333)
- mov r1, #0x0f @ r1 = 0x0f0f0f0f
- orr r1, r1, r1, lsl #8 @ ...
- orr r1, r1, r1, lsl #16 @ ...
- add r0, r0, lsr #4 @ x = x + (x >> 4)
- and r0, r0, r1 @ x = (x + (x >> 4)) & 0x0f0f0f0f
- add r0, r0, lsr #16 @ x = x + (x >> 16)
- add r0, r0, lsr #8 @ x = x + (x >> 8)
- and r0, r0, #0x3f @ x &= 0x3f
- bx lr @ return x
- .size __popcountsi2, .-__popcountsi2
+ ldr r2, .L2 @ r2 = 0x55555555
+ ldr r3, .L2+4 @ r3 = 0x33333333
+ and r2, r2, r0, lsr #1 @ r2 = (x >> 1)
+ rsb r2, r2, r0 @ x = x - ((x >> 1) & 0x55555555)
+ and r0, r2, r3
+ and r3, r3, r2, lsr #2 @ r3 = (x >> 2)
+ add r0, r0, r3
+ ldr r3, .L2+8 @ r3 = 0xF0F0F0F
+ add r0, r0, r0, lsr #4 @ x = x + (x >> 4)
+ and r3, r0, r3
+ add r3, r3, r3, asl #8
+ add r3, r3, r3, asl #16
+ mov r0, r3, lsr #24 @ (r3 >> 24)
+ bx lr
+.L2:
+ .word 0x55555555
+ .word 0x33333333
+ .word 0xF0F0F0F
+ .size __popcountsi2, .-__popcountsi2
+