summaryrefslogtreecommitdiffstats
path: root/firmware/asm/arm
diff options
context:
space:
mode:
authorThomas Martitz <kugel@rockbox.org>2012-01-07 19:56:09 +0100
committerThomas Martitz <kugel@rockbox.org>2012-01-22 18:46:45 +0100
commita035261089403de259e74ce4dd196e2715138ed2 (patch)
tree440d75109a7ebfefef2a58076b38b1bfe9db8f63 /firmware/asm/arm
parent8e8e978de6b6283b66a6829fa8c5e3b94674ce7d (diff)
downloadrockbox-a035261089403de259e74ce4dd196e2715138ed2.tar.gz
rockbox-a035261089403de259e74ce4dd196e2715138ed2.tar.bz2
rockbox-a035261089403de259e74ce4dd196e2715138ed2.zip
Move optimized memcpy and friends and strlen to firmware/asm,
using the new automatic-asm-picking infrastructure.
Diffstat (limited to 'firmware/asm/arm')
-rw-r--r--firmware/asm/arm/memcpy.S176
-rw-r--r--firmware/asm/arm/memmove.S190
-rw-r--r--firmware/asm/arm/memset.S98
-rw-r--r--firmware/asm/arm/memset16.S82
4 files changed, 546 insertions, 0 deletions
diff --git a/firmware/asm/arm/memcpy.S b/firmware/asm/arm/memcpy.S
new file mode 100644
index 0000000000..2a55fb5656
--- /dev/null
+++ b/firmware/asm/arm/memcpy.S
@@ -0,0 +1,176 @@
+/***************************************************************************
+ * __________ __ ___.
+ * Open \______ \ ____ ____ | | _\_ |__ _______ ___
+ * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
+ * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
+ * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
+ * \/ \/ \/ \/ \/
+ * $Id$
+ *
+ * Copyright (C) 2006 Free Software Foundation, Inc.
+ * This file was originally part of the GNU C Library
+ * Contributed to glibc by MontaVista Software, Inc. (written by Nicolas Pitre)
+ * Adapted for Rockbox by Daniel Ankers
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+
+#include "config.h"
+
+/*
+ * Endian independent macros for shifting bytes within registers.
+ */
+#ifndef __ARMEB__
+#define pull lsr
+#define push lsl
+#else
+#define pull lsl
+#define push lsr
+#endif
+
+/* Prototype: void *memcpy(void *dest, const void *src, size_t n); */
+
+ .section .icode,"ax",%progbits
+
+ .align 2
+ .global memcpy
+ .type memcpy,%function
+
+memcpy:
+ stmfd sp!, {r0, r4, lr}
+
+ subs r2, r2, #4
+ blt 8f
+ ands ip, r0, #3
+ bne 9f
+ ands ip, r1, #3
+ bne 10f
+
+1: subs r2, r2, #(28)
+ stmfd sp!, {r5 - r8}
+ blt 5f
+
+2:
+3:
+4: ldmia r1!, {r3, r4, r5, r6, r7, r8, ip, lr}
+ subs r2, r2, #32
+ stmia r0!, {r3, r4, r5, r6, r7, r8, ip, lr}
+ bge 3b
+
+5: ands ip, r2, #28
+ rsb ip, ip, #32
+ addne pc, pc, ip @ C is always clear here
+ b 7f
+6: nop
+ ldr r3, [r1], #4
+ ldr r4, [r1], #4
+ ldr r5, [r1], #4
+ ldr r6, [r1], #4
+ ldr r7, [r1], #4
+ ldr r8, [r1], #4
+ ldr lr, [r1], #4
+
+ add pc, pc, ip
+ nop
+ nop
+ str r3, [r0], #4
+ str r4, [r0], #4
+ str r5, [r0], #4
+ str r6, [r0], #4
+ str r7, [r0], #4
+ str r8, [r0], #4
+ str lr, [r0], #4
+
+7: ldmfd sp!, {r5 - r8}
+
+8: movs r2, r2, lsl #31
+ ldrneb r3, [r1], #1
+ ldrcsb r4, [r1], #1
+ ldrcsb ip, [r1]
+ strneb r3, [r0], #1
+ strcsb r4, [r0], #1
+ strcsb ip, [r0]
+
+ ldmpc regs="r0, r4"
+
+9: rsb ip, ip, #4
+ cmp ip, #2
+ ldrgtb r3, [r1], #1
+ ldrgeb r4, [r1], #1
+ ldrb lr, [r1], #1
+ strgtb r3, [r0], #1
+ strgeb r4, [r0], #1
+ subs r2, r2, ip
+ strb lr, [r0], #1
+ blt 8b
+ ands ip, r1, #3
+ beq 1b
+
+10: bic r1, r1, #3
+ cmp ip, #2
+ ldr lr, [r1], #4
+ beq 17f
+ bgt 18f
+
+
+ .macro forward_copy_shift pull push
+
+ subs r2, r2, #28
+ blt 14f
+
+11: stmfd sp!, {r5 - r9}
+
+12:
+13: ldmia r1!, {r4, r5, r6, r7}
+ mov r3, lr, pull #\pull
+ subs r2, r2, #32
+ ldmia r1!, {r8, r9, ip, lr}
+ orr r3, r3, r4, push #\push
+ mov r4, r4, pull #\pull
+ orr r4, r4, r5, push #\push
+ mov r5, r5, pull #\pull
+ orr r5, r5, r6, push #\push
+ mov r6, r6, pull #\pull
+ orr r6, r6, r7, push #\push
+ mov r7, r7, pull #\pull
+ orr r7, r7, r8, push #\push
+ mov r8, r8, pull #\pull
+ orr r8, r8, r9, push #\push
+ mov r9, r9, pull #\pull
+ orr r9, r9, ip, push #\push
+ mov ip, ip, pull #\pull
+ orr ip, ip, lr, push #\push
+ stmia r0!, {r3, r4, r5, r6, r7, r8, r9, ip}
+ bge 12b
+
+ ldmfd sp!, {r5 - r9}
+
+14: ands ip, r2, #28
+ beq 16f
+
+15: mov r3, lr, pull #\pull
+ ldr lr, [r1], #4
+ subs ip, ip, #4
+ orr r3, r3, lr, push #\push
+ str r3, [r0], #4
+ bgt 15b
+
+16: sub r1, r1, #(\push / 8)
+ b 8b
+
+ .endm
+
+
+ forward_copy_shift pull=8 push=24
+
+17: forward_copy_shift pull=16 push=16
+
+18: forward_copy_shift pull=24 push=8
+
diff --git a/firmware/asm/arm/memmove.S b/firmware/asm/arm/memmove.S
new file mode 100644
index 0000000000..d8cab048be
--- /dev/null
+++ b/firmware/asm/arm/memmove.S
@@ -0,0 +1,190 @@
+/***************************************************************************
+ * __________ __ ___.
+ * Open \______ \ ____ ____ | | _\_ |__ _______ ___
+ * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
+ * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
+ * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
+ * \/ \/ \/ \/ \/
+ * $Id$
+ *
+ * Copyright (C) 2006 Free Software Foundation, Inc.
+ * This file was originally part of the GNU C Library
+ * Contributed to glibc by MontaVista Software, Inc. (written by Nicolas Pitre)
+ * Adapted for Rockbox by Daniel Ankers
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+
+#include "config.h"
+
+/*
+ * Endian independent macros for shifting bytes within registers.
+ */
+#ifndef __ARMEB__
+#define pull lsr
+#define push lsl
+#else
+#define pull lsl
+#define push lsr
+#endif
+
+ .text
+
+/*
+ * Prototype: void *memmove(void *dest, const void *src, size_t n);
+ *
+ * Note:
+ *
+ * If the memory regions don't overlap, we simply branch to memcpy which is
+ * normally a bit faster. Otherwise the copy is done going downwards.
+ */
+
+ .section .icode,"ax",%progbits
+
+ .align 2
+ .global memmove
+ .type memmove,%function
+
+memmove:
+
+ subs ip, r0, r1
+ cmphi r2, ip
+ bls memcpy
+
+ stmfd sp!, {r0, r4, lr}
+ add r1, r1, r2
+ add r0, r0, r2
+ subs r2, r2, #4
+ blt 8f
+ ands ip, r0, #3
+ bne 9f
+ ands ip, r1, #3
+ bne 10f
+
+1: subs r2, r2, #(28)
+ stmfd sp!, {r5 - r8}
+ blt 5f
+
+2:
+3:
+4: ldmdb r1!, {r3, r4, r5, r6, r7, r8, ip, lr}
+ subs r2, r2, #32
+ stmdb r0!, {r3, r4, r5, r6, r7, r8, ip, lr}
+ bge 3b
+
+5: ands ip, r2, #28
+ rsb ip, ip, #32
+ addne pc, pc, ip @ C is always clear here
+ b 7f
+6: nop
+ ldr r3, [r1, #-4]!
+ ldr r4, [r1, #-4]!
+ ldr r5, [r1, #-4]!
+ ldr r6, [r1, #-4]!
+ ldr r7, [r1, #-4]!
+ ldr r8, [r1, #-4]!
+ ldr lr, [r1, #-4]!
+
+ add pc, pc, ip
+ nop
+ nop
+ str r3, [r0, #-4]!
+ str r4, [r0, #-4]!
+ str r5, [r0, #-4]!
+ str r6, [r0, #-4]!
+ str r7, [r0, #-4]!
+ str r8, [r0, #-4]!
+ str lr, [r0, #-4]!
+
+7: ldmfd sp!, {r5 - r8}
+
+8: movs r2, r2, lsl #31
+ ldrneb r3, [r1, #-1]!
+ ldrcsb r4, [r1, #-1]!
+ ldrcsb ip, [r1, #-1]
+ strneb r3, [r0, #-1]!
+ strcsb r4, [r0, #-1]!
+ strcsb ip, [r0, #-1]
+ ldmpc regs="r0, r4"
+
+9: cmp ip, #2
+ ldrgtb r3, [r1, #-1]!
+ ldrgeb r4, [r1, #-1]!
+ ldrb lr, [r1, #-1]!
+ strgtb r3, [r0, #-1]!
+ strgeb r4, [r0, #-1]!
+ subs r2, r2, ip
+ strb lr, [r0, #-1]!
+ blt 8b
+ ands ip, r1, #3
+ beq 1b
+
+10: bic r1, r1, #3
+ cmp ip, #2
+ ldr r3, [r1, #0]
+ beq 17f
+ blt 18f
+
+
+ .macro backward_copy_shift push pull
+
+ subs r2, r2, #28
+ blt 14f
+
+11: stmfd sp!, {r5 - r9}
+
+12:
+13: ldmdb r1!, {r7, r8, r9, ip}
+ mov lr, r3, push #\push
+ subs r2, r2, #32
+ ldmdb r1!, {r3, r4, r5, r6}
+ orr lr, lr, ip, pull #\pull
+ mov ip, ip, push #\push
+ orr ip, ip, r9, pull #\pull
+ mov r9, r9, push #\push
+ orr r9, r9, r8, pull #\pull
+ mov r8, r8, push #\push
+ orr r8, r8, r7, pull #\pull
+ mov r7, r7, push #\push
+ orr r7, r7, r6, pull #\pull
+ mov r6, r6, push #\push
+ orr r6, r6, r5, pull #\pull
+ mov r5, r5, push #\push
+ orr r5, r5, r4, pull #\pull
+ mov r4, r4, push #\push
+ orr r4, r4, r3, pull #\pull
+ stmdb r0!, {r4 - r9, ip, lr}
+ bge 12b
+
+ ldmfd sp!, {r5 - r9}
+
+14: ands ip, r2, #28
+ beq 16f
+
+15: mov lr, r3, push #\push
+ ldr r3, [r1, #-4]!
+ subs ip, ip, #4
+ orr lr, lr, r3, pull #\pull
+ str lr, [r0, #-4]!
+ bgt 15b
+
+16: add r1, r1, #(\pull / 8)
+ b 8b
+
+ .endm
+
+
+ backward_copy_shift push=8 pull=24
+
+17: backward_copy_shift push=16 pull=16
+
+18: backward_copy_shift push=24 pull=8
+
+
diff --git a/firmware/asm/arm/memset.S b/firmware/asm/arm/memset.S
new file mode 100644
index 0000000000..682da874ce
--- /dev/null
+++ b/firmware/asm/arm/memset.S
@@ -0,0 +1,98 @@
+/***************************************************************************
+ * __________ __ ___.
+ * Open \______ \ ____ ____ | | _\_ |__ _______ ___
+ * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
+ * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
+ * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
+ * \/ \/ \/ \/ \/
+ * $Id$
+ *
+ * Copyright (C) 2006 by Thom Johansen
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+#include "config.h"
+
+ .section .icode,"ax",%progbits
+
+ .align 2
+
+/* The following code is based on code found in Linux kernel version 2.6.15.3
+ * linux/arch/arm/lib/memset.S
+ *
+ * Copyright (C) 1995-2000 Russell King
+ */
+
+/* This code will align a pointer for memset, if needed */
+1: cmp r2, #4 @ 1 do we have enough
+ blt 5f @ 1 bytes to align with?
+ cmp r3, #2 @ 1
+ strgtb r1, [r0, #-1]! @ 1
+ strgeb r1, [r0, #-1]! @ 1
+ strb r1, [r0, #-1]! @ 1
+ sub r2, r2, r3 @ 1 r2 = r2 - r3
+ b 2f
+
+ .global memset
+ .type memset,%function
+memset:
+ add r0, r0, r2 @ we'll write backwards in memory
+ ands r3, r0, #3 @ 1 unaligned?
+ bne 1b @ 1
+2:
+/*
+ * we know that the pointer in r0 is aligned to a word boundary.
+ */
+ orr r1, r1, r1, lsl #8
+ orr r1, r1, r1, lsl #16
+ mov r3, r1
+ cmp r2, #16
+ blt 5f
+/*
+ * We need an extra register for this loop - save the return address and
+ * use the LR
+ */
+ str lr, [sp, #-4]!
+ mov ip, r1
+ mov lr, r1
+
+3: subs r2, r2, #64
+ stmgedb r0!, {r1, r3, ip, lr} @ 64 bytes at a time.
+ stmgedb r0!, {r1, r3, ip, lr}
+ stmgedb r0!, {r1, r3, ip, lr}
+ stmgedb r0!, {r1, r3, ip, lr}
+ bgt 3b
+ ldrpc cond=eq @ Now <64 bytes to go.
+/*
+ * No need to correct the count; we're only testing bits from now on
+ */
+ tst r2, #32
+ stmnedb r0!, {r1, r3, ip, lr}
+ stmnedb r0!, {r1, r3, ip, lr}
+ tst r2, #16
+ stmnedb r0!, {r1, r3, ip, lr}
+ ldr lr, [sp], #4
+
+5: tst r2, #8
+ stmnedb r0!, {r1, r3}
+ tst r2, #4
+ strne r1, [r0, #-4]!
+/*
+ * When we get here, we've got less than 4 bytes to zero. We
+ * may have an unaligned pointer as well.
+ */
+6: tst r2, #2
+ strneb r1, [r0, #-1]!
+ strneb r1, [r0, #-1]!
+ tst r2, #1
+ strneb r1, [r0, #-1]!
+ bx lr
+.end:
+ .size memset,.end-memset
diff --git a/firmware/asm/arm/memset16.S b/firmware/asm/arm/memset16.S
new file mode 100644
index 0000000000..5c787b1bed
--- /dev/null
+++ b/firmware/asm/arm/memset16.S
@@ -0,0 +1,82 @@
+/***************************************************************************
+ * __________ __ ___.
+ * Open \______ \ ____ ____ | | _\_ |__ _______ ___
+ * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
+ * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
+ * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
+ * \/ \/ \/ \/ \/
+ * $Id$
+ *
+ * Copyright (C) 2006 by Thom Johansen
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+#include "config.h"
+
+ .section .icode,"ax",%progbits
+
+ .align 2
+
+/* The following code is based on code from the Linux kernel version 2.6.15.3,
+ * linux/arch/arm/lib/memset.S
+ *
+ * Copyright (C) 1995-2000 Russell King
+ */
+
+ .global memset16
+ .type memset16,%function
+memset16:
+ tst r0, #2 @ unaligned?
+ cmpne r2, #0
+ strneh r1, [r0], #2 @ store one halfword to align
+ subne r2, r2, #1
+
+/*
+ * we know that the pointer in r0 is aligned to a word boundary.
+ */
+ orr r1, r1, r1, lsl #16
+ mov r3, r1
+ cmp r2, #8
+ blt 4f
+/*
+ * We need an extra register for this loop - save the return address and
+ * use the LR
+ */
+ str lr, [sp, #-4]!
+ mov ip, r1
+ mov lr, r1
+
+2: subs r2, r2, #32
+ stmgeia r0!, {r1, r3, ip, lr} @ 64 bytes at a time.
+ stmgeia r0!, {r1, r3, ip, lr}
+ stmgeia r0!, {r1, r3, ip, lr}
+ stmgeia r0!, {r1, r3, ip, lr}
+ bgt 2b
+ ldrpc cond=eq @ Now <64 bytes to go.
+/*
+ * No need to correct the count; we're only testing bits from now on
+ */
+ tst r2, #16
+ stmneia r0!, {r1, r3, ip, lr}
+ stmneia r0!, {r1, r3, ip, lr}
+ tst r2, #8
+ stmneia r0!, {r1, r3, ip, lr}
+ ldr lr, [sp], #4
+
+4: tst r2, #4
+ stmneia r0!, {r1, r3}
+ tst r2, #2
+ strne r1, [r0], #4
+
+ tst r2, #1
+ strneh r1, [r0], #2
+ bx lr
+.end:
+ .size memset16,.end-memset16