summaryrefslogtreecommitdiffstats
path: root/firmware/common/memset16_a.S
blob: 9ab1bdcb5b8c12ca693f8a2be9fe5839e51f1886 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
/***************************************************************************
 *             __________               __   ___.
 *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
 *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
 *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
 *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
 *                     \/            \/     \/    \/            \/
 * $Id$
 *
 * Copyright (C) 2006 by Jens Arnold
 *
 * All files in this archive are subject to the GNU General Public License.
 * See the file COPYING in the source tree root for full license agreement.
 *
 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
 * KIND, either express or implied.
 *
 ****************************************************************************/
#include "config.h"

    .section    .icode,"ax",@progbits

#ifdef CPU_COLDFIRE
    .global     memset16
    .type       memset16,@function

/* Fills a memory region with specified word value
 * Start address must be word aligned, length is in words
 * This version is optimized for speed
 *
 * arguments:
 *  (4,%sp)  - start address
 *  (8,%sp)  - data
 *  (12,%sp) - length
 *
 * return value:
 *  %d0 - start address
 *
 * register usage:
 *  %d0 - data (spread to both words when using long stores)
 *  %d1 - temporary / data (for burst transfer)
 *  %d2 - data (for burst transfer)
 *  %d3 - data (for burst transfer)
 *  %a0 - start address
 *  %a1 - current address (runs down from end to start)
 *
 * For maximum speed this routine uses both long stores and burst mode,
 * storing whole lines with movem.l. The routine fills memory from end
 * to start in order to ease returning the start address.
 */
memset16:
    move.l  (4,%sp),%a0     /* start address */
    move.l  (8,%sp),%d0     /* data */
    move.l  (12,%sp),%a1    /* length */
    add.l   %a1,%a1
    add.l   %a0,%a1         /* %a1 = end address */

    move.l  %a0,%d1
    addq.l  #6,%d1
    and.l   #0xFFFFFFFC,%d1 /* %d1 = first long bound + 4 */
    cmp.l   %d1,%a1         /* at least one aligned longword to fill? */
    blo.b   .no_longs       /* no, jump directly to word loop */

    and.l   #0xFFFF,%d0     /* start: spread data to both words */
    move.l  %d0,%d1
    swap    %d1
    or.l    %d1,%d0         /* data now in both words */

    move.l  %a1,%d1
    and.l   #0xFFFFFFFC,%d1 /* %d1 = last long bound */
    cmp.l   %d1,%a1         /* one extra word? */
    bls.b   .end_w1         /* no: skip */

    move.w  %d0,-(%a1)      /* set leading word */

.end_w1:
    moveq.l #30,%d1
    add.l   %a0,%d1
    and.l   #0xFFFFFFF0,%d1 /* %d1 = first line bound + 16 */
    cmp.l   %d1,%a1         /* at least one full line to fill? */
    blo.b   .no_lines       /* no, jump to longword loop */

    mov.l   %a1,%d1
    and.l   #0xFFFFFFF0,%d1 /* %d1 = last line bound */
    cmp.l   %d1,%a1         /* any longwords to set? */
    bls.b   .end_l1         /* no: skip longword loop */

    /* leading longword loop: sets 0..3 longwords */
.loop_l1:
    move.l  %d0,-(%a1)      /* store longword */
    cmp.l   %d1,%a1         /* runs %a1 down to last line bound */
    bhi.b   .loop_l1

.end_l1:
    move.l  %d2,-(%sp)      /* free some registers */
    move.l  %d3,-(%sp)

    move.l  %d0,%d1         /* spread data to 4 data registers */
    move.l  %d0,%d2
    move.l  %d0,%d3
    lea.l   (14,%a0),%a0    /* start address += 14, acct. for trl. data */
    
    /* main loop: set whole lines utilising burst mode */
.loop_line:
    lea.l   (-16,%a1),%a1   /* pre-decrement */
    movem.l %d0-%d3,(%a1)   /* store line */
    cmp.l   %a0,%a1         /* runs %a1 down to first line bound */
    bhi.b   .loop_line

    lea.l   (-14,%a0),%a0   /* correct start address */
    move.l  (%sp)+,%d3      /* restore registers */
    move.l  (%sp)+,%d2

    move.l  %a0,%d1         /* %d1 = start address ... */
    addq.l  #2,%d1          /* ... +2, account for possible trailing word */
    cmp.l   %d1,%a1         /* any longwords left */
    bhi.b   .loop_l2        /* yes: jump to longword loop */
    bra.b   .no_longs       /* no: skip loop */

.no_lines:
    move.l  %a0,%d1         /* %d1 = start address ... */
    addq.l  #2,%d1          /* ... +2, account for possible trailing word */

    /* trailing longword loop */
.loop_l2:
    move.l  %d0,-(%a1)      /* store longword */
    cmp.l   %d1,%a1         /* runs %a1 down to first long bound */
    bhi.b   .loop_l2

.no_longs:
    cmp.l   %a0,%a1         /* any words left? */
    bls.b   .end_w2         /* no: skip loop */

    /* trailing word loop */
.loop_w2:
    move.w  %d0,-(%a1)      /* store word */
    cmp.l   %a0,%a1         /* runs %a1 down to start address */
    bhi.b   .loop_w2

.end_w2:
    move.l  %a0,%d0         /* return start address */
    rts

.end:
    .size   memset16,.end-memset16
#endif