summaryrefslogtreecommitdiffstats
path: root/firmware/bitswap.S
blob: da628a3b7f088a0c405f60d495d4e881f6d6c811 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
/***************************************************************************
 *             __________               __   ___.
 *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
 *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
 *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
 *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
 *                     \/            \/     \/    \/            \/
 * $Id$
 *
 * Copyright (C) 2002 by Magnus Holmgren
 *
 * All files in this archive are subject to the GNU General Public License.
 * See the file COPYING in the source tree root for full license agreement.
 *
 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
 * KIND, either express or implied.
 *
 ****************************************************************************/

    .section    .icode,"ax",@progbits
    .align      4
    .global     _bitswap
    .type       _bitswap,@function

/* Registers used:
 *
 * r0   Temporary (required by some instructions)
 * r1   Low byte
 * r2   High byte
 * r3   Result after flip
 * r4   Data
 * r5   Length
 * r6   1
 * r7   Flip table
 */

_bitswap:
    mov.l   .fliptable,r7
    mov     #1,r6
    mov     r4,r0
    tst     #1,r0           /* odd address? */
    bt      .init           /* no, address is even */

    mov.b   @r4,r0          /* swap first byte */
    extu.b  r0,r0
    mov.b   @(r0,r7),r0
    mov.b   r0,@r4
    add     #1,r4
    add     #-1,r5
    bra     .init 

    /* The instruction order below is a bit strange, because:
     * 1) Keeping load/stores on longword boundaries means the instruction
     *    fetch won't compete with the memory access (because instructions
     *    are fetched in pairs).
     * 2) Using the result of a fetch in the next instruction causes a 
     *    stall (except in certain circumstances).
     * See the SH-1 programming manual for details.
     */

.loop:
    mov.w   @r4,r1          /* data to flip */
    add     #-2,r5
    swap.b  r1,r2           /* get high byte */
    extu.b  r2,r0           /* prepare high byte */
    mov.b   @(r0,r7),r2     /* swap high byte */
    extu.b  r1,r0           /* perpare low byte */
    mov.b   @(r0,r7),r1     /* swap low byte */
    extu.b  r2,r2           /* zero extend high byte */
    swap.b  r2,r3           /* put high byte in result */
    extu.b  r1,r0           /* zero extend low byte */
    or      r0,r3           /* put low byte in result */
    mov.w   r3,@r4          /* store result */
    add     #2,r4
.init:
    cmp/gt  r6,r5           /* while [bytes remaining] > 1 */
    bt      .loop           /* (at least 2 bytes left) */

    cmp/eq  r6,r5
    bf  .exit               /* if not 1 byte left, exit */

    mov.b   @r4,r0          /* swap last byte */
    extu.b  r0,r0
    mov.b   @(r0,r7),r0
    mov.b   r0,@r4
.exit:
    rts
    nop

    .align  4

.fliptable:
    .long   _fliptable

_fliptable:
    .byte   0x00, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0
    .byte   0x10, 0x90, 0x50, 0xd0, 0x30, 0xb0, 0x70, 0xf0
    .byte   0x08, 0x88, 0x48, 0xc8, 0x28, 0xa8, 0x68, 0xe8
    .byte   0x18, 0x98, 0x58, 0xd8, 0x38, 0xb8, 0x78, 0xf8
    .byte   0x04, 0x84, 0x44, 0xc4, 0x24, 0xa4, 0x64, 0xe4
    .byte   0x14, 0x94, 0x54, 0xd4, 0x34, 0xb4, 0x74, 0xf4
    .byte   0x0c, 0x8c, 0x4c, 0xcc, 0x2c, 0xac, 0x6c, 0xec
    .byte   0x1c, 0x9c, 0x5c, 0xdc, 0x3c, 0xbc, 0x7c, 0xfc
    .byte   0x02, 0x82, 0x42, 0xc2, 0x22, 0xa2, 0x62, 0xe2
    .byte   0x12, 0x92, 0x52, 0xd2, 0x32, 0xb2, 0x72, 0xf2
    .byte   0x0a, 0x8a, 0x4a, 0xca, 0x2a, 0xaa, 0x6a, 0xea
    .byte   0x1a, 0x9a, 0x5a, 0xda, 0x3a, 0xba, 0x7a, 0xfa
    .byte   0x06, 0x86, 0x46, 0xc6, 0x26, 0xa6, 0x66, 0xe6
    .byte   0x16, 0x96, 0x56, 0xd6, 0x36, 0xb6, 0x76, 0xf6
    .byte   0x0e, 0x8e, 0x4e, 0xce, 0x2e, 0xae, 0x6e, 0xee
    .byte   0x1e, 0x9e, 0x5e, 0xde, 0x3e, 0xbe, 0x7e, 0xfe
    .byte   0x01, 0x81, 0x41, 0xc1, 0x21, 0xa1, 0x61, 0xe1
    .byte   0x11, 0x91, 0x51, 0xd1, 0x31, 0xb1, 0x71, 0xf1
    .byte   0x09, 0x89, 0x49, 0xc9, 0x29, 0xa9, 0x69, 0xe9
    .byte   0x19, 0x99, 0x59, 0xd9, 0x39, 0xb9, 0x79, 0xf9
    .byte   0x05, 0x85, 0x45, 0xc5, 0x25, 0xa5, 0x65, 0xe5
    .byte   0x15, 0x95, 0x55, 0xd5, 0x35, 0xb5, 0x75, 0xf5
    .byte   0x0d, 0x8d, 0x4d, 0xcd, 0x2d, 0xad, 0x6d, 0xed
    .byte   0x1d, 0x9d, 0x5d, 0xdd, 0x3d, 0xbd, 0x7d, 0xfd
    .byte   0x03, 0x83, 0x43, 0xc3, 0x23, 0xa3, 0x63, 0xe3
    .byte   0x13, 0x93, 0x53, 0xd3, 0x33, 0xb3, 0x73, 0xf3
    .byte   0x0b, 0x8b, 0x4b, 0xcb, 0x2b, 0xab, 0x6b, 0xeb
    .byte   0x1b, 0x9b, 0x5b, 0xdb, 0x3b, 0xbb, 0x7b, 0xfb
    .byte   0x07, 0x87, 0x47, 0xc7, 0x27, 0xa7, 0x67, 0xe7
    .byte   0x17, 0x97, 0x57, 0xd7, 0x37, 0xb7, 0x77, 0xf7
    .byte   0x0f, 0x8f, 0x4f, 0xcf, 0x2f, 0xaf, 0x6f, 0xef
    .byte   0x1f, 0x9f, 0x5f, 0xdf, 0x3f, 0xbf, 0x7f, 0xff

.end:
    .size   _bitswap,.end-_bitswap