summaryrefslogtreecommitdiffstats
path: root/apps
diff options
context:
space:
mode:
authorJens Arnold <amiconn@rockbox.org>2005-09-11 21:44:48 +0000
committerJens Arnold <amiconn@rockbox.org>2005-09-11 21:44:48 +0000
commit4a2feaa30d96a7b38407551c9bece8b73a3a2aac (patch)
treeb621fbd61532ddf603c7f9364b86b44617914703 /apps
parent2c0d04cc943f7ef2dd67c49ea419e7db378c01d0 (diff)
downloadrockbox-4a2feaa30d96a7b38407551c9bece8b73a3a2aac.tar.gz
rockbox-4a2feaa30d96a7b38407551c9bece8b73a3a2aac.zip
Rockboy: Asm optimised updatepatpix() for coldfire. The vertical-mirroring pattern copy profits from burst mode (line aligned movem).
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@7509 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'apps')
-rw-r--r--apps/plugins/rockboy/lcd.c119
1 files changed, 117 insertions, 2 deletions
diff --git a/apps/plugins/rockboy/lcd.c b/apps/plugins/rockboy/lcd.c
index 9a7ead798f..872311d0e6 100644
--- a/apps/plugins/rockboy/lcd.c
+++ b/apps/plugins/rockboy/lcd.c
@@ -41,7 +41,11 @@ struct scan scan IDATA_ATTR;
#define WT (scan.wt)
#define WV (scan.wv)
-byte patpix[4096][8][8];
+byte patpix[4096][8][8]
+#if CONFIG_CPU == MCF5249 && !defined(SIMULATOR)
+ __attribute__ ((aligned(16))) /* to profit from burst mode */
+#endif
+ ;
byte patdirty[1024];
byte anydirty;
@@ -96,7 +100,7 @@ static byte *vdest;
void updatepatpix(void)
{
int i, j;
-#if CONFIG_CPU != SH7034 || defined(SIMULATOR)
+#if ((CONFIG_CPU != SH7034) && (CONFIG_CPU != MCF5249)) || defined(SIMULATOR)
int k, a, c;
#endif
byte *vram = lcd.vbank[0];
@@ -179,6 +183,70 @@ void updatepatpix(void)
: /* clobbers */
"r0", "r1", "r2"
);
+#elif CONFIG_CPU == MCF5249 && !defined(SIMULATOR)
+ asm volatile (
+ "move.b (%2),%%d2 \n"
+ "move.b (1,%2),%%d1 \n"
+
+ "addq.l #8,%1 \n"
+ "clr.l %%d0 \n"
+ "lsr.l #1,%%d1 \n"
+ "addx.l %%d0,%%d0 \n"
+ "lsr.l #1,%%d2 \n"
+ "addx.l %%d0,%%d0 \n"
+ "move.b %%d0,-(%1) \n"
+ "lsl.l #6,%%d0 \n"
+ "lsr.l #1,%%d1 \n"
+ "addx.l %%d0,%%d0 \n"
+ "lsr.l #1,%%d2 \n"
+ "addx.l %%d0,%%d0 \n"
+ "move.b %%d0,-(%1) \n"
+ "lsl.l #6,%%d0 \n"
+ "lsr.l #1,%%d1 \n"
+ "addx.l %%d0,%%d0 \n"
+ "lsr.l #1,%%d2 \n"
+ "addx.l %%d0,%%d0 \n"
+ "move.b %%d0,-(%1) \n"
+ "lsl.l #6,%%d0 \n"
+ "lsr.l #1,%%d1 \n"
+ "addx.l %%d0,%%d0 \n"
+ "lsr.l #1,%%d2 \n"
+ "addx.l %%d0,%%d0 \n"
+ "move.l %%d0,(%0) \n"
+ "move.b %%d0,-(%1) \n"
+ "clr.l %%d0 \n"
+ "lsr.l #1,%%d1 \n"
+ "addx.l %%d0,%%d0 \n"
+ "lsr.l #1,%%d2 \n"
+ "addx.l %%d0,%%d0 \n"
+ "move.b %%d0,-(%1) \n"
+ "lsl.l #6,%%d0 \n"
+ "lsr.l #1,%%d1 \n"
+ "addx.l %%d0,%%d0 \n"
+ "lsr.l #1,%%d2 \n"
+ "addx.l %%d0,%%d0 \n"
+ "move.b %%d0,-(%1) \n"
+ "lsl.l #6,%%d0 \n"
+ "lsr.l #1,%%d1 \n"
+ "addx.l %%d0,%%d0 \n"
+ "lsr.l #1,%%d2 \n"
+ "addx.l %%d0,%%d0 \n"
+ "move.b %%d0,-(%1) \n"
+ "lsl.l #6,%%d0 \n"
+ "lsr.l #1,%%d1 \n"
+ "addx.l %%d0,%%d0 \n"
+ "lsr.l #1,%%d2 \n"
+ "addx.l %%d0,%%d0 \n"
+ "move.l %%d0,(4,%0) \n"
+ "move.b %%d0,-(%1) \n"
+ : /* outputs */
+ : /* inputs */
+ /* %0 */ "a"(patpix[i+1024][j]),
+ /* %1 */ "a"(patpix[i][j]),
+ /* %2 */ "a"(&vram[(i<<4)|(j<<1)])
+ : /* clobbers */
+ "d0", "d1", "d2"
+ );
#else
a = ((i<<4) | (j<<1));
for (k = 0; k < 8; k++)
@@ -270,6 +338,53 @@ void updatepatpix(void)
: /* clobbers */
"r0", "r1"
);
+#elif CONFIG_CPU == MCF5249 && !defined(SIMULATOR)
+ asm volatile (
+ "movem.l (%0),%%d0-%%d3 \n"
+ "move.l %%d0,%%d4 \n"
+ "move.l %%d1,%%d5 \n"
+ "movem.l %%d2-%%d5,(48,%1) \n"
+ "movem.l (16,%0),%%d0-%%d3 \n"
+ "move.l %%d0,%%d4 \n"
+ "move.l %%d1,%%d5 \n"
+ "movem.l %%d2-%%d5,(32,%1) \n"
+ "movem.l (32,%0),%%d0-%%d3 \n"
+ "move.l %%d0,%%d4 \n"
+ "move.l %%d1,%%d5 \n"
+ "movem.l %%d2-%%d5,(16,%1) \n"
+ "movem.l (48,%0),%%d0-%%d3 \n"
+ "move.l %%d0,%%d4 \n"
+ "move.l %%d1,%%d5 \n"
+ "movem.l %%d2-%%d5,(%1) \n"
+
+ "move.l %2,%%d0 \n"
+ "add.l %%d0,%0 \n"
+ "add.l %%d0,%1 \n"
+
+ "movem.l (%0),%%d0-%%d3 \n"
+ "move.l %%d0,%%d4 \n"
+ "move.l %%d1,%%d5 \n"
+ "movem.l %%d2-%%d5,(48,%1) \n"
+ "movem.l (16,%0),%%d0-%%d3 \n"
+ "move.l %%d0,%%d4 \n"
+ "move.l %%d1,%%d5 \n"
+ "movem.l %%d2-%%d5,(32,%1) \n"
+ "movem.l (32,%0),%%d0-%%d3 \n"
+ "move.l %%d0,%%d4 \n"
+ "move.l %%d1,%%d5 \n"
+ "movem.l %%d2-%%d5,(16,%1) \n"
+ "movem.l (48,%0),%%d0-%%d3 \n"
+ "move.l %%d0,%%d4 \n"
+ "move.l %%d1,%%d5 \n"
+ "movem.l %%d2-%%d5,(%1) \n"
+ : /* outputs */
+ : /* inputs */
+ /* %0 */ "a"(patpix[i][0]),
+ /* %1 */ "a"(patpix[i+2048][0]),
+ /* %2 */ "i"(1024*64)
+ : /* clobbers */
+ "d0", "d1", "d2", "d3", "d4", "d5"
+ );
#else
for (j = 0; j < 8; j++)
{