summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJörg Hohensohn <hohensoh@rockbox.org>2004-01-26 22:26:54 +0000
committerJörg Hohensohn <hohensoh@rockbox.org>2004-01-26 22:26:54 +0000
commitc546b70e18b7e0390c032e0fe0de15441cd7ed1b (patch)
tree3e68491e48f17030dc256aa22893480d764fdcf3
parentbd7c07a600acba0e32dbab746812c9e6fc5e9446 (diff)
downloadrockbox-c546b70e18b7e0390c032e0fe0de15441cd7ed1b.tar.gz
rockbox-c546b70e18b7e0390c032e0fe0de15441cd7ed1b.zip
Really squeezed lcd_write_data() with tricky assembler optimization. Now it does 114 fps when playing video from memory (way more than the ~75 fps needed, I need to slow down the video player), leaving headroom for audio.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@4278 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--firmware/drivers/lcd.c116
1 files changed, 110 insertions, 6 deletions
diff --git a/firmware/drivers/lcd.c b/firmware/drivers/lcd.c
index 4925ea63d2..5c2c514f10 100644
--- a/firmware/drivers/lcd.c
+++ b/firmware/drivers/lcd.c
@@ -7,7 +7,7 @@
* \/ \/ \/ \/ \/
* $Id$
*
- * Copyright (C) 2002 by Alan Korr
+ * Copyright (C) 2002 by Alan Korr, speedup by Jörg Hohensohn
*
* All files in this archive are subject to the GNU General Public License.
* See the file COPYING in the source tree root for full license agreement.
@@ -180,9 +180,10 @@ void lcd_write(bool command, int byte)
Ultimately, all calls to lcd_write(false, xxx) should be substituted by
this, it will be most efficient if the LCD buffer is tilted to have the
X row as consecutive bytes, so we can write a whole row */
-/* FixMe: somehow the red LED is affected by this, although I don't touch
- any other bit. Therefore not used yet, except for lcd_blit() */
void lcd_write_data(unsigned char* p_bytes, int count) __attribute__ ((section (".icode")));
+
+#ifdef HAVE_LCD_CHARCELLS
+/* This version works for both Player and Recorder models */
void lcd_write_data(unsigned char* p_bytes, int count)
{
do
@@ -193,7 +194,8 @@ void lcd_write_data(unsigned char* p_bytes, int count)
byte = *p_bytes++ << 24; /* fetch to MSB position */
- cli(); /* make port modifications atomic */
+ cli(); /* make port modifications atomic, in case an IRQ uses PBDRL */
+ /* (currently not the case, so this could be optimized away) */
/* precalculate the values for later bit toggling, init data write */
asm (
@@ -268,9 +270,9 @@ void lcd_write_data(unsigned char* p_bytes, int count)
"mov.b %3,@%4\n"
"bf 1f\n"
- "mov.b %1,@%4\n"
+ "mov.b %1,@%4\n" /* set SD high, SC low still */
"1: \n"
- "or.b %2, @(r0,gbr)\n"
+ "or.b %2, @(r0,gbr)\n" /* rise SC (independent of SD level) */
"or.b %5, @(r0,gbr)\n" /* restore port */
:
@@ -287,3 +289,105 @@ void lcd_write_data(unsigned char* p_bytes, int count)
} while (--count); /* tail loop is faster */
}
+
+#else /* #ifdef HAVE_LCD_CHARCELLS */
+/* A further optimized version, exploits that SD is on bit 0 for recorders */
+void lcd_write_data(unsigned char* p_bytes, int count)
+{
+ do
+ {
+ unsigned byte;
+ unsigned sda1; /* precalculated SC=low,SD=1 */
+
+ /* take inverse data, so I can use the NEGC instruction below, it is
+ the only carry add/sub which does not destroy a source register */
+ byte = ~(*p_bytes++ << 24); /* fetch to MSB position */
+
+ cli(); /* make port modifications atomic, in case an IRQ uses PBDRL */
+ /* (currently not the case, so this could be optimized away) */
+
+ /* precalculate the values for later bit toggling, init data write */
+ asm (
+ "mov.b @%1,r0\n" /* r0 = PBDRL */
+ "or %3,r0\n" /* r0 |= LCD_DS | LCD_SD DS and SD high, */
+ "and %2,r0\n" /* r0 &= ~(LCD_CS | LCD_SC) CS and SC low */
+ "mov.b r0,@%1\n" /* PBDRL = r0 */
+ "neg r0,%0\n" /* sda1 = 0-r0 */
+ : /* outputs: */
+ /* %0 */ "=r"(sda1)
+ : /* inputs: */
+ /* %1 */ "r"(LCDR),
+ /* %2 */ "I"(~(LCD_CS | LCD_SC)),
+ /* %3 */ "I"(LCD_DS | LCD_SD)
+ : /* trashed */
+ "r0"
+ );
+
+ /* unrolled loop to serialize the byte */
+ asm (
+ "shll %0 \n" /* shift the MSB into carry */
+ "negc %1, r0\n" /* carry to SD, SC low */
+ "mov.b r0,@%3\n" /* set data to port */
+ "or %2, r0\n" /* rise SC (independent of SD level) */
+ "mov.b r0,@%3\n" /* set to port */
+
+ "shll %0 \n"
+ "negc %1, r0\n"
+ "mov.b r0,@%3\n"
+ "or %2, r0\n"
+ "mov.b r0,@%3\n"
+
+ "shll %0 \n"
+ "negc %1, r0\n"
+ "mov.b r0,@%3\n"
+ "or %2, r0\n"
+ "mov.b r0,@%3\n"
+
+ "shll %0 \n"
+ "negc %1, r0\n"
+ "mov.b r0,@%3\n"
+ "or %2, r0\n"
+ "mov.b r0,@%3\n"
+
+ "shll %0 \n"
+ "negc %1, r0\n"
+ "mov.b r0,@%3\n"
+ "or %2, r0\n"
+ "mov.b r0,@%3\n"
+
+ "shll %0 \n"
+ "negc %1, r0\n"
+ "mov.b r0,@%3\n"
+ "or %2, r0\n"
+ "mov.b r0,@%3\n"
+
+ "shll %0 \n"
+ "negc %1, r0\n"
+ "mov.b r0,@%3\n"
+ "or %2, r0\n"
+ "mov.b r0,@%3\n"
+
+ "shll %0 \n"
+ "negc %1, r0\n"
+ "mov.b r0,@%3\n"
+ "or %2, r0\n"
+ "mov.b r0,@%3\n"
+
+ "or %4, r0\n" /* restore port */
+ "mov.b r0,@%3\n"
+ : /* outputs: */
+ : /* inputs: */
+ /* %0 */ "r"(byte),
+ /* %1 */ "r"(sda1),
+ /* %2 */ "I"(LCD_SC),
+ /* %3 */ "r"(LCDR),
+ /* %4 */ "I"(LCD_CS|LCD_DS|LCD_SD|LCD_SC)
+ : /* trashed: */
+ "r0"
+ );
+
+ sti(); /* end of atomic port modifications */
+
+ } while (--count); /* tail loop is faster */
+}
+#endif /* #ifdef HAVE_LCD_CHARCELLS */