summaryrefslogtreecommitdiffstats
path: root/firmware/target/coldfire/iriver
diff options
context:
space:
mode:
authorJens Arnold <amiconn@rockbox.org>2007-10-14 23:05:56 +0000
committerJens Arnold <amiconn@rockbox.org>2007-10-14 23:05:56 +0000
commit99f955088149d5938ce4c9ca5624377f464b1380 (patch)
tree403178f0198f8ef0b57c49be3b25f085be52aa8a /firmware/target/coldfire/iriver
parent57418b2192f4eb4decab716d50e09232ba22f7f4 (diff)
downloadrockbox-99f955088149d5938ce4c9ca5624377f464b1380.tar.gz
rockbox-99f955088149d5938ce4c9ca5624377f464b1380.zip
H300, X5: Optimised lcd_yuv_blit(), using line-pair zig-zag writing to the LCD controller. ~7% speedup on H300, ~5% speedup on X5.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@15111 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'firmware/target/coldfire/iriver')
-rw-r--r--firmware/target/coldfire/iriver/h300/lcd-as-h300.S129
-rw-r--r--firmware/target/coldfire/iriver/h300/lcd-h300.c45
2 files changed, 121 insertions, 53 deletions
diff --git a/firmware/target/coldfire/iriver/h300/lcd-as-h300.S b/firmware/target/coldfire/iriver/h300/lcd-as-h300.S
index 1873b905c6..9106e22c1c 100644
--- a/firmware/target/coldfire/iriver/h300/lcd-as-h300.S
+++ b/firmware/target/coldfire/iriver/h300/lcd-as-h300.S
@@ -44,6 +44,23 @@
* |B| |19611723 33976259 0| |Cr - 128| >> 27
*
* Needs EMAC set to saturated, signed integer mode.
+ *
+ * register usage:
+ * %a0 - LCD data port
+ * %a1 - Y pointer
+ * %a2 - C pointer
+ * %a3 - C width
+ * %a4 - Y end address
+ * %a5 - Y factor
+ * %a6 - BU factor
+ * %d0 - scratch
+ * %d1 - B, previous Y \ alternating
+ * %d2 - U / B, previous Y /
+ * %d3 - V / G
+ * %d4 - R / output pixel
+ * %d5 - GU factor
+ * %d6 - GV factor
+ * %d7 - RGB signed -> unsigned conversion mask
*/
.align 2
.global lcd_write_yuv420_lines
@@ -52,10 +69,10 @@
lcd_write_yuv420_lines:
lea.l (-44, %sp), %sp /* free up some registers */
movem.l %d2-%d7/%a2-%a6, (%sp)
-
+
lea.l 0xf0000002, %a0 /* LCD data port */
- movem.l (44+4, %sp), %a1-%a4 /* Y data, Cb data, Cr data, width */
- lea.l (%a1, %a4), %a4 /* end address */
+ movem.l (44+4, %sp), %a1-%a3 /* Y data, C data, C width */
+ lea.l (%a1, %a3*2), %a4 /* Y end address */
move.l #19611723, %a5 /* y factor */
move.l #33976259, %a6 /* bu factor */
@@ -64,11 +81,11 @@ lcd_write_yuv420_lines:
move.l #0x8410, %d7 /* bitmask for signed->unsigned conversion
* of R, G and B within RGB565 at once */
- /* chroma for (very) first & second pixel */
+ /* chroma for first 2x2 pixel block */
+ clr.l %d3 /* load v component */
+ move.b (%a2, %a3), %d3
clr.l %d2 /* load u component */
move.b (%a2)+, %d2
- clr.l %d3 /* load v component */
- move.b (%a3)+, %d3
moveq.l #-128, %d0
add.l %d0, %d2
add.l %d0, %d3
@@ -79,9 +96,9 @@ lcd_write_yuv420_lines:
move.l #26881894, %d0 /* rv factor */
mac.l %d0, %d3, %acc2 /* rv */
- /* luma for (very) first pixel */
+ /* luma for very first pixel (top left) */
clr.l %d1
- move.b (%a1)+, %d1
+ move.b (%a1, %a3*2), %d1
moveq.l #-126, %d0
add.l %d1, %d0 /* y' (-0.5 ... +0.5) */
mac.l %a5, %d0, %acc0
@@ -91,11 +108,11 @@ lcd_write_yuv420_lines:
bra.b .yuv_line_entry
.yuv_line_loop:
- /* chroma for first & second pixel */
+ /* chroma for 2x2 pixel block */
+ clr.l %d3 /* load v component */
+ move.b (%a2, %a3), %d3
clr.l %d2 /* load u component */
move.b (%a2)+, %d2
- clr.l %d3 /* load v component */
- move.b (%a3)+, %d3
moveq.l #-128, %d0
add.l %d0, %d2
add.l %d0, %d3
@@ -106,16 +123,16 @@ lcd_write_yuv420_lines:
move.l #26881894, %d0 /* rv factor */
mac.l %d0, %d3, %acc2 /* rv */
- /* luma for first pixel */
+ /* luma for first pixel (top left) */
clr.l %d1
- move.b (%a1)+, %d1
+ move.b (%a1, %a3*2), %d1
moveq.l #-126, %d0
add.l %d1, %d0 /* y' (-0.5 ... +0.5) */
mac.l %a5, %d0, %acc0
mac.l %a5, %d0, %acc1
mac.l %a5, %d0, %acc2
-
- move.w %d4, (%a0)
+
+ move.w %d4, (%a0)
/* LCD write is delayed one pixel to use it for filling the EMAC latency */
/* convert to RGB565, pack and output */
@@ -134,22 +151,50 @@ lcd_write_yuv420_lines:
or.l %d2, %d4
eor.l %d7, %d4
- /* luma for second pixel as delta from the first */
- clr.l %d0
- move.b (%a1)+, %d0
+ /* luma for second pixel (bottom left) as delta from the first */
+ clr.l %d2
+ move.b (%a1)+, %d2
+ move.l %d2, %d0
sub.l %d1, %d0
mac.l %a5, %d0, %acc0
mac.l %a5, %d0, %acc1
mac.l %a5, %d0, %acc2
- move.w %d4, (%a0)
+ move.w %d4, (%a0)
/* LCD write is delayed one pixel to use it for filling the EMAC latency */
/* convert to RGB565, pack and output */
moveq.l #27, %d0
- movclr.l %acc0, %d2
- movclr.l %acc1, %d3
- movclr.l %acc2, %d4
+ move.l %acc0, %d1
+ move.l %acc1, %d3
+ move.l %acc2, %d4
+ lsr.l %d0, %d1
+ lsr.l %d0, %d4
+ moveq.l #26, %d0
+ lsr.l %d0, %d3
+ lsl.l #6, %d4
+ or.l %d3, %d4
+ lsl.l #5, %d4
+ or.l %d1, %d4
+ eor.l %d7, %d4
+
+ /* luma for third pixel (top right) as delta from the second */
+ clr.l %d1
+ move.b (%a1, %a3*2), %d1
+ move.l %d1, %d0
+ sub.l %d2, %d0
+ mac.l %a5, %d0, %acc0
+ mac.l %a5, %d0, %acc1
+ mac.l %a5, %d0, %acc2
+
+ move.w %d4, (%a0)
+ /* LCD write is delayed one pixel to use it for filling the EMAC latency */
+
+ /* convert to RGB565, pack and output */
+ moveq.l #27, %d0
+ move.l %acc0, %d2
+ move.l %acc1, %d3
+ move.l %acc2, %d4
lsr.l %d0, %d2
lsr.l %d0, %d4
moveq.l #26, %d0
@@ -160,24 +205,40 @@ lcd_write_yuv420_lines:
or.l %d2, %d4
eor.l %d7, %d4
+ /* luma for fourth pixel (bottom right) as delta from the third */
+ clr.l %d2
+ move.b (%a1)+, %d2
+ move.l %d2, %d0
+ sub.l %d1, %d0
+ mac.l %a5, %d0, %acc0
+ mac.l %a5, %d0, %acc1
+ mac.l %a5, %d0, %acc2
+
+ move.w %d4, (%a0)
+ /* LCD write is delayed one pixel to use it for filling the EMAC latency */
+
+ /* convert to RGB565, pack and output */
+ moveq.l #27, %d0
+ movclr.l %acc0, %d1
+ movclr.l %acc1, %d3
+ movclr.l %acc2, %d4
+ lsr.l %d0, %d1
+ lsr.l %d0, %d4
+ moveq.l #26, %d0
+ lsr.l %d0, %d3
+ lsl.l #6, %d4
+ or.l %d3, %d4
+ lsl.l #5, %d4
+ or.l %d1, %d4
+ eor.l %d7, %d4
+
cmp.l %a1, %a4 /* run %a1 up to end of line */
bhi.w .yuv_line_loop
-
- tst.l (44+4, %sp) /* use original Y pointer as a flag to */
- beq.b .yuv_exit /* distinguish between first and second */
- clr.l (44+4, %sp) /* pixel line */
-
- /* Rewind chroma pointers */
- movem.l (44+8, %sp), %a2-%a4 /* Cb data, Cr data, width */
- lea.l (%a1, %a4), %a4 /* end address */
- bra.w .yuv_line_loop
-
-.yuv_exit:
+
move.w %d4, (%a0) /* write (very) last pixel */
movem.l (%sp), %d2-%d7/%a2-%a6
lea.l (44, %sp), %sp /* restore registers */
-
rts
.yuv_end:
.size lcd_write_yuv420_lines, .yuv_end - lcd_write_yuv420_lines
diff --git a/firmware/target/coldfire/iriver/h300/lcd-h300.c b/firmware/target/coldfire/iriver/h300/lcd-h300.c
index 8f76d5255a..00662e16f9 100644
--- a/firmware/target/coldfire/iriver/h300/lcd-h300.c
+++ b/firmware/target/coldfire/iriver/h300/lcd-h300.c
@@ -80,6 +80,9 @@ static int xoffset = 0; /* needed for flip */
#define LCD_CMD (*(volatile unsigned short *)0xf0000000)
#define LCD_DATA (*(volatile unsigned short *)0xf0000002)
+#define R_ENTRY_MODE_HORZ 0x7030
+#define R_ENTRY_MODE_VERT 0x7038
+
/* called very frequently - inline! */
static inline void lcd_write_reg(int reg, int val)
{
@@ -307,13 +310,12 @@ void lcd_blit(const fb_data* data, int x, int by, int width,
}
/* Line write helper function for lcd_yuv_blit. Write two lines of yuv420.
- * y should have two lines of Y back to back.
- * bu and rv should contain the Cb and Cr data for the two lines of Y.
+ * y should have two lines of Y back to back, 2nd line first.
+ * c should contain the Cb and Cr data for the two lines of Y back to back.
* Needs EMAC set to saturated, signed integer mode.
*/
extern void lcd_write_yuv420_lines(const unsigned char *y,
- const unsigned char *bu,
- const unsigned char *rv, int width);
+ const unsigned char *c, int cwidth);
/* Performance function to blit a YUV bitmap directly to the LCD
* src_x, src_y, width and height should be even
@@ -325,8 +327,7 @@ void lcd_yuv_blit(unsigned char * const src[3],
{
/* IRAM Y, Cb and Cb buffers. */
unsigned char y_ibuf[LCD_WIDTH*2];
- unsigned char bu_ibuf[LCD_WIDTH/2];
- unsigned char rv_ibuf[LCD_WIDTH/2];
+ unsigned char c_ibuf[LCD_WIDTH];
const unsigned char *ysrc, *usrc, *vsrc;
const unsigned char *ysrc_max;
@@ -336,11 +337,9 @@ void lcd_yuv_blit(unsigned char * const src[3],
width &= ~1; /* stay on the safe side */
height &= ~1;
+ lcd_write_reg(R_ENTRY_MODE, R_ENTRY_MODE_HORZ);
/* Set start position and window */
- lcd_write_reg(R_VERT_RAM_ADDR_POS,((x+xoffset+width-1) << 8) | (x+xoffset));
- lcd_write_reg(R_RAM_ADDR_SET, ((x+xoffset) << 8) | y);
-
- lcd_begin_write_gram();
+ lcd_write_reg(R_VERT_RAM_ADDR_POS, ((xoffset + 219) << 8) | xoffset);
ysrc = src[0] + src_y * stride + src_x;
usrc = src[1] + (src_y * stride >> 2) + (src_x >> 1);
@@ -350,11 +349,17 @@ void lcd_yuv_blit(unsigned char * const src[3],
coldfire_set_macsr(EMAC_SATURATE);
do
{
- memcpy(y_ibuf, ysrc, width);
- memcpy(y_ibuf + width, ysrc + stride, width);
- memcpy(bu_ibuf, usrc, width >> 1);
- memcpy(rv_ibuf, vsrc, width >> 1);
- lcd_write_yuv420_lines(y_ibuf, bu_ibuf, rv_ibuf, width);
+ lcd_write_reg(R_HORIZ_RAM_ADDR_POS, ((y + 1) << 8) | y);
+ lcd_write_reg(R_RAM_ADDR_SET, ((x+xoffset) << 8) | y);
+ lcd_begin_write_gram();
+
+ memcpy(y_ibuf + width, ysrc, width);
+ memcpy(y_ibuf, ysrc + stride, width);
+ memcpy(c_ibuf, usrc, width >> 1);
+ memcpy(c_ibuf + (width >> 1), vsrc, width >> 1);
+ lcd_write_yuv420_lines(y_ibuf, c_ibuf, width >> 1);
+
+ y += 2;
ysrc += 2 * stride;
usrc += stride >> 1;
vsrc += stride >> 1;
@@ -368,11 +373,12 @@ void lcd_update(void) ICODE_ATTR;
void lcd_update(void)
{
if(display_on){
- /* reset update window */
+ lcd_write_reg(R_ENTRY_MODE, R_ENTRY_MODE_VERT);
+ /* set start position window */
+ lcd_write_reg(R_HORIZ_RAM_ADDR_POS, 175 << 8);
lcd_write_reg(R_VERT_RAM_ADDR_POS,((xoffset+219)<<8) | xoffset);
-
- /* Copy display bitmap to hardware */
lcd_write_reg(R_RAM_ADDR_SET, xoffset << 8);
+
lcd_begin_write_gram();
DAR3 = 0xf0000002;
@@ -403,8 +409,9 @@ void lcd_update_rect(int x, int y, int width, int height)
if(y + height > LCD_HEIGHT)
height = LCD_HEIGHT - y;
+ lcd_write_reg(R_ENTRY_MODE, R_ENTRY_MODE_VERT);
/* set update window */
-
+ lcd_write_reg(R_HORIZ_RAM_ADDR_POS, 175 << 8);
lcd_write_reg(R_VERT_RAM_ADDR_POS,((x+xoffset+width-1) << 8) | (x+xoffset));
lcd_write_reg(R_RAM_ADDR_SET, ((x+xoffset) << 8) | y);
lcd_begin_write_gram();