summaryrefslogtreecommitdiffstats
path: root/firmware/target/arm/ipod
diff options
context:
space:
mode:
authorAndree Buschmann <AndreeBuschmann@t-online.de>2010-12-29 23:17:47 +0000
committerAndree Buschmann <AndreeBuschmann@t-online.de>2010-12-29 23:17:47 +0000
commitb04d676706c6e306754a3d1223d52de0037638bf (patch)
treebf2e34a68ca26c6c2397c51c91d4e06801d009e3 /firmware/target/arm/ipod
parent1980fc3a61db6b85760ff044900094a231568936 (diff)
downloadrockbox-b04d676706c6e306754a3d1223d52de0037638bf.tar.gz
rockbox-b04d676706c6e306754a3d1223d52de0037638bf.tar.bz2
rockbox-b04d676706c6e306754a3d1223d52de0037638bf.zip
Speed up of iPod nano 1G and iPod color LCD. Use HDD6330 asm part for YUV blitting, introduce special handling for full width screen updates. Speed up is about +30% for YUV on both color/nano1G.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@28930 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'firmware/target/arm/ipod')
-rwxr-xr-xfirmware/target/arm/ipod/lcd-as-color-nano.S152
-rw-r--r--firmware/target/arm/ipod/lcd-color_nano.c132
2 files changed, 182 insertions, 102 deletions
diff --git a/firmware/target/arm/ipod/lcd-as-color-nano.S b/firmware/target/arm/ipod/lcd-as-color-nano.S
new file mode 100755
index 0000000000..d4df4d496a
--- /dev/null
+++ b/firmware/target/arm/ipod/lcd-as-color-nano.S
@@ -0,0 +1,152 @@
+/***************************************************************************
+ * __________ __ ___.
+ * Open \______ \ ____ ____ | | _\_ |__ _______ ___
+ * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
+ * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
+ * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
+ * \/ \/ \/ \/ \/
+ * $Id:$
+ *
+ * Copyright (C) 2010 by Andree Buschmann
+ *
+ * Generic asm helper function used by YUV blitting.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+
+#include "config.h"
+#include "cpu.h"
+
+ .section .icode, "ax", %progbits
+
+/****************************************************************************
+* void lcd_yuv_write_inner_loop(unsigned char const * const ysrc,
+* unsigned char const * const usrc,
+* unsigned char const * const vsrc,
+* int width);
+*
+* YUV- > RGB565 conversion
+* |R| |1.000000 -0.000001 1.402000| |Y'|
+* |G| = |1.000000 -0.334136 -0.714136| |Pb|
+* |B| |1.000000 1.772000 0.000000| |Pr|
+* Scaled, normalized, rounded and tweaked to yield RGB 565:
+* |R| |74 0 101| |Y' - 16| >> 9
+* |G| = |74 -24 -51| |Cb - 128| >> 8
+* |B| |74 128 0| |Cr - 128| >> 9
+*
+*/
+ .align 2
+ .global lcd_yuv_write_inner_loop
+ .type lcd_yuv_write_inner_loop, %function
+
+lcd_yuv_write_inner_loop:
+ @ r0 = ysrc
+ @ r1 = usrc
+ @ r2 = vsrc
+ @ r3 = width
+ stmfd sp!, { r4-r11, lr } @ save regs
+ mov r4, #0x70000000 @ r4 = LCD2_BLOCK_CTRL - 0x20
+ add r4, r4, #0x8a00 @
+ add r5, r4, #0x100 @ r5 = LCD2_BLOCK_DATA
+10: @ loop
+
+ ldrb r7, [r1], #1 @ *usrc++
+ ldrb r8, [r2], #1 @ *vsrc++
+
+ sub r7, r7, #128 @ Cb -= 128
+ sub r8, r8, #128 @ Cr -= 128
+
+ add r10, r8, r8, asl #2 @ Cr*101
+ add r10, r10, r8, asl #5
+ add r10, r10, r8, asl #6
+
+ add r11, r8, r8, asl #1 @ Cr*51 + Cb*24
+ add r11, r11, r11, asl #4
+ add r11, r11, r7, asl #3
+ add r11, r11, r7, asl #4
+
+ add r12, r7, #2 @ r12 = bu = (Cb*128 + 256) >> 9
+ mov r12, r12, asr #2
+ add r10, r10, #256 @ r10 = rv = (Cr*101 + 256) >> 9
+ mov r10, r10, asr #9
+ rsb r11, r11, #128 @ r11 = guv = (-r11 + 128) >> 8
+ mov r11, r11, asr #8
+
+@ pixel_1
+ ldrb r7, [r0], #1 @ *ysrc++
+ sub r7, r7, #16 @ Y = (Y' - 16) * 37
+ add r8, r7, r7, asl #2
+ add r7, r8, r7, asl #5
+
+ add r9, r10, r7, asr #8 @ R = (Y >> 8) + rv
+ add r8, r11, r7, asr #7 @ G = (Y >> 7) + guv
+ add r7, r12, r7, asr #8 @ B = (Y >> 8) + bu
+
+ cmp r9, #31 @ clamp R
+ mvnhi r9, r9, asr #31
+ andhi r9, r9, #31
+
+ cmp r8, #63 @ clamp G
+ mvnhi r8, r8, asr #31
+ andhi r8, r8, #63
+
+ cmp r7, #31 @ clamp B
+ mvnhi r7, r7, asr #31
+ andhi r7, r7, #31
+
+ orr r6, r7, r8, lsl #5 @ pack pixel
+ orr r6, r6, r9, lsl #11
+
+ mov r7, r6, lsl #8 @ swap bytes
+ and r7, r7, #0xff00
+ add r6, r7, r6, lsr #8
+
+@ pixel_2
+ ldrb r7, [r0], #1 @ *ysrc++
+ sub r7, r7, #16 @ Y = (Y' - 16) * 37
+ add r8, r7, r7, asl #2
+ add r7, r8, r7, asl #5
+
+ add r9, r10, r7, asr #8 @ R = (Y >> 8) + rv
+ add r8, r11, r7, asr #7 @ G = (Y >> 7) + guv
+ add r7, r12, r7, asr #8 @ B = (Y >> 8) + bu
+
+ cmp r9, #31 @ clamp R
+ mvnhi r9, r9, asr #31
+ andhi r9, r9, #31
+
+ cmp r8, #63 @ clamp G
+ mvnhi r8, r8, asr #31
+ andhi r8, r8, #63
+
+ cmp r7, #31 @ clamp B
+ mvnhi r7, r7, asr #31
+ andhi r7, r7, #31
+
+ orr r7, r7, r8, lsl #5 @ pack pixel
+ orr r7, r7, r9, lsl #11
+
+ orr r6, r6, r7, lsl #24 @ swap bytes and add pixels simultaneously
+ mov r7, r7, lsr #8
+ orr r6, r6, r7, lsl #16
+#if 1
+11: @ while (!(LCD2_BLOCK_CTRL & LCD2_BLOCK_TXOK));
+ ldr r11, [r4, #0x20] @
+ tst r11, #0x1000000 @
+ beq 11b @
+#endif
+ str r6, [r5] @ send two pixels
+
+ subs r3, r3, #2 @ decrease width
+ bgt 10b @ loop
+
+ ldmpc regs=r4-r11 @ restore regs
+ .ltorg @ dump constant pool
+ .size lcd_yuv_write_inner_loop, .-lcd_yuv_write_inner_loop
diff --git a/firmware/target/arm/ipod/lcd-color_nano.c b/firmware/target/arm/ipod/lcd-color_nano.c
index 7d004cb0f2..e3b9ea8eb6 100644
--- a/firmware/target/arm/ipod/lcd-color_nano.c
+++ b/firmware/target/arm/ipod/lcd-color_nano.c
@@ -121,38 +121,14 @@ void lcd_init_device(void)
}
/*** update functions ***/
+extern void lcd_yuv_write_inner_loop(unsigned char const * const ysrc,
+ unsigned char const * const usrc,
+ unsigned char const * const vsrc,
+ int width);
#define CSUB_X 2
#define CSUB_Y 2
-/* YUV- > RGB565 conversion
- * |R| |1.000000 -0.000001 1.402000| |Y'|
- * |G| = |1.000000 -0.334136 -0.714136| |Pb|
- * |B| |1.000000 1.772000 0.000000| |Pr|
- * Scaled, normalized, rounded and tweaked to yield RGB 565:
- * |R| |74 0 101| |Y' - 16| >> 9
- * |G| = |74 -24 -51| |Cb - 128| >> 8
- * |B| |74 128 0| |Cr - 128| >> 9
-*/
-
-#define RGBYFAC 74 /* 1.0 */
-#define RVFAC 101 /* 1.402 */
-#define GVFAC (-51) /* -0.714136 */
-#define GUFAC (-24) /* -0.334136 */
-#define BUFAC 128 /* 1.772 */
-
-/* ROUNDOFFS contain constant for correct round-offs as well as
- constant parts of the conversion matrix (e.g. (Y'-16)*RGBYFAC
- -> constant part = -16*RGBYFAC). Through extraction of these
- constant parts we save at leat 4 substractions in the conversion
- loop */
-#define ROUNDOFFSR (256 - 16*RGBYFAC - 128*RVFAC)
-#define ROUNDOFFSG (128 - 16*RGBYFAC - 128*GVFAC - 128*GUFAC)
-#define ROUNDOFFSB (256 - 16*RGBYFAC - 128*BUFAC)
-
-#define MAX_5BIT 0x1f
-#define MAX_6BIT 0x3f
-
/* Performance function to blit a YUV bitmap directly to the LCD */
void lcd_blit_yuv(unsigned char * const src[3],
int src_x, int src_y, int stride,
@@ -222,7 +198,8 @@ void lcd_blit_yuv(unsigned char * const src[3],
const int stride_div_csub_x = stride/CSUB_X;
h=0;
- while (1) {
+ while (1)
+ {
/* upsampling, YUV->RGB conversion and reduction to RGB565 in one go */
const unsigned char *ysrc = src[0] + stride * src_y + src_x;
@@ -231,17 +208,11 @@ void lcd_blit_yuv(unsigned char * const src[3],
const unsigned char *usrc = src[1] + uvoffset;
const unsigned char *vsrc = src[2] + uvoffset;
- const unsigned char *row_end = ysrc + width;
-
- int yp, up, vp;
- int red1, green1, blue1;
- int red2, green2, blue2;
- int rc, gc, bc;
int pixels_to_write;
- fb_data pixel1,pixel2;
- if (h==0) {
+ if (h==0)
+ {
while (!(LCD2_BLOCK_CTRL & LCD2_BLOCK_READY));
LCD2_BLOCK_CONFIG = 0;
@@ -251,7 +222,8 @@ void lcd_blit_yuv(unsigned char * const src[3],
h = height;
/* calculate how much we can do in one go */
- if (pixels_to_write > 0x10000) {
+ if (pixels_to_write > 0x10000)
+ {
h = (0x10000/2) / width;
pixels_to_write = (width * h) * 2;
}
@@ -262,61 +234,7 @@ void lcd_blit_yuv(unsigned char * const src[3],
LCD2_BLOCK_CTRL = 0x34000000;
}
- do
- {
- up = *usrc++;
- vp = *vsrc++;
- rc = RVFAC * vp + ROUNDOFFSR;
- gc = GVFAC * vp + GUFAC * up + ROUNDOFFSG;
- bc = BUFAC * up + ROUNDOFFSB;
-
- /* Pixel 1 -> RGB565 */
- yp = *ysrc++ * RGBYFAC;
- red1 = (yp + rc) >> 9;
- green1 = (yp + gc) >> 8;
- blue1 = (yp + bc) >> 9;
-
- /* Pixel 2 -> RGB565 */
- yp = *ysrc++ * RGBYFAC;
- red2 = (yp + rc) >> 9;
- green2 = (yp + gc) >> 8;
- blue2 = (yp + bc) >> 9;
-
- /* Since out of bounds errors are relatively rare, we check two
- pixels at once to see if any components are out of bounds, and
- then fix whichever is broken. This works due to high values and
- negative values both being !=0 when bitmasking them.
- We first check for red and blue components (5bit range). */
- if ((red1 | blue1 | red2 | blue2) & ~MAX_5BIT)
- {
- if (red1 & ~MAX_5BIT)
- red1 = (red1 >> 31) ? 0 : MAX_5BIT;
- if (blue1 & ~MAX_5BIT)
- blue1 = (blue1 >> 31) ? 0 : MAX_5BIT;
- if (red2 & ~MAX_5BIT)
- red2 = (red2 >> 31) ? 0 : MAX_5BIT;
- if (blue2 & ~MAX_5BIT)
- blue2 = (blue2 >> 31) ? 0 : MAX_5BIT;
- }
- /* We second check for green component (6bit range) */
- if ((green1 | green2) & ~MAX_6BIT)
- {
- if (green1 & ~MAX_6BIT)
- green1 = (green1 >> 31) ? 0 : MAX_6BIT;
- if (green2 & ~MAX_6BIT)
- green2 = (green2 >> 31) ? 0 : MAX_6BIT;
- }
-
- pixel1 = swap16((red1 << 11) | (green1 << 5) | blue1);
-
- pixel2 = swap16((red2 << 11) | (green2 << 5) | blue2);
-
- while (!(LCD2_BLOCK_CTRL & LCD2_BLOCK_TXOK));
-
- /* output 2 pixels */
- LCD2_BLOCK_DATA = (pixel2 << 16) | pixel1;
- }
- while (ysrc < row_end);
+ lcd_yuv_write_inner_loop(ysrc,usrc,vsrc,width);
src_y++;
h--;
@@ -415,16 +333,26 @@ void lcd_update_rect(int x, int y, int width, int height)
LCD2_BLOCK_CONFIG = 0xc0010000 | (pixels_to_write - 1);
LCD2_BLOCK_CTRL = 0x34000000;
- /* for each row */
- for (r = 0; r < h; r++) {
- /* for each column */
- for (c = 0; c < width; c += 2) {
- while (!(LCD2_BLOCK_CTRL & LCD2_BLOCK_TXOK));
-
- /* output 2 pixels */
- LCD2_BLOCK_DATA = *addr++;
+ if (LCD_WIDTH == width) {
+ /* for each row and column in a single loop */
+ for (r = 0; r < h*width; r += 2) {
+ while (!(LCD2_BLOCK_CTRL & LCD2_BLOCK_TXOK));
+
+ /* output 2 pixels */
+ LCD2_BLOCK_DATA = *addr++;
+ }
+ } else {
+ /* for each row */
+ for (r = 0; r < h; r++) {
+ /* for each column */
+ for (c = 0; c < width; c += 2) {
+ while (!(LCD2_BLOCK_CTRL & LCD2_BLOCK_TXOK));
+
+ /* output 2 pixels */
+ LCD2_BLOCK_DATA = *addr++;
+ }
+ addr += (LCD_WIDTH - width)/2;
}
- addr += (LCD_WIDTH - width)/2;
}
while (!(LCD2_BLOCK_CTRL & LCD2_BLOCK_READY));