summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSolomon Peachy <pizza@shaftnet.org>2022-10-13 11:03:53 -0400
committerSolomon Peachy <pizza@shaftnet.org>2022-10-13 11:08:06 -0400
commit418169aff8faf2cf90124cd95dba0af821cea73d (patch)
tree4fc206af57c9a07fba6cbcb3517fa946a7b84668
parentf9ea1fc79d6aaff9949a5b11ae011b4e04e0e9d9 (diff)
downloadrockbox-418169aff8.tar.gz
rockbox-418169aff8.zip
Revert "Remove YUV blitting functions and LCD modes"
This reverts commit fe6aa21e9eb88f49005863efd2003d0982920048. Change-Id: I8bb1e5d6c52ed1478002d2140ef494ec5d62b8e3
-rw-r--r--apps/plugin.c9
-rw-r--r--apps/plugin.h11
-rw-r--r--apps/plugins/test_fps.c91
-rw-r--r--docs/PLUGIN_API19
-rw-r--r--firmware/SOURCES15
-rw-r--r--firmware/asm/arm/lcd-as-memframe.S591
-rw-r--r--firmware/asm/lcd-as-memframe.c168
-rw-r--r--firmware/drivers/lcd-color-common.c189
-rw-r--r--firmware/drivers/lcd-memframe.c98
-rw-r--r--firmware/export/config/mrobe500.h2
-rw-r--r--firmware/export/lcd.h11
-rw-r--r--firmware/target/arm/as3525/lcd-as-e200v2-fuze-fuzev2.S550
-rw-r--r--firmware/target/arm/as3525/lcd-fuze.c80
-rw-r--r--firmware/target/arm/as3525/sansa-e200v2/lcd-e200v2.c98
-rw-r--r--firmware/target/arm/ipod/lcd-as-color-nano.S287
-rw-r--r--firmware/target/arm/ipod/lcd-color_nano.c56
-rw-r--r--firmware/target/arm/ipod/video/lcd-as-video.S237
-rw-r--r--firmware/target/arm/ipod/video/lcd-video.c47
-rw-r--r--firmware/target/arm/iriver/h10/lcd-as-h10.S538
-rw-r--r--firmware/target/arm/iriver/h10/lcd-h10_20gb.c90
-rw-r--r--firmware/target/arm/iriver/h10/lcd-h10_5gb.c162
-rw-r--r--firmware/target/arm/lcd-c200_c200v2.c77
-rw-r--r--firmware/target/arm/pbell/vibe500/lcd-as-vibe500.S556
-rw-r--r--firmware/target/arm/pbell/vibe500/lcd-vibe500.c75
-rw-r--r--firmware/target/arm/philips/hdd1630/lcd-as-hdd1630.S570
-rw-r--r--firmware/target/arm/philips/hdd1630/lcd-hdd1630.c81
-rw-r--r--firmware/target/arm/philips/hdd6330/lcd-as-hdd6330.S140
-rw-r--r--firmware/target/arm/philips/hdd6330/lcd-hdd6330.c98
-rw-r--r--firmware/target/arm/philips/sa9200/lcd-as-sa9200.S590
-rw-r--r--firmware/target/arm/philips/sa9200/lcd-sa9200.c82
-rw-r--r--firmware/target/arm/rk27xx/ihifi/lcd-ihifi.c15
-rw-r--r--firmware/target/arm/rk27xx/ihifi2/lcd-ihifi770.c15
-rw-r--r--firmware/target/arm/rk27xx/ihifi2/lcd-ihifi770c.c15
-rw-r--r--firmware/target/arm/rk27xx/ihifi2/lcd-ihifi800.c15
-rw-r--r--firmware/target/arm/rk27xx/lcd-hifiman.c19
-rw-r--r--firmware/target/arm/rk27xx/ma/lcd-ma.c15
-rw-r--r--firmware/target/arm/rk27xx/rk27generic/lcd-rk27generic.c19
-rw-r--r--firmware/target/arm/s5l8700/ipodnano2g/lcd-asm-nano2g.S228
-rw-r--r--firmware/target/arm/s5l8700/ipodnano2g/lcd-nano2g.c33
-rw-r--r--firmware/target/arm/s5l8700/meizu-m3/lcd-m3.c8
-rw-r--r--firmware/target/arm/s5l8700/meizu-m6sp/lcd-m6sp.c16
-rw-r--r--firmware/target/arm/s5l8702/ipod6g/lcd-6g.c46
-rw-r--r--firmware/target/arm/s5l8702/ipod6g/lcd-asm-6g.S1013
-rw-r--r--firmware/target/arm/samsung/yh820/lcd-as-yh820.S550
-rw-r--r--firmware/target/arm/samsung/yh820/lcd-yh820.c74
-rw-r--r--firmware/target/arm/samsung/yh925/lcd-as-yh925.S538
-rw-r--r--firmware/target/arm/samsung/yh925/lcd-yh925.c94
-rw-r--r--firmware/target/arm/sandisk/sansa-c200/lcd-as-c200.S550
-rw-r--r--firmware/target/arm/tms320dm320/mrobe-500/lcd-mr500.c86
-rw-r--r--firmware/target/coldfire/iaudio/x5/lcd-as-x5.S242
-rw-r--r--firmware/target/coldfire/iaudio/x5/lcd-x5.c63
-rw-r--r--firmware/target/coldfire/iriver/h300/lcd-as-h300.S246
-rw-r--r--firmware/target/coldfire/iriver/h300/lcd-h300.c61
-rw-r--r--firmware/target/mips/ingenic_jz47xx/lcd-jz4740.c62
54 files changed, 9638 insertions, 3 deletions
diff --git a/apps/plugin.c b/apps/plugin.c
index 2b3b58a654..888a9e109c 100644
--- a/apps/plugin.c
+++ b/apps/plugin.c
@@ -222,6 +222,15 @@ static const struct plugin_api rockbox_api = {
#if LCD_DEPTH >= 16
lcd_bitmap_transparent_part,
lcd_bitmap_transparent,
+#if MEMORYSIZE > 2
+ lcd_blit_yuv,
+#if defined(TOSHIBA_GIGABEAT_F) || defined(SANSA_E200) || defined(SANSA_C200) \
+ || defined(IRIVER_H10) || defined(COWON_D2) || defined(PHILIPS_HDD1630) \
+ || defined(SANSA_FUZE) || defined(SANSA_E200V2) || defined(SANSA_FUZEV2) \
+ || defined(TOSHIBA_GIGABEAT_S) || defined(PHILIPS_SA9200)
+ lcd_yuv_set_options,
+#endif
+#endif /* MEMORYSIZE > 2 */
#elif (LCD_DEPTH < 4) && (CONFIG_PLATFORM & PLATFORM_NATIVE)
lcd_blit_mono,
lcd_blit_grey_phase,
diff --git a/apps/plugin.h b/apps/plugin.h
index 681200608c..2ac333a19f 100644
--- a/apps/plugin.h
+++ b/apps/plugin.h
@@ -242,6 +242,17 @@ struct plugin_api {
int x, int y, int width, int height);
void (*lcd_bitmap_transparent)(const fb_data *src, int x, int y,
int width, int height);
+#if MEMORYSIZE > 2
+ void (*lcd_blit_yuv)(unsigned char * const src[3],
+ int src_x, int src_y, int stride,
+ int x, int y, int width, int height);
+#if defined(TOSHIBA_GIGABEAT_F) || defined(SANSA_E200) || defined(SANSA_C200) \
+ || defined(IRIVER_H10) || defined(COWON_D2) || defined(PHILIPS_HDD1630) \
+ || defined(SANSA_FUZE) || defined(SANSA_E200V2) || defined(SANSA_FUZEV2) \
+ || defined(TOSHIBA_GIGABEAT_S) || defined(PHILIPS_SA9200)
+ void (*lcd_yuv_set_options)(unsigned options);
+#endif
+#endif /* MEMORYSIZE > 2 */
#elif (LCD_DEPTH < 4) && (CONFIG_PLATFORM & PLATFORM_NATIVE)
void (*lcd_blit_mono)(const unsigned char *data, int x, int by, int width,
int bheight, int stride);
diff --git a/apps/plugins/test_fps.c b/apps/plugins/test_fps.c
index b2fc957dc1..ddf938ac25 100644
--- a/apps/plugins/test_fps.c
+++ b/apps/plugins/test_fps.c
@@ -123,6 +123,94 @@ static void time_main_update(void)
log_text(str);
}
+#if defined(HAVE_LCD_COLOR) && (MEMORYSIZE > 2)
+
+#if LCD_WIDTH >= LCD_HEIGHT
+#define YUV_WIDTH LCD_WIDTH
+#define YUV_HEIGHT LCD_HEIGHT
+#else /* Assume the screen is rotated on portrait LCDs */
+#define YUV_WIDTH LCD_HEIGHT
+#define YUV_HEIGHT LCD_WIDTH
+#endif
+
+static unsigned char ydata[YUV_HEIGHT][YUV_WIDTH];
+static unsigned char udata[YUV_HEIGHT/2][YUV_WIDTH/2];
+static unsigned char vdata[YUV_HEIGHT/2][YUV_WIDTH/2];
+
+static unsigned char * const yuvbuf[3] = {
+ (void*)ydata,
+ (void*)udata,
+ (void*)vdata
+};
+
+static void make_gradient_rect(int width, int height)
+{
+ unsigned char vline[YUV_WIDTH/2];
+ int x, y;
+
+ width /= 2;
+ height /= 2;
+
+ for (x = 0; x < width; x++)
+ vline[x] = (x << 8) / width;
+ for (y = 0; y < height; y++)
+ {
+ rb->memset(udata[y], (y << 8) / height, width);
+ rb->memcpy(vdata[y], vline, width);
+ }
+}
+
+static void time_main_yuv(void)
+{
+ char str[32]; /* text buffer */
+ long time_start; /* start tickcount */
+ long time_end; /* end tickcount */
+ int frame_count;
+ int fps;
+
+ const int part14_x = YUV_WIDTH/4; /* x-offset for 1/4 update test */
+ const int part14_w = YUV_WIDTH/2; /* x-size for 1/4 update test */
+ const int part14_y = YUV_HEIGHT/4; /* y-offset for 1/4 update test */
+ const int part14_h = YUV_HEIGHT/2; /* y-size for 1/4 update test */
+
+ log_text("Main LCD YUV");
+
+ rb->memset(ydata, 128, sizeof(ydata)); /* medium grey */
+
+ /* Test 1: full LCD update */
+ make_gradient_rect(YUV_WIDTH, YUV_HEIGHT);
+
+ frame_count = 0;
+ rb->sleep(0); /* sync to tick */
+ time_start = *rb->current_tick;
+ while((time_end = *rb->current_tick) - time_start < DURATION)
+ {
+ rb->lcd_blit_yuv(yuvbuf, 0, 0, YUV_WIDTH,
+ 0, 0, YUV_WIDTH, YUV_HEIGHT);
+ frame_count++;
+ }
+ fps = calc_tenth_fps(frame_count, time_end - time_start);
+ rb->snprintf(str, sizeof(str), "1/1: %d.%d fps", fps / 10, fps % 10);
+ log_text(str);
+
+ /* Test 2: quarter LCD update */
+ make_gradient_rect(YUV_WIDTH/2, YUV_HEIGHT/2);
+
+ frame_count = 0;
+ rb->sleep(0); /* sync to tick */
+ time_start = *rb->current_tick;
+ while((time_end = *rb->current_tick) - time_start < DURATION)
+ {
+ rb->lcd_blit_yuv(yuvbuf, 0, 0, YUV_WIDTH,
+ part14_x, part14_y, part14_w, part14_h);
+ frame_count++;
+ }
+ fps = calc_tenth_fps(frame_count, time_end - time_start);
+ rb->snprintf(str, sizeof(str), "1/4: %d.%d fps", fps / 10, fps % 10);
+ log_text(str);
+}
+#endif
+
#ifdef HAVE_REMOTE_LCD
static void time_remote_update(void)
{
@@ -318,6 +406,9 @@ enum plugin_status plugin_start(const void* parameter)
#endif
time_main_update();
rb->sleep(HZ);
+#if defined(HAVE_LCD_COLOR) && (MEMORYSIZE > 2)
+ time_main_yuv();
+#endif
#if LCD_DEPTH < 4
time_greyscale();
#endif
diff --git a/docs/PLUGIN_API b/docs/PLUGIN_API
index 0a256ff147..834b3522be 100644
--- a/docs/PLUGIN_API
+++ b/docs/PLUGIN_API
@@ -832,6 +832,19 @@ void lcd_blit_mono(const unsigned char *data, int x, int by, int width, int bhei
\param stride
\description
+void lcd_blit_yuv(unsigned char * const src[3], int src_x, int src_y, int stride, int x, int y, int width, int height)
+ \group lcd
+ \conditions (LCD_DEPTH >= 16)
+ \param src[3]
+ \param src_x
+ \param src_y
+ \param stride
+ \param x
+ \param y
+ \param width
+ \param height
+ \description
+
void lcd_clear_display(void)
\group lcd
\description Clears the LCD and the framebuffer
@@ -1226,6 +1239,12 @@ void lcd_vline(int x, int y1, int y2)
\param y2 Y end coordinate
\description Draws a vertical line at (=x=, =y1=) -> (=x=, =y2=) within current drawing mode
+void lcd_yuv_set_options(unsigned options)
+ \group lcd
+ \conditions (LCD_DEPTH >= 16) && (defined(TOSHIBA_GIGABEAT_F) || defined(SANSA_E200) || defined(SANSA_C200) || defined(IRIVER_H10) || defined(COWON_D2))
+ \param options
+ \description
+
void led(bool on)
\param on
\description
diff --git a/firmware/SOURCES b/firmware/SOURCES
index 9c273246be..abce59fe04 100644
--- a/firmware/SOURCES
+++ b/firmware/SOURCES
@@ -947,6 +947,7 @@ target/arm/sandisk/sansa-e200/powermgmt-e200.c
#endif /* SANSA_E200 */
#ifdef SANSA_C200
+target/arm/sandisk/sansa-c200/lcd-as-c200.S
target/arm/sandisk/sansa-c200/button-c200.c
target/arm/sandisk/sansa-c200/powermgmt-c200.c
#endif /* SANSA_C200 */
@@ -971,6 +972,7 @@ target/arm/philips/piezo.c
target/arm/philips/sa9200/backlight-sa9200.c
target/arm/philips/sa9200/button-sa9200.c
target/arm/philips/sa9200/lcd-sa9200.c
+target/arm/philips/sa9200/lcd-as-sa9200.S
target/arm/philips/sa9200/power-sa9200.c
target/arm/philips/sa9200/powermgmt-sa9200.c
#endif /* PHILIPS_SA9200 */
@@ -988,6 +990,7 @@ target/arm/philips/fmradio_i2c-hdd.c
target/arm/philips/hdd1630/backlight-hdd1630.c
target/arm/philips/hdd1630/button-hdd1630.c
target/arm/philips/hdd1630/lcd-hdd1630.c
+target/arm/philips/hdd1630/lcd-as-hdd1630.S
target/arm/philips/hdd1630/powermgmt-hdd1630.c
#endif /* PHILIPS_HDD1630 */
@@ -995,6 +998,7 @@ target/arm/philips/hdd1630/powermgmt-hdd1630.c
target/arm/philips/hdd6330/backlight-hdd6330.c
target/arm/philips/hdd6330/button-hdd6330.c
target/arm/philips/hdd6330/lcd-hdd6330.c
+target/arm/philips/hdd6330/lcd-as-hdd6330.S
target/arm/philips/hdd6330/powermgmt-hdd6330.c
#endif /* PHILIPS_HDD6330 */
@@ -1073,6 +1077,7 @@ target/coldfire/iriver/h300/adc-h300.c
target/coldfire/iriver/h300/backlight-h300.c
target/coldfire/iriver/h300/button-h300.c
target/coldfire/iriver/h300/pcf50606-h300.c
+target/coldfire/iriver/h300/lcd-as-h300.S
target/coldfire/iriver/h300/lcd-h300.c
target/coldfire/iriver/h300/power-h300.c
target/coldfire/iriver/h300/powermgmt-h300.c
@@ -1105,6 +1110,7 @@ target/arm/iriver/h10/powermgmt-h10.c
#ifdef IRIVER_H10
target/arm/iriver/h10/lcd-h10_20gb.c
+target/arm/iriver/h10/lcd-as-h10.S
#endif /* IRIVER_H10 */
#ifdef IRIVER_H10_5GB
@@ -1308,11 +1314,13 @@ target/arm/ipod/button-clickwheel.c
#ifdef IPOD_COLOR
target/arm/ipod/backlight-4g_color.c
target/arm/ipod/button-clickwheel.c
+target/arm/ipod/lcd-as-color-nano.S
#endif /* IPOD_COLOR */
#ifdef IPOD_NANO
target/arm/ipod/backlight-nano_video.c
target/arm/ipod/button-clickwheel.c
+target/arm/ipod/lcd-as-color-nano.S
#endif /* IPOD_NANO */
#ifdef IPOD_VIDEO
@@ -1384,6 +1392,7 @@ target/arm/as3525/backlight-e200v2-fuze.c
target/arm/as3525/dbop-as3525.c
#ifndef BOOTLOADER
target/arm/as3525/sansa-e200v2/powermgmt-e200v2.c
+target/arm/as3525/lcd-as-e200v2-fuze-fuzev2.S
#endif /* !BOOTLOADER */
#endif /* SANSA_E200V2 */
@@ -1410,6 +1419,7 @@ target/arm/as3525/backlight-e200v2-fuze.c
target/arm/as3525/dbop-as3525.c
#ifndef BOOTLOADER
target/arm/as3525/sansa-fuze/powermgmt-fuze.c
+target/arm/as3525/lcd-as-e200v2-fuze-fuzev2.S
#endif /* !BOOTLOADER */
#endif /* SANSA_FUZE */
@@ -1420,6 +1430,7 @@ target/arm/as3525/sansa-fuzev2/button-fuzev2.c
target/arm/as3525/dbop-as3525.c
#ifndef BOOTLOADER
target/arm/as3525/sansa-fuzev2/powermgmt-fuzev2.c
+target/arm/as3525/lcd-as-e200v2-fuze-fuzev2.S
#endif /* !BOOTLOADER */
#endif /* SANSA_FUZEV2 */
@@ -1540,6 +1551,7 @@ target/arm/s5l8702/pl080.c
target/arm/s5l8702/dma-s5l8702.c
target/arm/s5l8702/clocking-s5l8702.c
target/arm/s5l8702/ipod6g/lcd-6g.c
+target/arm/s5l8702/ipod6g/lcd-asm-6g.S
target/arm/s5l8702/ipod6g/piezo-6g.c
#if 0 //TODO
target/arm/s5l8702/postmortemstub.S
@@ -1756,6 +1768,7 @@ target/arm/samsung/power-yh82x_yh92x.c
#ifdef SAMSUNG_YH820
target/arm/samsung/yh820/backlight-yh820.c
target/arm/samsung/yh820/lcd-yh820.c
+target/arm/samsung/yh820/lcd-as-yh820.S
target/arm/samsung/yh820/powermgmt-yh820.c
#endif /* SAMSUNG_YH820 */
@@ -1770,6 +1783,7 @@ target/arm/samsung/fmradio-yh92x.c
#ifdef SAMSUNG_YH925
target/arm/samsung/yh925/backlight-yh925.c
target/arm/samsung/yh925/lcd-yh925.c
+target/arm/samsung/yh925/lcd-as-yh925.S
target/arm/samsung/yh925/powermgmt-yh925.c
#endif /* SAMSUNG_YH925 */
@@ -1788,6 +1802,7 @@ target/arm/pbell/vibe500/lcd-vibe500.c
target/arm/pbell/vibe500/button-vibe500.c
target/arm/pbell/vibe500/power-vibe500.c
target/arm/pbell/vibe500/backlight-vibe500.c
+target/arm/pbell/vibe500/lcd-as-vibe500.S
target/arm/pbell/vibe500/powermgmt-vibe500.c
#endif
diff --git a/firmware/asm/arm/lcd-as-memframe.S b/firmware/asm/arm/lcd-as-memframe.S
index 4bbae6fc0a..52ab0447c2 100644
--- a/firmware/asm/arm/lcd-as-memframe.S
+++ b/firmware/asm/arm/lcd-as-memframe.S
@@ -99,3 +99,594 @@ lcd_copy_buffer_rect: @
bgt 10b @ copy line @
ldmpc regs=r4-r11 @ restore regs and return
.size lcd_copy_buffer_rect, .-lcd_copy_buffer_rect
+
+
+/****************************************************************************
+ * void lcd_write_yuv420_lines(fb_data *dst,
+ * unsigned char const * const src[3],
+ * int width,
+ * int stride);
+ *
+ * |R| |1.000000 -0.000001 1.402000| |Y'|
+ * |G| = |1.000000 -0.334136 -0.714136| |Pb|
+ * |B| |1.000000 1.772000 0.000000| |Pr|
+ * Scaled, normalized, rounded and tweaked to yield RGB 565:
+ * |R| |74 0 101| |Y' - 16| >> 9
+ * |G| = |74 -24 -51| |Cb - 128| >> 8
+ * |B| |74 128 0| |Cr - 128| >> 9
+ *
+ * Write four RGB565 pixels in the following order on each loop:
+ * 1 3 + > down
+ * 2 4 \/ left
+ */
+ .section .icode.lcd_write_yuv420_lines, "ax", %progbits
+ .align 2
+ .global lcd_write_yuv420_lines
+ .type lcd_write_yuv420_lines, %function
+lcd_write_yuv420_lines:
+ @ r0 = dst
+ @ r1 = yuv_src
+ @ r2 = width
+ @ r3 = stride
+ stmfd sp!, { r4-r10, lr } @ save non-scratch
+ ldmia r1, { r4, r5, r6 } @ r4 = yuv_src[0] = Y'_p
+ @ r5 = yuv_src[1] = Cb_p
+ @ r6 = yuv_src[2] = Cr_p
+ @ r1 = scratch
+ sub r3, r3, #1 @
+10: @ loop line @
+ ldrb r7, [r4], #1 @ r7 = *Y'_p++;
+ ldrb r8, [r5], #1 @ r8 = *Cb_p++;
+ ldrb r9, [r6], #1 @ r9 = *Cr_p++;
+ @
+ sub r7, r7, #16 @ r7 = Y = (Y' - 16)*74
+ add r12, r7, r7, asl #2 @ actually (Y' - 16)*37 and shift right
+ add r7, r12, r7, asl #5 @ by one less when adding - same for all
+ @
+ sub r8, r8, #128 @ Cb -= 128
+ sub r9, r9, #128 @ Cr -= 128
+ @
+ add r10, r9, r9, asl #1 @ r10 = Cr*51 + Cb*24
+ add r10, r10, r10, asl #4 @
+ add r10, r10, r8, asl #3 @
+ add r10, r10, r8, asl #4 @
+ @
+ add lr, r9, r9, asl #2 @ r9 = Cr*101
+ add lr, lr, r9, asl #5 @
+ add r9, lr, r9, asl #6 @
+ @
+ add r8, r8, #2 @ r8 = bu = (Cb*128 + 128) >> 8
+ mov r8, r8, asr #2 @
+ add r9, r9, #256 @ r9 = rv = (r9 + 256) >> 9
+ mov r9, r9, asr #9 @
+ rsb r10, r10, #128 @ r10 = guv = (-r10 + 128) >> 8
+ mov r10, r10, asr #8 @
+ @ compute R, G, and B
+ add r1, r8, r7, asr #8 @ r1 = b = (Y >> 9) + bu
+ add lr, r9, r7, asr #8 @ lr = r = (Y >> 9) + rv
+ add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
+ @
+#if ARM_ARCH >= 6
+ usat r1, #5, r1 @ clamp b
+ usat lr, #5, lr @ clamp r
+ usat r7, #6, r7 @ clamp g
+#else
+ orr r12, r1, lr @ check if clamping is needed...
+ orr r12, r12, r7, asr #1 @ ...at all
+ cmp r12, #31 @
+ bls 15f @ no clamp @
+ cmp r1, #31 @ clamp b
+ mvnhi r1, r1, asr #31 @
+ andhi r1, r1, #31 @
+ cmp lr, #31 @ clamp r
+ mvnhi lr, lr, asr #31 @
+ andhi lr, lr, #31 @
+ cmp r7, #63 @ clamp g
+ mvnhi r7, r7, asr #31 @
+ andhi r7, r7, #63 @
+15: @ no clamp @
+#endif
+ @
+ ldrb r12, [r4, r3] @ r12 = Y' = *(Y'_p + stride)
+ @
+ orr r1, r1, r7, lsl #5 @ r4 |= (g << 5)
+ orr r1, r1, lr, lsl #11 @ r4 = b | (r << 11)
+
+#if LCD_WIDTH >= LCD_HEIGHT
+ strh r1, [r0] @
+#elif LCD_WIDTH < 256
+ strh r1, [r0], #LCD_WIDTH @ store pixel
+#else
+ strh r1, [r0] @
+#endif
+ @
+ sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74
+ add r12, r7, r7, asl #2 @
+ add r7, r12, r7, asl #5 @
+ @ compute R, G, and B
+ add r1, r8, r7, asr #8 @ r1 = b = (Y >> 9) + bu
+ add lr, r9, r7, asr #8 @ lr = r = (Y >> 9) + rv
+ add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
+ @
+#if ARM_ARCH >= 6
+ usat r1, #5, r1 @ clamp b
+ usat lr, #5, lr @ clamp r
+ usat r7, #6, r7 @ clamp g
+#else
+ orr r12, r1, lr @ check if clamping is needed...
+ orr r12, r12, r7, asr #1 @ ...at all
+ cmp r12, #31 @
+ bls 15f @ no clamp @
+ cmp r1, #31 @ clamp b
+ mvnhi r1, r1, asr #31 @
+ andhi r1, r1, #31 @
+ cmp lr, #31 @ clamp r
+ mvnhi lr, lr, asr #31 @
+ andhi lr, lr, #31 @
+ cmp r7, #63 @ clamp g
+ mvnhi r7, r7, asr #31 @
+ andhi r7, r7, #63 @
+15: @ no clamp @
+#endif
+ @
+ ldrb r12, [r4], #1 @ r12 = Y' = *(Y'_p++)
+ @
+ orr r1, r1, lr, lsl #11 @ r1 = b | (r << 11)
+ orr r1, r1, r7, lsl #5 @ r1 |= (g << 5)
+
+#if LCD_WIDTH >= LCD_HEIGHT
+ add r0, r0, #2*LCD_WIDTH @
+ strh r1, [r0] @ store pixel
+ sub r0, r0, #2*LCD_WIDTH @
+#elif LCD_WIDTH < 256
+ strh r1, [r0, #-LCD_WIDTH-2] @ store pixel
+#else
+ strh r1, [r0, #-2] @
+ add r0, r0, #LCD_WIDTH @
+#endif
+ @
+ sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74
+ add r12, r7, r7, asl #2 @
+ add r7, r12, r7, asl #5 @
+ @ compute R, G, and B
+ add r1, r8, r7, asr #8 @ r1 = b = (Y >> 9) + bu
+ add lr, r9, r7, asr #8 @ lr = r = (Y >> 9) + rv
+ add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
+ @
+#if ARM_ARCH >= 6
+ usat r1, #5, r1 @ clamp b
+ usat lr, #5, lr @ clamp r
+ usat r7, #6, r7 @ clamp g
+#else
+ orr r12, r1, lr @ check if clamping is needed...
+ orr r12, r12, r7, asr #1 @ ...at all
+ cmp r12, #31 @
+ bls 15f @ no clamp @
+ cmp r1, #31 @ clamp b
+ mvnhi r1, r1, asr #31 @
+ andhi r1, r1, #31 @
+ cmp lr, #31 @ clamp r
+ mvnhi lr, lr, asr #31 @
+ andhi lr, lr, #31 @
+ cmp r7, #63 @ clamp g
+ mvnhi r7, r7, asr #31 @
+ andhi r7, r7, #63 @
+15: @ no clamp @
+#endif
+ @
+ ldrb r12, [r4, r3] @ r12 = Y' = *(Y'_p + stride)
+ @
+ orr r1, r1, r7, lsl #5 @ r1 = b | (g << 5)
+ orr r1, r1, lr, lsl #11 @ r1 |= (r << 11)
+
+#if LCD_WIDTH >= LCD_HEIGHT
+ strh r1, [r0, #2]
+#elif LCD_WIDTH < 256
+ strh r1, [r0, #LCD_WIDTH]! @ store pixel
+#else
+ strh r1, [r0] @
+#endif
+ @
+ sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74
+ add r12, r7, r7, asl #2 @
+ add r7, r12, r7, asl #5 @
+ @ compute R, G, and B
+ add r1, r8, r7, asr #8 @ r1 = b = (Y >> 9) + bu
+ add lr, r9, r7, asr #8 @ lr = r = (Y >> 9) + rv
+ add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
+ @
+#if ARM_ARCH >= 6
+ usat r1, #5, r1 @ clamp b
+ usat lr, #5, lr @ clamp r
+ usat r7, #6, r7 @ clamp g
+#else
+ orr r12, r1, lr @ check if clamping is needed...
+ orr r12, r12, r7, asr #1 @ ...at all
+ cmp r12, #31 @
+ bls 15f @ no clamp @
+ cmp r1, #31 @ clamp b
+ mvnhi r1, r1, asr #31 @
+ andhi r1, r1, #31 @
+ cmp lr, #31 @ clamp r
+ mvnhi lr, lr, asr #31 @
+ andhi lr, lr, #31 @
+ cmp r7, #63 @ clamp g
+ mvnhi r7, r7, asr #31 @
+ andhi r7, r7, #63 @
+15: @ no clamp @
+#endif
+ @
+ orr r12, r1, lr, lsl #11 @ r12 = b | (r << 11)
+ orr r12, r12, r7, lsl #5 @ r12 |= (g << 5)
+
+#if LCD_WIDTH >= LCD_HEIGHT
+ add r0, r0, #2*LCD_WIDTH
+ strh r12, [r0, #2]
+#if LCD_WIDTH <= 512
+ sub r0, r0, #(2*LCD_WIDTH)-4
+#else
+ sub r0, r0, #(2*LCD_WIDTH)
+ add r0, r0, #4
+#endif
+#else
+ strh r12, [r0, #-2] @ store pixel
+#if LCD_WIDTH < 256
+ add r0, r0, #2*LCD_WIDTH @
+#else
+ add r0, r0, #LCD_WIDTH @
+#endif
+#endif
+ @
+ subs r2, r2, #2 @ subtract block from width
+ bgt 10b @ loop line @
+ @
+ ldmpc regs=r4-r10 @ restore registers and return
+ .ltorg @ dump constant pool
+ .size lcd_write_yuv420_lines, .-lcd_write_yuv420_lines
+
+
+/****************************************************************************
+ * void lcd_write_yuv420_lines_odither(fb_data *dst,
+ * unsigned char const * const src[3],
+ * int width,
+ * int stride,
+ * int x_screen,
+ * int y_screen);
+ *
+ * |R| |1.000000 -0.000001 1.402000| |Y'|
+ * |G| = |1.000000 -0.334136 -0.714136| |Pb|
+ * |B| |1.000000 1.772000 0.000000| |Pr|
+ * Red scaled at twice g & b but at same precision to place it in correct
+ * bit position after multiply and leave instruction count lower.
+ * |R| |258 0 408| |Y' - 16|
+ * |G| = |149 -49 -104| |Cb - 128|
+ * |B| |149 258 0| |Cr - 128|
+ *
+ * Write four RGB565 pixels in the following order on each loop:
+ * 1 3 + > down
+ * 2 4 \/ left
+ *
+ * Kernel pattern (raw|rotated|use order):
+ * 5 3 4 2 2 6 3 7 row0 row2 > down
+ * 1 7 0 6 | 4 0 5 1 | 2 4 6 0 3 5 7 1 col0 left
+ * 4 2 5 3 | 3 7 2 6 | 3 5 7 1 2 4 6 0 col2 \/
+ * 0 6 1 7 5 1 4 0
+ */
+ .section .icode.lcd_write_yuv420_lines_odither, "ax", %progbits
+ .align 2
+ .global lcd_write_yuv420_lines_odither
+ .type lcd_write_yuv420_lines_odither, %function
+lcd_write_yuv420_lines_odither:
+ @ r0 = dst
+ @ r1 = yuv_src
+ @ r2 = width
+ @ r3 = stride
+ @ [sp] = x_screen
+ @ [sp+4] = y_screen
+ stmfd sp!, { r4-r11, lr } @ save non-scratch
+ ldmia r1, { r4, r5, r6 } @ r4 = yuv_src[0] = Y'_p
+ @ r5 = yuv_src[1] = Cb_p
+ @ r6 = yuv_src[2] = Cr_p
+ @
+ sub r3, r3, #1 @
+ add r1, sp, #36 @ Line up pattern and kernel quadrant
+ ldmia r1, { r12, r14 } @
+ eor r14, r14, r12 @
+ and r14, r14, #0x2 @
+ mov r14, r14, lsl #6 @ 0x00 or 0x80
+10: @ loop line @
+ @
+ ldrb r7, [r4], #1 @ r7 = *Y'_p++;
+ ldrb r8, [r5], #1 @ r8 = *Cb_p++;
+ ldrb r9, [r6], #1 @ r9 = *Cr_p++;
+ @
+ eor r14, r14, #0x80 @ flip pattern quadrant
+ @
+ sub r7, r7, #16 @ r7 = Y = (Y' - 16)*149
+ add r12, r7, r7, asl #2 @
+ add r12, r12, r12, asl #4 @
+ add r7, r12, r7, asl #6 @
+ @
+ sub r8, r8, #128 @ Cb -= 128
+ sub r9, r9, #128 @ Cr -= 128
+ @
+ add r10, r8, r8, asl #4 @ r10 = guv = Cr*104 + Cb*49
+ add r10, r10, r8, asl #5 @
+ add r10, r10, r9, asl #3 @
+ add r10, r10, r9, asl #5 @
+ add r10, r10, r9, asl #6 @
+ @
+ mov r8, r8, asl #1 @ r8 = bu = Cb*258
+ add r8, r8, r8, asl #7 @
+ @
+ add r9, r9, r9, asl #1 @ r9 = rv = Cr*408
+ add r9, r9, r9, asl #4 @
+ mov r9, r9, asl #3 @
+ @
+ @ compute R, G, and B
+ add r1, r8, r7 @ r1 = b' = Y + bu
+ add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
+ rsb r7, r10, r7 @ r7 = g' = Y + guv
+ @
+ @ r8 = bu, r9 = rv, r10 = guv
+ @
+ sub r12, r1, r1, lsr #5 @ r1 = 31/32*b + b/256
+ add r1, r12, r1, lsr #8 @
+ @
+ sub r12, r11, r11, lsr #5 @ r11 = 31/32*r + r/256
+ add r11, r12, r11, lsr #8 @
+ @
+ sub r12, r7, r7, lsr #6 @ r7 = 63/64*g + g/256
+ add r7, r12, r7, lsr #8 @
+ @
+ add r12, r14, #0x100 @
+ @
+ add r1, r1, r12 @ b = r1 + delta
+ add r11, r11, r12, lsl #1 @ r = r11 + delta*2
+ add r7, r7, r12, lsr #1 @ g = r7 + delta/2
+ @
+#if ARM_ARCH >= 6
+ usat r11, #5, r11, asr #11 @ clamp r
+ usat r7, #6, r7, asr #9 @ clamp g
+ usat r1, #5, r1, asr #10 @ clamp b
+ @
+ ldrb r12, [r4, r3] @ r12 = Y' = *(Y'_p + stride)
+ @
+ orr r1, r1, r11, lsl #11 @ r1 = b | (r << 11)
+ orr r1, r1, r7, lsl #5 @ r1 |= (g << 5)
+#else
+ orr r12, r1, r11, asr #1 @ check if clamping is needed...
+ orr r12, r12, r7 @ ...at all
+ movs r12, r12, asr #15 @
+ beq 15f @ no clamp @
+ movs r12, r1, asr #15 @ clamp b
+ mvnne r1, r12, lsr #15 @
+ andne r1, r1, #0x7c00 @ mask b only if clamped
+ movs r12, r11, asr #16 @ clamp r
+ mvnne r11, r12, lsr #16 @
+ movs r12, r7, asr #15 @ clamp g
+ mvnne r7, r12, lsr #15 @
+15: @ no clamp @
+ @
+ ldrb r12, [r4, r3] @ r12 = Y' = *(Y'_p + stride)
+ @
+ and r11, r11, #0xf800 @ pack pixel
+ and r7, r7, #0x7e00 @ r1 = pixel = (r & 0xf800) |
+ orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
+ orr r1, r11, r1, lsr #10 @ (b >> 10)
+#endif
+ @
+#if LCD_WIDTH >= LCD_HEIGHT
+ strh r1, [r0] @
+#elif LCD_WIDTH < 256
+ strh r1, [r0], #LCD_WIDTH @ store pixel
+#else
+ strh r1, [r0] @
+#endif
+ @
+ sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149
+ add r12, r7, r7, asl #2 @
+ add r12, r12, r12, asl #4 @
+ add r7, r12, r7, asl #6 @
+ @ compute R, G, and B
+ add r1, r8, r7 @ r1 = b' = Y + bu
+ add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
+ rsb r7, r10, r7 @ r7 = g' = Y + guv
+ @
+ sub r12, r1, r1, lsr #5 @ r1 = 31/32*b' + b'/256
+ add r1, r12, r1, lsr #8 @
+ @
+ sub r12, r11, r11, lsr #5 @ r11 = 31/32*r' + r'/256
+ add r11, r12, r11, lsr #8 @
+ @
+ sub r12, r7, r7, lsr #6 @ r7 = 63/64*g' + g'/256
+ add r7, r12, r7, lsr #8 @
+ @
+ add r12, r14, #0x200 @
+ @
+ add r1, r1, r12 @ b = r1 + delta
+ add r11, r11, r12, lsl #1 @ r = r11 + delta*2
+ add r7, r7, r12, lsr #1 @ g = r7 + delta/2
+ @
+#if ARM_ARCH >= 6
+ usat r11, #5, r11, asr #11 @ clamp r
+ usat r7, #6, r7, asr #9 @ clamp g
+ usat r1, #5, r1, asr #10 @ clamp b
+ @
+ ldrb r12, [r4], #1 @ r12 = Y' = *(Y'_p++)
+ @
+ orr r1, r1, r11, lsl #11 @ r1 = b | (r << 11)
+ orr r1, r1, r7, lsl #5 @ r1 |= (g << 5)
+#else
+ orr r12, r1, r11, asr #1 @ check if clamping is needed...
+ orr r12, r12, r7 @ ...at all
+ movs r12, r12, asr #15 @
+ beq 15f @ no clamp @
+ movs r12, r1, asr #15 @ clamp b
+ mvnne r1, r12, lsr #15 @
+ andne r1, r1, #0x7c00 @ mask b only if clamped
+ movs r12, r11, asr #16 @ clamp r
+ mvnne r11, r12, lsr #16 @
+ movs r12, r7, asr #15 @ clamp g
+ mvnne r7, r12, lsr #15 @
+15: @ no clamp @
+ @
+ ldrb r12, [r4], #1 @ r12 = Y' = *(Y'_p++)
+ @
+ and r11, r11, #0xf800 @ pack pixel
+ and r7, r7, #0x7e00 @ r1 = pixel = (r & 0xf800) |
+ orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
+ orr r1, r11, r1, lsr #10 @ (b >> 10)
+#endif
+ @
+#if LCD_WIDTH >= LCD_HEIGHT
+ add r0, r0, #2*LCD_WIDTH @
+ strh r1, [r0] @ store pixel
+ sub r0, r0, #2*LCD_WIDTH @
+#elif LCD_WIDTH < 256
+ strh r1, [r0, #-LCD_WIDTH-2] @ store pixel
+#else
+ strh r1, [r0, #-2] @ store pixel
+ add r0, r0, #LCD_WIDTH @
+#endif
+ @
+ sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149
+ add r12, r7, r7, asl #2 @
+ add r12, r12, r12, asl #4 @
+ add r7, r12, r7, asl #6 @
+ @ compute R, G, and B
+ add r1, r8, r7 @ r1 = b' = Y + bu
+ add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
+ rsb r7, r10, r7 @ r7 = g' = Y + guv
+ @
+ @ r8 = bu, r9 = rv, r10 = guv
+ @
+ sub r12, r1, r1, lsr #5 @ r1 = 31/32*b' + b'/256
+ add r1, r12, r1, lsr #8 @
+ @
+ sub r12, r11, r11, lsr #5 @ r11 = 31/32*r' + r'/256
+ add r11, r12, r11, lsr #8 @
+ @
+ sub r12, r7, r7, lsr #6 @ r7 = 63/64*g' + g'/256
+ add r7, r12, r7, lsr #8 @
+ @
+ add r12, r14, #0x300 @
+ @
+ add r1, r1, r12 @ b = r1 + delta
+ add r11, r11, r12, lsl #1 @ r = r11 + delta*2
+ add r7, r7, r12, lsr #1 @ g = r7 + delta/2
+ @
+#if ARM_ARCH >= 6
+ usat r11, #5, r11, asr #11 @ clamp r
+ usat r7, #6, r7, asr #9 @ clamp g
+ usat r1, #5, r1, asr #10 @ clamp b
+ @
+ ldrb r12, [r4, r3] @ r12 = Y' = *(Y'_p + stride)
+ @
+ orr r1, r1, r11, lsl #11 @ r1 = b | (r << 11)
+ orr r1, r1, r7, lsl #5 @ r1 |= (g << 5)
+#else
+ orr r12, r1, r11, asr #1 @ check if clamping is needed...
+ orr r12, r12, r7 @ ...at all
+ movs r12, r12, asr #15 @
+ beq 15f @ no clamp @
+ movs r12, r1, asr #15 @ clamp b
+ mvnne r1, r12, lsr #15 @
+ andne r1, r1, #0x7c00 @ mask b only if clamped
+ movs r12, r11, asr #16 @ clamp r
+ mvnne r11, r12, lsr #16 @
+ movs r12, r7, asr #15 @ clamp g
+ mvnne r7, r12, lsr #15 @
+15: @ no clamp @
+ @
+ ldrb r12, [r4, r3] @ r12 = Y' = *(Y'_p + stride)
+ @
+ and r11, r11, #0xf800 @ pack pixel
+ and r7, r7, #0x7e00 @ r1 = pixel = (r & 0xf800) |
+ orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
+ orr r1, r11, r1, lsr #10 @ (b >> 10)
+#endif
+ @
+#if LCD_WIDTH >= LCD_HEIGHT
+ strh r1, [r0, #2]
+#elif LCD_WIDTH < 256
+ strh r1, [r0, #LCD_WIDTH]! @ store pixel
+#else
+ strh r1, [r0] @
+#endif
+
+ sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149
+ add r12, r7, r7, asl #2 @
+ add r12, r12, r12, asl #4 @
+ add r7, r12, r7, asl #6 @
+ @ compute R, G, and B
+ add r1, r8, r7 @ r1 = b' = Y + bu
+ add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
+ rsb r7, r10, r7 @ r7 = g' = Y + guv
+ @
+ sub r12, r1, r1, lsr #5 @ r1 = 31/32*b + b/256
+ add r1, r12, r1, lsr #8 @
+ @
+ sub r12, r11, r11, lsr #5 @ r11 = 31/32*r + r/256
+ add r11, r12, r11, lsr #8 @
+ @
+ sub r12, r7, r7, lsr #6 @ r7 = 63/64*g + g/256
+ add r7, r12, r7, lsr #8 @
+ @
+ @ This element is zero - use r14 @
+ @
+ add r1, r1, r14 @ b = r1 + delta
+ add r11, r11, r14, lsl #1 @ r = r11 + delta*2
+ add r7, r7, r14, lsr #1 @ g = r7 + delta/2
+ @
+#if ARM_ARCH >= 6
+ usat r11, #5, r11, asr #11 @ clamp r
+ usat r7, #6, r7, asr #9 @ clamp g
+ usat r1, #5, r1, asr #10 @ clamp b
+ @
+ orr r1, r1, r11, lsl #11 @ r1 = b | (r << 11)
+ orr r1, r1, r7, lsl #5 @ r1 |= (g << 5)
+#else
+ orr r12, r1, r11, asr #1 @ check if clamping is needed...
+ orr r12, r12, r7 @ ...at all
+ movs r12, r12, asr #15 @
+ beq 15f @ no clamp @
+ movs r12, r1, asr #15 @ clamp b
+ mvnne r1, r12, lsr #15 @
+ andne r1, r1, #0x7c00 @ mask b only if clamped
+ movs r12, r11, asr #16 @ clamp r
+ mvnne r11, r12, lsr #16 @
+ movs r12, r7, asr #15 @ clamp g
+ mvnne r7, r12, lsr #15 @
+15: @ no clamp @
+ @
+ and r11, r11, #0xf800 @ pack pixel
+ and r7, r7, #0x7e00 @ r1 = pixel = (r & 0xf800) |
+ orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
+ orr r1, r11, r1, lsr #10 @ (b >> 10)
+#endif
+ @
+#if LCD_WIDTH >= LCD_HEIGHT
+ add r0, r0, #2*LCD_WIDTH
+ strh r1, [r0, #2] @ store pixel
+#if LCD_WIDTH <= 512
+ sub r0, r0, #(2*LCD_WIDTH)-4
+#else
+ sub r0, r0, #(2*LCD_WIDTH)
+ add r0, r0, #4
+#endif
+#else
+ strh r1, [r0, #-2] @ store pixel
+#if LCD_WIDTH < 256
+ add r0, r0, #2*LCD_WIDTH @
+#else
+ add r0, r0, #LCD_WIDTH @
+#endif
+#endif
+ @
+ subs r2, r2, #2 @ subtract block from width
+ bgt 10b @ loop line @
+ @
+ ldmpc regs=r4-r11 @ restore registers and return
+ .ltorg @ dump constant pool
+ .size lcd_write_yuv420_lines_odither, .-lcd_write_yuv420_lines_odither
diff --git a/firmware/asm/lcd-as-memframe.c b/firmware/asm/lcd-as-memframe.c
index f7f3473fad..fb31fa1953 100644
--- a/firmware/asm/lcd-as-memframe.c
+++ b/firmware/asm/lcd-as-memframe.c
@@ -9,3 +9,171 @@ void lcd_copy_buffer_rect(fb_data *dst, fb_data *src, int width, int height)
dst += LCD_WIDTH;
} while (--height);
}
+
+#define YFAC (74)
+#define RVFAC (101)
+#define GUFAC (-24)
+#define GVFAC (-51)
+#define BUFAC (128)
+
+static inline int clamp(int val, int min, int max)
+{
+ if (val < min)
+ val = min;
+ else if (val > max)
+ val = max;
+ return val;
+}
+
+extern void lcd_write_yuv420_lines(fb_data *dst,
+ unsigned char const * const src[3],
+ int width,
+ int stride)
+{
+ /* Draw a partial YUV colour bitmap - similiar behavior to lcd_blit_yuv
+ in the core */
+ const unsigned char *ysrc, *usrc, *vsrc;
+ fb_data *row_end;
+
+ /* width and height must be >= 2 and an even number */
+ width &= ~1;
+
+#if LCD_WIDTH >= LCD_HEIGHT
+ row_end = dst + width;
+#else
+ row_end = dst + LCD_WIDTH * width;
+#endif
+
+ ysrc = src[0];
+ usrc = src[1];
+ vsrc = src[2];
+
+ /* stride => amount to jump from end of last row to start of next */
+ stride -= width;
+
+ /* upsampling, YUV->RGB conversion and reduction to RGB in one go */
+
+ do
+ {
+ int y, cb, cr, rv, guv, bu, r, g, b;
+
+ y = YFAC*(*ysrc++ - 16);
+ cb = *usrc++ - 128;
+ cr = *vsrc++ - 128;
+
+ rv = RVFAC*cr;
+ guv = GUFAC*cb + GVFAC*cr;
+ bu = BUFAC*cb;
+
+ r = y + rv;
+ g = y + guv;
+ b = y + bu;
+
+ if ((unsigned)(r | g | b) > 64*256-1)
+ {
+ r = clamp(r, 0, 64*256-1);
+ g = clamp(g, 0, 64*256-1);
+ b = clamp(b, 0, 64*256-1);
+ }
+
+ *dst = FB_RGBPACK(r >> 6, g >> 6, b >> 6);
+
+#if LCD_WIDTH >= LCD_HEIGHT
+ dst++;
+#else
+ dst += LCD_WIDTH;
+#endif
+
+ y = YFAC*(*ysrc++ - 16);
+ r = y + rv;
+ g = y + guv;
+ b = y + bu;
+
+ if ((unsigned)(r | g | b) > 64*256-1)
+ {
+ r = clamp(r, 0, 64*256-1);
+ g = clamp(g, 0, 64*256-1);
+ b = clamp(b, 0, 64*256-1);
+ }
+
+ *dst = FB_RGBPACK(r >> 6, g >> 6, b >> 6);
+
+#if LCD_WIDTH >= LCD_HEIGHT
+ dst++;
+#else
+ dst += LCD_WIDTH;
+#endif
+ }
+ while (dst < row_end);
+
+ ysrc += stride;
+ usrc -= width >> 1;
+ vsrc -= width >> 1;
+
+#if LCD_WIDTH >= LCD_HEIGHT
+ row_end += LCD_WIDTH;
+ dst += LCD_WIDTH - width;
+#else
+ row_end -= 1;
+ dst -= LCD_WIDTH*width + 1;
+#endif
+
+ do
+ {
+ int y, cb, cr, rv, guv, bu, r, g, b;
+
+ y = YFAC*(*ysrc++ - 16);
+ cb = *usrc++ - 128;
+ cr = *vsrc++ - 128;
+
+ rv = RVFAC*cr;
+ guv = GUFAC*cb + GVFAC*cr;
+ bu = BUFAC*cb;
+
+ r = y + rv;
+ g = y + guv;
+ b = y + bu;
+
+ if ((unsigned)(r | g | b) > 64*256-1)
+ {
+ r = clamp(r, 0, 64*256-1);
+ g = clamp(g, 0, 64*256-1);
+ b = clamp(b, 0, 64*256-1);
+ }
+
+ *dst = FB_RGBPACK(r >> 6, g >> 6, b >> 6);
+
+#if LCD_WIDTH >= LCD_HEIGHT
+ dst++;
+#else
+ dst += LCD_WIDTH;
+#endif
+
+ y = YFAC*(*ysrc++ - 16);
+ r = y + rv;
+ g = y + guv;
+ b = y + bu;
+
+ if ((unsigned)(r | g | b) > 64*256-1)
+ {
+ r = clamp(r, 0, 64*256-1);
+ g = clamp(g, 0, 64*256-1);
+ b = clamp(b, 0, 64*256-1);
+ }
+
+ *dst = FB_RGBPACK(r >> 6, g >> 6, b >> 6);
+
+#if LCD_WIDTH >= LCD_HEIGHT
+ dst++;
+#else
+ dst += LCD_WIDTH;
+#endif
+ }
+ while (dst < row_end);
+}
+
+void lcd_write_yuv420_lines_odither(fb_data *dst,
+ unsigned char const * const src[3],
+ int width, int stride,
+ int x_screen, int y_screen)
+__attribute__((alias("lcd_write_yuv420_lines")));
diff --git a/firmware/drivers/lcd-color-common.c b/firmware/drivers/lcd-color-common.c
index cce0bbbf4e..ae0fe519bc 100644
--- a/firmware/drivers/lcd-color-common.c
+++ b/firmware/drivers/lcd-color-common.c
@@ -220,6 +220,195 @@ static inline int clamp(int val, int min, int max)
return val;
}
+#ifndef _WIN32
+/*
+ * weak attribute doesn't work for win32 as of gcc 4.6.2 and binutils 2.21.52
+ * When building win32 simulators, we won't be using an optimized version of
+ * lcd_blit_yuv(), so just don't use the weak attribute.
+ */
+__attribute__((weak))
+#endif
+void lcd_yuv_set_options(unsigned options)
+{
+ (void)options;
+}
+
+/* Draw a partial YUV colour bitmap */
+#ifndef _WIN32
+__attribute__((weak))
+#endif
+void lcd_blit_yuv(unsigned char * const src[3],
+ int src_x, int src_y, int stride,
+ int x, int y, int width, int height)
+{
+ const unsigned char *ysrc, *usrc, *vsrc;
+ int linecounter;
+ fb_data *dst, *row_end;
+ long z;
+
+ /* width and height must be >= 2 and an even number */
+ width &= ~1;
+ linecounter = height >> 1;
+
+#if LCD_WIDTH >= LCD_HEIGHT
+ dst = FBADDR(x, y);
+ row_end = dst + width;
+#else
+ dst = FBADDR(LCD_WIDTH - y - 1, x);
+ row_end = dst + LCD_WIDTH * width;
+#endif
+
+ z = stride * src_y;
+ ysrc = src[0] + z + src_x;
+ usrc = src[1] + (z >> 2) + (src_x >> 1);
+ vsrc = src[2] + (usrc - src[1]);
+
+ /* stride => amount to jump from end of last row to start of next */
+ stride -= width;
+
+ /* upsampling, YUV->RGB conversion and reduction to RGB565 in one go */
+
+ do
+ {
+ do
+ {
+ int y, cb, cr, rv, guv, bu, r, g, b;
+
+ y = YFAC*(*ysrc++ - 16);
+ cb = *usrc++ - 128;
+ cr = *vsrc++ - 128;
+
+ rv = RVFAC*cr;
+ guv = GUFAC*cb + GVFAC*cr;
+ bu = BUFAC*cb;
+
+ r = y + rv;
+ g = y + guv;
+ b = y + bu;
+
+ if ((unsigned)(r | g | b) > 64*256-1)
+ {
+ r = clamp(r, 0, 64*256-1);
+ g = clamp(g, 0, 64*256-1);
+ b = clamp(b, 0, 64*256-1);
+ }
+
+ *dst = FB_RGBPACK(r >> 6, g >> 6, b >> 6);
+
+#if LCD_WIDTH >= LCD_HEIGHT
+ dst++;
+#else
+ dst += LCD_WIDTH;
+#endif
+
+ y = YFAC*(*ysrc++ - 16);
+ r = y + rv;
+ g = y + guv;
+ b = y + bu;
+
+ if ((unsigned)(r | g | b) > 64*256-1)
+ {
+ r = clamp(r, 0, 64*256-1);
+ g = clamp(g, 0, 64*256-1);
+ b = clamp(b, 0, 64*256-1);
+ }
+
+ *dst = FB_RGBPACK(r >> 6, g >> 6, b >> 6);
+
+#if LCD_WIDTH >= LCD_HEIGHT
+ dst++;
+#else
+ dst += LCD_WIDTH;
+#endif
+ }
+ while (dst < row_end);
+
+ ysrc += stride;
+ usrc -= width >> 1;
+ vsrc -= width >> 1;
+
+#if LCD_WIDTH >= LCD_HEIGHT
+ row_end += LCD_WIDTH;
+ dst += LCD_WIDTH - width;
+#else
+ row_end -= 1;
+ dst -= LCD_WIDTH*width + 1;
+#endif
+
+ do
+ {
+ int y, cb, cr, rv, guv, bu, r, g, b;
+
+ y = YFAC*(*ysrc++ - 16);
+ cb = *usrc++ - 128;
+ cr = *vsrc++ - 128;
+
+ rv = RVFAC*cr;
+ guv = GUFAC*cb + GVFAC*cr;
+ bu = BUFAC*cb;
+
+ r = y + rv;
+ g = y + guv;
+ b = y + bu;
+
+ if ((unsigned)(r | g | b) > 64*256-1)
+ {
+ r = clamp(r, 0, 64*256-1);
+ g = clamp(g, 0, 64*256-1);
+ b = clamp(b, 0, 64*256-1);
+ }
+
+ *dst = FB_RGBPACK(r >> 6, g >> 6, b >> 6);
+
+#if LCD_WIDTH >= LCD_HEIGHT
+ dst++;
+#else
+ dst += LCD_WIDTH;
+#endif
+
+ y = YFAC*(*ysrc++ - 16);
+ r = y + rv;
+ g = y + guv;
+ b = y + bu;
+
+ if ((unsigned)(r | g | b) > 64*256-1)
+ {
+ r = clamp(r, 0, 64*256-1);
+ g = clamp(g, 0, 64*256-1);
+ b = clamp(b, 0, 64*256-1);
+ }
+
+ *dst = FB_RGBPACK(r >> 6, g >> 6, b >> 6);
+
+#if LCD_WIDTH >= LCD_HEIGHT
+ dst++;
+#else
+ dst += LCD_WIDTH;
+#endif
+ }
+ while (dst < row_end);
+
+ ysrc += stride;
+ usrc += stride >> 1;
+ vsrc += stride >> 1;
+
+#if LCD_WIDTH >= LCD_HEIGHT
+ row_end += LCD_WIDTH;
+ dst += LCD_WIDTH - width;
+#else
+ row_end -= 1;
+ dst -= LCD_WIDTH*width + 1;
+#endif
+ }
+ while (--linecounter > 0);
+
+#if LCD_WIDTH >= LCD_HEIGHT
+ lcd_update_rect(x, y, width, height);
+#else
+ lcd_update_rect(LCD_WIDTH - y - height, x, height, width);
+#endif
+}
+
/* Fill a rectangle with a gradient. This function draws only the partial
* gradient. It assumes the original gradient is src_height high and skips
* the first few rows. This is useful for drawing only the bottom half of
diff --git a/firmware/drivers/lcd-memframe.c b/firmware/drivers/lcd-memframe.c
index 357b4af32a..bb1682b074 100644
--- a/firmware/drivers/lcd-memframe.c
+++ b/firmware/drivers/lcd-memframe.c
@@ -110,3 +110,101 @@ void lcd_update_rect(int x, int y, int width, int height)
}
}
#endif /* LCD_OPTIMIZED_UPDATE_RECT */
+
+
+/*** YUV functions ***/
+static unsigned lcd_yuv_options SHAREDBSS_ATTR = 0;
+
+
+/* Line write helper function for lcd_yuv_blit. Write two lines of yuv420. */
+extern void lcd_write_yuv420_lines(fb_data *dst,
+ unsigned char const * const src[3],
+ int width,
+ int stride);
+extern void lcd_write_yuv420_lines_odither(fb_data *dst,
+ unsigned char const * const src[3],
+ int width,
+ int stride,
+ int x_screen, /* To align dither pattern */
+ int y_screen);
+
+void lcd_yuv_set_options(unsigned options)
+{
+ lcd_yuv_options = options;
+}
+
+#ifndef LCD_OPTIMIZED_BLIT_YUV
+/* Performance function to blit a YUV bitmap directly to the LCD
+ * src_x, src_y, width and height should be even and within the LCD's
+ * boundaries.
+ *
+ * For portrait LCDs, show it rotated counterclockwise by 90 degrees
+ */
+void lcd_blit_yuv(unsigned char * const src[3],
+ int src_x, int src_y, int stride,
+ int x, int y, int width, int height)
+{
+ /* Macrofy the bits that change between orientations */
+#if CONFIG_ORIENTATION == SCREEN_PORTRAIT
+ #define LCD_FRAMEBUF_ADDR_ORIENTED(col, row) \
+ LCD_FRAMEBUF_ADDR(row, col)
+ #define lcd_write_yuv420_lines_odither_oriented(dst, src, w, s, col, row) \
+ lcd_write_yuv420_lines_odither(dst, src, w, s, row, col)
+ #define YUV_NEXTLINE() dst -= 2
+ #define YUV_DITHER_NEXTLINE() dst -= 2, y -= 2
+#else
+ #define LCD_FRAMEBUF_ADDR_ORIENTED(col, row) \
+ LCD_FRAMEBUF_ADDR(col, row)
+ #define lcd_write_yuv420_lines_odither_oriented(dst, src, w, s, col, row) \
+ lcd_write_yuv420_lines_odither(dst, src, w, s, col, row)
+ #define YUV_NEXTLINE() dst += 2*LCD_FBWIDTH
+ #define YUV_DITHER_NEXTLINE() dst += 2*LCD_FBWIDTH, y += 2
+#endif
+
+ if (!lcd_write_enabled())
+ return;
+
+ /* Sorry, but width and height must be >= 2 or else */
+ width &= ~1;
+ height >>= 1;
+
+#if CONFIG_ORIENTATION == SCREEN_PORTRAIT
+ /* Adjust portrait coordinates to make (0, 0) the upper right corner */
+ y = LCD_WIDTH - 1 - y;
+#endif
+
+ fb_data *dst = LCD_FRAMEBUF_ADDR_ORIENTED(x, y);
+ int z = stride*src_y;
+
+ unsigned char const * yuv_src[3];
+ yuv_src[0] = src[0] + z + src_x;
+ yuv_src[1] = src[1] + (z >> 2) + (src_x >> 1);
+ yuv_src[2] = src[2] + (yuv_src[1] - src[1]);
+
+ if (lcd_yuv_options & LCD_YUV_DITHER)
+ {
+ do
+ {
+ lcd_write_yuv420_lines_odither_oriented(dst, yuv_src, width,
+ stride, x, y);
+ yuv_src[0] += stride << 1; /* Skip down two luma lines */
+ yuv_src[1] += stride >> 1; /* Skip down one chroma line */
+ yuv_src[2] += stride >> 1;
+ YUV_DITHER_NEXTLINE();
+ }
+ while (--height > 0);
+ }
+ else
+ {
+ do
+ {
+ lcd_write_yuv420_lines(dst, yuv_src, width, stride);
+ yuv_src[0] += stride << 1; /* Skip down two luma lines */
+ yuv_src[1] += stride >> 1; /* Skip down one chroma line */
+ yuv_src[2] += stride >> 1;
+ YUV_NEXTLINE();
+ }
+ while (--height > 0);
+ }
+}
+#endif /* LCD_OPTIMIZED_BLIT_YUV */
diff --git a/firmware/export/config/mrobe500.h b/firmware/export/config/mrobe500.h
index 0ecec80e02..621c0b75ca 100644
--- a/firmware/export/config/mrobe500.h
+++ b/firmware/export/config/mrobe500.h
@@ -220,7 +220,7 @@
#define HAVE_USB_HID_MOUSE
/* Define this if hardware supports alternate blitting */
-#define HAVE_LCD_MODES (LCD_MODE_RGB565 | LCD_MODE_PAL256)
+#define HAVE_LCD_MODES (LCD_MODE_RGB565 | LCD_MODE_YUV | LCD_MODE_PAL256)
#define CONFIG_CPU DM320
diff --git a/firmware/export/lcd.h b/firmware/export/lcd.h
index 29c34b698b..67b22190ad 100644
--- a/firmware/export/lcd.h
+++ b/firmware/export/lcd.h
@@ -145,6 +145,7 @@ struct scrollinfo;
#if defined(HAVE_LCD_MODES)
void lcd_set_mode(int mode);
#define LCD_MODE_RGB565 0x00000001
+#define LCD_MODE_YUV 0x00000002
#define LCD_MODE_PAL256 0x00000004
#if HAVE_LCD_MODES & LCD_MODE_PAL256
@@ -235,7 +236,15 @@ extern bool lcd_putsxy_scroll_func(int x, int y, const unsigned char *string,
void *data, int x_offset);
/* performance function */
-#if !defined(HAVE_LCD_COLOR)
+#if defined(HAVE_LCD_COLOR)
+#if MEMORYSIZE > 2
+#define LCD_YUV_DITHER 0x1
+ extern void lcd_yuv_set_options(unsigned options);
+ extern void lcd_blit_yuv(unsigned char * const src[3],
+ int src_x, int src_y, int stride,
+ int x, int y, int width, int height);
+#endif /* MEMORYSIZE > 2 */
+#else
extern void lcd_blit_mono(const unsigned char *data, int x, int by, int width,
int bheight, int stride);
extern void lcd_blit_grey_phase(unsigned char *values, unsigned char *phases,
diff --git a/firmware/target/arm/as3525/lcd-as-e200v2-fuze-fuzev2.S b/firmware/target/arm/as3525/lcd-as-e200v2-fuze-fuzev2.S
new file mode 100644
index 0000000000..feffe6fb96
--- /dev/null
+++ b/firmware/target/arm/as3525/lcd-as-e200v2-fuze-fuzev2.S
@@ -0,0 +1,550 @@
+/***************************************************************************
+ * __________ __ ___.
+ * Open \______ \ ____ ____ | | _\_ |__ _______ ___
+ * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
+ * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
+ * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
+ * \/ \/ \/ \/ \/
+ * $Id$
+ *
+ * Copyright (C) 2007 by Jens Arnold
+ * Heavily based on lcd-as-memframe.c by Michael Sevakis
+ * Adapted for Sansa Fuze/e200v2 by Rafaël Carré
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+
+#include "config.h"
+#include "cpu.h"
+
+#define DBOP_BUSY (1<<10)
+
+/****************************************************************************
+ * void lcd_write_yuv420_lines(unsigned char const * const src[3],
+ * int width,
+ * int stride);
+ *
+ * |R| |1.000000 -0.000001 1.402000| |Y'|
+ * |G| = |1.000000 -0.334136 -0.714136| |Pb|
+ * |B| |1.000000 1.772000 0.000000| |Pr|
+ * Scaled, normalized, rounded and tweaked to yield RGB 565:
+ * |R| |74 0 101| |Y' - 16| >> 9
+ * |G| = |74 -24 -51| |Cb - 128| >> 8
+ * |B| |74 128 0| |Cr - 128| >> 9
+ *
+ * Write four RGB565 pixels in the following order on each loop:
+ * 1 3 + > down
+ * 2 4 \/ left
+ */
+ .section .icode, "ax", %progbits
+ .align 2
+ .global lcd_write_yuv420_lines
+ .type lcd_write_yuv420_lines, %function
+lcd_write_yuv420_lines:
+ @ r0 = yuv_src
+ @ r1 = width
+ @ r2 = stride
+ stmfd sp!, { r4-r11, lr } @ save non-scratch
+
+ mov r3, #0xC8000000 @
+ orr r3, r3, #0x120000 @ r3 = DBOP_BASE
+
+ ldmia r0, { r4, r5, r6 } @ r4 = yuv_src[0] = Y'_p
+ @ r5 = yuv_src[1] = Cb_p
+ @ r6 = yuv_src[2] = Cr_p
+ @ r0 = scratch
+ ldr r12, [r3, #8] @
+ sub r2, r2, #1 @ stride -= 1
+ orr r12, r12, #3<<13 @ DBOP_CTRL |= (1<<13|1<<14) (32bit mode)
+#ifdef SANSA_FUZEV2
+ bic r12, r12, #1<<13 @ DBOP_CTRL &= ~(1<<13),still 32bit mode
+#endif
+ str r12, [r3, #8] @
+10: @ loop line @
+ ldrb r7, [r4], #1 @ r7 = *Y'_p++;
+ ldrb r8, [r5], #1 @ r8 = *Cb_p++;
+ ldrb r9, [r6], #1 @ r9 = *Cr_p++;
+ @
+ sub r7, r7, #16 @ r7 = Y = (Y' - 16)*74
+ add r12, r7, r7, asl #2 @ actually (Y' - 16)*37 and shift right
+ add r7, r12, r7, asl #5 @ by one less when adding - same for all
+ @
+ sub r8, r8, #128 @ Cb -= 128
+ sub r9, r9, #128 @ Cr -= 128
+ @
+ add r10, r9, r9, asl #1 @ r10 = Cr*51 + Cb*24
+ add r10, r10, r10, asl #4 @
+ add r10, r10, r8, asl #3 @
+ add r10, r10, r8, asl #4 @
+ @
+ add lr, r9, r9, asl #2 @ r9 = Cr*101
+ add lr, lr, r9, asl #5 @
+ add r9, lr, r9, asl #6 @
+ @
+ add r8, r8, #2 @ r8 = bu = (Cb*128 + 128) >> 8
+ mov r8, r8, asr #2 @
+ add r9, r9, #256 @ r9 = rv = (r9 + 256) >> 9
+ mov r9, r9, asr #9 @
+ rsb r10, r10, #128 @ r10 = guv = (-r10 + 128) >> 8
+ mov r10, r10, asr #8 @
+ @ compute R, G, and B
+ add r0, r8, r7, asr #8 @ r0 = b = (Y >> 9) + bu
+ add lr, r9, r7, asr #8 @ lr = r = (Y >> 9) + rv
+ add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
+ @
+ orr r12, r0, lr @ check if clamping is needed...
+ orr r12, r12, r7, asr #1 @ ...at all
+ cmp r12, #31 @
+ bls 15f @ no clamp @
+ cmp r0, #31 @ clamp b
+ mvnhi r0, r0, asr #31 @
+ andhi r0, r0, #31 @
+ cmp lr, #31 @ clamp r
+ mvnhi lr, lr, asr #31 @
+ andhi lr, lr, #31 @
+ cmp r7, #63 @ clamp g
+ mvnhi r7, r7, asr #31 @
+ andhi r7, r7, #63 @
+15: @ no clamp @
+ @
+ ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
+ @
+ orr r0, r0, lr, lsl #11 @ r0 = (r << 11) | b
+ orr r11, r0, r7, lsl #5 @ r11 = (r << 11) | (g << 5) | b
+ orr r11, r0, r7, lsl #5 @ r11 = (r << 11) | (g << 5) | b
+#ifdef SANSA_FUZEV2
+ mov r0, r11, lsr #8 @
+ bic r11, r11, #0xff00 @
+ orr r11, r0, r11, lsl #8 @ swap bytes
+#endif
+ sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74
+ add r12, r7, r7, asl #2 @
+ add r7, r12, r7, asl #5 @
+ @ compute R, G, and B
+ add r0, r8, r7, asr #8 @ r0 = b = (Y >> 9) + bu
+ add lr, r9, r7, asr #8 @ lr = r = (Y >> 9) + rv
+ add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
+ @
+ orr r12, r0, lr @ check if clamping is needed...
+ orr r12, r12, r7, asr #1 @ ...at all
+ cmp r12, #31 @
+ bls 15f @ no clamp @
+ cmp r0, #31 @ clamp b
+ mvnhi r0, r0, asr #31 @
+ andhi r0, r0, #31 @
+ cmp lr, #31 @ clamp r
+ mvnhi lr, lr, asr #31 @
+ andhi lr, lr, #31 @
+ cmp r7, #63 @ clamp g
+ mvnhi r7, r7, asr #31 @
+ andhi r7, r7, #63 @
+15: @ no clamp @
+ @
+ ldrb r12, [r4], #1 @ r12 = Y' = *(Y'_p++)
+ @
+ orr r0, r0, lr, lsl #11 @ r0 = (r << 11) | b
+ orr r0, r0, r7, lsl #5 @ r0 = (r << 11) | (g << 5) | b
+
+#ifdef SANSA_FUZEV2
+ mov r7, r0, lsr #8 @
+ bic r7, r7, #0xff00 @
+ orr r0, r7, r0, lsl #8 @ swap bytes
+#endif
+
+ orr r0, r11, r0, lsl#16 @ pack with 2nd pixel
+ str r0, [r3, #0x10] @ write pixel
+ @
+ sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74
+ add r12, r7, r7, asl #2 @
+ add r7, r12, r7, asl #5 @
+ @ compute R, G, and B
+ add r0, r8, r7, asr #8 @ r0 = b = (Y >> 9) + bu
+ add lr, r9, r7, asr #8 @ lr = r = (Y >> 9) + rv
+ add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
+ @
+ orr r12, r0, lr @ check if clamping is needed...
+ orr r12, r12, r7, asr #1 @ ...at all
+ cmp r12, #31 @
+ bls 15f @ no clamp @
+ cmp r0, #31 @ clamp b
+ mvnhi r0, r0, asr #31 @
+ andhi r0, r0, #31 @
+ cmp lr, #31 @ clamp r
+ mvnhi lr, lr, asr #31 @
+ andhi lr, lr, #31 @
+ cmp r7, #63 @ clamp g
+ mvnhi r7, r7, asr #31 @
+ andhi r7, r7, #63 @
+15: @ no clamp @
+ @
+ ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
+ @
+ @
+ orr r0, r0, lr, lsl #11 @ r0 = (r << 11) | b
+ orr r11, r0, r7, lsl #5 @ r0 = (r << 11) | (g << 5) | b
+
+#ifdef SANSA_FUZEV2
+ mov r0, r11, lsr #8 @
+ bic r11, r11, #0xff00 @
+ orr r11, r0, r11, lsl #8 @ swap byte
+#endif
+
+ sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74
+ add r12, r7, r7, asl #2 @
+ add r7, r12, r7, asl #5 @
+ @ compute R, G, and B
+ add r0, r8, r7, asr #8 @ r0 = b = (Y >> 9) + bu
+ add lr, r9, r7, asr #8 @ lr = r = (Y >> 9) + rv
+ add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
+ @
+ orr r12, r0, lr @ check if clamping is needed...
+ orr r12, r12, r7, asr #1 @ ...at all
+ cmp r12, #31 @
+ bls 15f @ no clamp @
+ cmp r0, #31 @ clamp b
+ mvnhi r0, r0, asr #31 @
+ andhi r0, r0, #31 @
+ cmp lr, #31 @ clamp r
+ mvnhi lr, lr, asr #31 @
+ andhi lr, lr, #31 @
+ cmp r7, #63 @ clamp g
+ mvnhi r7, r7, asr #31 @
+ andhi r7, r7, #63 @
+15: @ no clamp @
+ @
+ orr r0, r0, lr, lsl #11 @ r0 = (r << 11) | b
+ orr r0, r0, r7, lsl #5 @ r0 = (r << 11) | (g << 5) | b
+
+#ifdef SANSA_FUZEV2
+ mov r7, r0, lsr #8 @
+ bic r7, r7, #0xff00 @
+ orr r0, r7, r0, lsl #8 @ swap bytes
+#endif
+
+ orr r0, r11, r0, lsl#16 @ pack with 2nd pixel
+ str r0, [r3, #0x10] @ write pixel
+ @
+ subs r1, r1, #2 @ subtract block from width
+ bgt 10b @ loop line @
+ @
+1: @ busy
+ @ writing at max 110*32 (LCD_WIDTH/2), the fifo is bigger
+ @ so polling fifo empty only after each line is save
+ ldr r7, [r3,#0xc] @ r7 = DBOP_STATUS
+ tst r7, #DBOP_BUSY @ fifo not empty?
+ beq 1b @
+
+ ldmpc regs=r4-r11 @ restore registers and return
+ .ltorg @ dump constant pool
+ .size lcd_write_yuv420_lines, .-lcd_write_yuv420_lines
+
+/****************************************************************************
+ * void lcd_write_yuv420_lines_odither(unsigned char const * const src[3],
+ * int width,
+ * int stride,
+ * int x_screen,
+ * int y_screen);
+ *
+ * |R| |1.000000 -0.000001 1.402000| |Y'|
+ * |G| = |1.000000 -0.334136 -0.714136| |Pb|
+ * |B| |1.000000 1.772000 0.000000| |Pr|
+ * Red scaled at twice g & b but at same precision to place it in correct
+ * bit position after multiply and leave instruction count lower.
+ * |R| |258 0 408| |Y' - 16|
+ * |G| = |149 -49 -104| |Cb - 128|
+ * |B| |149 258 0| |Cr - 128|
+ *
+ * Write four RGB565 pixels in the following order on each loop:
+ * 1 3 + > down
+ * 2 4 \/ left
+ *
+ * Kernel pattern (raw|rotated|use order):
+ * 5 3 4 2 2 6 3 7 row0 row2 > down
+ * 1 7 0 6 | 4 0 5 1 | 2 4 6 0 3 5 7 1 col0 left
+ * 4 2 5 3 | 3 7 2 6 | 3 5 7 1 2 4 6 0 col2 \/
+ * 0 6 1 7 5 1 4 0
+ */
+ .section .icode, "ax", %progbits
+ .align 2
+ .global lcd_write_yuv420_lines_odither
+ .type lcd_write_yuv420_lines_odither, %function
+lcd_write_yuv420_lines_odither:
+ @ r0 = yuv_src
+ @ r1 = width
+ @ r2 = stride
+ @ r3 = x_screen
+ @ [sp] = y_screen
+ stmfd sp!, { r4-r11, lr } @ save non-scratch
+ ldmia r0, { r4, r5, r6 } @ r4 = yuv_src[0] = Y'_p
+ @ r5 = yuv_src[1] = Cb_p
+ @ r6 = yuv_src[2] = Cr_p
+ @
+ ldr r14, [sp, #36] @ Line up pattern and kernel quadrant
+ sub r2, r2, #1 @ stride =- 1
+ eor r14, r14, r3 @
+ and r14, r14, #0x2 @
+ mov r14, r14, lsl #6 @ 0x00 or 0x80
+
+ mov r3, #0xC8000000 @
+ orr r3, r3, #0x120000 @ r3 = DBOP_BASE, need to be redone
+ @ due to lack of registers
+ ldr r12, [r3, #8] @
+ orr r12, r12, #3<<13 @ DBOP_CTRL |= (1<<13|1<<14)
+#ifdef SANSA_FUZEV2
+ bic r12, r12, #1<<13 @ DBOP_CTRL &= ~(1<<13), still 32bit mode
+#endif
+ str r12, [r3, #8] @ (32bit mode)
+10: @ loop line @
+ @
+ ldrb r7, [r4], #1 @ r7 = *Y'_p++;
+ ldrb r8, [r5], #1 @ r8 = *Cb_p++;
+ ldrb r9, [r6], #1 @ r9 = *Cr_p++;
+ @
+ eor r14, r14, #0x80 @ flip pattern quadrant
+ @
+ sub r7, r7, #16 @ r7 = Y = (Y' - 16)*149
+ add r12, r7, r7, asl #2 @
+ add r12, r12, r12, asl #4 @
+ add r7, r12, r7, asl #6 @
+ @
+ sub r8, r8, #128 @ Cb -= 128
+ sub r9, r9, #128 @ Cr -= 128
+ @
+ add r10, r8, r8, asl #4 @ r10 = guv = Cr*104 + Cb*49
+ add r10, r10, r8, asl #5 @
+ add r10, r10, r9, asl #3 @
+ add r10, r10, r9, asl #5 @
+ add r10, r10, r9, asl #6 @
+ @
+ mov r8, r8, asl #1 @ r8 = bu = Cb*258
+ add r8, r8, r8, asl #7 @
+ @
+ add r9, r9, r9, asl #1 @ r9 = rv = Cr*408
+ add r9, r9, r9, asl #4 @
+ mov r9, r9, asl #3 @
+ @
+ @ compute R, G, and B
+ add r0, r8, r7 @ r0 = b' = Y + bu
+ add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
+ rsb r7, r10, r7 @ r7 = g' = Y + guv
+ @
+ @ r8 = bu, r9 = rv, r10 = guv
+ @
+ sub r12, r0, r0, lsr #5 @ r0 = 31/32*b + b/256
+ add r0, r12, r0, lsr #8 @
+ @
+ sub r12, r11, r11, lsr #5 @ r11 = 31/32*r + r/256
+ add r11, r12, r11, lsr #8 @
+ @
+ sub r12, r7, r7, lsr #6 @ r7 = 63/64*g + g/256
+ add r7, r12, r7, lsr #8 @
+ @
+ add r12, r14, #0x100 @
+ @
+ add r0, r0, r12 @ b = r0 + delta
+ add r11, r11, r12, lsl #1 @ r = r11 + delta*2
+ add r7, r7, r12, lsr #1 @ g = r7 + delta/2
+ @
+ orr r12, r0, r11, asr #1 @ check if clamping is needed...
+ orr r12, r12, r7 @ ...at all
+ movs r12, r12, asr #15 @
+ beq 15f @ no clamp @
+ movs r12, r0, asr #15 @ clamp b
+ mvnne r0, r12, lsr #15 @
+ andne r0, r0, #0x7c00 @ mask b only if clamped
+ movs r12, r11, asr #16 @ clamp r
+ mvnne r11, r12, lsr #16 @
+ movs r12, r7, asr #15 @ clamp g
+ mvnne r7, r12, lsr #15 @
+15: @ no clamp @
+ @
+ ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
+ @
+ and r11, r11, #0xf800 @ pack pixel
+ and r7, r7, #0x7e00 @ r0 = pixel = (r & 0xf800) |
+ orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
+ orr r3, r11, r0, lsr #10 @ (b >> 10)
+#ifdef SANSA_FUZEV2
+ mov r7, r3, lsr #8 @
+ bic r3, r3, #0xff00 @
+ orr r3, r7, r3, lsl #8 @ swap pixel
+#endif
+ @ save pixel
+ sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149
+ add r12, r7, r7, asl #2 @
+ add r12, r12, r12, asl #4 @
+ add r7, r12, r7, asl #6 @
+ @ compute R, G, and B
+ add r0, r8, r7 @ r0 = b' = Y + bu
+ add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
+ rsb r7, r10, r7 @ r7 = g' = Y + guv
+ @
+ sub r12, r0, r0, lsr #5 @ r0 = 31/32*b' + b'/256
+ add r0, r12, r0, lsr #8 @
+ @
+ sub r12, r11, r11, lsr #5 @ r11 = 31/32*r' + r'/256
+ add r11, r12, r11, lsr #8 @
+ @
+ sub r12, r7, r7, lsr #6 @ r7 = 63/64*g' + g'/256
+ add r7, r12, r7, lsr #8 @
+ @
+ add r12, r14, #0x200 @
+ @
+ add r0, r0, r12 @ b = r0 + delta
+ add r11, r11, r12, lsl #1 @ r = r11 + delta*2
+ add r7, r7, r12, lsr #1 @ g = r7 + delta/2
+ @
+ orr r12, r0, r11, asr #1 @ check if clamping is needed...
+ orr r12, r12, r7 @ ...at all
+ movs r12, r12, asr #15 @
+ beq 15f @ no clamp @
+ movs r12, r0, asr #15 @ clamp b
+ mvnne r0, r12, lsr #15 @
+ andne r0, r0, #0x7c00 @ mask b only if clamped
+ movs r12, r11, asr #16 @ clamp r
+ mvnne r11, r12, lsr #16 @
+ movs r12, r7, asr #15 @ clamp g
+ mvnne r7, r12, lsr #15 @
+15: @ no clamp @
+ @
+ ldrb r12, [r4], #1 @ r12 = Y' = *(Y'_p++)
+
+ and r11, r11, #0xf800 @ pack pixel
+ and r7, r7, #0x7e00 @ r0 = pixel = (r & 0xf800) |
+ orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
+ orr r0, r11, r0, lsr #10 @ (b >> 10)
+#ifdef SANSA_FUZEV2
+ mov r7, r0, lsr #8 @
+ bic r0, r0, #0xff00 @
+ orr r0, r7, r0, lsl #8 @ swap pixel
+#endif
+ orr r3, r3, r0, lsl#16 @ pack with 2nd pixel
+ mov r0, #0xC8000000 @
+ orr r0, r0, #0x120000 @ r3 = DBOP_BASE
+
+ str r3, [r0, #0x10] @ write pixel
+ @
+ sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149
+ add r12, r7, r7, asl #2 @
+ add r12, r12, r12, asl #4 @
+ add r7, r12, r7, asl #6 @
+ @ compute R, G, and B
+ add r0, r8, r7 @ r0 = b' = Y + bu
+ add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
+ rsb r7, r10, r7 @ r7 = g' = Y + guv
+ @
+ @ r8 = bu, r9 = rv, r10 = guv
+ @
+ sub r12, r0, r0, lsr #5 @ r0 = 31/32*b' + b'/256
+ add r0, r12, r0, lsr #8 @
+ @
+ sub r12, r11, r11, lsr #5 @ r11 = 31/32*r' + r'/256
+ add r11, r12, r11, lsr #8 @
+ @
+ sub r12, r7, r7, lsr #6 @ r7 = 63/64*g' + g'/256
+ add r7, r12, r7, lsr #8 @
+ @
+ add r12, r14, #0x300 @
+ @
+ add r0, r0, r12 @ b = r0 + delta
+ add r11, r11, r12, lsl #1 @ r = r11 + delta*2
+ add r7, r7, r12, lsr #1 @ g = r7 + delta/2
+ @
+ orr r12, r0, r11, asr #1 @ check if clamping is needed...
+ orr r12, r12, r7 @ ...at all
+ movs r12, r12, asr #15 @
+ beq 15f @ no clamp @
+ movs r12, r0, asr #15 @ clamp b
+ mvnne r0, r12, lsr #15 @
+ andne r0, r0, #0x7c00 @ mask b only if clamped
+ movs r12, r11, asr #16 @ clamp r
+ mvnne r11, r12, lsr #16 @
+ movs r12, r7, asr #15 @ clamp g
+ mvnne r7, r12, lsr #15 @
+15: @ no clamp @
+ @
+ ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
+ @
+ and r11, r11, #0xf800 @ pack pixel
+ and r7, r7, #0x7e00 @ r0 = pixel = (r & 0xf800) |
+ orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
+ orr r3, r11, r0, lsr #10 @ (b >> 10)
+#ifdef SANSA_FUZEV2
+ mov r7, r3, lsr #8 @
+ bic r3, r3, #0xff00 @
+ orr r3, r7, r3, lsl #8 @ swap pixel
+#endif
+ @ save pixel
+ @
+ sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149
+ add r12, r7, r7, asl #2 @
+ add r12, r12, r12, asl #4 @
+ add r7, r12, r7, asl #6 @
+ @ compute R, G, and B
+ add r0, r8, r7 @ r0 = b' = Y + bu
+ add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
+ rsb r7, r10, r7 @ r7 = g' = Y + guv
+ @
+ sub r12, r0, r0, lsr #5 @ r0 = 31/32*b + b/256
+ add r0, r12, r0, lsr #8 @
+ @
+ sub r12, r11, r11, lsr #5 @ r11 = 31/32*r + r/256
+ add r11, r12, r11, lsr #8 @
+ @
+ sub r12, r7, r7, lsr #6 @ r7 = 63/64*g + g/256
+ add r7, r12, r7, lsr #8 @
+ @
+ @ This element is zero - use r14 @
+ @
+ add r0, r0, r14 @ b = r0 + delta
+ add r11, r11, r14, lsl #1 @ r = r11 + delta*2
+ add r7, r7, r14, lsr #1 @ g = r7 + delta/2
+ @
+ orr r12, r0, r11, asr #1 @ check if clamping is needed...
+ orr r12, r12, r7 @ ...at all
+ movs r12, r12, asr #15 @
+ beq 15f @ no clamp @
+ movs r12, r0, asr #15 @ clamp b
+ mvnne r0, r12, lsr #15 @
+ andne r0, r0, #0x7c00 @ mask b only if clamped
+ movs r12, r11, asr #16 @ clamp r
+ mvnne r11, r12, lsr #16 @
+ movs r12, r7, asr #15 @ clamp g
+ mvnne r7, r12, lsr #15 @
+15: @ no clamp @
+ @
+ and r11, r11, #0xf800 @ pack pixel
+ and r7, r7, #0x7e00 @ r0 = pixel = (r & 0xf800) |
+ orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
+ orr r0, r11, r0, lsr #10 @ (b >> 10)
+#ifdef SANSA_FUZEV2
+ mov r7, r0, lsr #8 @
+ bic r0, r0, #0xff00 @
+ orr r0, r7, r0, lsl #8 @ swap pixel
+#endif
+ orr r3, r3, r0, lsl#16 @ pack with 2nd pixel
+ mov r0, #0xC8000000 @
+ orr r0, r0, #0x120000 @ r3 = DBOP_BASE
+
+ str r3, [r0, #0x10] @ write pixel
+ @
+ subs r1, r1, #2 @ subtract block from width
+ bgt 10b @ loop line @
+ @
+1: @ busy @
+ @ writing at max 110*32 (LCD_WIDTH/2), the fifo is bigger (128*32)
+ @ so polling fifo empty only after each line is save
+ ldr r7, [r0,#0xc] @ r7 = DBOP_STATUS
+ tst r7, #DBOP_BUSY @ fifo not empty?
+ beq 1b @
+
+ ldmpc regs=r4-r11 @ restore registers and return
+ .ltorg @ dump constant pool
+ .size lcd_write_yuv420_lines_odither, .-lcd_write_yuv420_lines_odither
diff --git a/firmware/target/arm/as3525/lcd-fuze.c b/firmware/target/arm/as3525/lcd-fuze.c
index b1f62a1c95..a1ccea348d 100644
--- a/firmware/target/arm/as3525/lcd-fuze.c
+++ b/firmware/target/arm/as3525/lcd-fuze.c
@@ -197,6 +197,86 @@ static void lcd_window_y(int ymin, int ymax)
lcd_write_reg(R_RAM_ADDR_SET, ymin);
}
+static unsigned lcd_yuv_options = 0;
+
+void lcd_yuv_set_options(unsigned options)
+{
+ lcd_yuv_options = options;
+}
+
+
+#ifndef BOOTLOADER
+/* Line write helper function for lcd_yuv_blit. Write two lines of yuv420. */
+extern void lcd_write_yuv420_lines(unsigned char const * const src[3],
+ int width,
+ int stride);
+extern void lcd_write_yuv420_lines_odither(unsigned char const * const src[3],
+ int width,
+ int stride,
+ int x_screen, /* To align dither pattern */
+ int y_screen);
+
+/* Performance function to blit a YUV bitmap directly to the LCD
+ * src_x, src_y, width and height should be even
+ * x, y, width and height have to be within LCD bounds
+ */
+void lcd_blit_yuv(unsigned char * const src[3],
+ int src_x, int src_y, int stride,
+ int x, int y, int width, int height)
+{
+ unsigned char const * yuv_src[3];
+ off_t z;
+
+ /* Sorry, but width and height must be >= 2 or else */
+ width &= ~1;
+ height >>= 1;
+
+ z = stride*src_y;
+ yuv_src[0] = src[0] + z + src_x;
+ yuv_src[1] = src[1] + (z >> 2) + (src_x >> 1);
+ yuv_src[2] = src[2] + (yuv_src[1] - src[1]);
+
+ lcd_write_reg(R_ENTRY_MODE, R_ENTRY_MODE_VIDEO);
+
+ lcd_window_x(x, x + width - 1);
+
+ if (lcd_yuv_options & LCD_YUV_DITHER)
+ {
+ do
+ {
+ lcd_window_y(y, y + 1);
+
+ lcd_write_cmd(R_WRITE_DATA_2_GRAM);
+
+ lcd_write_yuv420_lines_odither(yuv_src, width, stride, x, y);
+ yuv_src[0] += stride << 1; /* Skip down two luma lines */
+ yuv_src[1] += stride >> 1; /* Skip down one chroma line */
+ yuv_src[2] += stride >> 1;
+ y += 2;
+ }
+ while (--height > 0);
+ }
+ else
+ {
+ do
+ {
+ lcd_window_y(y, y + 1);
+
+ lcd_write_cmd(R_WRITE_DATA_2_GRAM);
+
+ lcd_write_yuv420_lines(yuv_src, width, stride);
+ yuv_src[0] += stride << 1; /* Skip down two luma lines */
+ yuv_src[1] += stride >> 1; /* Skip down one chroma line */
+ yuv_src[2] += stride >> 1;
+ y += 2;
+ }
+ while (--height > 0);
+ }
+}
+
+#endif
+
+
/* Update the display.
This must be called after all other LCD functions that change the display. */
void lcd_update(void)
diff --git a/firmware/target/arm/as3525/sansa-e200v2/lcd-e200v2.c b/firmware/target/arm/as3525/sansa-e200v2/lcd-e200v2.c
index 141340c003..f69ad48793 100644
--- a/firmware/target/arm/as3525/sansa-e200v2/lcd-e200v2.c
+++ b/firmware/target/arm/as3525/sansa-e200v2/lcd-e200v2.c
@@ -336,6 +336,104 @@ bool lcd_active(void)
/*** update functions ***/
+static unsigned lcd_yuv_options = 0;
+
+void lcd_yuv_set_options(unsigned options)
+{
+ lcd_yuv_options = options;
+}
+
+
+#ifndef BOOTLOADER
+static void lcd_window_blit(int xmin, int ymin, int xmax, int ymax)
+{
+ if (!display_flipped)
+ {
+ lcd_write_reg(R_HORIZ_RAM_ADDR_POS,
+ ((LCD_WIDTH-1 - xmin) << 8) | (LCD_WIDTH-1 - xmax));
+ lcd_write_reg(R_VERT_RAM_ADDR_POS, (ymax << 8) | ymin);
+ lcd_write_reg(R_RAM_ADDR_SET,
+ (ymin << 8) | (LCD_WIDTH-1 - xmin));
+ }
+ else
+ {
+ lcd_write_reg(R_HORIZ_RAM_ADDR_POS, (xmax << 8) | xmin);
+ lcd_write_reg(R_VERT_RAM_ADDR_POS, (ymax << 8) | ymin);
+ lcd_write_reg(R_RAM_ADDR_SET, (ymax << 8) | xmin);
+ }
+}
+
+/* Line write helper function for lcd_yuv_blit. Write two lines of yuv420. */
+extern void lcd_write_yuv420_lines(unsigned char const * const src[3],
+ int width,
+ int stride);
+extern void lcd_write_yuv420_lines_odither(unsigned char const * const src[3],
+ int width,
+ int stride,
+ int x_screen, /* To align dither pattern */
+ int y_screen);
+
+/* Performance function to blit a YUV bitmap directly to the LCD
+ * src_x, src_y, width and height should be even
+ * x, y, width and height have to be within LCD bounds
+ */
+void lcd_blit_yuv(unsigned char * const src[3],
+ int src_x, int src_y, int stride,
+ int x, int y, int width, int height)
+{
+ unsigned char const * yuv_src[3];
+ off_t z;
+
+ /* Sorry, but width and height must be >= 2 or else */
+ width &= ~1;
+ height >>= 1;
+
+ z = stride*src_y;
+ yuv_src[0] = src[0] + z + src_x;
+ yuv_src[1] = src[1] + (z >> 2) + (src_x >> 1);
+ yuv_src[2] = src[2] + (yuv_src[1] - src[1]);
+
+ lcd_write_reg(R_ENTRY_MODE,
+ display_flipped ? R_ENTRY_MODE_VIDEO_FLIPPED : R_ENTRY_MODE_VIDEO_NORMAL
+ );
+
+ if (lcd_yuv_options & LCD_YUV_DITHER)
+ {
+ do
+ {
+ lcd_window_blit(y, x, y+1, x+width-1);
+
+ lcd_write_cmd(R_WRITE_DATA_2_GRAM);
+
+ lcd_write_yuv420_lines_odither(yuv_src, width, stride, x, y);
+ yuv_src[0] += stride << 1; /* Skip down two luma lines */
+ yuv_src[1] += stride >> 1; /* Skip down one chroma line */
+ yuv_src[2] += stride >> 1;
+ y += 2;
+ }
+ while (--height > 0);
+ }
+ else
+ {
+ do
+ {
+ lcd_window_blit(y, x, y+1, x+width-1);
+
+ lcd_write_cmd(R_WRITE_DATA_2_GRAM);
+
+ lcd_write_yuv420_lines(yuv_src, width, stride);
+ yuv_src[0] += stride << 1; /* Skip down two luma lines */
+ yuv_src[1] += stride >> 1; /* Skip down one chroma line */
+ yuv_src[2] += stride >> 1;
+ y += 2;
+ }
+ while (--height > 0);
+ }
+}
+
+#endif
+
+
/* Update the display.
This must be called after all other LCD functions that change the display. */
void lcd_update(void)
diff --git a/firmware/target/arm/ipod/lcd-as-color-nano.S b/firmware/target/arm/ipod/lcd-as-color-nano.S
new file mode 100644
index 0000000000..f6f9cc5be3
--- /dev/null
+++ b/firmware/target/arm/ipod/lcd-as-color-nano.S
@@ -0,0 +1,287 @@
+/***************************************************************************
+ * __________ __ ___.
+ * Open \______ \ ____ ____ | | _\_ |__ _______ ___
+ * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
+ * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
+ * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
+ * \/ \/ \/ \/ \/
+ * $Id:$
+ *
+ * Copyright (C) 2010-2011 by Andree Buschmann
+ *
+ * Generic asm helper function used by YUV blitting.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+
+#include "config.h"
+#include "cpu.h"
+
+/****************************************************************************
+ * #define FORCE_FIFO_WAIT
+ *
+ * This is not needed in YUV blitting when the LCD IF is fast enough. In this
+ * case YUV-to-RGB conversion per pixel needs longer than the transfer of a
+ * pixel via the LCD IF.
+ ****************************************************************************/
+
+#include "config.h"
+
+/* Set FIFO wait for both iPod Color and iPod nano1G until we know for which
+ * devices we can switch this off. */
+#define FORCE_FIFO_WAIT
+
+ .section .icode, "ax", %progbits
+
+/****************************************************************************
+ * extern void lcd_write_yuv420_lines(unsigned char const * const src[3],
+ * const unsigned LCD_BASE,
+ * int width,
+ * int stride);
+ *
+ * Conversion from Motion JPEG and MPEG Y'PbPr to RGB is:
+ * |R| |1.164 0.000 1.596| |Y' - 16|
+ * |G| = |1.164 -0.391 -0.813| |Pb - 128|
+ * |B| |1.164 2.018 0.000| |Pr - 128|
+ *
+ * Scaled, normalized, rounded and tweaked to yield RGB 565:
+ * |R| |74 0 101| |Y' - 16| >> 9
+ * |G| = |74 -24 -51| |Cb - 128| >> 8
+ * |B| |74 128 0| |Cr - 128| >> 9
+ *
+ * Converts two lines from YUV to RGB565 and writes to LCD at once. First loop
+ * loads Cb/Cr, calculates the chroma offset and saves them to buffer. Within
+ * the second loop these chroma offset are reloaded from buffer. Within each
+ * loop two pixels are calculated and written to LCD.
+ */
+ .align 2
+ .global lcd_write_yuv420_lines
+ .type lcd_write_yuv420_lines, %function
+lcd_write_yuv420_lines:
+ /* r0 = src = yuv_src */
+ /* r1 = dst = LCD_BASE */
+ /* r2 = width */
+ /* r3 = stride */
+ stmfd sp!, { r4-r10, lr } /* save non-scratch */
+ ldmia r0, { r9, r10, r12 } /* r9 = yuv_src[0] = Y'_p */
+ /* r10 = yuv_src[1] = Cb_p */
+ /* r12 = yuv_src[2] = Cr_p */
+ add r3, r9, r3 /* r3 = &ysrc[stride] */
+ add r4, r2, r2, asr #1 /* chroma buffer lenght = width/2 *3 */
+ mov r4, r4, asl #2 /* use words for str/ldm possibility */
+ add r4, r4, #19 /* plus room for 4 additional words, */
+ bic r4, r4, #3 /* rounded up to multiples of 4 byte */
+ sub sp, sp, r4 /* and allocate on stack */
+ stmia sp, {r1-r4} /* LCD_BASE, width, &ysrc[stride], stack_alloc */
+
+ mov r7, r2 /* r7 = loop count */
+ add r8, sp, #16 /* chroma buffer */
+ add lr, r1, #0x100 /* LCD data port = LCD2_BASE + 0x100 */
+
+ /* 1st loop start */
+10: /* loop start */
+
+ ldrb r0, [r10], #1 /* r0 = *usrc++ = *Cb_p++ */
+ ldrb r1, [r12], #1 /* r1 = *vsrc++ = *Cr_p++ */
+
+ sub r0, r0, #128 /* r0 = Cb-128 */
+ sub r1, r1, #128 /* r1 = Cr-128 */
+
+ add r2, r1, r1, asl #1 /* r2 = Cr*51 + Cb*24 */
+ add r2, r2, r2, asl #4
+ add r2, r2, r0, asl #3
+ add r2, r2, r0, asl #4
+
+ add r4, r1, r1, asl #2 /* r1 = Cr*101 */
+ add r4, r4, r1, asl #5
+ add r1, r4, r1, asl #6
+
+ add r1, r1, #256 /* r1 = rv = (r1 + 256) >> 9 */
+ mov r1, r1, asr #9
+ rsb r2, r2, #128 /* r2 = guv = (-r2 + 128) >> 8 */
+ mov r2, r2, asr #8
+ add r0, r0, #2 /* r0 = bu = (Cb*128 + 256) >> 9 */
+ mov r0, r0, asr #2
+ stmia r8!, {r0-r2} /* store r0, r1 and r2 to chroma buffer */
+
+ /* 1st loop, first pixel */
+ ldrb r5, [r9], #1 /* r5 = *ysrc++ = *Y'_p++ */
+ sub r5, r5, #16 /* r5 = (Y'-16) * 74 */
+ add r3, r5, r5, asl #2
+ add r5, r3, r5, asl #5
+
+ add r6, r1, r5, asr #8 /* r6 = r = (Y >> 9) + rv */
+ add r3, r2, r5, asr #7 /* r3 = g = (Y >> 8) + guv */
+ add r4, r0, r5, asr #8 /* r4 = b = (Y >> 9) + bu */
+
+ orr r5, r6, r4 /* check if clamping is needed... */
+ orr r5, r5, r3, asr #1 /* ...at all */
+ cmp r5, #31
+ bls 15f /* -> no clamp */
+ cmp r6, #31 /* clamp r */
+ mvnhi r6, r6, asr #31
+ andhi r6, r6, #31
+ cmp r3, #63 /* clamp g */
+ mvnhi r3, r3, asr #31
+ andhi r3, r3, #63
+ cmp r4, #31 /* clamp b */
+ mvnhi r4, r4, asr #31
+ andhi r4, r4, #31
+15: /* no clamp */
+
+ /* calculate pixel_1 and save to r4 for later pixel packing */
+ orr r4, r4, r3, lsl #5 /* pixel_1 = r<<11 | g<<5 | b */
+ orr r4, r4, r6, lsl #11 /* r4 = pixel_1 */
+
+ /* 1st loop, second pixel */
+ ldrb r5, [r9], #1 /* r5 = *ysrc++ = *Y'_p++ */
+ sub r5, r5, #16 /* r5 = (Y'-16) * 74 */
+ add r3, r5, r5, asl #2
+ add r5, r3, r5, asl #5
+
+ add r6, r1, r5, asr #8 /* r6 = r = (Y >> 9) + rv */
+ add r3, r2, r5, asr #7 /* r3 = g = (Y >> 8) + guv */
+ add r5, r0, r5, asr #8 /* r5 = b = (Y >> 9) + bu */
+
+ orr r0, r6, r5 /* check if clamping is needed... */
+ orr r0, r0, r3, asr #1 /* ...at all */
+ cmp r0, #31
+ bls 15f /* -> no clamp */
+ cmp r6, #31 /* clamp r */
+ mvnhi r6, r6, asr #31
+ andhi r6, r6, #31
+ cmp r3, #63 /* clamp g */
+ mvnhi r3, r3, asr #31
+ andhi r3, r3, #63
+ cmp r5, #31 /* clamp b */
+ mvnhi r5, r5, asr #31
+ andhi r5, r5, #31
+15: /* no clamp */
+
+ /* calculate pixel_2 and pack with pixel_1 before writing */
+ orr r5, r5, r3, lsl #5 /* pixel_2 = r<<11 | g<<5 | b */
+ orr r5, r5, r6, lsl #11 /* r5 = pixel_2 */
+#ifdef FORCE_FIFO_WAIT
+ /* wait for FIFO half full */
+.fifo_wait1:
+ ldr r3, [lr, #-0xE0] /* while !(LCD2_BLOCK_CTRL & 0x1000000); */
+ tst r3, #0x1000000
+ beq .fifo_wait1
+#endif
+
+ mov r3, r4, lsl #8 /* swap pixel_1 */
+ and r3, r3, #0xff00
+ add r4, r3, r4, lsr #8
+
+ orr r4, r4, r5, lsl #24 /* swap pixel_2 and pack with pixel_1 */
+ mov r5, r5, lsr #8
+ orr r4, r4, r5, lsl #16
+
+ str r4, [lr] /* write pixel_1 and pixel_2 */
+
+ subs r7, r7, #2 /* check for loop end */
+ bgt 10b /* back to beginning */
+ /* 1st loop end */
+
+ /* Reload several registers for pointer rewinding for next loop */
+ add r8, sp, #16 /* chroma buffer */
+ ldmia sp, { r1, r7, r9} /* r1 = LCD_BASE */
+ /* r7 = loop count */
+ /* r9 = &ysrc[stride] */
+
+ /* 2nd loop start */
+20: /* loop start */
+ /* restore r0 (bu), r1 (rv) and r2 (guv) from chroma buffer */
+ ldmia r8!, {r0-r2}
+
+ /* 2nd loop, first pixel */
+ ldrb r5, [r9], #1 /* r5 = *ysrc++ = *Y'_p++ */
+ sub r5, r5, #16 /* r5 = (Y'-16) * 74 */
+ add r3, r5, r5, asl #2
+ add r5, r3, r5, asl #5
+
+ add r6, r1, r5, asr #8 /* r6 = r = (Y >> 9) + rv */
+ add r3, r2, r5, asr #7 /* r3 = g = (Y >> 8) + guv */
+ add r4, r0, r5, asr #8 /* r4 = b = (Y >> 9) + bu */
+
+ orr r5, r6, r4 /* check if clamping is needed... */
+ orr r5, r5, r3, asr #1 /* ...at all */
+ cmp r5, #31
+ bls 15f /* -> no clamp */
+ cmp r6, #31 /* clamp r */
+ mvnhi r6, r6, asr #31
+ andhi r6, r6, #31
+ cmp r3, #63 /* clamp g */
+ mvnhi r3, r3, asr #31
+ andhi r3, r3, #63
+ cmp r4, #31 /* clamp b */
+ mvnhi r4, r4, asr #31
+ andhi r4, r4, #31
+15: /* no clamp */
+ /* calculate pixel_1 and save to r4 for later pixel packing */
+ orr r4, r4, r3, lsl #5 /* pixel_1 = r<<11 | g<<5 | b */
+ orr r4, r4, r6, lsl #11 /* r4 = pixel_1 */
+
+ /* 2nd loop, second pixel */
+ ldrb r5, [r9], #1 /* r5 = *ysrc++ = *Y'_p++ */
+ sub r5, r5, #16 /* r5 = (Y'-16) * 74 */
+ add r3, r5, r5, asl #2
+ add r5, r3, r5, asl #5
+
+ add r6, r1, r5, asr #8 /* r6 = r = (Y >> 9) + rv */
+ add r3, r2, r5, asr #7 /* r3 = g = (Y >> 8) + guv */
+ add r5, r0, r5, asr #8 /* r5 = b = (Y >> 9) + bu */
+
+ orr r0, r6, r5 /* check if clamping is needed... */
+ orr r0, r0, r3, asr #1 /* ...at all */
+ cmp r0, #31
+ bls 15f /* -> no clamp */
+ cmp r6, #31 /* clamp r */
+ mvnhi r6, r6, asr #31
+ andhi r6, r6, #31
+ cmp r3, #63 /* clamp g */
+ mvnhi r3, r3, asr #31
+ andhi r3, r3, #63
+ cmp r5, #31 /* clamp b */
+ mvnhi r5, r5, asr #31
+ andhi r5, r5, #31
+15: /* no clamp */
+
+ /* calculate pixel_2 and pack with pixel_1 before writing */
+ orr r5, r5, r3, lsl #5 /* pixel_2 = r<<11 | g<<5 | b */
+ orr r5, r5, r6, lsl #11 /* r5 = pixel_2 */
+#ifdef FORCE_FIFO_WAIT
+ /* wait for FIFO half full */
+.fifo_wait2:
+ ldr r3, [lr, #-0xE0] /* while !(LCD2_BLOCK_CTRL & 0x1000000); */
+ tst r3, #0x1000000
+ beq .fifo_wait2
+#endif
+
+ mov r3, r4, lsl #8 /* swap pixel_1 */
+ and r3, r3, #0xff00
+ add r4, r3, r4, lsr #8
+
+ orr r4, r4, r5, lsl #24 /* swap pixel_2 and pack with pixel_1 */
+ mov r5, r5, lsr #8
+ orr r4, r4, r5, lsl #16
+
+ str r4, [lr] /* write pixel_1 and pixel_2 */
+
+ subs r7, r7, #2 /* check for loop end */
+ bgt 20b /* back to beginning */
+ /* 2nd loop end */
+
+ ldr r3, [sp, #12]
+ add sp, sp, r3 /* deallocate buffer */
+ ldmpc regs=r4-r10 /* restore registers */
+
+ .ltorg
+ .size lcd_write_yuv420_lines, .-lcd_write_yuv420_lines
diff --git a/firmware/target/arm/ipod/lcd-color_nano.c b/firmware/target/arm/ipod/lcd-color_nano.c
index 71ae22cb23..67d26aa862 100644
--- a/firmware/target/arm/ipod/lcd-color_nano.c
+++ b/firmware/target/arm/ipod/lcd-color_nano.c
@@ -202,6 +202,62 @@ static void lcd_setup_drawing_region(int x, int y, int width, int height)
}
}
+/* Line write helper function for lcd_yuv_blit. Writes two lines of yuv420. */
+extern void lcd_write_yuv420_lines(unsigned char const * const src[3],
+ const unsigned int lcd_baseadress,
+ int width,
+ int stride);
+
+/* Performance function to blit a YUV bitmap directly to the LCD */
+void lcd_blit_yuv(unsigned char * const src[3],
+ int src_x, int src_y, int stride,
+ int x, int y, int width, int height)
+{
+ int z;
+ unsigned char const * yuv_src[3];
+
+ width = (width + 1) & ~1; /* ensure width is even */
+ height = (height + 1) & ~1; /* ensure height is even */
+
+ lcd_setup_drawing_region(x, y, width, height);
+
+ z = stride * src_y;
+ yuv_src[0] = src[0] + z + src_x;
+ yuv_src[1] = src[1] + (z >> 2) + (src_x >> 1);
+ yuv_src[2] = src[2] + (yuv_src[1] - src[1]);
+
+ while (height > 0) {
+ int r, h, pixels_to_write;
+
+ pixels_to_write = (width * height) * 2;
+ h = height;
+
+ /* calculate how much we can do in one go */
+ if (pixels_to_write > 0x10000) {
+ h = ((0x10000/2) / width) & ~1; /* ensure h is even */
+ pixels_to_write = (width * h) * 2;
+ }
+
+ LCD2_BLOCK_CTRL = 0x10000080;
+ LCD2_BLOCK_CONFIG = 0xc0010000 | (pixels_to_write - 1);
+ LCD2_BLOCK_CTRL = 0x34000000;
+
+ r = h>>1; /* lcd_write_yuv420_lines writes two lines at once */
+ do {
+ lcd_write_yuv420_lines(yuv_src, LCD2_BASE, width, stride);
+ yuv_src[0] += stride << 1;
+ yuv_src[1] += stride >> 1;
+ yuv_src[2] += stride >> 1;
+ } while (--r > 0);
+
+ /* transfer of pixels_to_write bytes finished */
+ while (!(LCD2_BLOCK_CTRL & LCD2_BLOCK_READY));
+ LCD2_BLOCK_CONFIG = 0;
+
+ height -= h;
+ }
+}
+
/* Helper function writes 'count' consecutive pixels from src to LCD IF */
static void lcd_write_line(int count, unsigned long *src)
{
diff --git a/firmware/target/arm/ipod/video/lcd-as-video.S b/firmware/target/arm/ipod/video/lcd-as-video.S
index 1b982c75ce..47155b8c75 100644
--- a/firmware/target/arm/ipod/video/lcd-as-video.S
+++ b/firmware/target/arm/ipod/video/lcd-as-video.S
@@ -63,3 +63,240 @@ lcd_write_data: /* r1 = pixel count, must be even */
strne r3, [lr]
ldmpc regs=r4
+
+/****************************************************************************
+ * extern void lcd_write_yuv420_lines(unsigned char const * const src[3],
+ * unsigned bcmaddr
+ * int width,
+ * int stride);
+ *
+ * Conversion from Motion JPEG and MPEG Y'PbPr to RGB is:
+ * |R| |1.164 0.000 1.596| |Y' - 16|
+ * |G| = |1.164 -0.391 -0.813| |Pb - 128|
+ * |B| |1.164 2.018 0.000| |Pr - 128|
+ *
+ * Scaled, normalized, rounded and tweaked to yield RGB 565:
+ * |R| |74 0 101| |Y' - 16| >> 9
+ * |G| = |74 -24 -51| |Cb - 128| >> 8
+ * |B| |74 128 0| |Cr - 128| >> 9
+ *
+ * Converts two lines from YUV to RGB565 and writes to BCM at once. First loop
+ * loads Cb/Cr, calculates the chroma offset and saves them to buffer. Within
+ * the second loop these chroma offset are reloaded from buffer.
+ * Within each loop two pixels are calculated and written to BCM. Before each
+ * loop the desired destination address is transmitted to BCM.
+ */
+ .align 2
+ .global lcd_write_yuv420_lines
+ .type lcd_write_yuv420_lines, %function
+lcd_write_yuv420_lines:
+ /* r0 = src = yuv_src */
+ /* r1 = dst = bcmaddr */
+ /* r2 = width */
+ /* r3 = stride */
+ stmfd sp!, { r4-r10, lr } /* save non-scratch */
+ ldmia r0, { r9, r10, r12 } /* r9 = yuv_src[0] = Y'_p */
+ /* r10 = yuv_src[1] = Cb_p */
+ /* r12 = yuv_src[2] = Cr_p */
+ add r3, r9, r3 /* r3 = &ysrc[stride] */
+ add r4, r2, r2, asr #1 /* chroma buffer lenght = width/2 *3 */
+ mov r4, r4, asl #2 /* use words for str/ldm possibility */
+ add r4, r4, #19 /* plus room for 4 additional words, */
+ bic r4, r4, #3 /* rounded up to multiples of 4 byte */
+ sub sp, sp, r4 /* and allocate on stack */
+ stmia sp, {r1-r4} /* bcmaddr, width, &ysrc[stride], stack_alloc */
+
+ mov r7, r2 /* r7 = loop count */
+ add r8, sp, #16 /* chroma buffer */
+ mov lr, #0x30000000 /* LCD data port */
+
+ /* The following writes dest address to BCM and waits for write ready */
+ orr r2, lr, #0x00010000 /* r2 = BCM_WR_ADDR32 */
+ orr r6, lr, #0x00030000 /* r6 = BCM_CONTROL */
+ str r1, [r2] /* BCM_WR_ADDR32 = bcmaddr */
+.busy_1:
+ ldrh r1, [r6] /* while (!(BCM_CONTROL & 0x2)) */
+ tst r1, #0x2
+ beq .busy_1
+
+ /* 1st loop start */
+10: /* loop start */
+
+ ldrb r0, [r10], #1 /* r0 = *usrc++ = *Cb_p++ */
+ ldrb r1, [r12], #1 /* r1 = *vsrc++ = *Cr_p++ */
+
+ sub r0, r0, #128 /* r0 = Cb-128 */
+ sub r1, r1, #128 /* r1 = Cr-128 */
+
+ add r2, r1, r1, asl #1 /* r2 = Cr*51 + Cb*24 */
+ add r2, r2, r2, asl #4
+ add r2, r2, r0, asl #3
+ add r2, r2, r0, asl #4
+
+ add r4, r1, r1, asl #2 /* r1 = Cr*101 */
+ add r4, r4, r1, asl #5
+ add r1, r4, r1, asl #6
+
+ add r1, r1, #256 /* r1 = rv = (r1 + 256) >> 9 */
+ mov r1, r1, asr #9
+ rsb r2, r2, #128 /* r2 = guv = (-r2 + 128) >> 8 */
+ mov r2, r2, asr #8
+ add r0, r0, #2 /* r0 = bu = (Cb*128 + 256) >> 9 */
+ mov r0, r0, asr #2
+ stmia r8!, {r0-r2} /* store r0, r1 and r2 to chroma buffer */
+
+ /* 1st loop, first pixel */
+ ldrb r5, [r9], #1 /* r5 = *ysrc++ = *Y'_p++ */
+ sub r5, r5, #16 /* r5 = (Y'-16) * 74 */
+ add r3, r5, r5, asl #2
+ add r5, r3, r5, asl #5
+
+ add r6, r1, r5, asr #8 /* r6 = r = (Y >> 9) + rv */
+ add r3, r2, r5, asr #7 /* r3 = g = (Y >> 8) + guv */
+ add r4, r0, r5, asr #8 /* r4 = b = (Y >> 9) + bu */
+
+ orr r5, r6, r4 /* check if clamping is needed... */
+ orr r5, r5, r3, asr #1 /* ...at all */
+ cmp r5, #31
+ bls 15f /* -> no clamp */
+ cmp r6, #31 /* clamp r */
+ mvnhi r6, r6, asr #31
+ andhi r6, r6, #31
+ cmp r3, #63 /* clamp g */
+ mvnhi r3, r3, asr #31
+ andhi r3, r3, #63
+ cmp r4, #31 /* clamp b */
+ mvnhi r4, r4, asr #31
+ andhi r4, r4, #31
+15: /* no clamp */
+
+ /* calculate pixel_1 and save to r5 for later pixel packing */
+ orr r4, r4, r3, lsl #5 /* pixel_1 = r<<11 | g<<5 | b */
+ orr r5, r4, r6, lsl #11 /* r5 = pixel_1 */
+
+ /* 1st loop, second pixel */
+ ldrb r4, [r9], #1 /* r4 = *ysrc++ = *Y'_p++ */
+ sub r4, r4, #16 /* r4 = (Y'-16) * 74 */
+ add r3, r4, r4, asl #2
+ add r4, r3, r4, asl #5
+
+ add r6, r1, r4, asr #8 /* r6 = r = (Y >> 9) + rv */
+ add r3, r2, r4, asr #7 /* r3 = g = (Y >> 8) + guv */
+ add r4, r0, r4, asr #8 /* r4 = b = (Y >> 9) + bu */
+
+ orr r0, r6, r4 /* check if clamping is needed... */
+ orr r0, r0, r3, asr #1 /* ...at all */
+ cmp r0, #31
+ bls 15f /* -> no clamp */
+ cmp r6, #31 /* clamp r */
+ mvnhi r6, r6, asr #31
+ andhi r6, r6, #31
+ cmp r3, #63 /* clamp g */
+ mvnhi r3, r3, asr #31
+ andhi r3, r3, #63
+ cmp r4, #31 /* clamp b */
+ mvnhi r4, r4, asr #31
+ andhi r4, r4, #31
+15: /* no clamp */
+
+ /* calculate pixel_2 and pack with pixel_1 before writing */
+ orr r4, r4, r3, lsl #5 /* pixel_2 = r<<11 | g<<5 | b */
+ orr r4, r4, r6, lsl #11 /* r4 = pixel_2 */
+ orr r4, r5, r4, lsl #16 /* r4 = pixel_2<<16 | pixel_1 */
+ str r4, [lr] /* write packed pixels */
+
+ subs r7, r7, #2 /* check for loop end */
+ bgt 10b /* back to beginning */
+ /* 1st loop end */
+
+ /* Reload several registers for pointer rewinding for next loop */
+ add r8, sp, #16 /* chroma buffer */
+ ldmia sp, { r1, r7, r9} /* r1 = bcmaddr */
+ /* r7 = loop count */
+ /* r9 = &ysrc[stride] */
+
+ /* The following writes dest address to BCM and waits for write ready */
+ orr r2, lr, #0x00010000 /* r2 = BCM_WR_ADDR32 */
+ orr r6, lr, #0x00030000 /* r6 = BCM_CONTROL */
+ add r1, r1, #640 /* dst += (LCD_WIDTH*2) */
+ str r1, [r2] /* BCM_WR_ADDR32 = dst */
+.busy_2:
+ ldrh r1, [r6] /* while (!(BCM_CONTROL & 0x2)) */
+ tst r1, #0x2
+ beq .busy_2
+
+
+ /* 2nd loop start */
+20: /* loop start */
+ /* restore r0 (bu), r1 (rv) and r2 (guv) from chroma buffer */
+ ldmia r8!, {r0-r2}
+
+ /* 2nd loop, first pixel */
+ ldrb r5, [r9], #1 /* r5 = *ysrc++ = *Y'_p++ */
+ sub r5, r5, #16 /* r5 = (Y'-16) * 74 */
+ add r3, r5, r5, asl #2
+ add r5, r3, r5, asl #5
+
+ add r6, r1, r5, asr #8 /* r6 = r = (Y >> 9) + rv */
+ add r3, r2, r5, asr #7 /* r3 = g = (Y >> 8) + guv */
+ add r4, r0, r5, asr #8 /* r4 = b = (Y >> 9) + bu */
+
+ orr r5, r6, r4 /* check if clamping is needed... */
+ orr r5, r5, r3, asr #1 /* ...at all */
+ cmp r5, #31
+ bls 15f /* -> no clamp */
+ cmp r6, #31 /* clamp r */
+ mvnhi r6, r6, asr #31
+ andhi r6, r6, #31
+ cmp r3, #63 /* clamp g */
+ mvnhi r3, r3, asr #31
+ andhi r3, r3, #63
+ cmp r4, #31 /* clamp b */
+ mvnhi r4, r4, asr #31
+ andhi r4, r4, #31
+15: /* no clamp */
+ /* calculate pixel_1 and save to r5 for later pixel packing */
+ orr r4, r4, r3, lsl #5 /* pixel_1 = r<<11 | g<<5 | b */
+ orr r5, r4, r6, lsl #11 /* r5 = pixel_1 */
+
+ /* 2nd loop, second pixel */
+ ldrb r4, [r9], #1 /* r4 = *ysrc++ = *Y'_p++ */
+ sub r4, r4, #16 /* r4 = (Y'-16) * 74 */
+ add r3, r4, r4, asl #2
+ add r4, r3, r4, asl #5
+
+ add r6, r1, r4, asr #8 /* r6 = r = (Y >> 9) + rv */
+ add r3, r2, r4, asr #7 /* r3 = g = (Y >> 8) + guv */
+ add r4, r0, r4, asr #8 /* r4 = b = (Y >> 9) + bu */
+
+ orr r0, r6, r4 /* check if clamping is needed... */
+ orr r0, r0, r3, asr #1 /* ...at all */
+ cmp r0, #31
+ bls 15f /* -> no clamp */
+ cmp r6, #31 /* clamp r */
+ mvnhi r6, r6, asr #31
+ andhi r6, r6, #31
+ cmp r3, #63 /* clamp g */
+ mvnhi r3, r3, asr #31
+ andhi r3, r3, #63
+ cmp r4, #31 /* clamp b */
+ mvnhi r4, r4, asr #31
+ andhi r4, r4, #31
+15: /* no clamp */
+
+ /* calculate pixel_2 and pack with pixel_1 before writing */
+ orr r4, r4, r3, lsl #5 /* pixel_2 = r<<11 | g<<5 | b */
+ orr r4, r4, r6, lsl #11 /* r4 = pixel_2 */
+ orr r4, r5, r4, lsl #16 /* r4 = pixel_2<<16 | pixel_1 */
+ str r4, [lr] /* write packed pixels */
+
+ subs r7, r7, #2 /* check for loop end */
+ bgt 20b /* back to beginning */
+ /* 2nd loop end */
+
+ ldr r3, [sp, #12]
+ add sp, sp, r3 /* deallocate buffer */
+ ldmpc regs=r4-r10 /* restore registers */
+
+ .ltorg
+ .size lcd_write_yuv420_lines, .-lcd_write_yuv420_lines
diff --git a/firmware/target/arm/ipod/video/lcd-video.c b/firmware/target/arm/ipod/video/lcd-video.c
index 494bec8429..27d889aafa 100644
--- a/firmware/target/arm/ipod/video/lcd-video.c
+++ b/firmware/target/arm/ipod/video/lcd-video.c
@@ -439,6 +439,53 @@ void lcd_update(void)
lcd_update_rect(0, 0, LCD_WIDTH, LCD_HEIGHT);
}
+/* Line write helper function for lcd_yuv_blit. Writes two lines of yuv420. */
+extern void lcd_write_yuv420_lines(unsigned char const * const src[3],
+ unsigned bcmaddr,
+ int width,
+ int stride);
+
+/* Performance function to blit a YUV bitmap directly to the LCD */
+void lcd_blit_yuv(unsigned char * const src[3],
+ int src_x, int src_y, int stride,
+ int x, int y, int width, int height)
+{
+ unsigned bcmaddr;
+ off_t z;
+ unsigned char const * yuv_src[3];
+
+#ifdef HAVE_LCD_SLEEP
+ if (!lcd_state.display_on)
+ return;
+#endif
+
+ /* Sorry, but width and height must be >= 2 or else */
+ width &= ~1;
+
+ z = stride * src_y;
+ yuv_src[0] = src[0] + z + src_x;
+ yuv_src[1] = src[1] + (z >> 2) + (src_x >> 1);
+ yuv_src[2] = src[2] + (yuv_src[1] - src[1]);
+
+ /* Prevent the tick from triggering BCM updates while we're writing. */
+ lcd_block_tick();
+
+ bcmaddr = BCMA_CMDPARAM + (LCD_WIDTH*2) * y + (x << 1);
+ height >>= 1;
+
+ do
+ {
+ lcd_write_yuv420_lines(yuv_src, bcmaddr, width, stride);
+ bcmaddr += (LCD_WIDTH*4); /* Skip up two lines */
+ yuv_src[0] += stride << 1;
+ yuv_src[1] += stride >> 1; /* Skip down one chroma line */
+ yuv_src[2] += stride >> 1;
+ }
+ while (--height > 0);
+
+ lcd_unblock_and_update();
+}
+
#ifdef HAVE_LCD_SLEEP
/* Executes a BCM command immediately and waits for it to complete.
Other BCM commands (eg. LCD updates or lcd_tick) must not interfere.
diff --git a/firmware/target/arm/iriver/h10/lcd-as-h10.S b/firmware/target/arm/iriver/h10/lcd-as-h10.S
new file mode 100644
index 0000000000..8ac8b4289f
--- /dev/null
+++ b/firmware/target/arm/iriver/h10/lcd-as-h10.S
@@ -0,0 +1,538 @@
+/***************************************************************************
+ * __________ __ ___.
+ * Open \______ \ ____ ____ | | _\_ |__ _______ ___
+ * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
+ * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
+ * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
+ * \/ \/ \/ \/ \/
+ * $Id$
+ *
+ * Copyright (C) 2007-2008 by Michael Sevakis
+ *
+ * H10 20GB LCD assembly routines
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+
+#include "config.h"
+#include "cpu.h"
+
+/****************************************************************************
+ * void lcd_write_yuv420_lines(unsigned char const * const src[3],
+ * int width,
+ * int stride);
+ *
+ * |R| |1.000000 -0.000001 1.402000| |Y'|
+ * |G| = |1.000000 -0.334136 -0.714136| |Pb|
+ * |B| |1.000000 1.772000 0.000000| |Pr|
+ * Scaled, normalized, rounded and tweaked to yield RGB 565:
+ * |R| |74 0 101| |Y' - 16| >> 9
+ * |G| = |74 -24 -51| |Cb - 128| >> 8
+ * |B| |74 128 0| |Cr - 128| >> 9
+ *
+ * Write four RGB565 pixels in the following order on each loop:
+ * 1 3 + > down
+ * 2 4 \/ left
+ */
+ .section .icode, "ax", %progbits
+ .align 2
+ .global lcd_write_yuv420_lines
+ .type lcd_write_yuv420_lines, %function
+lcd_write_yuv420_lines:
+ @ r0 = yuv_src
+ @ r1 = width
+ @ r2 = stride
+ stmfd sp!, { r4-r11, lr } @ save non-scratch
+ ldmia r0, { r4, r5, r6 } @ r4 = yuv_src[0] = Y'_p
+ @ r5 = yuv_src[1] = Cb_p
+ @ r6 = yuv_src[2] = Cr_p
+ @
+ mov r0, #0x7000000c @ r0 = &LCD2_PORT = 0x70008a0c
+ add r0, r0, #0x8a00 @
+ mov r14, #LCD2_DATA_MASK @
+ @
+ sub r2, r2, #1 @ Adjust stride because of increment
+10: @ loop line @
+ ldrb r7, [r4], #1 @ r7 = *Y'_p++;
+ ldrb r8, [r5], #1 @ r8 = *Cb_p++;
+ ldrb r9, [r6], #1 @ r9 = *Cr_p++;
+ @
+ sub r7, r7, #16 @ r7 = Y = (Y' - 16)*74
+ add r12, r7, r7, asl #2 @ actually (Y' - 16)*37 and shift right
+ add r7, r12, r7, asl #5 @ by one less when adding - same for all
+ @
+ sub r8, r8, #128 @ Cb -= 128
+ sub r9, r9, #128 @ Cr -= 128
+ @
+ add r10, r9, r9, asl #1 @ r10 = Cr*51 + Cb*24
+ add r10, r10, r10, asl #4 @
+ add r10, r10, r8, asl #3 @
+ add r10, r10, r8, asl #4 @
+ @
+ add r11, r9, r9, asl #2 @ r9 = Cr*101
+ add r11, r11, r9, asl #5 @
+ add r9, r11, r9, asl #6 @
+ @
+ add r8, r8, #2 @ r8 = bu = (Cb*128 + 128) >> 8
+ mov r8, r8, asr #2 @
+ add r9, r9, #256 @ r9 = rv = (r8 + 256) >> 9
+ mov r9, r9, asr #9 @
+ rsb r10, r10, #128 @ r10 = guv = (-r9 + 128) >> 8
+ mov r10, r10, asr #8 @
+ @ compute R, G, and B
+ add r3, r8, r7, asr #8 @ r3 = b = (Y >> 9) + bu
+ add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv
+ add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
+ @
+ orr r12, r3, r11 @ check if clamping is needed...
+ orr r12, r12, r7, asr #1 @ ...at all
+ cmp r12, #31 @
+ bls 15f @ no clamp @
+ cmp r3, #31 @ clamp b
+ mvnhi r3, r3, asr #31 @
+ andhi r3, r3, #31 @
+ cmp r11, #31 @ clamp r
+ mvnhi r11, r11, asr #31 @
+ andhi r11, r11, #31 @
+ cmp r7, #63 @ clamp g
+ mvnhi r7, r7, asr #31 @
+ andhi r7, r7, #63 @
+15: @ no clamp @
+ @
+ ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
+ @
+ orr r3, r3, r11, lsl #11 @ r3 = b | (r << 11)
+ orr r3, r3, r7, lsl #5 @ r3 |= (g << 5)
+ @
+ orr r7, r14, r3, lsr #8 @ store pixel
+ orr r11, r14, r3 @
+20: @
+ ldr r3, [r0] @
+ tst r3, #LCD2_BUSY_MASK @
+ bne 20b @
+ str r7, [r0] @
+ str r11, [r0] @
+ @
+ sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74
+ add r12, r7, r7, asl #2 @
+ add r7, r12, r7, asl #5 @
+ @ compute R, G, and B
+ add r3, r8, r7, asr #8 @ r3 = b = (Y >> 9) + bu
+ add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv
+ add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
+ @
+ orr r12, r3, r11 @ check if clamping is needed...
+ orr r12, r12, r7, asr #1 @ ...at all
+ cmp r12, #31 @
+ bls 15f @ no clamp @
+ cmp r3, #31 @ clamp b
+ mvnhi r3, r3, asr #31 @
+ andhi r3, r3, #31 @
+ cmp r11, #31 @ clamp r
+ mvnhi r11, r11, asr #31 @
+ andhi r11, r11, #31 @
+ cmp r7, #63 @ clamp g
+ mvnhi r7, r7, asr #31 @
+ andhi r7, r7, #63 @
+15: @ no clamp @
+ @
+ ldrb r12, [r4], #1 @ r12 = Y' = *(Y'_p++)
+ @
+ orr r3, r3, r11, lsl #11 @ r3 = b | (r << 11)
+ orr r3, r3, r7, lsl #5 @ r3 |= (g << 5)
+ @
+ orr r7, r14, r3, lsr #8 @ store pixel
+ orr r11, r14, r3 @
+20: @
+ ldr r3, [r0] @
+ tst r3, #LCD2_BUSY_MASK @
+ bne 20b @
+ str r7, [r0] @
+ str r11, [r0] @
+ @
+ sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74
+ add r12, r7, r7, asl #2 @
+ add r7, r12, r7, asl #5 @
+ @ compute R, G, and B
+ add r3, r8, r7, asr #8 @ r3 = b = (Y >> 9) + bu
+ add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv
+ add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
+ @
+ orr r12, r3, r11 @ check if clamping is needed...
+ orr r12, r12, r7, asr #1 @ ...at all
+ cmp r12, #31 @
+ bls 15f @ no clamp @
+ cmp r3, #31 @ clamp b
+ mvnhi r3, r3, asr #31 @
+ andhi r3, r3, #31 @
+ cmp r11, #31 @ clamp r
+ mvnhi r11, r11, asr #31 @
+ andhi r11, r11, #31 @
+ cmp r7, #63 @ clamp g
+ mvnhi r7, r7, asr #31 @
+ andhi r7, r7, #63 @
+15: @ no clamp @
+ @
+ ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
+ @
+ orr r3, r3, r7, lsl #5 @ r3 = b | (g << 5)
+ orr r3, r3, r11, lsl #11 @ r3 |= (r << 11)
+ @
+ orr r7, r14, r3, lsr #8 @ store pixel
+ orr r11, r14, r3 @
+20: @
+ ldr r3, [r0] @
+ tst r3, #LCD2_BUSY_MASK @
+ bne 20b @
+ str r7, [r0] @
+ str r11, [r0] @
+ @
+ sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74
+ add r12, r7, r7, asl #2 @
+ add r7, r12, r7, asl #5 @
+ @ compute R, G, and B
+ add r3, r8, r7, asr #8 @ r3 = b = (Y >> 9) + bu
+ add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv
+ add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
+ @
+ orr r12, r3, r11 @ check if clamping is needed...
+ orr r12, r12, r7, asr #1 @ ...at all
+ cmp r12, #31 @
+ bls 15f @ no clamp @
+ cmp r3, #31 @ clamp b
+ mvnhi r3, r3, asr #31 @
+ andhi r3, r3, #31 @
+ cmp r11, #31 @ clamp r
+ mvnhi r11, r11, asr #31 @
+ andhi r11, r11, #31 @
+ cmp r7, #63 @ clamp g
+ mvnhi r7, r7, asr #31 @
+ andhi r7, r7, #63 @
+15: @ no clamp @
+ @
+ orr r3, r3, r11, lsl #11 @ r3 = b | (r << 11)
+ orr r3, r3, r7, lsl #5 @ r3 |= (g << 5)
+ @
+ orr r7, r14, r3, lsr #8 @ store pixel
+ orr r11, r14, r3 @
+20: @
+ ldr r3, [r0] @
+ tst r3, #LCD2_BUSY_MASK @
+ bne 20b @
+ str r7, [r0] @
+ str r11, [r0] @
+ @
+ subs r1, r1, #2 @ subtract block from width
+ bgt 10b @ loop line @
+ @
+ ldmpc regs=r4-r11 @ restore registers and return
+ .ltorg @ dump constant pool
+ .size lcd_write_yuv420_lines, .-lcd_write_yuv420_lines
+
+
+/****************************************************************************
+ * void lcd_write_yuv420_lines_odither(unsigned char const * const src[3],
+ * int width,
+ * int stride,
+ * int x_screen,
+ * int y_screen);
+ *
+ * |R| |1.000000 -0.000001 1.402000| |Y'|
+ * |G| = |1.000000 -0.334136 -0.714136| |Pb|
+ * |B| |1.000000 1.772000 0.000000| |Pr|
+ * Red scaled at twice g & b but at same precision to place it in correct
+ * bit position after multiply and leave instruction count lower.
+ * |R| |258 0 408| |Y' - 16|
+ * |G| = |149 -49 -104| |Cb - 128|
+ * |B| |149 258 0| |Cr - 128|
+ *
+ * Write four RGB565 pixels in the following order on each loop:
+ * 1 3 + > down
+ * 2 4 \/ left
+ *
+ * Kernel pattern (raw|use order):
+ * 5 3 4 2 row0 row2 > down
+ * 1 7 0 6 | 5 1 3 7 4 0 2 6 col0 left
+ * 4 2 5 3 | 4 0 2 6 5 1 3 7 col2 \/
+ * 0 6 1 7
+ */
+ .section .icode, "ax", %progbits
+ .align 2
+ .global lcd_write_yuv420_lines_odither
+ .type lcd_write_yuv420_lines_odither, %function
+lcd_write_yuv420_lines_odither:
+ @ r0 = yuv_src
+ @ r1 = width
+ @ r2 = stride
+ @ r3 = x_screen
+ @ [sp] = y_screen
+ stmfd sp!, { r4-r11, lr } @ save non-scratch
+ ldmia r0, { r4, r5, r6 } @ r4 = yuv_src[0] = Y'_p
+ @ r5 = yuv_src[1] = Cb_p
+ @ r6 = yuv_src[2] = Cr_p
+ @
+ ldr r0, [sp, #36] @ Line up pattern and kernel quadrant
+ eor r14, r3, r0 @
+ and r14, r14, #0x2 @
+ mov r14, r14, lsl #6 @ 0x00 or 0x80
+ @
+ mov r0, #0x7000000c @ r0 = &LCD2_PORT = 0x70008a0c
+ add r0, r0, #0x8a00 @
+ @
+ sub r2, r2, #1 @ Adjust stride because of increment
+10: @ loop line @
+ @
+ ldrb r7, [r4], #1 @ r7 = *Y'_p++;
+ ldrb r8, [r5], #1 @ r8 = *Cb_p++;
+ ldrb r9, [r6], #1 @ r9 = *Cr_p++;
+ @
+ eor r14, r14, #0x80 @ flip pattern quadrant
+ @
+ sub r7, r7, #16 @ r7 = Y = (Y' - 16)*149
+ add r12, r7, r7, asl #2 @
+ add r12, r12, r12, asl #4 @
+ add r7, r12, r7, asl #6 @
+ @
+ sub r8, r8, #128 @ Cb -= 128
+ sub r9, r9, #128 @ Cr -= 128
+ @
+ add r10, r8, r8, asl #4 @ r10 = guv = Cr*104 + Cb*49
+ add r10, r10, r8, asl #5 @
+ add r10, r10, r9, asl #3 @
+ add r10, r10, r9, asl #5 @
+ add r10, r10, r9, asl #6 @
+ @
+ mov r8, r8, asl #1 @ r8 = bu = Cb*258
+ add r8, r8, r8, asl #7 @
+ @
+ add r9, r9, r9, asl #1 @ r9 = rv = Cr*408
+ add r9, r9, r9, asl #4 @
+ mov r9, r9, asl #3 @
+ @
+ @ compute R, G, and B
+ add r3, r8, r7 @ r3 = b' = Y + bu
+ add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
+ rsb r7, r10, r7 @ r7 = g' = Y + guv
+ @
+ @ r8 = bu, r9 = rv, r10 = guv
+ @
+ sub r12, r3, r3, lsr #5 @ r3 = 31/32*b + b/256
+ add r3, r12, r3, lsr #8 @
+ @
+ sub r12, r11, r11, lsr #5 @ r11 = 31/32*r + r/256
+ add r11, r12, r11, lsr #8 @
+ @
+ sub r12, r7, r7, lsr #6 @ r7 = 63/64*g + g/256
+ add r7, r12, r7, lsr #8 @
+ @
+ add r12, r14, #0x200 @
+ @
+ add r3, r3, r12 @ b = r3 + delta
+ add r11, r11, r12, lsl #1 @ r = r11 + delta*2
+ add r7, r7, r12, lsr #1 @ g = r7 + delta/2
+ @
+ orr r12, r3, r11, asr #1 @ check if clamping is needed...
+ orr r12, r12, r7 @ ...at all
+ movs r12, r12, asr #15 @
+ beq 15f @ no clamp @
+ movs r12, r3, asr #15 @ clamp b
+ mvnne r3, r12, lsr #15 @
+ andne r3, r3, #0x7c00 @ mask b only if clamped
+ movs r12, r11, asr #16 @ clamp r
+ mvnne r11, r12, lsr #16 @
+ movs r12, r7, asr #15 @ clamp g
+ mvnne r7, r12, lsr #15 @
+15: @ no clamp @
+ @
+ ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
+ @
+ and r11, r11, #0xf800 @ pack pixel
+ and r7, r7, #0x7e00 @ r3 = pixel = (r & 0xf800) |
+ orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
+ orr r3, r11, r3, lsr #10 @ (b >> 10)
+ @
+ mov r11, #LCD2_DATA_MASK @ store pixel
+ orr r7, r11, r3, lsr #8 @
+ orr r11, r11, r3 @
+20: @
+ ldr r3, [r0] @
+ tst r3, #LCD2_BUSY_MASK @
+ bne 20b @
+ str r7, [r0] @
+ str r11, [r0] @
+ @
+ sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149
+ add r12, r7, r7, asl #2 @
+ add r12, r12, r12, asl #4 @
+ add r7, r12, r7, asl #6 @
+ @ compute R, G, and B
+ add r3, r8, r7 @ r3 = b' = Y + bu
+ add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
+ rsb r7, r10, r7 @ r7 = g' = Y + guv
+ @
+ sub r12, r3, r3, lsr #5 @ r3 = 31/32*b' + b'/256
+ add r3, r12, r3, lsr #8 @
+ @
+ sub r12, r11, r11, lsr #5 @ r11 = 31/32*r' + r'/256
+ add r11, r12, r11, lsr #8 @
+ @
+ sub r12, r7, r7, lsr #6 @ r7 = 63/64*g' + g'/256
+ add r7, r12, r7, lsr #8 @
+ @
+ @ This element is zero - use r14 @
+ @
+ add r3, r3, r14 @ b = r3 + delta
+ add r11, r11, r14, lsl #1 @ r = r11 + delta*2
+ add r7, r7, r14, lsr #1 @ g = r7 + delta/2
+ @
+ orr r12, r3, r11, asr #1 @ check if clamping is needed...
+ orr r12, r12, r7 @ ...at all
+ movs r12, r12, asr #15 @
+ beq 15f @ no clamp @
+ movs r12, r3, asr #15 @ clamp b
+ mvnne r3, r12, lsr #15 @
+ andne r3, r3, #0x7c00 @ mask b only if clamped
+ movs r12, r11, asr #16 @ clamp r
+ mvnne r11, r12, lsr #16 @
+ movs r12, r7, asr #15 @ clamp g
+ mvnne r7, r12, lsr #15 @
+15: @ no clamp @
+ @
+ ldrb r12, [r4], #1 @ r12 = Y' = *(Y'_p++)
+ @
+ and r11, r11, #0xf800 @ pack pixel
+ and r7, r7, #0x7e00 @ r3 = pixel = (r & 0xf800) |
+ orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
+ orr r3, r11, r3, lsr #10 @ (b >> 10)
+ @
+ mov r11, #LCD2_DATA_MASK @ store pixel
+ orr r7, r11, r3, lsr #8 @
+ orr r11, r11, r3 @
+20: @
+ ldr r3, [r0] @
+ tst r3, #LCD2_BUSY_MASK @
+ bne 20b @
+ str r7, [r0] @
+ str r11, [r0] @
+ @
+ sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149
+ add r12, r7, r7, asl #2 @
+ add r12, r12, r12, asl #4 @
+ add r7, r12, r7, asl #6 @
+ @ compute R, G, and B
+ add r3, r8, r7 @ r3 = b' = Y + bu
+ add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
+ rsb r7, r10, r7 @ r7 = g' = Y + guv
+ @
+ @ r8 = bu, r9 = rv, r10 = guv
+ @
+ sub r12, r3, r3, lsr #5 @ r3 = 31/32*b' + b'/256
+ add r3, r12, r3, lsr #8 @
+ @
+ sub r12, r11, r11, lsr #5 @ r11 = 31/32*r' + r'/256
+ add r11, r12, r11, lsr #8 @
+ @
+ sub r12, r7, r7, lsr #6 @ r7 = 63/64*g' + g'/256
+ add r7, r12, r7, lsr #8 @
+ @
+ add r12, r14, #0x100 @
+ @
+ add r3, r3, r12 @ b = r3 + delta
+ add r11, r11, r12, lsl #1 @ r = r11 + delta*2
+ add r7, r7, r12, lsr #1 @ g = r7 + delta/2
+ @
+ orr r12, r3, r11, asr #1 @ check if clamping is needed...
+ orr r12, r12, r7 @ ...at all
+ movs r12, r12, asr #15 @
+ beq 15f @ no clamp @
+ movs r12, r3, asr #15 @ clamp b
+ mvnne r3, r12, lsr #15 @
+ andne r3, r3, #0x7c00 @ mask b only if clamped
+ movs r12, r11, asr #16 @ clamp r
+ mvnne r11, r12, lsr #16 @
+ movs r12, r7, asr #15 @ clamp g
+ mvnne r7, r12, lsr #15 @
+15: @ no clamp @
+ @
+ ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
+ @
+ and r11, r11, #0xf800 @ pack pixel
+ and r7, r7, #0x7e00 @ r3 = pixel = (r & 0xf800) |
+ orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
+ orr r3, r11, r3, lsr #10 @ (b >> 10)
+ @
+ mov r11, #LCD2_DATA_MASK @ store pixel
+ orr r7, r11, r3, lsr #8 @
+ orr r11, r11, r3 @
+20: @
+ ldr r3, [r0] @
+ tst r3, #LCD2_BUSY_MASK @
+ bne 20b @
+ str r7, [r0] @
+ str r11, [r0] @
+ @
+ sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149
+ add r12, r7, r7, asl #2 @
+ add r12, r12, r12, asl #4 @
+ add r7, r12, r7, asl #6 @
+ @ compute R, G, and B
+ add r3, r8, r7 @ r3 = b' = Y + bu
+ add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
+ rsb r7, r10, r7 @ r7 = g' = Y + guv
+ @
+ sub r12, r3, r3, lsr #5 @ r3 = 31/32*b + b/256
+ add r3, r12, r3, lsr #8 @
+ @
+ sub r12, r11, r11, lsr #5 @ r11 = 31/32*r + r/256
+ add r11, r12, r11, lsr #8 @
+ @
+ sub r12, r7, r7, lsr #6 @ r7 = 63/64*g + g/256
+ add r7, r12, r7, lsr #8 @
+ @
+ add r12, r14, #0x300 @
+ @
+ add r3, r3, r12 @ b = r3 + delta
+ add r11, r11, r12, lsl #1 @ r = r11 + delta*2
+ add r7, r7, r12, lsr #1 @ g = r7 + delta/2
+ @
+ orr r12, r3, r11, asr #1 @ check if clamping is needed...
+ orr r12, r12, r7 @ ...at all
+ movs r12, r12, asr #15 @
+ beq 15f @ no clamp @
+ movs r12, r3, asr #15 @ clamp b
+ mvnne r3, r12, lsr #15 @
+ andne r3, r3, #0x7c00 @ mask b only if clamped
+ movs r12, r11, asr #16 @ clamp r
+ mvnne r11, r12, lsr #16 @
+ movs r12, r7, asr #15 @ clamp g
+ mvnne r7, r12, lsr #15 @
+15: @ no clamp @
+ @
+ and r11, r11, #0xf800 @ pack pixel
+ and r7, r7, #0x7e00 @ r3 = pixel = (r & 0xf800) |
+ orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
+ orr r3, r11, r3, lsr #10 @ (b >> 10)
+ @
+ mov r11, #LCD2_DATA_MASK @ store pixel
+ orr r7, r11, r3, lsr #8 @
+ orr r11, r11, r3 @
+20: @
+ ldr r3, [r0] @
+ tst r3, #LCD2_BUSY_MASK @
+ bne 20b @
+ str r7, [r0] @
+ str r11, [r0] @
+ @
+ subs r1, r1, #2 @ subtract block from width
+ bgt 10b @ loop line @
+ @
+ ldmpc regs=r4-r11 @ restore registers and return
+ .ltorg @ dump constant pool
+ .size lcd_write_yuv420_lines_odither, .-lcd_write_yuv420_lines_odither
diff --git a/firmware/target/arm/iriver/h10/lcd-h10_20gb.c b/firmware/target/arm/iriver/h10/lcd-h10_20gb.c
index 403c1c19e0..c7e339295d 100644
--- a/firmware/target/arm/iriver/h10/lcd-h10_20gb.c
+++ b/firmware/target/arm/iriver/h10/lcd-h10_20gb.c
@@ -36,6 +36,8 @@ static unsigned short disp_control_rev;
/* Contrast setting << 8 */
static int lcd_contrast;
+static unsigned lcd_yuv_options SHAREDBSS_ATTR = 0;
+
/* Forward declarations */
#if defined(HAVE_LCD_ENABLE) || defined(HAVE_LCD_SLEEP)
static void lcd_display_off(void);
@@ -396,6 +398,94 @@ bool lcd_active(void)
/*** update functions ***/
+void lcd_yuv_set_options(unsigned options)
+{
+ lcd_yuv_options = options;
+}
+
+/* Line write helper function for lcd_yuv_blit. Write two lines of yuv420. */
+extern void lcd_write_yuv420_lines(unsigned char const * const src[3],
+ int width,
+ int stride);
+extern void lcd_write_yuv420_lines_odither(unsigned char const * const src[3],
+ int width,
+ int stride,
+ int x_screen, /* To align dither pattern */
+ int y_screen);
+
+/* Performance function to blit a YUV bitmap directly to the LCD */
+void lcd_blit_yuv(unsigned char * const src[3],
+ int src_x, int src_y, int stride,
+ int x, int y, int width, int height)
+{
+ const unsigned char *yuv_src[3];
+ const unsigned char *ysrc_max;
+ int y0;
+ int options;
+
+ if (!display_on)
+ return;
+
+ width &= ~1;
+ height &= ~1;
+
+ /* calculate the drawing region */
+
+ /* The 20GB LCD is actually 128x160 but rotated 90 degrees so the origin
+ * is actually the bottom left and horizontal and vertical are swapped.
+ * Rockbox expects the origin to be the top left so we need to use
+ * 127 - y instead of just y */
+
+ /* max vert << 8 | start vert */
+ lcd_write_reg(R_VERT_RAM_ADDR_POS, ((x + width - 1) << 8) | x);
+
+ y0 = LCD_HEIGHT - 1 - y + y_offset;
+
+ /* DIT=0, BGR=1, HWM=0, I/D1-0=10, AM=0, LG2-0=000 */
+ lcd_write_reg(R_ENTRY_MODE, 0x1020);
+
+ yuv_src[0] = src[0] + src_y * stride + src_x;
+ yuv_src[1] = src[1] + (src_y * stride >> 2) + (src_x >> 1);
+ yuv_src[2] = src[2] + (yuv_src[1] - src[1]);
+ ysrc_max = yuv_src[0] + height * stride;
+
+ options = lcd_yuv_options;
+
+ do
+ {
+ /* max horiz << 8 | start horiz */
+ lcd_write_reg(R_HORIZ_RAM_ADDR_POS, (y0 << 8) | (y0 - 1));
+
+ /* position cursor (set AD0-AD15) */
+ /* start vert << 8 | start horiz */
+ lcd_write_reg(R_RAM_ADDR_SET, (x << 8) | y0);
+
+ /* start drawing */
+ lcd_send_cmd(R_WRITE_DATA_2_GRAM);
+
+ if (options & LCD_YUV_DITHER)
+ {
+ lcd_write_yuv420_lines_odither(yuv_src, width, stride,
+ x, y);
+ y -= 2;
+ }
+ else
+ {
+ lcd_write_yuv420_lines(yuv_src, width, stride);
+ }
+
+ y0 -= 2;
+ yuv_src[0] += stride << 1;
+ yuv_src[1] += stride >> 1;
+ yuv_src[2] += stride >> 1;
+ }
+ while (yuv_src[0] < ysrc_max);
+
+ /* DIT=0, BGR=1, HWM=0, I/D1-0=10, AM=1, LG2-0=000 */
+ lcd_write_reg(R_ENTRY_MODE, 0x1028);
+}
+
+
/* Update a fraction of the display. */
void lcd_update_rect(int x0, int y0, int width, int height)
{
diff --git a/firmware/target/arm/iriver/h10/lcd-h10_5gb.c b/firmware/target/arm/iriver/h10/lcd-h10_5gb.c
index 4386e1670c..5e1ad9ce23 100644
--- a/firmware/target/arm/iriver/h10/lcd-h10_5gb.c
+++ b/firmware/target/arm/iriver/h10/lcd-h10_5gb.c
@@ -118,6 +118,168 @@ void lcd_init_device(void)
/*** update functions ***/
+#define CSUB_X 2
+#define CSUB_Y 2
+
+#define RYFAC (31*257)
+#define GYFAC (31*257)
+#define BYFAC (31*257)
+#define RVFAC 11170 /* 31 * 257 * 1.402 */
+#define GVFAC (-5690) /* 31 * 257 * -0.714136 */
+#define GUFAC (-2742) /* 31 * 257 * -0.344136 */
+#define BUFAC 14118 /* 31 * 257 * 1.772 */
+
+#define ROUNDOFFS (127*257)
+#define ROUNDOFFSG (63*257)
+
+/* Performance function to blit a YUV bitmap directly to the LCD */
+void lcd_blit_yuv(unsigned char * const src[3],
+ int src_x, int src_y, int stride,
+ int x, int y, int width, int height)
+{
+ int y0, x0, y1, x1;
+ int ymax;
+
+ width = (width + 1) & ~1;
+
+ /* calculate the drawing region */
+ x0 = x;
+ x1 = x + width - 1;
+ y0 = y;
+ y1 = y + height - 1;
+
+ /* max horiz << 8 | start horiz */
+ lcd_write_reg(R_HORIZ_RAM_ADDR_POS, (x1 << 8) | x0);
+
+ /* max vert << 8 | start vert */
+ lcd_write_reg(R_VERT_RAM_ADDR_POS, (y1 << 8) | y0);
+
+ /* start vert << 8 | start horiz */
+ lcd_write_reg(R_RAM_ADDR_SET, (y0 << 8) | x0);
+
+ /* start drawing */
+ lcd_send_cmd(R_WRITE_DATA_2_GRAM);
+
+ ymax = y + height - 1 ;
+
+ const int stride_div_csub_x = stride/CSUB_X;
+
+ for (; y <= ymax ; y++)
+ {
+ /* upsampling, YUV->RGB conversion and reduction to RGB565 in one go */
+ const unsigned char *ysrc = src[0] + stride * src_y + src_x;
+
+ const int uvoffset = stride_div_csub_x * (src_y/CSUB_Y) +
+ (src_x/CSUB_X);
+
+ const unsigned char *usrc = src[1] + uvoffset;
+ const unsigned char *vsrc = src[2] + uvoffset;
+ const unsigned char *row_end = ysrc + width;
+
+ int y, u, v;
+ int red1, green1, blue1;
+ int red2, green2, blue2;
+ unsigned rbits, gbits, bbits;
+
+ int rc, gc, bc;
+
+ do
+ {
+ u = *usrc++ - 128;
+ v = *vsrc++ - 128;
+ rc = RVFAC * v + ROUNDOFFS;
+ gc = GVFAC * v + GUFAC * u + ROUNDOFFSG;
+ bc = BUFAC * u + ROUNDOFFS;
+
+ /* Pixel 1 */
+ y = *ysrc++;
+
+ red1 = RYFAC * y + rc;
+ green1 = GYFAC * y + gc;
+ blue1 = BYFAC * y + bc;
+
+ /* Pixel 2 */
+ y = *ysrc++;
+ red2 = RYFAC * y + rc;
+ green2 = GYFAC * y + gc;
+ blue2 = BYFAC * y + bc;
+
+ /* Since out of bounds errors are relatively rare, we check two
+ pixels at once to see if any components are out of bounds, and
+ then fix whichever is broken. This works due to high values and
+ negative values both becoming larger than the cutoff when
+ casted to unsigned. And ORing them together checks all of them
+ simultaneously. */
+ if (((unsigned)(red1 | green1 | blue1 |
+ red2 | green2 | blue2)) > (RYFAC*255+ROUNDOFFS)) {
+ if (((unsigned)(red1 | green1 | blue1)) >
+ (RYFAC*255+ROUNDOFFS)) {
+ if ((unsigned)red1 > (RYFAC*255+ROUNDOFFS))
+ {
+ if (red1 < 0)
+ red1 = 0;
+ else
+ red1 = (RYFAC*255+ROUNDOFFS);
+ }
+ if ((unsigned)green1 > (GYFAC*255+ROUNDOFFSG))
+ {
+ if (green1 < 0)
+ green1 = 0;
+ else
+ green1 = (GYFAC*255+ROUNDOFFSG);
+ }
+ if ((unsigned)blue1 > (BYFAC*255+ROUNDOFFS))
+ {
+ if (blue1 < 0)
+ blue1 = 0;
+ else
+ blue1 = (BYFAC*255+ROUNDOFFS);
+ }
+ }
+
+ if (((unsigned)(red2 | green2 | blue2)) >
+ (RYFAC*255+ROUNDOFFS)) {
+ if ((unsigned)red2 > (RYFAC*255+ROUNDOFFS))
+ {
+ if (red2 < 0)
+ red2 = 0;
+ else
+ red2 = (RYFAC*255+ROUNDOFFS);
+ }
+ if ((unsigned)green2 > (GYFAC*255+ROUNDOFFSG))
+ {
+ if (green2 < 0)
+ green2 = 0;
+ else
+ green2 = (GYFAC*255+ROUNDOFFSG);
+ }
+ if ((unsigned)blue2 > (BYFAC*255+ROUNDOFFS))
+ {
+ if (blue2 < 0)
+ blue2 = 0;
+ else
+ blue2 = (BYFAC*255+ROUNDOFFS);
+ }
+ }
+ }
+
+ rbits = red1 >> 16 ;
+ gbits = green1 >> 15 ;
+ bbits = blue1 >> 16 ;
+ lcd_send_data((rbits << 11) | (gbits << 5) | bbits);
+
+ rbits = red2 >> 16 ;
+ gbits = green2 >> 15 ;
+ bbits = blue2 >> 16 ;
+ lcd_send_data((rbits << 11) | (gbits << 5) | bbits);
+ }
+ while (ysrc < row_end);
+
+ src_y++;
+ }
+}
+
+
/* Update a fraction of the display. */
void lcd_update_rect(int x0, int y0, int width, int height)
{
diff --git a/firmware/target/arm/lcd-c200_c200v2.c b/firmware/target/arm/lcd-c200_c200v2.c
index 665c82f292..38877ccac9 100644
--- a/firmware/target/arm/lcd-c200_c200v2.c
+++ b/firmware/target/arm/lcd-c200_c200v2.c
@@ -30,6 +30,9 @@
#endif
/* Display status */
+#if MEMORYSIZE > 2
+static unsigned lcd_yuv_options SHAREDBSS_ATTR = 0;
+#endif
static bool is_lcd_enabled = true;
/* LCD command set for Samsung S6B33B2 */
@@ -298,6 +301,80 @@ void lcd_set_flip(bool yesno)
/*** update functions ***/
+#if MEMORYSIZE > 2 /* not for C200V2 */
+void lcd_yuv_set_options(unsigned options)
+{
+ lcd_yuv_options = options;
+}
+
+/* Line write helper function for lcd_yuv_blit. Write two lines of yuv420. */
+extern void lcd_write_yuv420_lines(unsigned char const * const src[3],
+ int width,
+ int stride);
+extern void lcd_write_yuv420_lines_odither(unsigned char const * const src[3],
+ int width,
+ int stride,
+ int x_screen, /* To align dither pattern */
+ int y_screen);
+/* Performance function to blit a YUV bitmap directly to the LCD */
+void lcd_blit_yuv(unsigned char * const src[3],
+ int src_x, int src_y, int stride,
+ int x, int y, int width, int height)
+{
+ unsigned char const * yuv_src[3];
+ off_t z;
+
+ /* Sorry, but width and height must be >= 2 or else */
+ width &= ~1;
+ height >>= 1;
+
+ y += 0x1a;
+
+ z = stride*src_y;
+ yuv_src[0] = src[0] + z + src_x;
+ yuv_src[1] = src[1] + (z >> 2) + (src_x >> 1);
+ yuv_src[2] = src[2] + (yuv_src[1] - src[1]);
+
+ lcd_send_command(R_ENTRY_MODE, 0x80);
+
+ lcd_send_command(R_X_ADDR_AREA, x);
+ lcd_send_command(x + width - 1, 0);
+
+ if (lcd_yuv_options & LCD_YUV_DITHER)
+ {
+ do
+ {
+ lcd_send_command(R_Y_ADDR_AREA, y);
+ lcd_send_command(y + 1, 0);
+
+ lcd_write_yuv420_lines_odither(yuv_src, width, stride, x, y);
+
+ yuv_src[0] += stride << 1; /* Skip down two luma lines */
+ yuv_src[1] += stride >> 1; /* Skip down one chroma line */
+ yuv_src[2] += stride >> 1;
+ y += 2;
+ }
+ while (--height > 0);
+ }
+ else
+ {
+ do
+ {
+ lcd_send_command(R_Y_ADDR_AREA, y);
+ lcd_send_command(y + 1, 0);
+
+ lcd_write_yuv420_lines(yuv_src, width, stride);
+
+ yuv_src[0] += stride << 1; /* Skip down two luma lines */
+ yuv_src[1] += stride >> 1; /* Skip down one chroma line */
+ yuv_src[2] += stride >> 1;
+ y += 2;
+ }
+ while (--height > 0);
+ }
+}
+#endif /* MEMORYSIZE > 2 */
+
/* Update the display.
This must be called after all other LCD functions that change the display. */
void lcd_update(void)
diff --git a/firmware/target/arm/pbell/vibe500/lcd-as-vibe500.S b/firmware/target/arm/pbell/vibe500/lcd-as-vibe500.S
new file mode 100644
index 0000000000..e03011c168
--- /dev/null
+++ b/firmware/target/arm/pbell/vibe500/lcd-as-vibe500.S
@@ -0,0 +1,556 @@
+/***************************************************************************
+ * __________ __ ___.
+ * Open \______ \ ____ ____ | | _\_ |__ _______ ___
+ * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
+ * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
+ * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
+ * \/ \/ \/ \/ \/
+ * $Id:$
+ *
+ * Copyright (C) 2007-2008 by Michael Sevakis
+ * Adapted for the Packard Bell Vibe 500 by Szymon Dziok
+ *
+ * Packard Bell Vibe 500 LCD assembly routines
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+
+#include "config.h"
+#include "cpu.h"
+
+/****************************************************************************
+ * void lcd_write_yuv420_lines(unsigned char const * const src[3],
+ * int width,
+ * int stride);
+ *
+ * |R| |1.000000 -0.000001 1.402000| |Y'|
+ * |G| = |1.000000 -0.334136 -0.714136| |Pb|
+ * |B| |1.000000 1.772000 0.000000| |Pr|
+ * Scaled, normalized, rounded and tweaked to yield RGB 565:
+ * |R| |74 0 101| |Y' - 16| >> 9
+ * |G| = |74 -24 -51| |Cb - 128| >> 8
+ * |B| |74 128 0| |Cr - 128| >> 9
+ *
+ * Write four RGB565 pixels in the following order on each loop:
+ * 1 3 + > down
+ * 2 4 \/ left
+ */
+ .section .icode, "ax", %progbits
+ .align 2
+ .global lcd_write_yuv420_lines
+ .type lcd_write_yuv420_lines, %function
+lcd_write_yuv420_lines:
+ @ r0 = yuv_src
+ @ r1 = width
+ @ r2 = stride
+ stmfd sp!, { r4-r11, lr } @ save non-scratch
+ ldmia r0, { r4, r5, r6 } @ r4 = yuv_src[0] = Y'_p
+ @ r5 = yuv_src[1] = Cb_p
+ @ r6 = yuv_src[2] = Cr_p
+ @
+ ldr r0, =LCD1_BASE @
+ @
+ sub r2, r2, #1 @ Adjust stride because of increment
+10: @ loop line @
+ ldrb r7, [r4], #1 @ r7 = *Y'_p++;
+ ldrb r8, [r5], #1 @ r8 = *Cb_p++;
+ ldrb r9, [r6], #1 @ r9 = *Cr_p++;
+ @
+ sub r7, r7, #16 @ r7 = Y = (Y' - 16)*74
+ add r12, r7, r7, asl #2 @ actually (Y' - 16)*37 and shift right
+ add r7, r12, r7, asl #5 @ by one less when adding - same for all
+ @
+ sub r8, r8, #128 @ Cb -= 128
+ sub r9, r9, #128 @ Cr -= 128
+ @
+ add r10, r9, r9, asl #1 @ r10 = Cr*51 + Cb*24
+ add r10, r10, r10, asl #4 @
+ add r10, r10, r8, asl #3 @
+ add r10, r10, r8, asl #4 @
+ @
+ add r11, r9, r9, asl #2 @ r9 = Cr*101
+ add r11, r11, r9, asl #5 @
+ add r9, r11, r9, asl #6 @
+ @
+ add r8, r8, #2 @ r8 = bu = (Cb*128 + 128) >> 8
+ mov r8, r8, asr #2 @
+ add r9, r9, #256 @ r9 = rv = (r8 + 256) >> 9
+ mov r9, r9, asr #9 @
+ rsb r10, r10, #128 @ r10 = guv = (-r9 + 128) >> 8
+ mov r10, r10, asr #8 @
+ @ compute R, G, and B
+ add r3, r8, r7, asr #8 @ r3 = b = (Y >> 9) + bu
+ add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv
+ add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
+ @
+ orr r12, r3, r11 @ check if clamping is needed...
+ orr r12, r12, r7, asr #1 @ ...at all
+ cmp r12, #31 @
+ bls 15f @ no clamp @
+ cmp r3, #31 @ clamp b
+ mvnhi r3, r3, asr #31 @
+ andhi r3, r3, #31 @
+ cmp r11, #31 @ clamp r
+ mvnhi r11, r11, asr #31 @
+ andhi r11, r11, #31 @
+ cmp r7, #63 @ clamp g
+ mvnhi r7, r7, asr #31 @
+ andhi r7, r7, #63 @
+15: @ no clamp @
+ @
+ ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
+ @
+ orr r3, r3, r11, lsl #11 @ r3 = b | (r << 11)
+ orr r3, r3, r7, lsl #5 @ r3 |= (g << 5)
+ @
+ movs r7, r3, lsr #8 @ store pixel
+20: @
+ ldr r11, [r0] @
+ tst r11, #LCD1_BUSY_MASK @
+ bne 20b @
+ str r7, [r0, #0x10] @
+25: @
+ ldr r11, [r0] @
+ tst r11, #LCD1_BUSY_MASK @
+ bne 25b @
+ str r3, [r0, #0x10] @
+ @
+ sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74
+ add r12, r7, r7, asl #2 @
+ add r7, r12, r7, asl #5 @
+ @ compute R, G, and B
+ add r3, r8, r7, asr #8 @ r3 = b = (Y >> 9) + bu
+ add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv
+ add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
+ @
+ orr r12, r3, r11 @ check if clamping is needed...
+ orr r12, r12, r7, asr #1 @ ...at all
+ cmp r12, #31 @
+ bls 15f @ no clamp @
+ cmp r3, #31 @ clamp b
+ mvnhi r3, r3, asr #31 @
+ andhi r3, r3, #31 @
+ cmp r11, #31 @ clamp r
+ mvnhi r11, r11, asr #31 @
+ andhi r11, r11, #31 @
+ cmp r7, #63 @ clamp g
+ mvnhi r7, r7, asr #31 @
+ andhi r7, r7, #63 @
+15: @ no clamp @
+ @
+ ldrb r12, [r4], #1 @ r12 = Y' = *(Y'_p++)
+ @
+ orr r3, r3, r11, lsl #11 @ r3 = b | (r << 11)
+ orr r3, r3, r7, lsl #5 @ r3 |= (g << 5)
+ @
+ movs r7, r3, lsr #8 @ store pixel
+20: @
+ ldr r11, [r0] @
+ tst r11, #LCD1_BUSY_MASK @
+ bne 20b @
+ str r7, [r0, #0x10] @
+25: @
+ ldr r11, [r0] @
+ tst r11, #LCD1_BUSY_MASK @
+ bne 25b @
+ str r3, [r0, #0x10] @
+ @
+ sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74
+ add r12, r7, r7, asl #2 @
+ add r7, r12, r7, asl #5 @
+ @ compute R, G, and B
+ add r3, r8, r7, asr #8 @ r3 = b = (Y >> 9) + bu
+ add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv
+ add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
+ @
+ orr r12, r3, r11 @ check if clamping is needed...
+ orr r12, r12, r7, asr #1 @ ...at all
+ cmp r12, #31 @
+ bls 15f @ no clamp @
+ cmp r3, #31 @ clamp b
+ mvnhi r3, r3, asr #31 @
+ andhi r3, r3, #31 @
+ cmp r11, #31 @ clamp r
+ mvnhi r11, r11, asr #31 @
+ andhi r11, r11, #31 @
+ cmp r7, #63 @ clamp g
+ mvnhi r7, r7, asr #31 @
+ andhi r7, r7, #63 @
+15: @ no clamp @
+ @
+ ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
+ @
+ orr r3, r3, r7, lsl #5 @ r3 = b | (g << 5)
+ orr r3, r3, r11, lsl #11 @ r3 |= (r << 11)
+ @
+ movs r7, r3, lsr #8 @ store pixel
+20: @
+ ldr r11, [r0] @
+ tst r11, #LCD1_BUSY_MASK @
+ bne 20b @
+ str r7, [r0, #0x10] @
+25: @
+ ldr r11, [r0] @
+ tst r11, #LCD1_BUSY_MASK @
+ bne 25b @
+ str r3, [r0, #0x10] @
+ @
+ sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74
+ add r12, r7, r7, asl #2 @
+ add r7, r12, r7, asl #5 @
+ @ compute R, G, and B
+ add r3, r8, r7, asr #8 @ r3 = b = (Y >> 9) + bu
+ add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv
+ add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
+ @
+ orr r12, r3, r11 @ check if clamping is needed...
+ orr r12, r12, r7, asr #1 @ ...at all
+ cmp r12, #31 @
+ bls 15f @ no clamp @
+ cmp r3, #31 @ clamp b
+ mvnhi r3, r3, asr #31 @
+ andhi r3, r3, #31 @
+ cmp r11, #31 @ clamp r
+ mvnhi r11, r11, asr #31 @
+ andhi r11, r11, #31 @
+ cmp r7, #63 @ clamp g
+ mvnhi r7, r7, asr #31 @
+ andhi r7, r7, #63 @
+15: @ no clamp @
+ @
+ orr r3, r3, r11, lsl #11 @ r3 = b | (r << 11)
+ orr r3, r3, r7, lsl #5 @ r3 |= (g << 5)
+ @
+ movs r7, r3, lsr #8 @ store pixel
+20: @
+ ldr r11, [r0] @
+ tst r11, #LCD1_BUSY_MASK @
+ bne 20b @
+ str r7, [r0, #0x10] @
+25: @
+ ldr r11, [r0] @
+ tst r11, #LCD1_BUSY_MASK @
+ bne 25b @
+ str r3, [r0, #0x10] @
+ @
+ subs r1, r1, #2 @ subtract block from width
+ bgt 10b @ loop line @
+ @
+ ldmpc regs=r4-r11 @ restore registers and return
+ .ltorg @ dump constant pool
+ .size lcd_write_yuv420_lines, .-lcd_write_yuv420_lines
+
+
+/****************************************************************************
+ * void lcd_write_yuv420_lines_odither(unsigned char const * const src[3],
+ * int width,
+ * int stride,
+ * int x_screen,
+ * int y_screen);
+ *
+ * |R| |1.000000 -0.000001 1.402000| |Y'|
+ * |G| = |1.000000 -0.334136 -0.714136| |Pb|
+ * |B| |1.000000 1.772000 0.000000| |Pr|
+ * Red scaled at twice g & b but at same precision to place it in correct
+ * bit position after multiply and leave instruction count lower.
+ * |R| |258 0 408| |Y' - 16|
+ * |G| = |149 -49 -104| |Cb - 128|
+ * |B| |149 258 0| |Cr - 128|
+ *
+ * Write four RGB565 pixels in the following order on each loop:
+ * 1 3 + > down
+ * 2 4 \/ left
+ *
+ * Kernel pattern (raw|use order):
+ * 5 3 4 2 row0 row2 > down
+ * 1 7 0 6 | 5 1 3 7 4 0 2 6 col0 left
+ * 4 2 5 3 | 4 0 2 6 5 1 3 7 col2 \/
+ * 0 6 1 7
+ */
+ .section .icode, "ax", %progbits
+ .align 2
+ .global lcd_write_yuv420_lines_odither
+ .type lcd_write_yuv420_lines_odither, %function
+lcd_write_yuv420_lines_odither:
+ @ r0 = yuv_src
+ @ r1 = width
+ @ r2 = stride
+ @ r3 = x_screen
+ @ [sp] = y_screen
+ stmfd sp!, { r4-r11, lr } @ save non-scratch
+ ldmia r0, { r4, r5, r6 } @ r4 = yuv_src[0] = Y'_p
+ @ r5 = yuv_src[1] = Cb_p
+ @ r6 = yuv_src[2] = Cr_p
+ @
+ ldr r0, [sp, #36] @ Line up pattern and kernel quadrant
+ eor r14, r3, r0 @
+ and r14, r14, #0x2 @
+ mov r14, r14, lsl #6 @ 0x00 or 0x80
+ @
+ ldr r0, =LCD1_BASE @
+ @
+ sub r2, r2, #1 @ Adjust stride because of increment
+10: @ loop line @
+ @
+ ldrb r7, [r4], #1 @ r7 = *Y'_p++;
+ ldrb r8, [r5], #1 @ r8 = *Cb_p++;
+ ldrb r9, [r6], #1 @ r9 = *Cr_p++;
+ @
+ eor r14, r14, #0x80 @ flip pattern quadrant
+ @
+ sub r7, r7, #16 @ r7 = Y = (Y' - 16)*149
+ add r12, r7, r7, asl #2 @
+ add r12, r12, r12, asl #4 @
+ add r7, r12, r7, asl #6 @
+ @
+ sub r8, r8, #128 @ Cb -= 128
+ sub r9, r9, #128 @ Cr -= 128
+ @
+ add r10, r8, r8, asl #4 @ r10 = guv = Cr*104 + Cb*49
+ add r10, r10, r8, asl #5 @
+ add r10, r10, r9, asl #3 @
+ add r10, r10, r9, asl #5 @
+ add r10, r10, r9, asl #6 @
+ @
+ mov r8, r8, asl #1 @ r8 = bu = Cb*258
+ add r8, r8, r8, asl #7 @
+ @
+ add r9, r9, r9, asl #1 @ r9 = rv = Cr*408
+ add r9, r9, r9, asl #4 @
+ mov r9, r9, asl #3 @
+ @
+ @ compute R, G, and B
+ add r3, r8, r7 @ r3 = b' = Y + bu
+ add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
+ rsb r7, r10, r7 @ r7 = g' = Y + guv
+ @
+ @ r8 = bu, r9 = rv, r10 = guv
+ @
+ sub r12, r3, r3, lsr #5 @ r3 = 31/32*b + b/256
+ add r3, r12, r3, lsr #8 @
+ @
+ sub r12, r11, r11, lsr #5 @ r11 = 31/32*r + r/256
+ add r11, r12, r11, lsr #8 @
+ @
+ sub r12, r7, r7, lsr #6 @ r7 = 63/64*g + g/256
+ add r7, r12, r7, lsr #8 @
+ @
+ add r12, r14, #0x200 @
+ @
+ add r3, r3, r12 @ b = r3 + delta
+ add r11, r11, r12, lsl #1 @ r = r11 + delta*2
+ add r7, r7, r12, lsr #1 @ g = r7 + delta/2
+ @
+ orr r12, r3, r11, asr #1 @ check if clamping is needed...
+ orr r12, r12, r7 @ ...at all
+ movs r12, r12, asr #15 @
+ beq 15f @ no clamp @
+ movs r12, r3, asr #15 @ clamp b
+ mvnne r3, r12, lsr #15 @
+ andne r3, r3, #0x7c00 @ mask b only if clamped
+ movs r12, r11, asr #16 @ clamp r
+ mvnne r11, r12, lsr #16 @
+ movs r12, r7, asr #15 @ clamp g
+ mvnne r7, r12, lsr #15 @
+15: @ no clamp @
+ @
+ ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
+ @
+ and r11, r11, #0xf800 @ pack pixel
+ and r7, r7, #0x7e00 @ r3 = pixel = (r & 0xf800) |
+ orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
+ orr r3, r11, r3, lsr #10 @ (b >> 10)
+ @
+ movs r7, r3, lsr #8 @ store pixel
+20: @
+ ldr r11, [r0] @
+ tst r11, #LCD1_BUSY_MASK @
+ bne 20b @
+ str r7, [r0, #0x10] @
+25: @
+ ldr r11, [r0] @
+ tst r11, #LCD1_BUSY_MASK @
+ bne 25b @
+ str r3, [r0, #0x10] @
+ @
+ sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149
+ add r12, r7, r7, asl #2 @
+ add r12, r12, r12, asl #4 @
+ add r7, r12, r7, asl #6 @
+ @ compute R, G, and B
+ add r3, r8, r7 @ r3 = b' = Y + bu
+ add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
+ rsb r7, r10, r7 @ r7 = g' = Y + guv
+ @
+ sub r12, r3, r3, lsr #5 @ r3 = 31/32*b' + b'/256
+ add r3, r12, r3, lsr #8 @
+ @
+ sub r12, r11, r11, lsr #5 @ r11 = 31/32*r' + r'/256
+ add r11, r12, r11, lsr #8 @
+ @
+ sub r12, r7, r7, lsr #6 @ r7 = 63/64*g' + g'/256
+ add r7, r12, r7, lsr #8 @
+ @
+ @ This element is zero - use r14 @
+ @
+ add r3, r3, r14 @ b = r3 + delta
+ add r11, r11, r14, lsl #1 @ r = r11 + delta*2
+ add r7, r7, r14, lsr #1 @ g = r7 + delta/2
+ @
+ orr r12, r3, r11, asr #1 @ check if clamping is needed...
+ orr r12, r12, r7 @ ...at all
+ movs r12, r12, asr #15 @
+ beq 15f @ no clamp @
+ movs r12, r3, asr #15 @ clamp b
+ mvnne r3, r12, lsr #15 @
+ andne r3, r3, #0x7c00 @ mask b only if clamped
+ movs r12, r11, asr #16 @ clamp r
+ mvnne r11, r12, lsr #16 @
+ movs r12, r7, asr #15 @ clamp g
+ mvnne r7, r12, lsr #15 @
+15: @ no clamp @
+ @
+ ldrb r12, [r4], #1 @ r12 = Y' = *(Y'_p++)
+ @
+ and r11, r11, #0xf800 @ pack pixel
+ and r7, r7, #0x7e00 @ r3 = pixel = (r & 0xf800) |
+ orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
+ orr r3, r11, r3, lsr #10 @ (b >> 10)
+ @
+ movs r7, r3, lsr #8 @ store pixel
+20: @
+ ldr r11, [r0] @
+ tst r11, #LCD1_BUSY_MASK @
+ bne 20b @
+ str r7, [r0, #0x10] @
+25: @
+ ldr r11, [r0] @
+ tst r11, #LCD1_BUSY_MASK @
+ bne 25b @
+ str r3, [r0, #0x10] @
+ @
+ sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149
+ add r12, r7, r7, asl #2 @
+ add r12, r12, r12, asl #4 @
+ add r7, r12, r7, asl #6 @
+ @ compute R, G, and B
+ add r3, r8, r7 @ r3 = b' = Y + bu
+ add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
+ rsb r7, r10, r7 @ r7 = g' = Y + guv
+ @
+ @ r8 = bu, r9 = rv, r10 = guv
+ @
+ sub r12, r3, r3, lsr #5 @ r3 = 31/32*b' + b'/256
+ add r3, r12, r3, lsr #8 @
+ @
+ sub r12, r11, r11, lsr #5 @ r11 = 31/32*r' + r'/256
+ add r11, r12, r11, lsr #8 @
+ @
+ sub r12, r7, r7, lsr #6 @ r7 = 63/64*g' + g'/256
+ add r7, r12, r7, lsr #8 @
+ @
+ add r12, r14, #0x100 @
+ @
+ add r3, r3, r12 @ b = r3 + delta
+ add r11, r11, r12, lsl #1 @ r = r11 + delta*2
+ add r7, r7, r12, lsr #1 @ g = r7 + delta/2
+ @
+ orr r12, r3, r11, asr #1 @ check if clamping is needed...
+ orr r12, r12, r7 @ ...at all
+ movs r12, r12, asr #15 @
+ beq 15f @ no clamp @
+ movs r12, r3, asr #15 @ clamp b
+ mvnne r3, r12, lsr #15 @
+ andne r3, r3, #0x7c00 @ mask b only if clamped
+ movs r12, r11, asr #16 @ clamp r
+ mvnne r11, r12, lsr #16 @
+ movs r12, r7, asr #15 @ clamp g
+ mvnne r7, r12, lsr #15 @
+15: @ no clamp @
+ @
+ ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
+ @
+ and r11, r11, #0xf800 @ pack pixel
+ and r7, r7, #0x7e00 @ r3 = pixel = (r & 0xf800) |
+ orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
+ orr r3, r11, r3, lsr #10 @ (b >> 10)
+ @
+ movs r7, r3, lsr #8 @ store pixel
+20: @
+ ldr r11, [r0] @
+ tst r11, #LCD1_BUSY_MASK @
+ bne 20b @
+ str r7, [r0, #0x10] @
+25: @
+ ldr r11, [r0] @
+ tst r11, #LCD1_BUSY_MASK @
+ bne 25b @
+ str r3, [r0, #0x10] @
+ @
+ sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149
+ add r12, r7, r7, asl #2 @
+ add r12, r12, r12, asl #4 @
+ add r7, r12, r7, asl #6 @
+ @ compute R, G, and B
+ add r3, r8, r7 @ r3 = b' = Y + bu
+ add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
+ rsb r7, r10, r7 @ r7 = g' = Y + guv
+ @
+ sub r12, r3, r3, lsr #5 @ r3 = 31/32*b + b/256
+ add r3, r12, r3, lsr #8 @
+ @
+ sub r12, r11, r11, lsr #5 @ r11 = 31/32*r + r/256
+ add r11, r12, r11, lsr #8 @
+ @
+ sub r12, r7, r7, lsr #6 @ r7 = 63/64*g + g/256
+ add r7, r12, r7, lsr #8 @
+ @
+ add r12, r14, #0x300 @
+ @
+ add r3, r3, r12 @ b = r3 + delta
+ add r11, r11, r12, lsl #1 @ r = r11 + delta*2
+ add r7, r7, r12, lsr #1 @ g = r7 + delta/2
+ @
+ orr r12, r3, r11, asr #1 @ check if clamping is needed...
+ orr r12, r12, r7 @ ...at all
+ movs r12, r12, asr #15 @
+ beq 15f @ no clamp @
+ movs r12, r3, asr #15 @ clamp b
+ mvnne r3, r12, lsr #15 @
+ andne r3, r3, #0x7c00 @ mask b only if clamped
+ movs r12, r11, asr #16 @ clamp r
+ mvnne r11, r12, lsr #16 @
+ movs r12, r7, asr #15 @ clamp g
+ mvnne r7, r12, lsr #15 @
+15: @ no clamp @
+ @
+ and r11, r11, #0xf800 @ pack pixel
+ and r7, r7, #0x7e00 @ r3 = pixel = (r & 0xf800) |
+ orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
+ orr r3, r11, r3, lsr #10 @ (b >> 10)
+ @
+ movs r7, r3, lsr #8 @ store pixel
+20: @
+ ldr r11, [r0] @
+ tst r11, #LCD1_BUSY_MASK @
+ bne 20b @
+ str r7, [r0, #0x10] @
+25: @
+ ldr r11, [r0] @
+ tst r11, #LCD1_BUSY_MASK @
+ bne 25b @
+ str r3, [r0, #0x10] @
+ @
+ subs r1, r1, #2 @ subtract block from width
+ bgt 10b @ loop line @
+ @
+ ldmpc regs=r4-r11 @ restore registers and return
+ .ltorg @ dump constant pool
+ .size lcd_write_yuv420_lines_odither, .-lcd_write_yuv420_lines_odither
diff --git a/firmware/target/arm/pbell/vibe500/lcd-vibe500.c b/firmware/target/arm/pbell/vibe500/lcd-vibe500.c
index 2daa5def74..047ef2bf53 100644
--- a/firmware/target/arm/pbell/vibe500/lcd-vibe500.c
+++ b/firmware/target/arm/pbell/vibe500/lcd-vibe500.c
@@ -35,6 +35,8 @@ static unsigned short disp_control_rev;
/* Contrast setting << 8 */
static int lcd_contrast;
+static unsigned lcd_yuv_options SHAREDBSS_ATTR = 0;
+
/* Forward declarations */
#if defined(HAVE_LCD_ENABLE) || defined(HAVE_LCD_SLEEP)
static void lcd_display_off(void);
@@ -375,6 +377,79 @@ bool lcd_active(void)
/*** update functions ***/
+void lcd_yuv_set_options(unsigned options)
+{
+ lcd_yuv_options = options;
+}
+
+/* Line write helper function for lcd_yuv_blit. Write two lines of yuv420. */
+
+extern void lcd_write_yuv420_lines(unsigned char const * const src[3],
+ int width,
+ int stride);
+extern void lcd_write_yuv420_lines_odither(unsigned char const * const src[3],
+ int width,
+ int stride,
+ int x_screen, /* To align dither pattern */
+ int y_screen);
+
+/* Performance function to blit a YUV bitmap directly to the LCD */
+void lcd_blit_yuv(unsigned char * const src[3],
+ int src_x, int src_y, int stride,
+ int x, int y, int width, int height)
+{
+ const unsigned char *yuv_src[3];
+ const unsigned char *ysrc_max;
+ int y0;
+ int options;
+
+ if (!display_on)
+ return;
+
+ width &= ~1;
+ height &= ~1;
+
+ lcd_write_reg(R_VERT_RAM_ADDR_POS, ((LCD_WIDTH - 1 - x) << 8) |
+ ((LCD_WIDTH-1) - (x + width - 1)));
+
+ y0 = LCD_HEIGHT - 1 - y;
+
+ lcd_write_reg(R_ENTRY_MODE,0x1000);
+
+ yuv_src[0] = src[0] + src_y * stride + src_x;
+ yuv_src[1] = src[1] + (src_y * stride >> 2) + (src_x >> 1);
+ yuv_src[2] = src[2] + (yuv_src[1] - src[1]);
+ ysrc_max = yuv_src[0] + height * stride;
+
+ options = lcd_yuv_options;
+
+ do
+ {
+ lcd_write_reg(R_HORIZ_RAM_ADDR_POS, (y0 << 8) | (y0 - 1));
+ lcd_write_reg(R_RAM_ADDR_SET, ((LCD_WIDTH - 1 - x) << 8) | y0);
+
+ /* start drawing */
+ lcd_send_cmd(R_WRITE_DATA_2_GRAM);
+
+ if (options & LCD_YUV_DITHER)
+ {
+ lcd_write_yuv420_lines_odither(yuv_src, width, stride,x, y);
+ y -= 2;
+ }
+ else
+ {
+ lcd_write_yuv420_lines(yuv_src, width, stride);
+ }
+
+ y0 -= 2;
+ yuv_src[0] += stride << 1;
+ yuv_src[1] += stride >> 1;
+ yuv_src[2] += stride >> 1;
+ }
+ while (yuv_src[0] < ysrc_max);
+ lcd_write_reg(R_ENTRY_MODE,0x1008);
+}
+
/* Update a fraction of the display. */
void lcd_update_rect(int x0, int y0, int width, int height)
{
diff --git a/firmware/target/arm/philips/hdd1630/lcd-as-hdd1630.S b/firmware/target/arm/philips/hdd1630/lcd-as-hdd1630.S
new file mode 100644
index 0000000000..3bb3530917
--- /dev/null
+++ b/firmware/target/arm/philips/hdd1630/lcd-as-hdd1630.S
@@ -0,0 +1,570 @@
+/***************************************************************************
+ * __________ __ ___.
+ * Open \______ \ ____ ____ | | _\_ |__ _______ ___
+ * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
+ * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
+ * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
+ * \/ \/ \/ \/ \/
+ * $Id$
+ *
+ * Copyright (C) 2007-2008 by Michael Sevakis
+ *
+ * H10 20GB LCD assembly routines
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+
+#include "config.h"
+#include "cpu.h"
+
+/****************************************************************************
+ * void lcd_write_yuv420_lines(unsigned char const * const src[3],
+ * int width,
+ * int stride);
+ *
+ * |R| |1.000000 -0.000001 1.402000| |Y'|
+ * |G| = |1.000000 -0.334136 -0.714136| |Pb|
+ * |B| |1.000000 1.772000 0.000000| |Pr|
+ * Scaled, normalized, rounded and tweaked to yield RGB 565:
+ * |R| |74 0 101| |Y' - 16| >> 9
+ * |G| = |74 -24 -51| |Cb - 128| >> 8
+ * |B| |74 128 0| |Cr - 128| >> 9
+ *
+ * Write four RGB565 pixels in the following order on each loop:
+ * 1 3 + > down
+ * 2 4 \/ left
+ */
+ .section .icode, "ax", %progbits
+ .align 2
+ .global lcd_write_yuv420_lines
+ .type lcd_write_yuv420_lines, %function
+lcd_write_yuv420_lines:
+ @ r0 = yuv_src
+ @ r1 = width
+ @ r2 = stride
+ stmfd sp!, { r4-r11, lr } @ save non-scratch
+ ldmia r0, { r4, r5, r6 } @ r4 = yuv_src[0] = Y'_p
+ @ r5 = yuv_src[1] = Cb_p
+ @ r6 = yuv_src[2] = Cr_p
+ @
+ mov r0, #0x7000000c @ r0 = &LCD2_PORT = 0x70008a0c
+ add r0, r0, #0x8a00 @
+ mov r14, #LCD2_DATA_MASK @
+ @
+ sub r2, r2, #1 @ Adjust stride because of increment
+10: @ loop line @
+ ldrb r7, [r4], #1 @ r7 = *Y'_p++;
+ ldrb r8, [r5], #1 @ r8 = *Cb_p++;
+ ldrb r9, [r6], #1 @ r9 = *Cr_p++;
+ @
+ sub r7, r7, #16 @ r7 = Y = (Y' - 16)*74
+ add r12, r7, r7, asl #2 @ actually (Y' - 16)*37 and shift right
+ add r7, r12, r7, asl #5 @ by one less when adding - same for all
+ @
+ sub r8, r8, #128 @ Cb -= 128
+ sub r9, r9, #128 @ Cr -= 128
+ @
+ add r10, r9, r9, asl #1 @ r10 = Cr*51 + Cb*24
+ add r10, r10, r10, asl #4 @
+ add r10, r10, r8, asl #3 @
+ add r10, r10, r8, asl #4 @
+ @
+ add r11, r9, r9, asl #2 @ r9 = Cr*101
+ add r11, r11, r9, asl #5 @
+ add r9, r11, r9, asl #6 @
+ @
+ add r8, r8, #2 @ r8 = bu = (Cb*128 + 128) >> 8
+ mov r8, r8, asr #2 @
+ add r9, r9, #256 @ r9 = rv = (r8 + 256) >> 9
+ mov r9, r9, asr #9 @
+ rsb r10, r10, #128 @ r10 = guv = (-r9 + 128) >> 8
+ mov r10, r10, asr #8 @
+ @ compute R, G, and B
+ add r3, r8, r7, asr #8 @ r3 = b = (Y >> 9) + bu
+ add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv
+ add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
+ @
+ orr r12, r3, r11 @ check if clamping is needed...
+ orr r12, r12, r7, asr #1 @ ...at all
+ cmp r12, #31 @
+ bls 15f @ no clamp @
+ cmp r3, #31 @ clamp b
+ mvnhi r3, r3, asr #31 @
+ andhi r3, r3, #31 @
+ cmp r11, #31 @ clamp r
+ mvnhi r11, r11, asr #31 @
+ andhi r11, r11, #31 @
+ cmp r7, #63 @ clamp g
+ mvnhi r7, r7, asr #31 @
+ andhi r7, r7, #63 @
+15: @ no clamp @
+ @
+ ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
+ @
+ orr r3, r3, r11, lsl #11 @ r3 = b | (r << 11)
+ orr r3, r3, r7, lsl #5 @ r3 |= (g << 5)
+ @
+ orr r7, r14, r3, lsr #8 @ store pixel
+ orr r11, r14, r3 @
+20: @
+ ldr r3, [r0] @
+ tst r3, #LCD2_BUSY_MASK @
+ bne 20b @
+ str r7, [r0] @
+20: @
+ ldr r3, [r0] @
+ tst r3, #LCD2_BUSY_MASK @
+ bne 20b @
+ str r11, [r0] @
+ @
+ sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74
+ add r12, r7, r7, asl #2 @
+ add r7, r12, r7, asl #5 @
+ @ compute R, G, and B
+ add r3, r8, r7, asr #8 @ r3 = b = (Y >> 9) + bu
+ add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv
+ add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
+ @
+ orr r12, r3, r11 @ check if clamping is needed...
+ orr r12, r12, r7, asr #1 @ ...at all
+ cmp r12, #31 @
+ bls 15f @ no clamp @
+ cmp r3, #31 @ clamp b
+ mvnhi r3, r3, asr #31 @
+ andhi r3, r3, #31 @
+ cmp r11, #31 @ clamp r
+ mvnhi r11, r11, asr #31 @
+ andhi r11, r11, #31 @
+ cmp r7, #63 @ clamp g
+ mvnhi r7, r7, asr #31 @
+ andhi r7, r7, #63 @
+15: @ no clamp @
+ @
+ ldrb r12, [r4], #1 @ r12 = Y' = *(Y'_p++)
+ @
+ orr r3, r3, r11, lsl #11 @ r3 = b | (r << 11)
+ orr r3, r3, r7, lsl #5 @ r3 |= (g << 5)
+ @
+ orr r7, r14, r3, lsr #8 @ store pixel
+ orr r11, r14, r3 @
+20: @
+ ldr r3, [r0] @
+ tst r3, #LCD2_BUSY_MASK @
+ bne 20b @
+ str r7, [r0] @
+20: @
+ ldr r3, [r0] @
+ tst r3, #LCD2_BUSY_MASK @
+ bne 20b @
+ str r11, [r0] @
+ @
+ sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74
+ add r12, r7, r7, asl #2 @
+ add r7, r12, r7, asl #5 @
+ @ compute R, G, and B
+ add r3, r8, r7, asr #8 @ r3 = b = (Y >> 9) + bu
+ add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv
+ add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
+ @
+ orr r12, r3, r11 @ check if clamping is needed...
+ orr r12, r12, r7, asr #1 @ ...at all
+ cmp r12, #31 @
+ bls 15f @ no clamp @
+ cmp r3, #31 @ clamp b
+ mvnhi r3, r3, asr #31 @
+ andhi r3, r3, #31 @
+ cmp r11, #31 @ clamp r
+ mvnhi r11, r11, asr #31 @
+ andhi r11, r11, #31 @
+ cmp r7, #63 @ clamp g
+ mvnhi r7, r7, asr #31 @
+ andhi r7, r7, #63 @
+15: @ no clamp @
+ @
+ ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
+ @
+ orr r3, r3, r7, lsl #5 @ r3 = b | (g << 5)
+ orr r3, r3, r11, lsl #11 @ r3 |= (r << 11)
+ @
+ orr r7, r14, r3, lsr #8 @ store pixel
+ orr r11, r14, r3 @
+20: @
+ ldr r3, [r0] @
+ tst r3, #LCD2_BUSY_MASK @
+ bne 20b @
+ str r7, [r0] @
+20: @
+ ldr r3, [r0] @
+ tst r3, #LCD2_BUSY_MASK @
+ bne 20b @
+ str r11, [r0] @
+ @
+ sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74
+ add r12, r7, r7, asl #2 @
+ add r7, r12, r7, asl #5 @
+ @ compute R, G, and B
+ add r3, r8, r7, asr #8 @ r3 = b = (Y >> 9) + bu
+ add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv
+ add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
+ @
+ orr r12, r3, r11 @ check if clamping is needed...
+ orr r12, r12, r7, asr #1 @ ...at all
+ cmp r12, #31 @
+ bls 15f @ no clamp @
+ cmp r3, #31 @ clamp b
+ mvnhi r3, r3, asr #31 @
+ andhi r3, r3, #31 @
+ cmp r11, #31 @ clamp r
+ mvnhi r11, r11, asr #31 @
+ andhi r11, r11, #31 @
+ cmp r7, #63 @ clamp g
+ mvnhi r7, r7, asr #31 @
+ andhi r7, r7, #63 @
+15: @ no clamp @
+ @
+ orr r3, r3, r11, lsl #11 @ r3 = b | (r << 11)
+ orr r3, r3, r7, lsl #5 @ r3 |= (g << 5)
+ @
+ orr r7, r14, r3, lsr #8 @ store pixel
+ orr r11, r14, r3 @
+20: @
+ ldr r3, [r0] @
+ tst r3, #LCD2_BUSY_MASK @
+ bne 20b @
+ str r7, [r0] @
+20: @
+ ldr r3, [r0] @
+ tst r3, #LCD2_BUSY_MASK @
+ bne 20b @
+ str r11, [r0] @
+ @
+ subs r1, r1, #2 @ subtract block from width
+ bgt 10b @ loop line @
+ @
+ ldmpc regs=r4-r11 @ restore registers and return
+ .ltorg @ dump constant pool
+ .size lcd_write_yuv420_lines, .-lcd_write_yuv420_lines
+
+
+/****************************************************************************
+ * void lcd_write_yuv420_lines_odither(unsigned char const * const src[3],
+ * int width,
+ * int stride,
+ * int x_screen,
+ * int y_screen);
+ *
+ * |R| |1.000000 -0.000001 1.402000| |Y'|
+ * |G| = |1.000000 -0.334136 -0.714136| |Pb|
+ * |B| |1.000000 1.772000 0.000000| |Pr|
+ * Red scaled at twice g & b but at same precision to place it in correct
+ * bit position after multiply and leave instruction count lower.
+ * |R| |258 0 408| |Y' - 16|
+ * |G| = |149 -49 -104| |Cb - 128|
+ * |B| |149 258 0| |Cr - 128|
+ *
+ * Write four RGB565 pixels in the following order on each loop:
+ * 1 3 + > down
+ * 2 4 \/ left
+ *
+ * Kernel pattern (raw|use order):
+ * 5 3 4 2 row0 row2 > down
+ * 1 7 0 6 | 5 1 3 7 4 0 2 6 col0 left
+ * 4 2 5 3 | 4 0 2 6 5 1 3 7 col2 \/
+ * 0 6 1 7
+ */
+ .section .icode, "ax", %progbits
+ .align 2
+ .global lcd_write_yuv420_lines_odither
+ .type lcd_write_yuv420_lines_odither, %function
+lcd_write_yuv420_lines_odither:
+ @ r0 = yuv_src
+ @ r1 = width
+ @ r2 = stride
+ @ r3 = x_screen
+ @ [sp] = y_screen
+ stmfd sp!, { r4-r11, lr } @ save non-scratch
+ ldmia r0, { r4, r5, r6 } @ r4 = yuv_src[0] = Y'_p
+ @ r5 = yuv_src[1] = Cb_p
+ @ r6 = yuv_src[2] = Cr_p
+ @
+ ldr r0, [sp, #36] @ Line up pattern and kernel quadrant
+ eor r14, r3, r0 @
+ and r14, r14, #0x2 @
+ mov r14, r14, lsl #6 @ 0x00 or 0x80
+ @
+ mov r0, #0x7000000c @ r0 = &LCD2_PORT = 0x70008a0c
+ add r0, r0, #0x8a00 @
+ @
+ sub r2, r2, #1 @ Adjust stride because of increment
+10: @ loop line @
+ @
+ ldrb r7, [r4], #1 @ r7 = *Y'_p++;
+ ldrb r8, [r5], #1 @ r8 = *Cb_p++;
+ ldrb r9, [r6], #1 @ r9 = *Cr_p++;
+ @
+ eor r14, r14, #0x80 @ flip pattern quadrant
+ @
+ sub r7, r7, #16 @ r7 = Y = (Y' - 16)*149
+ add r12, r7, r7, asl #2 @
+ add r12, r12, r12, asl #4 @
+ add r7, r12, r7, asl #6 @
+ @
+ sub r8, r8, #128 @ Cb -= 128
+ sub r9, r9, #128 @ Cr -= 128
+ @
+ add r10, r8, r8, asl #4 @ r10 = guv = Cr*104 + Cb*49
+ add r10, r10, r8, asl #5 @
+ add r10, r10, r9, asl #3 @
+ add r10, r10, r9, asl #5 @
+ add r10, r10, r9, asl #6 @
+ @
+ mov r8, r8, asl #1 @ r8 = bu = Cb*258
+ add r8, r8, r8, asl #7 @
+ @
+ add r9, r9, r9, asl #1 @ r9 = rv = Cr*408
+ add r9, r9, r9, asl #4 @
+ mov r9, r9, asl #3 @
+ @
+ @ compute R, G, and B
+ add r3, r8, r7 @ r3 = b' = Y + bu
+ add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
+ rsb r7, r10, r7 @ r7 = g' = Y + guv
+ @
+ @ r8 = bu, r9 = rv, r10 = guv
+ @
+ sub r12, r3, r3, lsr #5 @ r3 = 31/32*b + b/256
+ add r3, r12, r3, lsr #8 @
+ @
+ sub r12, r11, r11, lsr #5 @ r11 = 31/32*r + r/256
+ add r11, r12, r11, lsr #8 @
+ @
+ sub r12, r7, r7, lsr #6 @ r7 = 63/64*g + g/256
+ add r7, r12, r7, lsr #8 @
+ @
+ add r12, r14, #0x200 @
+ @
+ add r3, r3, r12 @ b = r3 + delta
+ add r11, r11, r12, lsl #1 @ r = r11 + delta*2
+ add r7, r7, r12, lsr #1 @ g = r7 + delta/2
+ @
+ orr r12, r3, r11, asr #1 @ check if clamping is needed...
+ orr r12, r12, r7 @ ...at all
+ movs r12, r12, asr #15 @
+ beq 15f @ no clamp @
+ movs r12, r3, asr #15 @ clamp b
+ mvnne r3, r12, lsr #15 @
+ andne r3, r3, #0x7c00 @ mask b only if clamped
+ movs r12, r11, asr #16 @ clamp r
+ mvnne r11, r12, lsr #16 @
+ movs r12, r7, asr #15 @ clamp g
+ mvnne r7, r12, lsr #15 @
+15: @ no clamp @
+ @
+ ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
+ @
+ and r11, r11, #0xf800 @ pack pixel
+ and r7, r7, #0x7e00 @ r3 = pixel = (r & 0xf800) |
+ orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
+ orr r3, r11, r3, lsr #10 @ (b >> 10)
+ @
+ mov r11, #LCD2_DATA_MASK @ store pixel
+ orr r7, r11, r3, lsr #8 @
+ orr r11, r11, r3 @
+20: @
+ ldr r3, [r0] @
+ tst r3, #LCD2_BUSY_MASK @
+ bne 20b @
+ str r7, [r0] @
+20: @
+ ldr r3, [r0] @
+ tst r3, #LCD2_BUSY_MASK @
+ bne 20b @
+ str r11, [r0] @
+ @
+ sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149
+ add r12, r7, r7, asl #2 @
+ add r12, r12, r12, asl #4 @
+ add r7, r12, r7, asl #6 @
+ @ compute R, G, and B
+ add r3, r8, r7 @ r3 = b' = Y + bu
+ add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
+ rsb r7, r10, r7 @ r7 = g' = Y + guv
+ @
+ sub r12, r3, r3, lsr #5 @ r3 = 31/32*b' + b'/256
+ add r3, r12, r3, lsr #8 @
+ @
+ sub r12, r11, r11, lsr #5 @ r11 = 31/32*r' + r'/256
+ add r11, r12, r11, lsr #8 @
+ @
+ sub r12, r7, r7, lsr #6 @ r7 = 63/64*g' + g'/256
+ add r7, r12, r7, lsr #8 @
+ @
+ @ This element is zero - use r14 @
+ @
+ add r3, r3, r14 @ b = r3 + delta
+ add r11, r11, r14, lsl #1 @ r = r11 + delta*2
+ add r7, r7, r14, lsr #1 @ g = r7 + delta/2
+ @
+ orr r12, r3, r11, asr #1 @ check if clamping is needed...
+ orr r12, r12, r7 @ ...at all
+ movs r12, r12, asr #15 @
+ beq 15f @ no clamp @
+ movs r12, r3, asr #15 @ clamp b
+ mvnne r3, r12, lsr #15 @
+ andne r3, r3, #0x7c00 @ mask b only if clamped
+ movs r12, r11, asr #16 @ clamp r
+ mvnne r11, r12, lsr #16 @
+ movs r12, r7, asr #15 @ clamp g
+ mvnne r7, r12, lsr #15 @
+15: @ no clamp @
+ @
+ ldrb r12, [r4], #1 @ r12 = Y' = *(Y'_p++)
+ @
+ and r11, r11, #0xf800 @ pack pixel
+ and r7, r7, #0x7e00 @ r3 = pixel = (r & 0xf800) |
+ orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
+ orr r3, r11, r3, lsr #10 @ (b >> 10)
+ @
+ mov r11, #LCD2_DATA_MASK @ store pixel
+ orr r7, r11, r3, lsr #8 @
+ orr r11, r11, r3 @
+20: @
+ ldr r3, [r0] @
+ tst r3, #LCD2_BUSY_MASK @
+ bne 20b @
+ str r7, [r0] @
+20: @
+ ldr r3, [r0] @
+ tst r3, #LCD2_BUSY_MASK @
+ bne 20b @
+ str r11, [r0] @
+ @
+ sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149
+ add r12, r7, r7, asl #2 @
+ add r12, r12, r12, asl #4 @
+ add r7, r12, r7, asl #6 @
+ @ compute R, G, and B
+ add r3, r8, r7 @ r3 = b' = Y + bu
+ add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
+ rsb r7, r10, r7 @ r7 = g' = Y + guv
+ @
+ @ r8 = bu, r9 = rv, r10 = guv
+ @
+ sub r12, r3, r3, lsr #5 @ r3 = 31/32*b' + b'/256
+ add r3, r12, r3, lsr #8 @
+ @
+ sub r12, r11, r11, lsr #5 @ r11 = 31/32*r' + r'/256
+ add r11, r12, r11, lsr #8 @
+ @
+ sub r12, r7, r7, lsr #6 @ r7 = 63/64*g' + g'/256
+ add r7, r12, r7, lsr #8 @
+ @
+ add r12, r14, #0x100 @
+ @
+ add r3, r3, r12 @ b = r3 + delta
+ add r11, r11, r12, lsl #1 @ r = r11 + delta*2
+ add r7, r7, r12, lsr #1 @ g = r7 + delta/2
+ @
+ orr r12, r3, r11, asr #1 @ check if clamping is needed...
+ orr r12, r12, r7 @ ...at all
+ movs r12, r12, asr #15 @
+ beq 15f @ no clamp @
+ movs r12, r3, asr #15 @ clamp b
+ mvnne r3, r12, lsr #15 @
+ andne r3, r3, #0x7c00 @ mask b only if clamped
+ movs r12, r11, asr #16 @ clamp r
+ mvnne r11, r12, lsr #16 @
+ movs r12, r7, asr #15 @ clamp g
+ mvnne r7, r12, lsr #15 @
+15: @ no clamp @
+ @
+ ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
+ @
+ and r11, r11, #0xf800 @ pack pixel
+ and r7, r7, #0x7e00 @ r3 = pixel = (r & 0xf800) |
+ orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
+ orr r3, r11, r3, lsr #10 @ (b >> 10)
+ @
+ mov r11, #LCD2_DATA_MASK @ store pixel
+ orr r7, r11, r3, lsr #8 @
+ orr r11, r11, r3 @
+20: @
+ ldr r3, [r0] @
+ tst r3, #LCD2_BUSY_MASK @
+ bne 20b @
+ str r7, [r0] @
+20: @
+ ldr r3, [r0] @
+ tst r3, #LCD2_BUSY_MASK @
+ bne 20b @
+ str r11, [r0] @
+ @
+ sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149
+ add r12, r7, r7, asl #2 @
+ add r12, r12, r12, asl #4 @
+ add r7, r12, r7, asl #6 @
+ @ compute R, G, and B
+ add r3, r8, r7 @ r3 = b' = Y + bu
+ add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
+ rsb r7, r10, r7 @ r7 = g' = Y + guv
+ @
+ sub r12, r3, r3, lsr #5 @ r3 = 31/32*b + b/256
+ add r3, r12, r3, lsr #8 @
+ @
+ sub r12, r11, r11, lsr #5 @ r11 = 31/32*r + r/256
+ add r11, r12, r11, lsr #8 @
+ @
+ sub r12, r7, r7, lsr #6 @ r7 = 63/64*g + g/256
+ add r7, r12, r7, lsr #8 @
+ @
+ add r12, r14, #0x300 @
+ @
+ add r3, r3, r12 @ b = r3 + delta
+ add r11, r11, r12, lsl #1 @ r = r11 + delta*2
+ add r7, r7, r12, lsr #1 @ g = r7 + delta/2
+ @
+ orr r12, r3, r11, asr #1 @ check if clamping is needed...
+ orr r12, r12, r7 @ ...at all
+ movs r12, r12, asr #15 @
+ beq 15f @ no clamp @
+ movs r12, r3, asr #15 @ clamp b
+ mvnne r3, r12, lsr #15 @
+ andne r3, r3, #0x7c00 @ mask b only if clamped
+ movs r12, r11, asr #16 @ clamp r
+ mvnne r11, r12, lsr #16 @
+ movs r12, r7, asr #15 @ clamp g
+ mvnne r7, r12, lsr #15 @
+15: @ no clamp @
+ @
+ and r11, r11, #0xf800 @ pack pixel
+ and r7, r7, #0x7e00 @ r3 = pixel = (r & 0xf800) |
+ orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
+ orr r3, r11, r3, lsr #10 @ (b >> 10)
+ @
+ mov r11, #LCD2_DATA_MASK @ store pixel
+ orr r7, r11, r3, lsr #8 @
+ orr r11, r11, r3 @
+20: @
+ ldr r3, [r0] @
+ tst r3, #LCD2_BUSY_MASK @
+ bne 20b @
+ str r7, [r0] @
+20: @
+ ldr r3, [r0] @
+ tst r3, #LCD2_BUSY_MASK @
+ bne 20b @
+ str r11, [r0] @
+ @
+ subs r1, r1, #2 @ subtract block from width
+ bgt 10b @ loop line @
+ @
+ ldmpc regs=r4-r11 @ restore registers and return
+ .ltorg @ dump constant pool
+ .size lcd_write_yuv420_lines_odither, .-lcd_write_yuv420_lines_odither
diff --git a/firmware/target/arm/philips/hdd1630/lcd-hdd1630.c b/firmware/target/arm/philips/hdd1630/lcd-hdd1630.c
index d9570600bc..c26c0bc963 100644
--- a/firmware/target/arm/philips/hdd1630/lcd-hdd1630.c
+++ b/firmware/target/arm/philips/hdd1630/lcd-hdd1630.c
@@ -81,6 +81,7 @@
static bool lcd_enabled;
/* Display status */
+static unsigned lcd_yuv_options SHAREDBSS_ATTR = 0;
static unsigned mad_ctrl = 0;
/* wait for LCD */
@@ -312,6 +313,86 @@ void lcd_set_flip(bool yesno)
lcd_send_data(mad_ctrl);
}
+void lcd_yuv_set_options(unsigned options)
+{
+ lcd_yuv_options = options;
+}
+
+/* Line write helper function for lcd_yuv_blit. Write two lines of yuv420. */
+extern void lcd_write_yuv420_lines(unsigned char const * const src[3],
+ int width, int stride);
+
+extern void lcd_write_yuv420_lines_odither(unsigned char const * const src[3],
+ int width, int stride,
+ int x_screen, int y_screen);
+
+/* Performance function to blit a YUV bitmap directly to the LCD */
+void lcd_blit_yuv(unsigned char * const src[3],
+ int src_x, int src_y, int stride,
+ int x, int y, int width, int height)
+{
+ unsigned char const * yuv_src[3];
+ off_t z;
+
+ /* Sorry, but width and height must be >= 2 or else */
+ width &= ~1;
+ height >>= 1;
+
+ z = stride*src_y;
+ yuv_src[0] = src[0] + z + src_x;
+ yuv_src[1] = src[1] + (z >> 2) + (src_x >> 1);
+ yuv_src[2] = src[2] + (yuv_src[1] - src[1]);
+
+ /* Set vertical address mode */
+ lcd_send_cmd(MADCTR);
+ lcd_send_data(mad_ctrl | (1<<5));
+
+ lcd_send_cmd(RASET);
+ lcd_send_data(x);
+ lcd_send_data(x + width - 1);
+
+ if (lcd_yuv_options & LCD_YUV_DITHER)
+ {
+ do
+ {
+ lcd_send_cmd(CASET);
+ lcd_send_data(y);
+ lcd_send_data(y + 1);
+
+ lcd_send_cmd(RAMWR);
+
+ lcd_write_yuv420_lines_odither(yuv_src, width, stride, x, y);
+ yuv_src[0] += stride << 1; /* Skip down two luma lines */
+ yuv_src[1] += stride >> 1; /* Skip down one chroma line */
+ yuv_src[2] += stride >> 1;
+ y += 2;
+ }
+ while (--height > 0);
+ }
+ else
+ {
+ do
+ {
+ lcd_send_cmd(CASET);
+ lcd_send_data(y);
+ lcd_send_data(y + 1);
+
+ lcd_send_cmd(RAMWR);
+
+ lcd_write_yuv420_lines(yuv_src, width, stride);
+ yuv_src[0] += stride << 1; /* Skip down two luma lines */
+ yuv_src[1] += stride >> 1; /* Skip down one chroma line */
+ yuv_src[2] += stride >> 1;
+ y += 2;
+ }
+ while (--height > 0);
+ }
+
+ /* Restore the address mode */
+ lcd_send_cmd(MADCTR);
+ lcd_send_data(mad_ctrl);
+}
+
/* Update the display.
This must be called after all other LCD functions that change the display. */
void lcd_update(void)
diff --git a/firmware/target/arm/philips/hdd6330/lcd-as-hdd6330.S b/firmware/target/arm/philips/hdd6330/lcd-as-hdd6330.S
new file mode 100644
index 0000000000..c3a7992a2e
--- /dev/null
+++ b/firmware/target/arm/philips/hdd6330/lcd-as-hdd6330.S
@@ -0,0 +1,140 @@
+/***************************************************************************
+ * __________ __ ___.
+ * Open \______ \ ____ ____ | | _\_ |__ _______ ___
+ * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
+ * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
+ * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
+ * \/ \/ \/ \/ \/
+ * $Id:$
+ *
+ * Copyright (C) 2010 by Szymon Dziok
+ *
+ * Philips Gogear HDD6330 LCD assembly routine
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+
+#include "config.h"
+#include "cpu.h"
+
+/****************************************************************************
+ void lcd_yuv_write_inner_loop(unsigned char const * const ysrc,
+ unsigned char const * const usrc,
+ unsigned char const * const vsrc,
+ int width);
+*/
+ .section .icode, "ax", %progbits
+ .align 2
+ .global lcd_yuv_write_inner_loop
+ .type lcd_yuv_write_inner_loop, %function
+lcd_yuv_write_inner_loop:
+ @ r0 = ysrc
+ @ r1 = usrc
+ @ r2 = vsrc
+ @ r3 = width
+ stmfd sp!, { r4-r11, lr } @ save regs
+ mov r4, #0x70000000 @ r4 = LCD2_BLOCK_CTRL - 0x20
+ add r4, r4, #0x8a00 @
+ add r5, r4, #0x100 @ r5 = LCD2_BLOCK_DATA
+10: @ loop
+
+ ldrb r7, [r1], #1 @ *usrc++
+ ldrb r8, [r2], #1 @ *vsrc++
+
+ sub r7, r7, #128 @ Cb -= 128
+ sub r8, r8, #128 @ Cr -= 128
+
+ add r10, r8, r8, asl #2 @ Cr*101
+ add r10, r10, r8, asl #5
+ add r10, r10, r8, asl #6
+
+ add r11, r8, r8, asl #1 @ Cr*51 + Cb*24
+ add r11, r11, r11, asl #4
+ add r11, r11, r7, asl #3
+ add r11, r11, r7, asl #4
+
+ add r12, r7, #2 @ r12 = bu = (Cb*128 + 256) >> 9
+ mov r12, r12, asr #2
+ add r10, r10, #256 @ r10 = rv = (Cr*101 + 256) >> 9
+ mov r10, r10, asr #9
+ rsb r11, r11, #128 @ r11 = guv = (-r11 + 128) >> 8
+ mov r11, r11, asr #8
+
+@ pixel_1
+ ldrb r7, [r0], #1 @ *ysrc++
+ sub r7, r7, #16 @ Y = (Y' - 16) * 37
+ add r8, r7, r7, asl #2
+ add r7, r8, r7, asl #5
+
+ add r9, r10, r7, asr #8 @ R = (Y >> 8) + rv
+ add r8, r11, r7, asr #7 @ G = (Y >> 7) + guv
+ add r7, r12, r7, asr #8 @ B = (Y >> 8) + bu
+
+ cmp r9, #31 @ clamp R
+ mvnhi r9, r9, asr #31
+ andhi r9, r9, #31
+
+ cmp r8, #63 @ clamp G
+ mvnhi r8, r8, asr #31
+ andhi r8, r8, #63
+
+ cmp r7, #31 @ clamp B
+ mvnhi r7, r7, asr #31
+ andhi r7, r7, #31
+
+ orr r6, r7, r8, lsl #5 @ pack pixel
+ orr r6, r6, r9, lsl #11
+
+ mov r7, r6, lsl #8 @ swap bytes
+ and r7, r7, #0xff00
+ add r6, r7, r6, lsr #8
+
+@ pixel_2
+ ldrb r7, [r0], #1 @ *ysrc++
+ sub r7, r7, #16 @ Y = (Y' - 16) * 37
+ add r8, r7, r7, asl #2
+ add r7, r8, r7, asl #5
+
+ add r9, r10, r7, asr #8 @ R = (Y >> 8) + rv
+ add r8, r11, r7, asr #7 @ G = (Y >> 7) + guv
+ add r7, r12, r7, asr #8 @ B = (Y >> 8) + bu
+
+ cmp r9, #31 @ clamp R
+ mvnhi r9, r9, asr #31
+ andhi r9, r9, #31
+
+ cmp r8, #63 @ clamp G
+ mvnhi r8, r8, asr #31
+ andhi r8, r8, #63
+
+ cmp r7, #31 @ clamp B
+ mvnhi r7, r7, asr #31
+ andhi r7, r7, #31
+
+ orr r7, r7, r8, lsl #5 @ pack pixel
+ orr r7, r7, r9, lsl #11
+
+ orr r6, r6, r7, lsl #24 @ swap bytes and add pixels simultaneously
+ mov r7, r7, lsr #8
+ orr r6, r6, r7, lsl #16
+
+11: @ while (!(LCD2_BLOCK_CTRL & LCD2_BLOCK_TXOK));
+ ldr r11, [r4, #0x20] @
+ tst r11, #0x1000000 @
+ beq 11b @
+
+ str r6, [r5] @ send two pixels
+
+ subs r3, r3, #2 @ decrease width
+ bgt 10b @ loop
+
+ ldmpc regs=r4-r11 @ restore regs
+ .ltorg @ dump constant pool
+ .size lcd_yuv_write_inner_loop, .-lcd_yuv_write_inner_loop
diff --git a/firmware/target/arm/philips/hdd6330/lcd-hdd6330.c b/firmware/target/arm/philips/hdd6330/lcd-hdd6330.c
index 9d2fdc8519..cdd3064bba 100644
--- a/firmware/target/arm/philips/hdd6330/lcd-hdd6330.c
+++ b/firmware/target/arm/philips/hdd6330/lcd-hdd6330.c
@@ -37,6 +37,9 @@
/* whether the lcd is currently enabled or not */
static bool lcd_enabled;
+/* Display status */
+static unsigned lcd_yuv_options SHAREDBSS_ATTR = 0;
+
/* Value used for flipping. Must be remembered when display is turned off. */
static unsigned short flip;
@@ -144,6 +147,101 @@ void lcd_set_flip(bool yesno)
lcd_send_data(0x08 | flip);
}
+void lcd_yuv_set_options(unsigned options)
+{
+ lcd_yuv_options = options;
+}
+
+#define CSUB_X 2
+#define CSUB_Y 2
+
+/* YUV- > RGB565 conversion
+ * |R| |1.000000 -0.000001 1.402000| |Y'|
+ * |G| = |1.000000 -0.334136 -0.714136| |Pb|
+ * |B| |1.000000 1.772000 0.000000| |Pr|
+ * Scaled, normalized, rounded and tweaked to yield RGB 565:
+ * |R| |74 0 101| |Y' - 16| >> 9
+ * |G| = |74 -24 -51| |Cb - 128| >> 8
+ * |B| |74 128 0| |Cr - 128| >> 9
+*/
+
+extern void lcd_yuv_write_inner_loop(unsigned char const * const ysrc,
+ unsigned char const * const usrc,
+ unsigned char const * const vsrc,
+ int width);
+
+/* Performance function to blit a YUV bitmap directly to the LCD */
+void lcd_blit_yuv(unsigned char * const src[3],
+ int src_x, int src_y, int stride,
+ int x, int y, int width, int height)
+{
+ int h;
+
+ width = (width + 1) & ~1;
+
+ lcd_send_reg(LCD_REG_HORIZ_ADDR_START);
+ lcd_send_data(y);
+
+ lcd_send_reg(LCD_REG_HORIZ_ADDR_END);
+ lcd_send_data(y + height - 1);
+
+ lcd_send_reg(LCD_REG_VERT_ADDR_START);
+ lcd_send_data(x + x_offset);
+
+ lcd_send_reg(LCD_REG_VERT_ADDR_END);
+ lcd_send_data(x + width - 1 + x_offset);
+
+ lcd_send_reg(LCD_REG_WRITE_DATA_2_GRAM);
+
+ const int stride_div_csub_x = stride/CSUB_X;
+
+ h=0;
+ while (1)
+ {
+ /* upsampling, YUV->RGB conversion and reduction to RGB565 in one go */
+ const unsigned char *ysrc = src[0] + stride * src_y + src_x;
+
+ const int uvoffset = stride_div_csub_x * (src_y/CSUB_Y) +
+ (src_x/CSUB_X);
+
+ const unsigned char *usrc = src[1] + uvoffset;
+ const unsigned char *vsrc = src[2] + uvoffset;
+
+ int pixels_to_write;
+
+ if (h==0)
+ {
+ while (!(LCD2_BLOCK_CTRL & LCD2_BLOCK_READY));
+ LCD2_BLOCK_CONFIG = 0;
+
+ if (height == 0) break;
+
+ pixels_to_write = (width * height) * 2;
+ h = height;
+
+ /* calculate how much we can do in one go */
+ if (pixels_to_write > 0x10000)
+ {
+ h = (0x10000/2) / width;
+ pixels_to_write = (width * h) * 2;
+ }
+
+ height -= h;
+ LCD2_BLOCK_CTRL = 0x10000080;
+ LCD2_BLOCK_CONFIG = 0xc0010000 | (pixels_to_write - 1);
+ LCD2_BLOCK_CTRL = 0x34000000;
+ }
+
+ lcd_yuv_write_inner_loop(ysrc,usrc,vsrc,width);
+
+ src_y++;
+ h--;
+ }
+
+ while (!(LCD2_BLOCK_CTRL & LCD2_BLOCK_READY));
+ LCD2_BLOCK_CONFIG = 0;
+}
+
/* Update the display.
This must be called after all other LCD functions that change the display. */
void lcd_update(void)
diff --git a/firmware/target/arm/philips/sa9200/lcd-as-sa9200.S b/firmware/target/arm/philips/sa9200/lcd-as-sa9200.S
new file mode 100644
index 0000000000..d99222b9df
--- /dev/null
+++ b/firmware/target/arm/philips/sa9200/lcd-as-sa9200.S
@@ -0,0 +1,590 @@
+/***************************************************************************
+ * __________ __ ___.
+ * Open \______ \ ____ ____ | | _\_ |__ _______ ___
+ * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
+ * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
+ * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
+ * \/ \/ \/ \/ \/
+ * $Id$
+ *
+ * Copyright (C) 2007-2011 by Michael Sevakis
+ *
+ * Philips GoGear SA9200 LCD assembly routines
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+
+ /* This code should work in general for a Renesas type LCD interface
+ * connected to the "mono" bridge. TODO: Share it where possible.
+ *
+ * Dither is already prepared to be built for upright and rotated
+ * orientations. */
+
+#include "config.h"
+#include "cpu.h"
+
+/****************************************************************************
+ * void lcd_write_yuv420_lines(unsigned char const * const src[3],
+ * int width,
+ * int stride);
+ *
+ * |R| |1.000000 -0.000001 1.402000| |Y'|
+ * |G| = |1.000000 -0.334136 -0.714136| |Pb|
+ * |B| |1.000000 1.772000 0.000000| |Pr|
+ * Scaled, normalized, rounded and tweaked to yield RGB 565:
+ * |R| |74 0 101| |Y' - 16| >> 9
+ * |G| = |74 -24 -51| |Cb - 128| >> 8
+ * |B| |74 128 0| |Cr - 128| >> 9
+ *
+ * Write four RGB565 pixels in the following order on each loop:
+ * 1 3 + > down
+ * 2 4 \/ left
+ */
+ .section .icode, "ax", %progbits
+ .align 2
+ .global lcd_write_yuv420_lines
+ .type lcd_write_yuv420_lines, %function
+lcd_write_yuv420_lines:
+ @ r0 = yuv_src
+ @ r1 = width
+ @ r2 = stride
+ stmfd sp!, { r4-r10, lr } @ save non-scratch
+ ldmia r0, { r4, r5, r6 } @ r4 = yuv_src[0] = Y'_p
+ @ r5 = yuv_src[1] = Cb_p
+ @ r6 = yuv_src[2] = Cr_p
+ @
+ mov r0, #0x70000000 @ r0 = LCD1_BASE_ADDR = 0x70003000
+ orr r0, r0, #0x3000 @
+ @
+ sub r2, r2, #1 @ Adjust stride because of increment
+10: @ loop line @
+ ldrb r7, [r4], #1 @ r7 = *Y'_p++;
+ ldrb r8, [r5], #1 @ r8 = *Cb_p++;
+ ldrb r9, [r6], #1 @ r9 = *Cr_p++;
+ @
+ sub r7, r7, #16 @ r7 = Y = (Y' - 16)*74
+ add r12, r7, r7, asl #2 @ actually (Y' - 16)*37 and shift right
+ add r7, r12, r7, asl #5 @ by one less when adding - same for all
+ @
+ sub r8, r8, #128 @ Cb -= 128
+ sub r9, r9, #128 @ Cr -= 128
+ @
+ add r10, r9, r9, asl #1 @ r10 = Cr*51 + Cb*24
+ add r10, r10, r10, asl #4 @
+ add r10, r10, r8, asl #3 @
+ add r10, r10, r8, asl #4 @
+ @
+ add r14, r9, r9, asl #2 @ r9 = Cr*101
+ add r14, r14, r9, asl #5 @
+ add r9, r14, r9, asl #6 @
+ @
+ add r8, r8, #2 @ r8 = bu = (Cb*128 + 128) >> 8
+ mov r8, r8, asr #2 @
+ add r9, r9, #256 @ r9 = rv = (r8 + 256) >> 9
+ mov r9, r9, asr #9 @
+ rsb r10, r10, #128 @ r10 = guv = (-r9 + 128) >> 8
+ mov r10, r10, asr #8 @
+ @ compute R, G, and B
+ add r3, r8, r7, asr #8 @ r3 = b = (Y >> 9) + bu
+ add r14, r9, r7, asr #8 @ r14 = r = (Y >> 9) + rv
+ add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
+ @
+ orr r12, r3, r14 @ check if clamping is needed...
+ orr r12, r12, r7, asr #1 @ ...at all
+ cmp r12, #31 @
+ bls 15f @ no clamp @
+ cmp r3, #31 @ clamp b
+ mvnhi r3, r3, asr #31 @
+ andhi r3, r3, #31 @
+ cmp r14, #31 @ clamp r
+ mvnhi r14, r14, asr #31 @
+ andhi r14, r14, #31 @
+ cmp r7, #63 @ clamp g
+ mvnhi r7, r7, asr #31 @
+ andhi r7, r7, #63 @
+15: @ no clamp @
+ @
+ ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
+ @
+ orr r7, r3, r7, lsl #5 @ r7 = |00000000|00000000|00000ggg|gggbbbbb|
+ orr r7, r7, r14, lsl #11 @ r7 = |00000000|00000000|rrrrrggg|gggbbbbb|
+ mov r14, r7, lsr #8 @ r14 = |00000000|00000000|00000000|rrrrrggg|
+ @
+20: @
+ ldr r3, [r0] @
+ tst r3, #LCD1_BUSY_MASK @
+ bne 20b @
+ strb r14, [r0, #0x10] @
+20: @
+ ldr r3, [r0] @
+ tst r3, #LCD1_BUSY_MASK @
+ bne 20b @
+ strb r7, [r0, #0x10] @
+ @
+ sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74
+ add r12, r7, r7, asl #2 @
+ add r7, r12, r7, asl #5 @
+ @ compute R, G, and B
+ add r3, r8, r7, asr #8 @ r3 = b = (Y >> 9) + bu
+ add r14, r9, r7, asr #8 @ r14 = r = (Y >> 9) + rv
+ add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
+ @
+ orr r12, r3, r14 @ check if clamping is needed...
+ orr r12, r12, r7, asr #1 @ ...at all
+ cmp r12, #31 @
+ bls 15f @ no clamp @
+ cmp r3, #31 @ clamp b
+ mvnhi r3, r3, asr #31 @
+ andhi r3, r3, #31 @
+ cmp r14, #31 @ clamp r
+ mvnhi r14, r14, asr #31 @
+ andhi r14, r14, #31 @
+ cmp r7, #63 @ clamp g
+ mvnhi r7, r7, asr #31 @
+ andhi r7, r7, #63 @
+15: @ no clamp @
+ @
+ ldrb r12, [r4], #1 @ r12 = Y' = *(Y'_p++)
+ @
+ orr r7, r3, r7, lsl #5 @ r7 = |00000000|00000000|00000ggg|gggbbbbb|
+ orr r7, r7, r14, lsl #11 @ r7 = |00000000|00000000|rrrrrggg|gggbbbbb|
+ mov r14, r7, lsr #8 @ r14 = |00000000|00000000|00000000|rrrrrggg|
+20: @
+ ldr r3, [r0] @
+ tst r3, #LCD1_BUSY_MASK @
+ bne 20b @
+ strb r14, [r0, #0x10] @
+20: @
+ ldr r3, [r0] @
+ tst r3, #LCD1_BUSY_MASK @
+ bne 20b @
+ strb r7, [r0, #0x10] @
+ @
+ sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74
+ add r12, r7, r7, asl #2 @
+ add r7, r12, r7, asl #5 @
+ @ compute R, G, and B
+ add r3, r8, r7, asr #8 @ r3 = b = (Y >> 9) + bu
+ add r14, r9, r7, asr #8 @ r14 = r = (Y >> 9) + rv
+ add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
+ @
+ orr r12, r3, r14 @ check if clamping is needed...
+ orr r12, r12, r7, asr #1 @ ...at all
+ cmp r12, #31 @
+ bls 15f @ no clamp @
+ cmp r3, #31 @ clamp b
+ mvnhi r3, r3, asr #31 @
+ andhi r3, r3, #31 @
+ cmp r14, #31 @ clamp r
+ mvnhi r14, r14, asr #31 @
+ andhi r14, r14, #31 @
+ cmp r7, #63 @ clamp g
+ mvnhi r7, r7, asr #31 @
+ andhi r7, r7, #63 @
+15: @ no clamp @
+ @
+ ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
+ @
+ orr r7, r3, r7, lsl #5 @ r7 = |00000000|00000000|00000ggg|gggbbbbb|
+ orr r7, r7, r14, lsl #11 @ r7 = |00000000|00000000|rrrrrggg|gggbbbbb|
+ mov r14, r7, lsr #8 @ r14 = |00000000|00000000|00000000|rrrrrggg|
+20: @
+ ldr r3, [r0] @
+ tst r3, #LCD1_BUSY_MASK @
+ bne 20b @
+ strb r14, [r0, #0x10] @
+20: @
+ ldr r3, [r0] @
+ tst r3, #LCD1_BUSY_MASK @
+ bne 20b @
+ strb r7, [r0, #0x10] @
+ @
+ sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74
+ add r12, r7, r7, asl #2 @
+ add r7, r12, r7, asl #5 @
+ @ compute R, G, and B
+ add r3, r8, r7, asr #8 @ r3 = b = (Y >> 9) + bu
+ add r14, r9, r7, asr #8 @ r14 = r = (Y >> 9) + rv
+ add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
+ @
+ orr r12, r3, r14 @ check if clamping is needed...
+ orr r12, r12, r7, asr #1 @ ...at all
+ cmp r12, #31 @
+ bls 15f @ no clamp @
+ cmp r3, #31 @ clamp b
+ mvnhi r3, r3, asr #31 @
+ andhi r3, r3, #31 @
+ cmp r14, #31 @ clamp r
+ mvnhi r14, r14, asr #31 @
+ andhi r14, r14, #31 @
+ cmp r7, #63 @ clamp g
+ mvnhi r7, r7, asr #31 @
+ andhi r7, r7, #63 @
+15: @ no clamp @
+ @
+ orr r7, r3, r7, lsl #5 @ r7 = |00000000|00000000|00000ggg|gggbbbbb|
+ orr r7, r7, r14, lsl #11 @ r7 = |00000000|00000000|rrrrrggg|gggbbbbb|
+ mov r14, r7, lsr #8 @ r14 = |00000000|00000000|00000000|rrrrrggg|
+20: @
+ ldr r3, [r0] @
+ tst r3, #LCD1_BUSY_MASK @
+ bne 20b @
+ strb r14, [r0, #0x10] @
+20: @
+ ldr r3, [r0] @
+ tst r3, #LCD1_BUSY_MASK @
+ bne 20b @
+ strb r7, [r0, #0x10] @
+ @
+ subs r1, r1, #2 @ subtract block from width
+ bgt 10b @ loop line @
+ @
+ ldmpc regs=r4-r10 @ restore registers and return
+ .ltorg @ dump constant pool
+ .size lcd_write_yuv420_lines, .-lcd_write_yuv420_lines
+
+
+/****************************************************************************
+ * void lcd_write_yuv420_lines_odither(unsigned char const * const src[3],
+ * int width,
+ * int stride,
+ * int x_screen,
+ * int y_screen);
+ *
+ * |R| |1.000000 -0.000001 1.402000| |Y'|
+ * |G| = |1.000000 -0.334136 -0.714136| |Pb|
+ * |B| |1.000000 1.772000 0.000000| |Pr|
+ * Red scaled at twice g & b but at same precision to place it in correct
+ * bit position after multiply and leave instruction count lower.
+ * |R| |258 0 408| |Y' - 16|
+ * |G| = |149 -49 -104| |Cb - 128|
+ * |B| |149 258 0| |Cr - 128|
+ *
+ * Write four RGB565 pixels in the following order on each loop:
+ * 1 3 + > right/down
+ * 2 4 \/ down/left
+ *
+ * Kernel pattern for upright display:
+ * 5 3 4 2 +-> right
+ * 1 7 0 6 | down
+ * 4 2 5 3 \/
+ * 0 6 1 7
+ *
+ * Kernel pattern for clockwise rotated display:
+ * 2 6 3 7 +-> down
+ * 4 0 5 1 | left
+ * 3 7 2 6 \/
+ * 5 1 4 0
+ */
+ .section .icode, "ax", %progbits
+ .align 2
+ .global lcd_write_yuv420_lines_odither
+ .type lcd_write_yuv420_lines_odither, %function
+lcd_write_yuv420_lines_odither:
+ @ r0 = yuv_src
+ @ r1 = width
+ @ r2 = strideS
+ @ r3 = x_screen
+ @ [sp] = y_screen
+ stmfd sp!, { r4-r11, lr } @ save non-scratch
+ ldmia r0, { r4, r5, r6 } @ r4 = yuv_src[0] = Y'_p
+ @ r5 = yuv_src[1] = Cb_p
+ @ r6 = yuv_src[2] = Cr_p
+ @
+ ldr r0, [sp, #36] @ Line up pattern and kernel quadrant
+ eor r14, r3, r0 @
+ and r14, r14, #0x2 @
+ mov r14, r14, lsl #6 @ 0x00 or 0x80
+ @
+ mov r0, #0x70000000 @ r0 = LCD1_BASE_ADDR = 0x70003000
+ orr r0, r0, #0x3000 @
+ @
+ sub r2, r2, #1 @ Adjust stride because of increment
+10: @ loop line @
+ @
+ ldrb r7, [r4], #1 @ r7 = *Y'_p++;
+ ldrb r8, [r5], #1 @ r8 = *Cb_p++;
+ ldrb r9, [r6], #1 @ r9 = *Cr_p++;
+ @
+ eor r14, r14, #0x80 @ flip pattern quadrant
+ @
+ sub r7, r7, #16 @ r7 = Y = (Y' - 16)*149
+ add r12, r7, r7, asl #2 @
+ add r12, r12, r12, asl #4 @
+ add r7, r12, r7, asl #6 @
+ @
+ sub r8, r8, #128 @ Cb -= 128
+ sub r9, r9, #128 @ Cr -= 128
+ @
+ add r10, r8, r8, asl #4 @ r10 = guv = Cr*104 + Cb*49
+ add r10, r10, r8, asl #5 @
+ add r10, r10, r9, asl #3 @
+ add r10, r10, r9, asl #5 @
+ add r10, r10, r9, asl #6 @
+ @
+ mov r8, r8, asl #1 @ r8 = bu = Cb*258
+ add r8, r8, r8, asl #7 @
+ @
+ add r9, r9, r9, asl #1 @ r9 = rv = Cr*408
+ add r9, r9, r9, asl #4 @
+ mov r9, r9, asl #3 @
+ @
+ @ compute R, G, and B
+ add r3, r8, r7 @ r3 = b' = Y + bu
+ add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
+ rsb r7, r10, r7 @ r7 = g' = Y + guv
+ @
+ @ r8 = bu, r9 = rv, r10 = guv
+ @
+ sub r12, r3, r3, lsr #5 @ r3 = 31/32*b + b/256
+ add r3, r12, r3, lsr #8 @
+ @
+ sub r12, r11, r11, lsr #5 @ r11 = 31/32*r + r/256
+ add r11, r12, r11, lsr #8 @
+ @
+ sub r12, r7, r7, lsr #6 @ r7 = 63/64*g + g/256
+ add r7, r12, r7, lsr #8 @
+ @
+#if LCD_WIDTH >= LCD_HEIGHT
+ add r12, r14, #0x200 @
+#else
+ add r12, r14, #0x100 @
+#endif
+ @
+ add r3, r3, r12 @ b = r3 + delta
+ add r11, r11, r12, lsl #1 @ r = r11 + delta*2
+ add r7, r7, r12, lsr #1 @ g = r7 + delta/2
+ @
+ orr r12, r3, r11, asr #1 @ check if clamping is needed...
+ orr r12, r12, r7 @ ...at all
+ movs r12, r12, asr #15 @
+ beq 15f @ no clamp @
+ movs r12, r3, asr #15 @ clamp b
+ mvnne r3, r12, lsr #15 @
+ andne r3, r3, #0x7c00 @ mask b only if clamped
+ movs r12, r11, asr #16 @ clamp r
+ mvnne r11, r12, lsr #16 @
+ movs r12, r7, asr #15 @ clamp g
+ mvnne r7, r12, lsr #15 @
+15: @ no clamp @
+ @
+ ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
+ @
+ and r11, r11, #0xf800 @ r11 = |00000000|00000000|rrrrrggg|gggbbbbb|
+ and r7, r7, #0x7e00 @
+ orr r11, r11, r7, lsr #4 @
+ orr r11, r11, r3, lsr #10 @
+ mov r7, r11, lsr #8 @ r7 = |00000000|00000000|00000000|rrrrrggg|
+ @
+20: @
+ ldr r3, [r0] @
+ tst r3, #LCD1_BUSY_MASK @
+ bne 20b @
+ strb r7, [r0, #0x10] @
+20: @
+ ldr r3, [r0] @
+ tst r3, #LCD1_BUSY_MASK @
+ bne 20b @
+ strb r11, [r0, #0x10] @
+ @
+ sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149
+ add r12, r7, r7, asl #2 @
+ add r12, r12, r12, asl #4 @
+ add r7, r12, r7, asl #6 @
+ @ compute R, G, and B
+ add r3, r8, r7 @ r3 = b' = Y + bu
+ add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
+ rsb r7, r10, r7 @ r7 = g' = Y + guv
+ @
+ sub r12, r3, r3, lsr #5 @ r3 = 31/32*b' + b'/256
+ add r3, r12, r3, lsr #8 @
+ @
+ sub r12, r11, r11, lsr #5 @ r11 = 31/32*r' + r'/256
+ add r11, r12, r11, lsr #8 @
+ @
+ sub r12, r7, r7, lsr #6 @ r7 = 63/64*g' + g'/256
+ add r7, r12, r7, lsr #8 @
+ @
+#if LCD_WIDTH >= LCD_HEIGHT
+ @ This element is zero - use r14 @
+ @
+ add r3, r3, r14 @ b = r3 + delta
+ add r11, r11, r14, lsl #1 @ r = r11 + delta*2
+ add r7, r7, r14, lsr #1 @ g = r7 + delta/2
+#else
+ add r12, r14, #0x200 @
+ @
+ add r3, r3, r12 @ b = r3 + delta
+ add r11, r11, r12, lsl #1 @ r = r11 + delta*2
+ add r7, r7, r12, lsr #1 @ g = r7 + delta/2
+#endif
+ @
+ orr r12, r3, r11, asr #1 @ check if clamping is needed...
+ orr r12, r12, r7 @ ...at all
+ movs r12, r12, asr #15 @
+ beq 15f @ no clamp @
+ movs r12, r3, asr #15 @ clamp b
+ mvnne r3, r12, lsr #15 @
+ andne r3, r3, #0x7c00 @ mask b only if clamped
+ movs r12, r11, asr #16 @ clamp r
+ mvnne r11, r12, lsr #16 @
+ movs r12, r7, asr #15 @ clamp g
+ mvnne r7, r12, lsr #15 @
+15: @ no clamp @
+ @
+ ldrb r12, [r4], #1 @ r12 = Y' = *(Y'_p++)
+ @
+ and r11, r11, #0xf800 @ r11 = |00000000|00000000|rrrrrggg|gggbbbbb|
+ and r7, r7, #0x7e00 @
+ orr r11, r11, r7, lsr #4 @
+ orr r11, r11, r3, lsr #10 @
+ mov r7, r11, lsr #8 @ r7 = |00000000|00000000|00000000|rrrrrggg|
+ @
+20: @
+ ldr r3, [r0] @
+ tst r3, #LCD1_BUSY_MASK @
+ bne 20b @
+ strb r7, [r0, #0x10] @
+20: @
+ ldr r3, [r0] @
+ tst r3, #LCD1_BUSY_MASK @
+ bne 20b @
+ strb r11, [r0, #0x10] @
+ @
+ sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149
+ add r12, r7, r7, asl #2 @
+ add r12, r12, r12, asl #4 @
+ add r7, r12, r7, asl #6 @
+ @ compute R, G, and B
+ add r3, r8, r7 @ r3 = b' = Y + bu
+ add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
+ rsb r7, r10, r7 @ r7 = g' = Y + guv
+ @
+ @ r8 = bu, r9 = rv, r10 = guv
+ @
+ sub r12, r3, r3, lsr #5 @ r3 = 31/32*b' + b'/256
+ add r3, r12, r3, lsr #8 @
+ @
+ sub r12, r11, r11, lsr #5 @ r11 = 31/32*r' + r'/256
+ add r11, r12, r11, lsr #8 @
+ @
+ sub r12, r7, r7, lsr #6 @ r7 = 63/64*g' + g'/256
+ add r7, r12, r7, lsr #8 @
+ @
+#if LCD_WIDTH >= LCD_HEIGHT
+ add r12, r14, #0x100 @
+#else
+ add r12, r14, #0x300 @
+#endif
+ @
+ add r3, r3, r12 @ b = r3 + delta
+ add r11, r11, r12, lsl #1 @ r = r11 + delta*2
+ add r7, r7, r12, lsr #1 @ g = r7 + delta/2
+ @
+ orr r12, r3, r11, asr #1 @ check if clamping is needed...
+ orr r12, r12, r7 @ ...at all
+ movs r12, r12, asr #15 @
+ beq 15f @ no clamp @
+ movs r12, r3, asr #15 @ clamp b
+ mvnne r3, r12, lsr #15 @
+ andne r3, r3, #0x7c00 @ mask b only if clamped
+ movs r12, r11, asr #16 @ clamp r
+ mvnne r11, r12, lsr #16 @
+ movs r12, r7, asr #15 @ clamp g
+ mvnne r7, r12, lsr #15 @
+15: @ no clamp @
+ @
+ ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
+ @
+ and r11, r11, #0xf800 @ r11 = |00000000|00000000|rrrrrggg|gggbbbbb|
+ and r7, r7, #0x7e00 @
+ orr r11, r11, r7, lsr #4 @
+ orr r11, r11, r3, lsr #10 @
+ mov r7, r11, lsr #8 @ r7 = |00000000|00000000|00000000|rrrrrggg|
+ @
+20: @
+ ldr r3, [r0] @
+ tst r3, #LCD1_BUSY_MASK @
+ bne 20b @
+ strb r7, [r0, #0x10] @
+20: @
+ ldr r3, [r0] @
+ tst r3, #LCD1_BUSY_MASK @
+ bne 20b @
+ strb r11, [r0, #0x10] @
+ @
+ sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149
+ add r12, r7, r7, asl #2 @
+ add r12, r12, r12, asl #4 @
+ add r7, r12, r7, asl #6 @
+ @ compute R, G, and B
+ add r3, r8, r7 @ r3 = b' = Y + bu
+ add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
+ rsb r7, r10, r7 @ r7 = g' = Y + guv
+ @
+ sub r12, r3, r3, lsr #5 @ r3 = 31/32*b + b/256
+ add r3, r12, r3, lsr #8 @
+ @
+ sub r12, r11, r11, lsr #5 @ r11 = 31/32*r + r/256
+ add r11, r12, r11, lsr #8 @
+ @
+ sub r12, r7, r7, lsr #6 @ r7 = 63/64*g + g/256
+ add r7, r12, r7, lsr #8 @
+ @
+#if LCD_WIDTH >= LCD_HEIGHT
+ add r12, r14, #0x300 @
+ @
+ add r3, r3, r12 @ b = r3 + delta
+ add r11, r11, r12, lsl #1 @ r = r11 + delta*2
+ add r7, r7, r12, lsr #1 @ g = r7 + delta/2
+#else
+ @ This element is zero - use r14 @
+ @
+ add r3, r3, r14 @ b = r3 + delta
+ add r11, r11, r14, lsl #1 @ r = r11 + delta*2
+ add r7, r7, r14, lsr #1 @ g = r7 + delta/2
+#endif
+ @
+ orr r12, r3, r11, asr #1 @ check if clamping is needed...
+ orr r12, r12, r7 @ ...at all
+ movs r12, r12, asr #15 @
+ beq 15f @ no clamp @
+ movs r12, r3, asr #15 @ clamp b
+ mvnne r3, r12, lsr #15 @
+ andne r3, r3, #0x7c00 @ mask b only if clamped
+ movs r12, r11, asr #16 @ clamp r
+ mvnne r11, r12, lsr #16 @
+ movs r12, r7, asr #15 @ clamp g
+ mvnne r7, r12, lsr #15 @
+15: @ no clamp @
+ @
+ and r11, r11, #0xf800 @ r11 = |00000000|00000000|rrrrrggg|gggbbbbb|
+ and r7, r7, #0x7e00 @
+ orr r11, r11, r7, lsr #4 @
+ orr r11, r11, r3, lsr #10 @
+ mov r7, r11, lsr #8 @ r7 = |00000000|00000000|00000000|rrrrrggg|
+ @
+20: @
+ ldr r3, [r0] @
+ tst r3, #LCD1_BUSY_MASK @
+ bne 20b @
+ strb r7, [r0, #0x10] @
+20: @
+ ldr r3, [r0] @
+ tst r3, #LCD1_BUSY_MASK @
+ bne 20b @
+ strb r11, [r0, #0x10] @
+ @
+ subs r1, r1, #2 @ subtract block from width
+ bgt 10b @ loop line @
+ @
+ ldmpc regs=r4-r11 @ restore registers and return
+ .ltorg @ dump constant pool
+ .size lcd_write_yuv420_lines_odither, .-lcd_write_yuv420_lines_odither
diff --git a/firmware/target/arm/philips/sa9200/lcd-sa9200.c b/firmware/target/arm/philips/sa9200/lcd-sa9200.c
index c6c297e6ca..e30a298045 100644
--- a/firmware/target/arm/philips/sa9200/lcd-sa9200.c
+++ b/firmware/target/arm/philips/sa9200/lcd-sa9200.c
@@ -75,6 +75,9 @@ static void lcd_display_off(void);
#define R_GATE_OUT_PERIOD_CTRL 0x71
#define R_SOFTWARE_RESET 0x72
+/* Display status */
+static unsigned lcd_yuv_options SHAREDBSS_ATTR = 0;
+
/* wait for LCD */
static inline void lcd_wait_write(void)
{
@@ -404,6 +407,85 @@ void lcd_set_flip(bool yesno)
lcd_write_reg(R_DRV_OUTPUT_CONTROL, flip ? 0x090c : 0x0a0c);
}
+void lcd_yuv_set_options(unsigned options)
+{
+ lcd_yuv_options = options;
+}
+
+/* Performance function to blit a YUV bitmap directly to the LCD */
+void lcd_write_yuv420_lines(unsigned char const * const src[3],
+ int width,
+ int stride);
+void lcd_write_yuv420_lines_odither(unsigned char const * const src[3],
+ int width,
+ int stride,
+ int x_screen,
+ int y_screen);
+void lcd_blit_yuv(unsigned char * const src[3],
+ int src_x, int src_y, int stride,
+ int x, int y, int width, int height)
+{
+ const unsigned char *yuv_src[3];
+ const unsigned char *ysrc_max;
+ int options;
+
+ if (!display_on)
+ return;
+
+ width &= ~1;
+ height &= ~1;
+
+ /* calculate the drawing region */
+ lcd_write_reg(R_VERT_RAM_ADDR_POS, ((x + width - 1) << 8) | x);
+
+ /* convert YUV coordinates to screen coordinates */
+ y = LCD_WIDTH - 1 - y;
+
+ /* 2px strip: cursor moves left, then down in gram */
+ /* BGR=1, MDT1-0=00, I/D1-0=10, AM=0 */
+ lcd_write_reg(R_ENTRY_MODE, 0x1020);
+
+ yuv_src[0] = src[0] + src_y * stride + src_x;
+ yuv_src[1] = src[1] + (src_y * stride >> 2) + (src_x >> 1);
+ yuv_src[2] = src[2] + (yuv_src[1] - src[1]);
+ ysrc_max = yuv_src[0] + height * stride;
+
+ /* cache options setting */
+ options = lcd_yuv_options;
+
+ do
+ {
+ /* max horiz << 8 | start horiz */
+ lcd_write_reg(R_HORIZ_RAM_ADDR_POS, (y << 8) | (y - 1));
+
+ /* position cursor (set AD0-AD15) */
+ lcd_write_reg(R_RAM_ADDR_SET, (x << 8) | y);
+
+ /* start drawing */
+ lcd_send_command(R_WRITE_DATA_2_GRAM);
+
+ if (options & LCD_YUV_DITHER)
+ {
+ lcd_write_yuv420_lines_odither(yuv_src, width, stride,
+ y, x);
+ }
+ else
+ {
+ lcd_write_yuv420_lines(yuv_src, width, stride);
+ }
+
+ y -= 2; /* move strip by "down" 2 px */
+ yuv_src[0] += stride << 1;
+ yuv_src[1] += stride >> 1;
+ yuv_src[2] += stride >> 1;
+ }
+ while (yuv_src[0] < ysrc_max);
+
+ /* back to normal right, then down cursor in gram */
+ /* BGR=1, MDT1-0=00, I/D1-0=11, AM=0 */
+ lcd_write_reg(R_ENTRY_MODE, 0x1030);
+}
+
/* Update the display.
This must be called after all other LCD functions that change the display. */
void lcd_update(void)
diff --git a/firmware/target/arm/rk27xx/ihifi/lcd-ihifi.c b/firmware/target/arm/rk27xx/ihifi/lcd-ihifi.c
index 9596dca98b..d5906b9dd5 100644
--- a/firmware/target/arm/rk27xx/ihifi/lcd-ihifi.c
+++ b/firmware/target/arm/rk27xx/ihifi/lcd-ihifi.c
@@ -207,3 +207,18 @@ bool lcd_active()
{
return display_on;
}
+
+/* Blit a YUV bitmap directly to the LCD */
+void lcd_blit_yuv(unsigned char * const src[3],
+ int src_x, int src_y, int stride,
+ int x, int y, int width, int height)
+{
+ (void)src;
+ (void)src_x;
+ (void)src_y;
+ (void)stride;
+ (void)x;
+ (void)y;
+ (void)width;
+ (void)height;
+}
diff --git a/firmware/target/arm/rk27xx/ihifi2/lcd-ihifi770.c b/firmware/target/arm/rk27xx/ihifi2/lcd-ihifi770.c
index cef0186de5..23505d9fa0 100644
--- a/firmware/target/arm/rk27xx/ihifi2/lcd-ihifi770.c
+++ b/firmware/target/arm/rk27xx/ihifi2/lcd-ihifi770.c
@@ -268,3 +268,18 @@ bool lcd_active()
{
return display_on;
}
+
+/* Blit a YUV bitmap directly to the LCD */
+void lcd_blit_yuv(unsigned char * const src[3],
+ int src_x, int src_y, int stride,
+ int x, int y, int width, int height)
+{
+ (void)src;
+ (void)src_x;
+ (void)src_y;
+ (void)stride;
+ (void)x;
+ (void)y;
+ (void)width;
+ (void)height;
+}
diff --git a/firmware/target/arm/rk27xx/ihifi2/lcd-ihifi770c.c b/firmware/target/arm/rk27xx/ihifi2/lcd-ihifi770c.c
index e2436e9b90..311b8057cb 100644
--- a/firmware/target/arm/rk27xx/ihifi2/lcd-ihifi770c.c
+++ b/firmware/target/arm/rk27xx/ihifi2/lcd-ihifi770c.c
@@ -231,3 +231,18 @@ bool lcd_active()
{
return display_on;
}
+
+/* Blit a YUV bitmap directly to the LCD */
+void lcd_blit_yuv(unsigned char * const src[3],
+ int src_x, int src_y, int stride,
+ int x, int y, int width, int height)
+{
+ (void)src;
+ (void)src_x;
+ (void)src_y;
+ (void)stride;
+ (void)x;
+ (void)y;
+ (void)width;
+ (void)height;
+}
diff --git a/firmware/target/arm/rk27xx/ihifi2/lcd-ihifi800.c b/firmware/target/arm/rk27xx/ihifi2/lcd-ihifi800.c
index 8520715650..821b52dcb6 100644
--- a/firmware/target/arm/rk27xx/ihifi2/lcd-ihifi800.c
+++ b/firmware/target/arm/rk27xx/ihifi2/lcd-ihifi800.c
@@ -211,3 +211,18 @@ bool lcd_active()
{
return display_on;
}
+
+/* Blit a YUV bitmap directly to the LCD */
+void lcd_blit_yuv(unsigned char * const src[3],
+ int src_x, int src_y, int stride,
+ int x, int y, int width, int height)
+{
+ (void)src;
+ (void)src_x;
+ (void)src_y;
+ (void)stride;
+ (void)x;
+ (void)y;
+ (void)width;
+ (void)height;
+}
diff --git a/firmware/target/arm/rk27xx/lcd-hifiman.c b/firmware/target/arm/rk27xx/lcd-hifiman.c
index 95486b02e4..bde1d3546f 100644
--- a/firmware/target/arm/rk27xx/lcd-hifiman.c
+++ b/firmware/target/arm/rk27xx/lcd-hifiman.c
@@ -350,3 +350,22 @@ bool lcd_active()
{
return display_on;
}
+
+/* Blit a YUV bitmap directly to the LCD
+ * provided by generic fallback in lcd-16bit-common.c
+ */
+#if 0
+void lcd_blit_yuv(unsigned char * const src[3],
+ int src_x, int src_y, int stride,
+ int x, int y, int width, int height)
+{
+ (void)src;
+ (void)src_x;
+ (void)src_y;
+ (void)stride;
+ (void)x;
+ (void)y;
+ (void)width;
+ (void)height;
+}
+#endif
diff --git a/firmware/target/arm/rk27xx/ma/lcd-ma.c b/firmware/target/arm/rk27xx/ma/lcd-ma.c
index 8dfe874b44..fa3ccc5aa0 100644
--- a/firmware/target/arm/rk27xx/ma/lcd-ma.c
+++ b/firmware/target/arm/rk27xx/ma/lcd-ma.c
@@ -253,3 +253,18 @@ bool lcd_active()
{
return display_on;
}
+
+/* Blit a YUV bitmap directly to the LCD */
+void lcd_blit_yuv(unsigned char * const src[3],
+ int src_x, int src_y, int stride,
+ int x, int y, int width, int height)
+{
+ (void)src;
+ (void)src_x;
+ (void)src_y;
+ (void)stride;
+ (void)x;
+ (void)y;
+ (void)width;
+ (void)height;
+}
diff --git a/firmware/target/arm/rk27xx/rk27generic/lcd-rk27generic.c b/firmware/target/arm/rk27xx/rk27generic/lcd-rk27generic.c
index e5cefd282d..b40f2860d7 100644
--- a/firmware/target/arm/rk27xx/rk27generic/lcd-rk27generic.c
+++ b/firmware/target/arm/rk27xx/rk27generic/lcd-rk27generic.c
@@ -178,3 +178,22 @@ void lcd_set_gram_area(int x_start, int y_start,
lcd_cmd(GRAM_WRITE);
LCDC_CTRL &= ~RGB24B;
}
+
+/* Blit a YUV bitmap directly to the LCD
+ * provided by generic fallback in lcd-16bit-common.c
+ */
+#if 0
+void lcd_blit_yuv(unsigned char * const src[3],
+ int src_x, int src_y, int stride,
+ int x, int y, int width, int height)
+{
+ (void)src;
+ (void)src_x;
+ (void)src_y;
+ (void)stride;
+ (void)x;
+ (void)y;
+ (void)width;
+ (void)height;
+}
+#endif
diff --git a/firmware/target/arm/s5l8700/ipodnano2g/lcd-asm-nano2g.S b/firmware/target/arm/s5l8700/ipodnano2g/lcd-asm-nano2g.S
index 50104a73e8..af338eef16 100644
--- a/firmware/target/arm/s5l8700/ipodnano2g/lcd-asm-nano2g.S
+++ b/firmware/target/arm/s5l8700/ipodnano2g/lcd-asm-nano2g.S
@@ -65,3 +65,231 @@ lcd_write_line: /* r2 = LCD_BASE */
bgt .loop
ldmpc regs=r4-r6
+
+/****************************************************************************
+ * extern void lcd_write_yuv420_lines(unsigned char const * const src[3],
+ * const unsigned LCD_BASE,
+ * int width,
+ * int stride);
+ *
+ * Conversion from Motion JPEG and MPEG Y'PbPr to RGB is:
+ * |R| |1.164 0.000 1.596| |Y' - 16|
+ * |G| = |1.164 -0.391 -0.813| |Pb - 128|
+ * |B| |1.164 2.018 0.000| |Pr - 128|
+ *
+ * Scaled, normalized, rounded and tweaked to yield RGB 565:
+ * |R| |74 0 101| |Y' - 16| >> 9
+ * |G| = |74 -24 -51| |Cb - 128| >> 8
+ * |B| |74 128 0| |Cr - 128| >> 9
+ *
+ * Converts two lines from YUV to RGB565 and writes to LCD at once. First loop
+ * loads Cb/Cr, calculates the chroma offset and saves them to buffer. Within
+ * the second loop these chroma offset are reloaded from buffer. Within each
+ * loop two pixels are calculated and written to LCD.
+ */
+ .align 2
+ .global lcd_write_yuv420_lines
+ .type lcd_write_yuv420_lines, %function
+lcd_write_yuv420_lines:
+ /* r0 = src = yuv_src */
+ /* r1 = dst = LCD_BASE */
+ /* r2 = width */
+ /* r3 = stride */
+ stmfd sp!, { r4-r10, lr } /* save non-scratch */
+ ldmia r0, { r9, r10, r12 } /* r9 = yuv_src[0] = Y'_p */
+ /* r10 = yuv_src[1] = Cb_p */
+ /* r12 = yuv_src[2] = Cr_p */
+ add r3, r9, r3 /* r3 = &ysrc[stride] */
+ add r4, r2, r2, asr #1 /* chroma buffer lenght = width/2 *3 */
+ mov r4, r4, asl #2 /* use words for str/ldm possibility */
+ add r4, r4, #19 /* plus room for 4 additional words, */
+ bic r4, r4, #3 /* rounded up to multiples of 4 byte */
+ sub sp, sp, r4 /* and allocate on stack */
+ stmia sp, {r1-r4} /* LCD_BASE, width, &ysrc[stride], stack_alloc */
+
+ mov r7, r2 /* r7 = loop count */
+ add r8, sp, #16 /* chroma buffer */
+ add lr, r1, #0x40 /* LCD data port = LCD_BASE + 0x40 */
+
+ /* 1st loop start */
+10: /* loop start */
+
+ ldrb r0, [r10], #1 /* r0 = *usrc++ = *Cb_p++ */
+ ldrb r1, [r12], #1 /* r1 = *vsrc++ = *Cr_p++ */
+
+ sub r0, r0, #128 /* r0 = Cb-128 */
+ sub r1, r1, #128 /* r1 = Cr-128 */
+
+ add r2, r1, r1, asl #1 /* r2 = Cr*51 + Cb*24 */
+ add r2, r2, r2, asl #4
+ add r2, r2, r0, asl #3
+ add r2, r2, r0, asl #4
+
+ add r4, r1, r1, asl #2 /* r1 = Cr*101 */
+ add r4, r4, r1, asl #5
+ add r1, r4, r1, asl #6
+
+ add r1, r1, #256 /* r1 = rv = (r1 + 256) >> 9 */
+ mov r1, r1, asr #9
+ rsb r2, r2, #128 /* r2 = guv = (-r2 + 128) >> 8 */
+ mov r2, r2, asr #8
+ add r0, r0, #2 /* r0 = bu = (Cb*128 + 256) >> 9 */
+ mov r0, r0, asr #2
+ stmia r8!, {r0-r2} /* store r0, r1 and r2 to chroma buffer */
+
+ /* 1st loop, first pixel */
+ ldrb r5, [r9], #1 /* r5 = *ysrc++ = *Y'_p++ */
+ sub r5, r5, #16 /* r5 = (Y'-16) * 74 */
+ add r3, r5, r5, asl #2
+ add r5, r3, r5, asl #5
+
+ add r6, r1, r5, asr #8 /* r6 = r = (Y >> 9) + rv */
+ add r3, r2, r5, asr #7 /* r3 = g = (Y >> 8) + guv */
+ add r4, r0, r5, asr #8 /* r4 = b = (Y >> 9) + bu */
+
+ orr r5, r6, r4 /* check if clamping is needed... */
+ orr r5, r5, r3, asr #1 /* ...at all */
+ cmp r5, #31
+ bls 15f /* -> no clamp */
+ cmp r6, #31 /* clamp r */
+ mvnhi r6, r6, asr #31
+ andhi r6, r6, #31
+ cmp r3, #63 /* clamp g */
+ mvnhi r3, r3, asr #31
+ andhi r3, r3, #63
+ cmp r4, #31 /* clamp b */
+ mvnhi r4, r4, asr #31
+ andhi r4, r4, #31
+15: /* no clamp */
+
+ /* calculate pixel_1 and save to r4 for later pixel packing */
+ orr r4, r4, r3, lsl #5 /* pixel_1 = r<<11 | g<<5 | b */
+ orr r4, r4, r6, lsl #11 /* r4 = pixel_1 */
+
+ /* 1st loop, second pixel */
+ ldrb r5, [r9], #1 /* r5 = *ysrc++ = *Y'_p++ */
+ sub r5, r5, #16 /* r5 = (Y'-16) * 74 */
+ add r3, r5, r5, asl #2
+ add r5, r3, r5, asl #5
+
+ add r6, r1, r5, asr #8 /* r6 = r = (Y >> 9) + rv */
+ add r3, r2, r5, asr #7 /* r3 = g = (Y >> 8) + guv */
+ add r5, r0, r5, asr #8 /* r5 = b = (Y >> 9) + bu */
+
+ orr r0, r6, r5 /* check if clamping is needed... */
+ orr r0, r0, r3, asr #1 /* ...at all */
+ cmp r0, #31
+ bls 15f /* -> no clamp */
+ cmp r6, #31 /* clamp r */
+ mvnhi r6, r6, asr #31
+ andhi r6, r6, #31
+ cmp r3, #63 /* clamp g */
+ mvnhi r3, r3, asr #31
+ andhi r3, r3, #63
+ cmp r5, #31 /* clamp b */
+ mvnhi r5, r5, asr #31
+ andhi r5, r5, #31
+15: /* no clamp */
+
+ /* calculate pixel_2 and pack with pixel_1 before writing */
+ orr r5, r5, r3, lsl #5 /* pixel_2 = r<<11 | g<<5 | b */
+ orr r5, r5, r6, lsl #11 /* r5 = pixel_2 */
+#ifdef FORCE_FIFO_WAIT
+ /* wait for FIFO half full */
+.fifo_wait1:
+ ldr r3, [lr, #-0x24] /* while (LCD_STATUS & 0x08); */
+ tst r3, #0x8
+ bgt .fifo_wait1
+#endif
+ stmia lr, {r4,r5} /* write pixel_1 and pixel_2 */
+
+ subs r7, r7, #2 /* check for loop end */
+ bgt 10b /* back to beginning */
+ /* 1st loop end */
+
+ /* Reload several registers for pointer rewinding for next loop */
+ add r8, sp, #16 /* chroma buffer */
+ ldmia sp, { r1, r7, r9} /* r1 = LCD_BASE */
+ /* r7 = loop count */
+ /* r9 = &ysrc[stride] */
+
+ /* 2nd loop start */
+20: /* loop start */
+ /* restore r0 (bu), r1 (rv) and r2 (guv) from chroma buffer */
+ ldmia r8!, {r0-r2}
+
+ /* 2nd loop, first pixel */
+ ldrb r5, [r9], #1 /* r5 = *ysrc++ = *Y'_p++ */
+ sub r5, r5, #16 /* r5 = (Y'-16) * 74 */
+ add r3, r5, r5, asl #2
+ add r5, r3, r5, asl #5
+
+ add r6, r1, r5, asr #8 /* r6 = r = (Y >> 9) + rv */
+ add r3, r2, r5, asr #7 /* r3 = g = (Y >> 8) + guv */
+ add r4, r0, r5, asr #8 /* r4 = b = (Y >> 9) + bu */
+
+ orr r5, r6, r4 /* check if clamping is needed... */
+ orr r5, r5, r3, asr #1 /* ...at all */
+ cmp r5, #31
+ bls 15f /* -> no clamp */
+ cmp r6, #31 /* clamp r */
+ mvnhi r6, r6, asr #31
+ andhi r6, r6, #31
+ cmp r3, #63 /* clamp g */
+ mvnhi r3, r3, asr #31
+ andhi r3, r3, #63
+ cmp r4, #31 /* clamp b */
+ mvnhi r4, r4, asr #31
+ andhi r4, r4, #31
+15: /* no clamp */
+ /* calculate pixel_1 and save to r4 for later pixel packing */
+ orr r4, r4, r3, lsl #5 /* pixel_1 = r<<11 | g<<5 | b */
+ orr r4, r4, r6, lsl #11 /* r4 = pixel_1 */
+
+ /* 2nd loop, second pixel */
+ ldrb r5, [r9], #1 /* r5 = *ysrc++ = *Y'_p++ */
+ sub r5, r5, #16 /* r5 = (Y'-16) * 74 */
+ add r3, r5, r5, asl #2
+ add r5, r3, r5, asl #5
+
+ add r6, r1, r5, asr #8 /* r6 = r = (Y >> 9) + rv */
+ add r3, r2, r5, asr #7 /* r3 = g = (Y >> 8) + guv */
+ add r5, r0, r5, asr #8 /* r5 = b = (Y >> 9) + bu */
+
+ orr r0, r6, r5 /* check if clamping is needed... */
+ orr r0, r0, r3, asr #1 /* ...at all */
+ cmp r0, #31
+ bls 15f /* -> no clamp */
+ cmp r6, #31 /* clamp r */
+ mvnhi r6, r6, asr #31
+ andhi r6, r6, #31
+ cmp r3, #63 /* clamp g */
+ mvnhi r3, r3, asr #31
+ andhi r3, r3, #63
+ cmp r5, #31 /* clamp b */
+ mvnhi r5, r5, asr #31
+ andhi r5, r5, #31
+15: /* no clamp */
+
+ /* calculate pixel_2 and pack with pixel_1 before writing */
+ orr r5, r5, r3, lsl #5 /* pixel_2 = r<<11 | g<<5 | b */
+ orr r5, r5, r6, lsl #11 /* r5 = pixel_2 */
+#ifdef FORCE_FIFO_WAIT
+ /* wait for FIFO half full */
+.fifo_wait2:
+ ldr r3, [lr, #-0x24] /* while (LCD_STATUS & 0x08); */
+ tst r3, #0x8
+ bgt .fifo_wait2
+#endif
+ stmia lr, {r4,r5} /* write pixel_1 and pixel_2 */
+
+ subs r7, r7, #2 /* check for loop end */
+ bgt 20b /* back to beginning */
+ /* 2nd loop end */
+
+ ldr r3, [sp, #12]
+ add sp, sp, r3 /* deallocate buffer */
+ ldmpc regs=r4-r10 /* restore registers */
+
+ .ltorg
+ .size lcd_write_yuv420_lines, .-lcd_write_yuv420_lines
diff --git a/firmware/target/arm/s5l8700/ipodnano2g/lcd-nano2g.c b/firmware/target/arm/s5l8700/ipodnano2g/lcd-nano2g.c
index 2d630886c9..13e5c5c1d4 100644
--- a/firmware/target/arm/s5l8700/ipodnano2g/lcd-nano2g.c
+++ b/firmware/target/arm/s5l8700/ipodnano2g/lcd-nano2g.c
@@ -406,3 +406,36 @@ void lcd_update_rect(int x, int y, int width, int height)
} while (--height > 0 );
}
}
+
+/* Line write helper function for lcd_yuv_blit. Writes two lines of yuv420. */
+extern void lcd_write_yuv420_lines(unsigned char const * const src[3],
+ const unsigned int lcd_baseadress,
+ int width,
+ int stride);
+
+/* Blit a YUV bitmap directly to the LCD */
+void lcd_blit_yuv(unsigned char * const src[3],
+ int src_x, int src_y, int stride,
+ int x, int y, int width, int height)
+{
+ unsigned int z;
+ unsigned char const * yuv_src[3];
+
+ width = (width + 1) & ~1; /* ensure width is even */
+
+ lcd_setup_drawing_region(x, y, width, height);
+
+ z = stride * src_y;
+ yuv_src[0] = src[0] + z + src_x;
+ yuv_src[1] = src[1] + (z >> 2) + (src_x >> 1);
+ yuv_src[2] = src[2] + (yuv_src[1] - src[1]);
+
+ height >>= 1;
+
+ do {
+ lcd_write_yuv420_lines(yuv_src, LCD_BASE, width, stride);
+ yuv_src[0] += stride << 1;
+ yuv_src[1] += stride >> 1; /* Skip down one chroma line */
+ yuv_src[2] += stride >> 1;
+ } while (--height > 0);
+}
diff --git a/firmware/target/arm/s5l8700/meizu-m3/lcd-m3.c b/firmware/target/arm/s5l8700/meizu-m3/lcd-m3.c
index ced8925999..5ed6c752b7 100644
--- a/firmware/target/arm/s5l8700/meizu-m3/lcd-m3.c
+++ b/firmware/target/arm/s5l8700/meizu-m3/lcd-m3.c
@@ -311,3 +311,11 @@ void lcd_update_rect(int x, int y, int width, int height)
{
lcd_update();
}
+
+void lcd_blit_yuv(unsigned char * const src[3],
+ int src_x, int src_y, int stride,
+ int x, int y, int width, int height)
+{
+ /* stub */
+}
+
diff --git a/firmware/target/arm/s5l8700/meizu-m6sp/lcd-m6sp.c b/firmware/target/arm/s5l8700/meizu-m6sp/lcd-m6sp.c
index bbbfccc11d..5e722d5a87 100644
--- a/firmware/target/arm/s5l8700/meizu-m6sp/lcd-m6sp.c
+++ b/firmware/target/arm/s5l8700/meizu-m6sp/lcd-m6sp.c
@@ -476,3 +476,19 @@ void lcd_update(void)
{
lcd_update_rect(0, 0, LCD_WIDTH, LCD_HEIGHT);
}
+
+void lcd_blit_yuv(unsigned char * const src[3],
+ int src_x, int src_y, int stride,
+ int x, int y, int width, int height)
+{
+ (void)src;
+ (void)src_x;
+ (void)src_y;
+ (void)stride;
+ (void)x;
+ (void)y;
+ (void)width;
+ (void)height;
+ /* TODO: not implemented yet */
+}
+
diff --git a/firmware/target/arm/s5l8702/ipod6g/lcd-6g.c b/firmware/target/arm/s5l8702/ipod6g/lcd-6g.c
index e1406549f4..14647a5697 100644
--- a/firmware/target/arm/s5l8702/ipod6g/lcd-6g.c
+++ b/firmware/target/arm/s5l8702/ipod6g/lcd-6g.c
@@ -530,3 +530,49 @@ void lcd_update_rect(int x, int y, int width, int height)
displaylcd_dma(pixels);
}
+
+/* Line write helper function for lcd_yuv_blit. Writes two lines of yuv420. */
+extern void lcd_write_yuv420_lines(unsigned char const * const src[3],
+ uint16_t* outbuf,
+ int width,
+ int stride);
+
+/* Blit a YUV bitmap directly to the LCD */
+void lcd_blit_yuv(unsigned char * const src[3],
+ int src_x, int src_y, int stride,
+ int x, int y, int width, int height) ICODE_ATTR;
+void lcd_blit_yuv(unsigned char * const src[3],
+ int src_x, int src_y, int stride,
+ int x, int y, int width, int height)
+{
+ unsigned int z;
+ unsigned char const * yuv_src[3];
+
+#ifdef HAVE_LCD_SLEEP
+ if (!lcd_active()) return;
+#endif
+
+ width = (width + 1) & ~1; /* ensure width is even */
+
+ int pixels = width * height;
+ uint16_t* out = lcd_dblbuf[0];
+
+ z = stride * src_y;
+ yuv_src[0] = src[0] + z + src_x;
+ yuv_src[1] = src[1] + (z >> 2) + (src_x >> 1);
+ yuv_src[2] = src[2] + (yuv_src[1] - src[1]);
+
+ displaylcd_setup(x, y, width, height);
+
+ height >>= 1;
+
+ do {
+ lcd_write_yuv420_lines(yuv_src, out, width, stride);
+ yuv_src[0] += stride << 1;
+ yuv_src[1] += stride >> 1; /* Skip down one chroma line */
+ yuv_src[2] += stride >> 1;
+ out += width << 1;
+ } while (--height);
+
+ displaylcd_dma(pixels);
+}
diff --git a/firmware/target/arm/s5l8702/ipod6g/lcd-asm-6g.S b/firmware/target/arm/s5l8702/ipod6g/lcd-asm-6g.S
new file mode 100644
index 0000000000..1ed7c4e189
--- /dev/null
+++ b/firmware/target/arm/s5l8702/ipod6g/lcd-asm-6g.S
@@ -0,0 +1,1013 @@
+/***************************************************************************
+ * __________ __ ___.
+ * Open \______ \ ____ ____ | | _\_ |__ _______ ___
+ * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
+ * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
+ * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
+ * \/ \/ \/ \/ \/
+ * $Id: lcd-as-video.S 26756 2010-06-11 04:41:36Z funman $
+ *
+ * Copyright (C) 2010 by Andree Buschmann
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+
+/* Version history:
+ *
+ * SVN:
+ * - initial SVN version.
+ *
+ * ARMv4:
+ * - use all available registers to calculate four pixels within each
+ * loop iteration.
+ * - avoid LDR interlocks.
+ *
+ * ARMv5TE:
+ * - use ARMv5TE+ 1-cycle multiply-accumulate instructions.
+ *
+ * ARMv5TE_WST:
+ * - use data tables (256 bytes) for RBG565 saturation.
+ *
+ * All versions are based on current SVN algorithm (round->scale->add)
+ * using the same coefficients, so output results are identical.
+ *
+ * TODO?: SVN coefficients are a very nice approximation for operations
+ * with shift+add instructions. When 16x16+32 MLA instructions are used,
+ * NBR and COEF_N could probably be adjusted to slighly increase accuracy.
+ */
+#define VERSION_SVN 0
+#define VERSION_ARMV4 1
+#define VERSION_ARMV5TE 2
+#define VERSION_ARMV5TE_WST 3
+
+#define YUV2RGB_VERSION VERSION_ARMV5TE_WST
+
+
+#define ASM
+#include "config.h"
+#include "cpu.h"
+
+#if (YUV2RGB_VERSION == VERSION_SVN)
+ .section .icode, "ax", %progbits
+
+
+/****************************************************************************
+ * extern void lcd_write_yuv420_lines(unsigned char const * const src[3],
+ * uint16_t* out,
+ * int width,
+ * int stride);
+ *
+ * Conversion from Motion JPEG and MPEG Y'PbPr to RGB is:
+ * |R| |1.164 0.000 1.596| |Y' - 16|
+ * |G| = |1.164 -0.391 -0.813| |Pb - 128|
+ * |B| |1.164 2.018 0.000| |Pr - 128|
+ *
+ * Scaled, normalized, rounded and tweaked to yield RGB 565:
+ * |R| |74 0 101| |Y' - 16| >> 9
+ * |G| = |74 -24 -51| |Cb - 128| >> 8
+ * |B| |74 128 0| |Cr - 128| >> 9
+ *
+ * Converts two lines from YUV to RGB565 and writes to LCD at once. First loop
+ * loads Cb/Cr, calculates the chroma offset and saves them to buffer. Within
+ * the second loop these chroma offset are reloaded from buffer. Within each
+ * loop two pixels are calculated and written to LCD.
+ */
+ .align 2
+ .global lcd_write_yuv420_lines
+ .type lcd_write_yuv420_lines, %function
+lcd_write_yuv420_lines:
+ /* r0 = src = yuv_src */
+ /* r1 = dst = out */
+ /* r2 = width */
+ /* r3 = stride */
+ stmfd sp!, { r4-r10, lr } /* save non-scratch */
+ ldmia r0, { r9, r10, r12 } /* r9 = yuv_src[0] = Y'_p */
+ /* r10 = yuv_src[1] = Cb_p */
+ /* r12 = yuv_src[2] = Cr_p */
+ add r3, r9, r3 /* r3 = &ysrc[stride] */
+ add r4, r2, r2, asr #1 /* chroma buffer lenght = width/2 *3 */
+ mov r4, r4, asl #2 /* use words for str/ldm possibility */
+ add r4, r4, #15 /* plus room for 3 additional words, */
+ bic r4, r4, #3 /* rounded up to multiples of 4 byte */
+ sub sp, sp, r4 /* and allocate on stack */
+ stmia sp, {r2-r4} /* width, &ysrc[stride], stack_alloc */
+
+ mov r7, r2 /* r7 = loop count */
+ add r8, sp, #12 /* chroma buffer */
+ mov lr, r1 /* RGB565 data destination buffer */
+
+ /* 1st loop start */
+10: /* loop start */
+
+ ldrb r0, [r10], #1 /* r0 = *usrc++ = *Cb_p++ */
+ ldrb r1, [r12], #1 /* r1 = *vsrc++ = *Cr_p++ */
+
+ sub r0, r0, #128 /* r0 = Cb-128 */
+ sub r1, r1, #128 /* r1 = Cr-128 */
+
+ add r2, r1, r1, asl #1 /* r2 = Cr*51 + Cb*24 */
+ add r2, r2, r2, asl #4
+ add r2, r2, r0, asl #3
+ add r2, r2, r0, asl #4
+
+ add r4, r1, r1, asl #2 /* r1 = Cr*101 */
+ add r4, r4, r1, asl #5
+ add r1, r4, r1, asl #6
+
+ add r1, r1, #256 /* r1 = rv = (r1 + 256) >> 9 */
+ mov r1, r1, asr #9
+ rsb r2, r2, #128 /* r2 = guv = (-r2 + 128) >> 8 */
+ mov r2, r2, asr #8
+ add r0, r0, #2 /* r0 = bu = (Cb*128 + 256) >> 9 */
+ mov r0, r0, asr #2
+ stmia r8!, {r0-r2} /* store r0, r1 and r2 to chroma buffer */
+
+ /* 1st loop, first pixel */
+ ldrb r5, [r9], #1 /* r5 = *ysrc++ = *Y'_p++ */
+ sub r5, r5, #16 /* r5 = (Y'-16) * 74 */
+ add r3, r5, r5, asl #2
+ add r5, r3, r5, asl #5
+
+ add r6, r1, r5, asr #8 /* r6 = r = (Y >> 9) + rv */
+ add r3, r2, r5, asr #7 /* r3 = g = (Y >> 8) + guv */
+ add r4, r0, r5, asr #8 /* r4 = b = (Y >> 9) + bu */
+
+ orr r5, r6, r4 /* check if clamping is needed... */
+ orr r5, r5, r3, asr #1 /* ...at all */
+ cmp r5, #31
+ bls 15f /* -> no clamp */
+ cmp r6, #31 /* clamp r */
+ mvnhi r6, r6, asr #31
+ andhi r6, r6, #31
+ cmp r3, #63 /* clamp g */
+ mvnhi r3, r3, asr #31
+ andhi r3, r3, #63
+ cmp r4, #31 /* clamp b */
+ mvnhi r4, r4, asr #31
+ andhi r4, r4, #31
+15: /* no clamp */
+
+ /* calculate pixel_1 and save to r4 for later pixel packing */
+ orr r4, r4, r3, lsl #5 /* pixel_1 = r<<11 | g<<5 | b */
+ orr r4, r4, r6, lsl #11 /* r4 = pixel_1 */
+
+ /* 1st loop, second pixel */
+ ldrb r5, [r9], #1 /* r5 = *ysrc++ = *Y'_p++ */
+ sub r5, r5, #16 /* r5 = (Y'-16) * 74 */
+ add r3, r5, r5, asl #2
+ add r5, r3, r5, asl #5
+
+ add r6, r1, r5, asr #8 /* r6 = r = (Y >> 9) + rv */
+ add r3, r2, r5, asr #7 /* r3 = g = (Y >> 8) + guv */
+ add r5, r0, r5, asr #8 /* r5 = b = (Y >> 9) + bu */
+
+ orr r0, r6, r5 /* check if clamping is needed... */
+ orr r0, r0, r3, asr #1 /* ...at all */
+ cmp r0, #31
+ bls 15f /* -> no clamp */
+ cmp r6, #31 /* clamp r */
+ mvnhi r6, r6, asr #31
+ andhi r6, r6, #31
+ cmp r3, #63 /* clamp g */
+ mvnhi r3, r3, asr #31
+ andhi r3, r3, #63
+ cmp r5, #31 /* clamp b */
+ mvnhi r5, r5, asr #31
+ andhi r5, r5, #31
+15: /* no clamp */
+
+ /* calculate pixel_2 and pack with pixel_1 before writing */
+ orr r5, r5, r3, lsl #5 /* pixel_2 = r<<11 | g<<5 | b */
+ orr r5, r5, r6, lsl #11 /* r5 = pixel_2 */
+ orr r4, r4, r5, lsl #16
+ str r4, [lr], #4 /* write pixel_1 and pixel_2 */
+
+ subs r7, r7, #2 /* check for loop end */
+ bgt 10b /* back to beginning */
+ /* 1st loop end */
+
+ /* Reload several registers for pointer rewinding for next loop */
+ add r8, sp, #12 /* chroma buffer */
+ ldmia sp, {r7, r9} /* r7 = loop count */
+ /* r9 = &ysrc[stride] */
+
+ /* 2nd loop start */
+20: /* loop start */
+ /* restore r0 (bu), r1 (rv) and r2 (guv) from chroma buffer */
+ ldmia r8!, {r0-r2}
+
+ /* 2nd loop, first pixel */
+ ldrb r5, [r9], #1 /* r5 = *ysrc++ = *Y'_p++ */
+ sub r5, r5, #16 /* r5 = (Y'-16) * 74 */
+ add r3, r5, r5, asl #2
+ add r5, r3, r5, asl #5
+
+ add r6, r1, r5, asr #8 /* r6 = r = (Y >> 9) + rv */
+ add r3, r2, r5, asr #7 /* r3 = g = (Y >> 8) + guv */
+ add r4, r0, r5, asr #8 /* r4 = b = (Y >> 9) + bu */
+
+ orr r5, r6, r4 /* check if clamping is needed... */
+ orr r5, r5, r3, asr #1 /* ...at all */
+ cmp r5, #31
+ bls 15f /* -> no clamp */
+ cmp r6, #31 /* clamp r */
+ mvnhi r6, r6, asr #31
+ andhi r6, r6, #31
+ cmp r3, #63 /* clamp g */
+ mvnhi r3, r3, asr #31
+ andhi r3, r3, #63
+ cmp r4, #31 /* clamp b */
+ mvnhi r4, r4, asr #31
+ andhi r4, r4, #31
+15: /* no clamp */
+ /* calculate pixel_1 and save to r4 for later pixel packing */
+ orr r4, r4, r3, lsl #5 /* pixel_1 = r<<11 | g<<5 | b */
+ orr r4, r4, r6, lsl #11 /* r4 = pixel_1 */
+
+ /* 2nd loop, second pixel */
+ ldrb r5, [r9], #1 /* r5 = *ysrc++ = *Y'_p++ */
+ sub r5, r5, #16 /* r5 = (Y'-16) * 74 */
+ add r3, r5, r5, asl #2
+ add r5, r3, r5, asl #5
+
+ add r6, r1, r5, asr #8 /* r6 = r = (Y >> 9) + rv */
+ add r3, r2, r5, asr #7 /* r3 = g = (Y >> 8) + guv */
+ add r5, r0, r5, asr #8 /* r5 = b = (Y >> 9) + bu */
+
+ orr r0, r6, r5 /* check if clamping is needed... */
+ orr r0, r0, r3, asr #1 /* ...at all */
+ cmp r0, #31
+ bls 15f /* -> no clamp */
+ cmp r6, #31 /* clamp r */
+ mvnhi r6, r6, asr #31
+ andhi r6, r6, #31
+ cmp r3, #63 /* clamp g */
+ mvnhi r3, r3, asr #31
+ andhi r3, r3, #63
+ cmp r5, #31 /* clamp b */
+ mvnhi r5, r5, asr #31
+ andhi r5, r5, #31
+15: /* no clamp */
+
+ /* calculate pixel_2 and pack with pixel_1 before writing */
+ orr r5, r5, r3, lsl #5 /* pixel_2 = r<<11 | g<<5 | b */
+ orr r5, r5, r6, lsl #11 /* r5 = pixel_2 */
+ orr r4, r4, r5, lsl #16
+ str r4, [lr], #4 /* write pixel_1 and pixel_2 */
+
+ subs r7, r7, #2 /* check for loop end */
+ bgt 20b /* back to beginning */
+ /* 2nd loop end */
+
+ ldr r3, [sp, #8]
+ add sp, sp, r3 /* deallocate buffer */
+ ldmpc regs=r4-r10 /* restore registers */
+
+ .ltorg
+ .size lcd_write_yuv420_lines, .-lcd_write_yuv420_lines
+
+
+#elif (YUV2RGB_VERSION == VERSION_ARMV4)
+/****************************************************************************
+ * extern void lcd_write_yuv420_lines(unsigned char const * const src[3],
+ * uint16_t* out,
+ * int width,
+ * int stride);
+ *
+ * Conversion from Motion JPEG and MPEG Y'PbPr to RGB is:
+ * |R| |1.164 0.000 1.596| |Y' - 16|
+ * |G| = |1.164 -0.391 -0.813| |Pb - 128|
+ * |B| |1.164 2.018 0.000| |Pr - 128|
+ *
+ * Scaled, normalized, rounded and tweaked to yield RGB 565:
+ * |R| |74 0 101| |Y' - 16| >> 9
+ * |G| = |74 -24 -51| |Cb - 128| >> 8
+ * |B| |74 128 0| |Cr - 128| >> 9
+ *
+ * Converts two lines from YUV420 to RGB565, within each iteration four
+ * pixels (2 per line) are calculated and written to destination buffer.
+ */
+ .section .icode, "ax", %progbits
+
+ .align 2
+ .global lcd_write_yuv420_lines
+ .type lcd_write_yuv420_lines, %function
+
+lcd_write_yuv420_lines:
+ /* r0 = src = yuv_src */
+ /* r1 = dst = out */
+ /* r2 = width */
+ /* r3 = stride */
+ stmfd sp!, {r4-r11,lr} /* save non-scratch */
+ ldmia r0, {r10-r12} /* r10 = yuv_src[0] = Y'_p */
+ /* r11 = yuv_src[1] = Cb_p */
+ /* r12 = yuv_src[2] = Cr_p */
+ mov r9, r2, lsl #1 /* r9 = 2*width (loop count) */
+ str r9, [sp, #-4]! /* [--sp] = 2*width (constant) */
+ add r8, r10, r3 /* r8 = Y'_p + stride = Y'stride_p */
+ mov lr, r1 /* RGB565 data destination buffer */
+
+10: /* loop start */
+ ldrb r0, [r11], #1 /* r0 = *Cb_p++ */
+ ldrb r1, [r12], #1 /* r1 = *Cr_p++ */
+ ldrb r3, [r8], #1 /* r3 = Y'3 */
+ ldrb r4, [r8], #1 /* r4 = Y'4 */
+
+ sub r0, r0, #128 /* r0 = Cb-128 */
+ sub r1, r1, #128 /* r1 = Cr-128 */
+
+ add r2, r1, r1, asl #1 /* r2 = Cr*51 + Cb*24 */
+ add r2, r2, r2, asl #4
+ add r2, r2, r0, asl #3
+ add r2, r2, r0, asl #4
+
+ add r5, r1, r1, asl #2 /* r1 = Cr*101 */
+ add r5, r5, r1, asl #5
+ add r1, r5, r1, asl #6
+
+ add r1, r1, #256 /* r1 = rv = (r1 + 256) >> 9 */
+ mov r1, r1, asr #9
+ rsb r2, r2, #128 /* r2 = guv = (-r2 + 128) >> 8 */
+ mov r2, r2, asr #8
+ add r0, r0, #2 /* r0 = bu = (Cb*128 + 256) >> 9 */
+ mov r0, r0, asr #2
+
+ /* pixel_3 */
+ sub r3, r3, #16 /* r3 = (Y'-16) * (74/2) */
+ add r7, r3, r3, asl #2
+ add r3, r7, r3, asl #5
+
+ add r6, r1, r3, asr #8 /* r6 = r = (Y >> 9) + rv */
+ add r7, r2, r3, asr #7 /* r7 = g = (Y >> 8) + guv */
+ add r5, r0, r3, asr #8 /* r5 = b = (Y >> 9) + bu */
+
+ orr r3, r6, r5 /* check if clamping is needed... */
+ orr r3, r3, r7, asr #1 /* ...at all */
+ cmp r3, #31
+ bls 15f /* no clamp */
+ cmp r6, #31 /* clamp r */
+ mvnhi r6, r6, asr #31
+ andhi r6, r6, #31
+ cmp r7, #63 /* clamp g */
+ mvnhi r7, r7, asr #31
+ andhi r7, r7, #63
+ cmp r5, #31 /* clamp b */
+ mvnhi r5, r5, asr #31
+ andhi r5, r5, #31
+15: /* no clamp */
+
+ /* calculate pixel_3 and save to r5 for later pixel packing */
+ orr r5, r5, r7, lsl #5 /* pixel_3 = r<<11 | g<<5 | b */
+ orr r5, r5, r6, lsl #11 /* r5 = pixel_3 */
+
+ /* pixel_4 */
+ sub r4, r4, #16 /* r4 = (Y'-16) * (74/2) */
+ add r7, r4, r4, asl #2
+ add r4, r7, r4, asl #5
+
+ add r6, r1, r4, asr #8 /* r6 = r = (Y >> 9) + rv */
+ add r7, r2, r4, asr #7 /* r7 = g = (Y >> 8) + guv */
+ add r4, r0, r4, asr #8 /* r4 = b = (Y >> 9) + bu */
+
+ orr r3, r6, r4 /* check if clamping is needed... */
+ orr r3, r3, r7, asr #1 /* ...at all */
+ cmp r3, #31
+ bls 15f /* no clamp */
+ cmp r6, #31 /* clamp r */
+ mvnhi r6, r6, asr #31
+ andhi r6, r6, #31
+ cmp r7, #63 /* clamp g */
+ mvnhi r7, r7, asr #31
+ andhi r7, r7, #63
+ cmp r4, #31 /* clamp b */
+ mvnhi r4, r4, asr #31
+ andhi r4, r4, #31
+15: /* no clamp */
+
+ /* calculate pixel_4 and pack with pixel_3 before writing */
+ orr r4, r4, r7, lsl #5 /* pixel_4 = r<<11 | g<<5 | b */
+ orr r4, r4, r6, lsl #11 /* r4 = pixel_4 */
+ orr r5, r5, r4, lsl #16 /* r5 = pixel_4<<16 | pixel_3 */
+
+ ldr r7, [sp] /* r7 = 2*width */
+ ldrb r3, [r10], #1 /* r3 = Y'1 */
+ ldrb r4, [r10], #1 /* r4 = Y'2 */
+
+ str r5, [lr, r7] /* write pixel_3 and pixel_4 */
+
+ /* pixel_1 */
+ sub r3, r3, #16 /* r3 = (Y'-16) * (74/2) */
+ add r7, r3, r3, asl #2
+ add r3, r7, r3, asl #5
+
+ add r6, r1, r3, asr #8 /* r6 = r = (Y >> 9) + rv */
+ add r7, r2, r3, asr #7 /* r7 = g = (Y >> 8) + guv */
+ add r5, r0, r3, asr #8 /* r5 = b = (Y >> 9) + bu */
+
+ orr r3, r6, r5 /* check if clamping is needed... */
+ orr r3, r3, r7, asr #1 /* ...at all */
+ cmp r3, #31
+ bls 15f /* no clamp */
+ cmp r6, #31 /* clamp r */
+ mvnhi r6, r6, asr #31
+ andhi r6, r6, #31
+ cmp r7, #63 /* clamp g */
+ mvnhi r7, r7, asr #31
+ andhi r7, r7, #63
+ cmp r5, #31 /* clamp b */
+ mvnhi r5, r5, asr #31
+ andhi r5, r5, #31
+15: /* no clamp */
+
+ /* calculate pixel_1 and save to r5 for later pixel packing */
+ orr r5, r5, r7, lsl #5 /* pixel_1 = r<<11 | g<<5 | b */
+ orr r5, r5, r6, lsl #11 /* r5 = pixel_1 */
+
+ /* pixel_2 */
+ sub r4, r4, #16 /* r4 = (Y'-16) * (74/2) */
+ add r7, r4, r4, asl #2
+ add r4, r7, r4, asl #5
+
+ add r6, r1, r4, asr #8 /* r6 = r = (Y >> 9) + rv */
+ add r7, r2, r4, asr #7 /* r7 = g = (Y >> 8) + guv */
+ add r4, r0, r4, asr #8 /* r4 = b = (Y >> 9) + bu */
+
+ orr r3, r6, r4 /* check if clamping is needed... */
+ orr r3, r3, r7, asr #1 /* ...at all */
+ cmp r3, #31
+ bls 15f /* no clamp */
+ cmp r6, #31 /* clamp r */
+ mvnhi r6, r6, asr #31
+ andhi r6, r6, #31
+ cmp r7, #63 /* clamp g */
+ mvnhi r7, r7, asr #31
+ andhi r7, r7, #63
+ cmp r4, #31 /* clamp b */
+ mvnhi r4, r4, asr #31
+ andhi r4, r4, #31
+15: /* no clamp */
+
+ /* calculate pixel_2 and pack with pixel_1 before writing */
+ orr r4, r4, r7, lsl #5 /* pixel_2 = r<<11 | g<<5 | b */
+ orr r4, r4, r6, lsl #11 /* r4 = pixel_2 */
+ orr r5, r5, r4, lsl #16 /* r5 = pixel_2<<16 | pixel_1 */
+
+ str r5, [lr], #4 /* write pixel_1 and pixel_2 */
+
+ subs r9, r9, #4 /* check for loop end */
+ bgt 10b /* back to beginning */
+
+ /* loop end */
+ add sp, sp, #4 /* deallocate stack */
+ ldmpc regs=r4-r11 /* restore registers */
+
+ .ltorg
+ .size lcd_write_yuv420_lines, .-lcd_write_yuv420_lines
+
+
+#elif (YUV2RGB_VERSION == VERSION_ARMV5TE)
+/****************************************************************************
+ * How do I encode Y'CBCR components from R'G'B' in [0, +1]? (see ColorFAQ)
+ * |R| |0.00456621 0 0.00625893| |Y' - 16|
+ * |G| = |0.00456621 -0.00153632 -0.00318811| |Pb - 128|
+ * |B| |0.00456621 0.00791071 0 | |Pr - 128|
+ *
+ * Scaled, normalized, rounded and tweaked to yield RGB 565:
+ * |R| |74 0 101| |Y' - 16| >> 9
+ * |G| = |74 -24 -51| |Cb - 128| >> 8
+ * |B| |74 128 0| |Cr - 128| >> 9
+ */
+#define NBR 14 /* 14-bit resolution (SVN) */
+#define COEF_C0 74
+#define COEF_C1 101
+#define COEF_C2 -24
+#define COEF_C3 -51
+#define COEF_C4 128
+#define C4_IS_POW2
+
+/* constant for rounding a NBR number before down-scaling it to RS bits */
+#define ROUND(RS) (1 << (NBR - RS - 1))
+
+/* packed 16-bit coefficients */
+#define COEF_C4_C1 ((COEF_C4 << 16) | (COEF_C1 & 0xffff))
+#define COEF_2C3_2C2 ((COEF_C3 << 17) | ((COEF_C2 << 1) & 0xffff))
+/* 32-bit MLA constants */
+#define CONST_MLA_Y (-16 * COEF_C0)
+
+/****************************************************************************
+ * extern void lcd_write_yuv420_lines(unsigned char const * const src[3],
+ * uint16_t* out,
+ * int width,
+ * int stride);
+ *
+ * Converts two lines from YUV420 to RGB565, within each iteration four
+ * pixels (2 per line) are calculated and written to destination buffer.
+ *
+ * - use ARMv5TE+ 1-cycle multiply+accumulator instructions.
+ */
+ .section .icode, "ax", %progbits
+
+ .align 2
+ .global lcd_write_yuv420_lines
+ .type lcd_write_yuv420_lines, %function
+
+lcd_write_yuv420_lines:
+ @ r0 = src = yuv_src
+ @ r1 = out = dst_p
+ @ r2 = width
+ @ r3 = stride
+ stmfd sp!, {r4-r11,lr} @ save non-scratch
+ ldmia r0, {r10-r12} @ r10 = yuv_src[0] = Y'_p
+ @ r11 = yuv_src[1] = Cb_p
+ @ r12 = yuv_src[2] = Cr_p
+ adr r0, const_data @ load constants
+ ldmia r0, {r5-r8} @ r5 = COEF_C4_C1
+ @ r6 = COEF_2C3_2C2
+ @ r7 = COEF_C0
+ @ r8 = CONST_MLA_Y
+ sub r4, r12, r11 @ r4 = Cr_p-Cb_p
+ mov r9, r2, asl #1 @ r9 = 2*width
+ stmfd sp!, {r4-r6,r9} @ SP -> Cr_p-Cb_p
+ @ COEF_C4_C1
+ @ COEF_2C3_2C2
+ @ 2*width
+ add r12, r10, r3 @ r12 = Y'_p + stride = Y'stride_p
+ mov lr, r1 @ RGB565 data destination buffer
+ orr r9, r7, r2, lsl #15 @ loop_count = width/2;
+ @ r9 = loop_count<<16 | COEF_C0
+ sub r9, r9, #0x10000 @ loop_count--
+
+10: @ loop_start
+
+ @ register usage:
+ @ r8 = CONST_MLA_Y
+ @ r9 = loop count<<16 | COEF_C0
+ @ r10 = Y'_p
+ @ r11 = Cb_p
+ @ r12 = Y'stride_p
+ @ lr = dst_p
+ @ free: r0-r7
+
+ ldmia sp, {r2-r4} @ r2 = Cr_p-Cb_p
+ @ r3 = COEF_C4_C1
+ @ r4 = COEF_2C3_2C2
+ mov r5, #ROUND(5) @ r5 = round constant
+
+ ldrb r6, [r12], #1 @ r6 = Y'3
+ ldrb r7, [r12], #1 @ r7 = Y'4
+
+ ldrb r1, [r11, r2] @ r1 = Cr = *Cr_p++
+ ldrb r0, [r11], #1 @ r0 = Cb = *Cb_p++
+
+ /* calculate Y3 and Y4 */
+ smlabb r6, r6, r9, r8 @ r6 = Y3 = C0*Y'3 - C0*16
+ smlabb r7, r7, r9, r8 @ r7 = Y4 = C0*Y'4 - C0*16
+
+ /* calculate rv, guv, bu */
+ sub r1, r1, #128 @ r1 = Cr" = Cr-128
+ sub r0, r0, #128 @ r0 = Cb" = Cb-128
+
+ smlabt r2, r1, r4, r5 @ r2 = guv" = Cr"*(2*C2) +
+ smlabb r2, r0, r4, r2 @ Cb"*(2*C3) + round
+ smlabb r1, r1, r3, r5 @ r1 = rv" = Cr"*C1 + round
+ #ifdef C4_IS_POW2
+ add r0, r5, r0, asl #NBR-7 @ r0 = bu" = Cb"*C4 + round
+ #else
+ smlabt r0, r0, r3, r5 @ r0 = bu" = Cb"*C4 + round
+ #endif
+
+ /* scale rv",guv",bu" */
+ mov r2, r2, asr #NBR-5 @ r2 = guv = guv" >> scale
+ mov r1, r1, asr #NBR-5 @ r1 = rv = rv" >> scale
+ mov r0, r0, asr #NBR-5 @ r0 = bu = bu" >> scale
+
+ @ register usage:
+ @ r8-r12,lr: pointers, counters
+ @ r0,r1,r2 = bu,rv,guv (rounded and scaled to RGB565)
+ @ r6,r7 = Y'3,Y'4
+ @ free: r3-r5
+
+ /* pixel_3 */
+ add r5, r1, r6, asr #NBR-5 @ r5 = r = (Y3 >> scale) + rv
+ add r4, r2, r6, asr #NBR-6 @ r4 = g = (Y3 >> scale) + guv
+ add r3, r0, r6, asr #NBR-5 @ r3 = b = (Y3 >> scale) + bu
+
+ orr r6, r5, r3 @ check if clamping is needed...
+ orr r6, r6, r4, asr #1 @ ...at all
+ cmp r6, #31
+ bls 15f @ no clamp
+ cmp r5, #31 @ clamp r
+ mvnhi r5, r5, asr #31
+ andhi r5, r5, #31
+ cmp r4, #63 @ clamp g
+ mvnhi r4, r4, asr #31
+ andhi r4, r4, #63
+ cmp r3, #31 @ clamp b
+ mvnhi r3, r3, asr #31
+ andhi r3, r3, #31
+15: @ no clamp
+
+ /* calculate pixel_3 and save to r3 for later pixel packing */
+ orr r3, r3, r4, lsl #5 @ r3 = pixel_3 = r<<11 | g<<5 | b
+ orr r3, r3, r5, lsl #11
+
+ /* pixel_4 */
+ add r5, r1, r7, asr #NBR-5 @ r5 = r = (Y4 >> scale) + rv
+ add r4, r2, r7, asr #NBR-6 @ r4 = g = (Y4 >> scale) + guv
+ add r7, r0, r7, asr #NBR-5 @ r7 = b = (Y4 >> scale) + bu
+
+ orr r6, r5, r7 @ check if clamping is needed...
+ orr r6, r6, r4, asr #1 @ ...at all
+ cmp r6, #31
+ bls 15f @ no clamp
+ cmp r5, #31 @ clamp r
+ mvnhi r5, r5, asr #31
+ andhi r5, r5, #31
+ cmp r4, #63 @ clamp g
+ mvnhi r4, r4, asr #31
+ andhi r4, r4, #63
+ cmp r7, #31 @ clamp b
+ mvnhi r7, r7, asr #31
+ andhi r7, r7, #31
+15: @ no clamp
+
+ /* calculate pixel_4 and pack with pixel_3 before writing */
+ orr r7, r7, r4, lsl #5 @ r7 = pixel_4 = r<<11 | g<<5 | b
+ orr r7, r7, r5, lsl #11
+ orr r3, r3, r7, lsl #16 @ r3 = pixel_4<<16 | pixel_3
+
+ /* avoid interlocks when writing pixel_3 and pixel_4 */
+ ldr r5, [sp, #12] @ r5 = 2*width
+
+ ldrb r6, [r10], #1 @ r6 = Y'1
+ ldrb r7, [r10], #1 @ r7 = Y'2
+
+ /* write pixel_3 and pixel_4 */
+ str r3, [lr, r5] @ [dst_p + 2*width] = r3
+
+ @ register usage:
+ @ r8-r12,lr: pointers, counters
+ @ r0,r1,r2 = bu,rv,guv (rounded and scaled to RGB565)
+ @ r6,r7 = Y'1,Y'2
+ @ free: r3-r5
+
+ /* calculate Y1 and Y2 */
+ smlabb r6, r6, r9, r8 @ r6 = Y1 = C0*Y'1 - C0*16
+ smlabb r7, r7, r9, r8 @ r7 = Y2 = C0*Y'2 - C0*16
+
+ /* pixel_1 */
+ add r5, r1, r6, asr #NBR-5 @ r5 = r = (Y1 >> scale) + rv
+ add r4, r2, r6, asr #NBR-6 @ r4 = g = (Y1 >> scale) + guv
+ add r3, r0, r6, asr #NBR-5 @ r3 = b = (Y1 >> scale) + bu
+
+ orr r6, r5, r3 @ check if clamping is needed...
+ orr r6, r6, r4, asr #1 @ ...at all
+ cmp r6, #31
+ bls 15f @ no clamp
+ cmp r5, #31 @ clamp r
+ mvnhi r5, r5, asr #31
+ andhi r5, r5, #31
+ cmp r4, #63 @ clamp g
+ mvnhi r4, r4, asr #31
+ andhi r4, r4, #63
+ cmp r3, #31 @ clamp b
+ mvnhi r3, r3, asr #31
+ andhi r3, r3, #31
+15: @ no clamp
+
+ /* calculate pixel_1 and save to r3 for later pixel packing */
+ orr r3, r3, r4, lsl #5 @ r3 = pixel_1 = r<<11 | g<<5 | b
+ orr r3, r3, r5, lsl #11
+
+ /* pixel_2 */
+ add r5, r1, r7, asr #NBR-5 @ r5 = r = (Y2 >> scale) + rv
+ add r4, r2, r7, asr #NBR-6 @ r4 = g = (Y2 >> scale) + guv
+ add r7, r0, r7, asr #NBR-5 @ r7 = b = (Y2 >> scale) + bu
+
+ orr r6, r5, r7 @ check if clamping is needed...
+ orr r6, r6, r4, asr #1 @ ...at all
+ cmp r6, #31
+ bls 15f @ no clamp
+ cmp r5, #31 @ clamp r
+ mvnhi r5, r5, asr #31
+ andhi r5, r5, #31
+ cmp r4, #63 @ clamp g
+ mvnhi r4, r4, asr #31
+ andhi r4, r4, #63
+ cmp r7, #31 @ clamp b
+ mvnhi r7, r7, asr #31
+ andhi r7, r7, #31
+15: @ no clamp
+
+ /* calculate pixel_2 and pack with pixel_1 before writing */
+ orr r7, r7, r4, lsl #5 @ r7 = pixel_2 = r<<11 | g<<5 | b
+ orr r7, r7, r5, lsl #11
+ orr r3, r3, r7, lsl #16 @ r3 = pixel_2 << 16 | pixel_1
+
+ str r3, [lr], #4 @ write pixel_1 and pixel_2
+
+ /* check for loop end */
+ subs r9, r9, #0x10000 @ loop_count--
+ bge 10b @ back to beginning
+
+ /* bye */
+ add sp, sp, #16
+ ldmpc regs=r4-r11 @ restore registers
+
+ .ltorg
+ .size lcd_write_yuv420_lines, .-lcd_write_yuv420_lines
+
+/* data */
+ .align 2
+const_data:
+ .word COEF_C4_C1
+ .word COEF_2C3_2C2
+ .word COEF_C0
+ .word CONST_MLA_Y
+
+ .size const_data, .-const_data
+
+
+#else /* YUV2RGB_VERSION == VERSION_ARMV5TE_WST */
+/****************************************************************************
+ * How do I encode Y'CBCR components from R'G'B' in [0, +1]? (see ColorFAQ)
+ * |R| |0.00456621 0 0.00625893| |Y' - 16|
+ * |G| = |0.00456621 -0.00153632 -0.00318811| |Pb - 128|
+ * |B| |0.00456621 0.00791071 0 | |Pr - 128|
+ *
+ * Scaled, normalized, rounded and tweaked to yield RGB 565:
+ * |R| |74 0 101| |Y' - 16| >> 9
+ * |G| = |74 -24 -51| |Cb - 128| >> 8
+ * |B| |74 128 0| |Cr - 128| >> 9
+ */
+#define NBR 14 /* 14-bit resolution (SVN) */
+#define COEF_C0 74
+#define COEF_C1 101
+#define COEF_C2 -24
+#define COEF_C3 -51
+#define COEF_C4 128
+#define C4_IS_POW2
+
+/* packed 16-bit coefficients */
+#define COEF_C4_C1 ((COEF_C4 << 16) | (COEF_C1 & 0xffff))
+#define COEF_C3_C2 ((COEF_C3 << 16) | (COEF_C2 & 0xffff))
+
+/* constant for rounding an NBR number before down-scaling it to RS bits */
+#define ROUND(RS) (1 << (NBR - RS - 1))
+
+/* 32-bit MLA constants */
+#define CONST_MLA_Y (-16 * COEF_C0)
+#define CONST_MLA_RV ((-128 * COEF_C1) + ROUND(5))
+#define CONST_MLA_BU ((-128 * COEF_C4) + ROUND(5))
+/* trick to save the register needed for table_sat6 reference:
+ add table_sat6-table_sat5 offset (conveniently scaled) to guv MLA */
+#define CONST_MLA_GUV (-128 * (COEF_C2 + COEF_C3) + ROUND(6) + \
+ ((table_sat6 - table_sat5) << (NBR - 6)))
+
+/****************************************************************************
+ * extern void lcd_write_yuv420_lines(unsigned char const * const src[3],
+ * uint16_t* out,
+ * int width,
+ * int stride);
+ *
+ * Converts two lines from YUV420 to RGB565, within each iteration four
+ * pixels (2 per line) are calculated and written to destination buffer.
+ *
+ * - use ARMv5TE+ 1-cycle multiply+accumulator instructions.
+ * - use data tables (256 bytes) for RBG565 saturation.
+ */
+ .section .icode, "ax", %progbits
+
+ .align 2
+ .global lcd_write_yuv420_lines
+ .type lcd_write_yuv420_lines, %function
+
+lcd_write_yuv420_lines:
+ @ r0 = src = yuv_src
+ @ r1 = out = dst1_p
+ @ r2 = width
+ @ r3 = stride
+ stmfd sp!, {r4-r11,lr} @ save non-scratch
+ ldmia r0, {r10-r12} @ r10 = yuv_src[0] = Y'_p
+ @ r11 = yuv_src[1] = Cb_p
+ @ r12 = yuv_src[2] = Cr_p
+ /* prepare data and fill stack */
+ adr r0, const_data @ load constants
+ ldmia r0, {r4-r9,lr} @ r4 = COEF_C0
+ @ r5 = CONST_MLA_GUV
+ @ r6 = COEF_C3_C2
+ @ r7 = CONST_MLA_BU
+ @ r8 = COEF_C4_C1
+ @ r9 = CONST_MLA_RV
+ @ lr = table_sat5
+ sub r0, r12, r11 @ r0 = Cr_p-Cb_p
+ #define STACK_SZ 28
+ stmfd sp!, {r0,r5-r9,lr} @ SP -> Cr_p-Cb_p
+ @ CONST_MLA_GUV
+ @ COEF_C3_C2
+ @ CONST_MLA_BU
+ @ COEF_C4_C1
+ @ CONST_MLA_RV
+ @ table_sat5
+ mov r8, r4, lsl #4 @
+ rsb r8, #0 @ r8 = -16*COEF_C0 = CONST_MLA_Y
+ mov lr, r1 @ RGB565 data destination buffer
+ add r9, lr, r2, asl #1 @ r9 = out + 2*width = dst2_p
+ add r12, r3, r10 @ r12 = Y'_p + stride
+ orr r7, r4, r2, lsl #15 @ loop_count = width/2;
+ @ r7 = loop_count<<16 | COEF_C0
+ sub r7, r7, #0x10000 @ loop_count--
+
+ /* align loop code to minimize occupied lines, execution
+ time per loop is optimized ~10% on ARM926EJ-S */
+ .align CACHEALIGN_BITS
+loop_start:
+
+ @ register usage:
+ @ r7 = loop count<<16 | COEF_C0
+ @ r8 = CONST_MLA_Y
+ @ r9 = dst2_p
+ @ r10 = Y'_p
+ @ r11 = Cb_p
+ @ r12 = Y'stride_p
+ @ lr = dst1_p
+ @ free: r0-r6
+
+ /* load constants from stack */
+ ldmia sp, {r1-r3,r6} @ r1 = Cr_p-Cb_p
+ @ r2 = CONST_MLA_GUV
+ @ r3 = COEF_C3_C2
+ @ r6 = CONST_MLA_BU
+
+ /* read Cr", Cb" */
+ ldrb r1, [r11, r1] @ r1 = Cr = *Cr_p++
+ ldrb r0, [r11], #1 @ r0 = Cb = *Cb_p++
+
+ /* load more constants (avoids r1 interlock) */
+ ldrd r4, [sp, #16] @ r4 = COEF_C4_C1
+ @ r5 = CONST_MLA_RV
+
+ /* calculate rv", guv", bu" */
+ smlabt r2, r1, r3, r2 @ r2 = guv" = Cr*C2 + Cb*C3
+ smlabb r2, r0, r3, r2 @ + CONST_MLA_GUV
+ smlabb r1, r1, r4, r5 @ r1 = rv" = Cr*C1 + CONST_MLA_RV
+ #ifdef C4_IS_POW2
+ add r0, r6, r0, asl #NBR-7 @ r0 = bu" = Cb*C4 + CONST_MLA_BU
+ #else
+ smlabt r0, r0, r4, r6 @ r0 = bu" = Cb*C4 + CONST_MLA_BU
+ #endif
+
+ ldr r4, [sp, #STACK_SZ-4] @ r4 = table_sat5
+
+ /* read Y'1 and Y'2 */
+ ldrb r5, [r10], #1 @ r5 = Y'1 = *Y'_p++
+ ldrb r6, [r10], #1 @ r6 = Y'2 = *Y'_p++
+
+ /* scale rv",guv",bu", adding sat5_p here saves instructions later */
+ add r1, r4, r1, asr #NBR-5 @ r1 = rv' = sat5_p + rv">>scale
+ add r2, r4, r2, asr #NBR-6 @ r2 = guv' = sat5_p + guv">>scale
+ add r0, r4, r0, asr #NBR-5 @ r0 = bu' = sat5_p + bu">>scale
+
+ @ register usage:
+ @ r7-r12,lr: pointers, counters, tables
+ @ r0,r1,r2 = (bu,rv,guv) rounded and RGB565 scaled
+ @ r5,r6 = Y'1,Y'2
+ @ free: r3,r4
+
+ /* calculate Y1 and Y2 */
+ smlabb r5, r5, r7, r8 @ r5 = Y1 = C0*Y'1 - 16*C0
+ smlabb r6, r6, r7, r8 @ r6 = Y2 = C0*Y'2 - 16*C0
+
+ /* pixel_1 */
+ ldrb r3, [r0, r5, asr #NBR-5] @ r3 = b = sat5[Y1>>scale + bu']
+ ldrb r4, [r2, r5, asr #NBR-6] @ r4 = g = sat6[Y1>>scale + guv']
+ ldrb r5, [r1, r5, asr #NBR-5] @ r5 = r = sat5[Y1>>scale + rv']
+
+ /* calculate pixel_1 */
+ orr r3, r3, r4, lsl #5 @ r3 = pixel_1 = g<<5 | b
+
+ /* pixel_2 (avoid r5 interlock) */
+ ldrb r4, [r0, r6, asr #NBR-5] @ r4 = b = sat5[Y2>>scale + bu']
+
+ /* calculate pixel_1 and save to r3 for later pixel packing */
+ orr r3, r3, r5, lsl #11 @ r3 = pixel_1 = r<<11 | g<<5 | b
+
+ /* pixel_2 */
+ ldrb r5, [r2, r6, asr #NBR-6] @ r5 = g = sat6[Y2>>scale + guv']
+ ldrb r6, [r1, r6, asr #NBR-5] @ r6 = r = sat5[Y2>>scale + rv']
+
+ /* calculate pixel_2 and pack with pixel_1 before writing */
+ orr r3, r3, r4, lsl #16 @ r3 = pixel_2<<16 | pixel_1
+ orr r3, r3, r5, lsl #21
+ orr r3, r3, r6, lsl #27
+
+ /* read Y'3 and Y'4 */
+ ldrb r5, [r12], #1 @ r5 = Y'3 = *Y'stride_p++
+ ldrb r6, [r12], #1 @ r6 = Y'4 = *Y'stride_p++
+
+ /* write pixel_1 and pixel_2 */
+ str r3, [lr], #4 @ *dst2_p++ = r3
+
+ @ register usage:
+ @ r7-r12,lr: pointers, counters, tables
+ @ r0,r1,r2 = (bu,rv,guv) rounded and RGB565 scaled
+ @ r5,r6 = Y'3,Y'4
+ @ free: r3,r4
+
+ /* calculate Y3 and Y4 */
+ smlabb r5, r5, r7, r8 @ r5 = Y3 = C0*Y'3 - 16*C0
+ smlabb r6, r6, r7, r8 @ r6 = Y4 = C0*Y'4 - 16*C0
+
+ /* pixel_3 */
+ ldrb r3, [r0, r5, asr #NBR-5] @ r3 = b = sat5[Y3>>scale + bu']
+ ldrb r4, [r2, r5, asr #NBR-6] @ r4 = g = sat6[Y3>>scale + guv']
+ ldrb r5, [r1, r5, asr #NBR-5] @ r5 = r = sat5[Y3>>scale + rv']
+
+ /* calculate pixel_3 */
+ orr r3, r3, r4, lsl #5 @ r3 = pixel_3 = g<<5 | b
+
+ /* pixel_4 (avoid r5 interlock) */
+ ldrb r4, [r0, r6, asr #NBR-5] @ r4 = b = sat5[Y4>>scale + bu']
+
+ /* calculate pixel_3 and save to r3 for later pixel packing */
+ orr r3, r3, r5, lsl #11 @ r3 = pixel_3 = r<<11 | g<<5 | b
+
+ /* pixel_4 */
+ ldrb r5, [r2, r6, asr #NBR-6] @ r5 = g = sat6[Y4>>scale + guv']
+ ldrb r6, [r1, r6, asr #NBR-5] @ r6 = r = sat5[Y4>>scale + rv']
+
+ /* calculate pixel_4 and pack with pixel_3 before writing */
+ orr r3, r3, r4, lsl #16 @ r3 = pixel_4 << 16 | pixel_3
+ orr r3, r3, r5, lsl #21
+ orr r3, r3, r6, lsl #27
+
+ /* write pixel_3 and pixel_4 */
+ str r3, [r9], #4 @ *dst1_p++ = r3
+
+ /* check for loop end */
+ subs r7, r7, #0x10000 @ loop_count--
+ bge loop_start @ back to beginning
+
+ /* bye */
+ add sp, sp, #STACK_SZ @ deallocate stack
+ ldmpc regs=r4-r11 @ restore registers
+
+ .ltorg
+ .size lcd_write_yuv420_lines, .-lcd_write_yuv420_lines
+
+/* data */
+ .align 2
+const_data:
+ .word COEF_C0
+ .word CONST_MLA_GUV
+ .word COEF_C3_C2
+ .word CONST_MLA_BU
+ .word COEF_C4_C1
+ .word CONST_MLA_RV
+ .word table_sat5
+
+ .size const_data, .-const_data
+
+/* saturation tables */
+ /*.section .data*/
+ /* aligned to cache line size to minimize cache usage */
+ .align CACHEALIGN_BITS
+
+saturation_tables:
+ /* 5-bit saturation table [-36..0..+67], size=104 */
+ /* table_sat5[-36..-1] */
+ .byte 0, 0, 0, 0
+ .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ table_sat5:
+ /* table_sat5[0..67] */
+ .byte 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
+ .byte 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
+ .byte 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31
+ .byte 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31
+ .byte 31, 31, 31, 31
+
+ /* 6-bit saturation table [-44..0..+107], size=152 */
+ /* table_sat6[-44..-1] */
+ .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ table_sat6:
+ /* table_sat6[0..107] */
+ .byte 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
+ .byte 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
+ .byte 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47
+ .byte 48, 49, 50, 51, 52, 53 ,54, 55, 56, 57, 58, 59, 60, 61, 62, 63
+ .byte 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63
+ .byte 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63
+ .byte 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63
+
+ .size saturation_tables, .-saturation_tables
+#endif /* YUV2RGB_VERSION */
diff --git a/firmware/target/arm/samsung/yh820/lcd-as-yh820.S b/firmware/target/arm/samsung/yh820/lcd-as-yh820.S
new file mode 100644
index 0000000000..542ceeeb36
--- /dev/null
+++ b/firmware/target/arm/samsung/yh820/lcd-as-yh820.S
@@ -0,0 +1,550 @@
+/***************************************************************************
+ * __________ __ ___.
+ * Open \______ \ ____ ____ | | _\_ |__ _______ ___
+ * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
+ * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
+ * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
+ * \/ \/ \/ \/ \/
+ * $Id$
+ *
+ * Copyright (C) 2007 by Jens Arnold
+ * Heavily based on lcd-as-memframe.c by Michael Sevakis
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+
+#include "config.h"
+#include "cpu.h"
+
+/****************************************************************************
+ * void lcd_write_yuv420_lines(unsigned char const * const src[3],
+ * int width,
+ * int stride);
+ *
+ * |R| |1.000000 -0.000001 1.402000| |Y'|
+ * |G| = |1.000000 -0.334136 -0.714136| |Pb|
+ * |B| |1.000000 1.772000 0.000000| |Pr|
+ * Scaled, normalized, rounded and tweaked to yield RGB 565:
+ * |R| |74 0 101| |Y' - 16| >> 9
+ * |G| = |74 -24 -51| |Cb - 128| >> 8
+ * |B| |74 128 0| |Cr - 128| >> 9
+ *
+ * Write four RGB565 pixels in the following order on each loop:
+ * 1 3 + > down
+ * 2 4 \/ left
+ */
+ .section .icode, "ax", %progbits
+ .align 2
+ .global lcd_write_yuv420_lines
+ .type lcd_write_yuv420_lines, %function
+lcd_write_yuv420_lines:
+ @ r0 = yuv_src
+ @ r1 = width
+ @ r2 = stride
+ stmfd sp!, { r4-r10, lr } @ save non-scratch
+ ldmia r0, { r4, r5, r6 } @ r4 = yuv_src[0] = Y'_p
+ @ r5 = yuv_src[1] = Cb_p
+ @ r6 = yuv_src[2] = Cr_p
+ @ r0 = scratch
+ sub r2, r2, #1 @
+ mov r3, #0x70000000 @
+ orr r3, r3, #0x3000 @ r3 = LCD1_BASE
+10: @ loop line @
+ ldrb r7, [r4], #1 @ r7 = *Y'_p++;
+ ldrb r8, [r5], #1 @ r8 = *Cb_p++;
+ ldrb r9, [r6], #1 @ r9 = *Cr_p++;
+ @
+ sub r7, r7, #16 @ r7 = Y = (Y' - 16)*74
+ add r12, r7, r7, asl #2 @ actually (Y' - 16)*37 and shift right
+ add r7, r12, r7, asl #5 @ by one less when adding - same for all
+ @
+ sub r8, r8, #128 @ Cb -= 128
+ sub r9, r9, #128 @ Cr -= 128
+ @
+ add r10, r9, r9, asl #1 @ r10 = Cr*51 + Cb*24
+ add r10, r10, r10, asl #4 @
+ add r10, r10, r8, asl #3 @
+ add r10, r10, r8, asl #4 @
+ @
+ add lr, r9, r9, asl #2 @ r9 = Cr*101
+ add lr, lr, r9, asl #5 @
+ add r9, lr, r9, asl #6 @
+ @
+ add r8, r8, #2 @ r8 = bu = (Cb*128 + 128) >> 8
+ mov r8, r8, asr #2 @
+ add r9, r9, #256 @ r9 = rv = (r9 + 256) >> 9
+ mov r9, r9, asr #9 @
+ rsb r10, r10, #128 @ r10 = guv = (-r10 + 128) >> 8
+ mov r10, r10, asr #8 @
+ @ compute R, G, and B
+ add r0, r8, r7, asr #8 @ r0 = b = (Y >> 9) + bu
+ add lr, r9, r7, asr #8 @ lr = r = (Y >> 9) + rv
+ add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
+ @
+ orr r12, r0, lr @ check if clamping is needed...
+ orr r12, r12, r7, asr #1 @ ...at all
+ cmp r12, #31 @
+ bls 15f @ no clamp @
+ cmp r0, #31 @ clamp b
+ mvnhi r0, r0, asr #31 @
+ andhi r0, r0, #31 @
+ cmp lr, #31 @ clamp r
+ mvnhi lr, lr, asr #31 @
+ andhi lr, lr, #31 @
+ cmp r7, #63 @ clamp g
+ mvnhi r7, r7, asr #31 @
+ andhi r7, r7, #63 @
+15: @ no clamp @
+ @
+ ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
+ @
+ mov lr, lr, lsl #3 @
+ orr lr, lr, r7, lsr #3 @ lr = (r << 3) | (g >> 3)
+ orr r0, r0, r7, lsl #5 @ r0 = (g << 5) | b
+1: @ busy @
+ ldr r7, [r3] @ r7 = LCD1_BASE
+ tst r7, #LCD1_BUSY_MASK @ bridge busy?
+ bne 1b @
+ str lr, [r3, #0x10] @ send MSB
+1: @busy @
+ ldr r7, [r3] @ r7 = LCD1_BASE
+ tst r7, #LCD1_BUSY_MASK @ bridge busy?
+ bne 1b @
+ str r0, [r3, #0x10] @ send LSB
+ @
+ sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74
+ add r12, r7, r7, asl #2 @
+ add r7, r12, r7, asl #5 @
+ @ compute R, G, and B
+ add r0, r8, r7, asr #8 @ r0 = b = (Y >> 9) + bu
+ add lr, r9, r7, asr #8 @ lr = r = (Y >> 9) + rv
+ add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
+ @
+ orr r12, r0, lr @ check if clamping is needed...
+ orr r12, r12, r7, asr #1 @ ...at all
+ cmp r12, #31 @
+ bls 15f @ no clamp @
+ cmp r0, #31 @ clamp b
+ mvnhi r0, r0, asr #31 @
+ andhi r0, r0, #31 @
+ cmp lr, #31 @ clamp r
+ mvnhi lr, lr, asr #31 @
+ andhi lr, lr, #31 @
+ cmp r7, #63 @ clamp g
+ mvnhi r7, r7, asr #31 @
+ andhi r7, r7, #63 @
+15: @ no clamp @
+ @
+ ldrb r12, [r4], #1 @ r12 = Y' = *(Y'_p++)
+ @
+ mov lr, lr, lsl #3 @
+ orr lr, lr, r7, lsr #3 @ lr = (r << 3) | (g >> 3)
+ orr r0, r0, r7, lsl #5 @ r0 = (g << 5) | b
+1: @ busy @
+ ldr r7, [r3] @ r7 = LCD1_BASE
+ tst r7, #LCD1_BUSY_MASK @ bridge busy?
+ bne 1b @
+ str lr, [r3, #0x10] @ send MSB
+1: @ busy @
+ ldr r7, [r3] @ r7 = LCD1_BASE
+ tst r7, #LCD1_BUSY_MASK @ bridge busy?
+ bne 1b @
+ str r0, [r3, #0x10] @ send LSB
+ @
+ sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74
+ add r12, r7, r7, asl #2 @
+ add r7, r12, r7, asl #5 @
+ @ compute R, G, and B
+ add r0, r8, r7, asr #8 @ r0 = b = (Y >> 9) + bu
+ add lr, r9, r7, asr #8 @ lr = r = (Y >> 9) + rv
+ add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
+ @
+ orr r12, r0, lr @ check if clamping is needed...
+ orr r12, r12, r7, asr #1 @ ...at all
+ cmp r12, #31 @
+ bls 15f @ no clamp @
+ cmp r0, #31 @ clamp b
+ mvnhi r0, r0, asr #31 @
+ andhi r0, r0, #31 @
+ cmp lr, #31 @ clamp r
+ mvnhi lr, lr, asr #31 @
+ andhi lr, lr, #31 @
+ cmp r7, #63 @ clamp g
+ mvnhi r7, r7, asr #31 @
+ andhi r7, r7, #63 @
+15: @ no clamp @
+ @
+ ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
+ @
+ @
+ mov lr, lr, lsl #3 @
+ orr lr, lr, r7, lsr #3 @ lr = (r << 3) | (g >> 3)
+ orr r0, r0, r7, lsl #5 @ r0 = (g << 5) | b
+1: @ busy @
+ ldr r7, [r3] @ r7 = LCD1_BASE
+ tst r7, #LCD1_BUSY_MASK @ bridge busy?
+ bne 1b @
+ str lr, [r3, #0x10] @ send MSB
+1: @ busy @
+ ldr r7, [r3] @ r7 = LCD1_BASE
+ tst r7, #LCD1_BUSY_MASK @ bridge busy?
+ bne 1b @
+ str r0, [r3, #0x10] @ send LSB
+ @
+ sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74
+ add r12, r7, r7, asl #2 @
+ add r7, r12, r7, asl #5 @
+ @ compute R, G, and B
+ add r0, r8, r7, asr #8 @ r0 = b = (Y >> 9) + bu
+ add lr, r9, r7, asr #8 @ lr = r = (Y >> 9) + rv
+ add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
+ @
+ orr r12, r0, lr @ check if clamping is needed...
+ orr r12, r12, r7, asr #1 @ ...at all
+ cmp r12, #31 @
+ bls 15f @ no clamp @
+ cmp r0, #31 @ clamp b
+ mvnhi r0, r0, asr #31 @
+ andhi r0, r0, #31 @
+ cmp lr, #31 @ clamp r
+ mvnhi lr, lr, asr #31 @
+ andhi lr, lr, #31 @
+ cmp r7, #63 @ clamp g
+ mvnhi r7, r7, asr #31 @
+ andhi r7, r7, #63 @
+15: @ no clamp @
+ @
+ mov lr, lr, lsl #3 @
+ orr lr, lr, r7, lsr #3 @ lr = (r << 3) | (g >> 3)
+ orr r0, r0, r7, lsl #5 @ r0 = (g << 5) | b
+1: @ busy @
+ ldr r7, [r3] @ r7 = LCD1_BASE
+ tst r7, #LCD1_BUSY_MASK @ bridge busy?
+ bne 1b @
+ str lr, [r3, #0x10] @ send MSB
+1: @ busy @
+ ldr r7, [r3] @ r7 = LCD1_BASE
+ tst r7, #LCD1_BUSY_MASK @ bridge busy?
+ bne 1b @
+ str r0, [r3, #0x10] @ send LSB
+ @
+ subs r1, r1, #2 @ subtract block from width
+ bgt 10b @ loop line @
+ @
+ ldmpc regs=r4-r10 @ restore registers and return
+ .ltorg @ dump constant pool
+ .size lcd_write_yuv420_lines, .-lcd_write_yuv420_lines
+
+/****************************************************************************
+ * void lcd_write_yuv420_lines_odither(unsigned char const * const src[3],
+ * int width,
+ * int stride,
+ * int x_screen,
+ * int y_screen);
+ *
+ * |R| |1.000000 -0.000001 1.402000| |Y'|
+ * |G| = |1.000000 -0.334136 -0.714136| |Pb|
+ * |B| |1.000000 1.772000 0.000000| |Pr|
+ * Red scaled at twice g & b but at same precision to place it in correct
+ * bit position after multiply and leave instruction count lower.
+ * |R| |258 0 408| |Y' - 16|
+ * |G| = |149 -49 -104| |Cb - 128|
+ * |B| |149 258 0| |Cr - 128|
+ *
+ * Write four RGB565 pixels in the following order on each loop:
+ * 1 3 + > down
+ * 2 4 \/ left
+ *
+ * Kernel pattern (raw|rotated|use order):
+ * 5 3 4 2 2 6 3 7 row0 row2 > down
+ * 1 7 0 6 | 4 0 5 1 | 2 4 6 0 3 5 7 1 col0 left
+ * 4 2 5 3 | 3 7 2 6 | 3 5 7 1 2 4 6 0 col2 \/
+ * 0 6 1 7 5 1 4 0
+ */
+ .section .icode, "ax", %progbits
+ .align 2
+ .global lcd_write_yuv420_lines_odither
+ .type lcd_write_yuv420_lines_odither, %function
+lcd_write_yuv420_lines_odither:
+ @ r0 = yuv_src
+ @ r1 = width
+ @ r2 = stride
+ @ r3 = x_screen
+ @ [sp] = y_screen
+ stmfd sp!, { r4-r11, lr } @ save non-scratch
+ ldmia r0, { r4, r5, r6 } @ r4 = yuv_src[0] = Y'_p
+ @ r5 = yuv_src[1] = Cb_p
+ @ r6 = yuv_src[2] = Cr_p
+ @
+ sub r2, r2, #1 @
+ ldr r14, [sp, #36] @ Line up pattern and kernel quadrant
+ eor r14, r14, r3 @
+ and r14, r14, #0x2 @
+ mov r14, r14, lsl #6 @ 0x00 or 0x80
+ mov r3, #0x70000000 @
+ orr r3, r3, #0x3000 @ r3 = LCD1_BASE
+10: @ loop line @
+ @
+ ldrb r7, [r4], #1 @ r7 = *Y'_p++;
+ ldrb r8, [r5], #1 @ r8 = *Cb_p++;
+ ldrb r9, [r6], #1 @ r9 = *Cr_p++;
+ @
+ eor r14, r14, #0x80 @ flip pattern quadrant
+ @
+ sub r7, r7, #16 @ r7 = Y = (Y' - 16)*149
+ add r12, r7, r7, asl #2 @
+ add r12, r12, r12, asl #4 @
+ add r7, r12, r7, asl #6 @
+ @
+ sub r8, r8, #128 @ Cb -= 128
+ sub r9, r9, #128 @ Cr -= 128
+ @
+ add r10, r8, r8, asl #4 @ r10 = guv = Cr*104 + Cb*49
+ add r10, r10, r8, asl #5 @
+ add r10, r10, r9, asl #3 @
+ add r10, r10, r9, asl #5 @
+ add r10, r10, r9, asl #6 @
+ @
+ mov r8, r8, asl #1 @ r8 = bu = Cb*258
+ add r8, r8, r8, asl #7 @
+ @
+ add r9, r9, r9, asl #1 @ r9 = rv = Cr*408
+ add r9, r9, r9, asl #4 @
+ mov r9, r9, asl #3 @
+ @
+ @ compute R, G, and B
+ add r0, r8, r7 @ r0 = b' = Y + bu
+ add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
+ rsb r7, r10, r7 @ r7 = g' = Y + guv
+ @
+ @ r8 = bu, r9 = rv, r10 = guv
+ @
+ sub r12, r0, r0, lsr #5 @ r0 = 31/32*b + b/256
+ add r0, r12, r0, lsr #8 @
+ @
+ sub r12, r11, r11, lsr #5 @ r11 = 31/32*r + r/256
+ add r11, r12, r11, lsr #8 @
+ @
+ sub r12, r7, r7, lsr #6 @ r7 = 63/64*g + g/256
+ add r7, r12, r7, lsr #8 @
+ @
+ add r12, r14, #0x100 @
+ @
+ add r0, r0, r12 @ b = r0 + delta
+ add r11, r11, r12, lsl #1 @ r = r11 + delta*2
+ add r7, r7, r12, lsr #1 @ g = r7 + delta/2
+ @
+ orr r12, r0, r11, asr #1 @ check if clamping is needed...
+ orr r12, r12, r7 @ ...at all
+ movs r12, r12, asr #15 @
+ beq 15f @ no clamp @
+ movs r12, r0, asr #15 @ clamp b
+ mvnne r0, r12, lsr #15 @
+ andne r0, r0, #0x7c00 @ mask b only if clamped
+ movs r12, r11, asr #16 @ clamp r
+ mvnne r11, r12, lsr #16 @
+ movs r12, r7, asr #15 @ clamp g
+ mvnne r7, r12, lsr #15 @
+15: @ no clamp @
+ @
+ ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
+ @
+
+ and r11, r11, #0xf800 @ pack pixel
+ mov r11, r11, lsr #8
+ and r7, r7, #0x7e00
+ orr r11, r11, r7, lsr #12
+ mov r7, r7, lsr#4
+ orr r0, r7, r0, lsr #10
+1: @ busy @
+ ldr r7, [r3] @ r7 = LCD1_BASE
+ tst r7, #LCD1_BUSY_MASK @ bridge busy?
+ bne 1b @
+ str r11, [r3, #0x10] @ send MSB
+1: @ busy @
+ ldr r7, [r3] @ r7 = LCD1_BASE
+ tst r7, #LCD1_BUSY_MASK @ bridge busy?
+ bne 1b @
+ str r0, [r3, #0x10] @ send LSB
+ @
+ sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149
+ add r12, r7, r7, asl #2 @
+ add r12, r12, r12, asl #4 @
+ add r7, r12, r7, asl #6 @
+ @ compute R, G, and B
+ add r0, r8, r7 @ r0 = b' = Y + bu
+ add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
+ rsb r7, r10, r7 @ r7 = g' = Y + guv
+ @
+ sub r12, r0, r0, lsr #5 @ r0 = 31/32*b' + b'/256
+ add r0, r12, r0, lsr #8 @
+ @
+ sub r12, r11, r11, lsr #5 @ r11 = 31/32*r' + r'/256
+ add r11, r12, r11, lsr #8 @
+ @
+ sub r12, r7, r7, lsr #6 @ r7 = 63/64*g' + g'/256
+ add r7, r12, r7, lsr #8 @
+ @
+ add r12, r14, #0x200 @
+ @
+ add r0, r0, r12 @ b = r0 + delta
+ add r11, r11, r12, lsl #1 @ r = r11 + delta*2
+ add r7, r7, r12, lsr #1 @ g = r7 + delta/2
+ @
+ orr r12, r0, r11, asr #1 @ check if clamping is needed...
+ orr r12, r12, r7 @ ...at all
+ movs r12, r12, asr #15 @
+ beq 15f @ no clamp @
+ movs r12, r0, asr #15 @ clamp b
+ mvnne r0, r12, lsr #15 @
+ andne r0, r0, #0x7c00 @ mask b only if clamped
+ movs r12, r11, asr #16 @ clamp r
+ mvnne r11, r12, lsr #16 @
+ movs r12, r7, asr #15 @ clamp g
+ mvnne r7, r12, lsr #15 @
+15: @ no clamp @
+ @
+ ldrb r12, [r4], #1 @ r12 = Y' = *(Y'_p++)
+
+ and r11, r11, #0xf800 @ pack pixel
+ mov r11, r11, lsr #8
+ and r7, r7, #0x7e00
+ orr r11, r11, r7, lsr #12
+ mov r7, r7, lsr#4
+ orr r0, r7, r0, lsr #10
+1: @ busy @
+ ldr r7, [r3] @ r7 = LCD1_BASE
+ tst r7, #LCD1_BUSY_MASK @ bridge busy?
+ bne 1b @
+ str r11, [r3, #0x10] @ send MSB
+1: @ busy @
+ ldr r7, [r3] @ r7 = LCD1_BASE
+ tst r7, #LCD1_BUSY_MASK @ bridge busy?
+ bne 1b @
+ str r0, [r3, #0x10] @ send LSB
+
+ sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149
+ add r12, r7, r7, asl #2 @
+ add r12, r12, r12, asl #4 @
+ add r7, r12, r7, asl #6 @
+ @ compute R, G, and B
+ add r0, r8, r7 @ r0 = b' = Y + bu
+ add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
+ rsb r7, r10, r7 @ r7 = g' = Y + guv
+ @
+ @ r8 = bu, r9 = rv, r10 = guv
+ @
+ sub r12, r0, r0, lsr #5 @ r0 = 31/32*b' + b'/256
+ add r0, r12, r0, lsr #8 @
+ @
+ sub r12, r11, r11, lsr #5 @ r11 = 31/32*r' + r'/256
+ add r11, r12, r11, lsr #8 @
+ @
+ sub r12, r7, r7, lsr #6 @ r7 = 63/64*g' + g'/256
+ add r7, r12, r7, lsr #8 @
+ @
+ add r12, r14, #0x300 @
+ @
+ add r0, r0, r12 @ b = r0 + delta
+ add r11, r11, r12, lsl #1 @ r = r11 + delta*2
+ add r7, r7, r12, lsr #1 @ g = r7 + delta/2
+ @
+ orr r12, r0, r11, asr #1 @ check if clamping is needed...
+ orr r12, r12, r7 @ ...at all
+ movs r12, r12, asr #15 @
+ beq 15f @ no clamp @
+ movs r12, r0, asr #15 @ clamp b
+ mvnne r0, r12, lsr #15 @
+ andne r0, r0, #0x7c00 @ mask b only if clamped
+ movs r12, r11, asr #16 @ clamp r
+ mvnne r11, r12, lsr #16 @
+ movs r12, r7, asr #15 @ clamp g
+ mvnne r7, r12, lsr #15 @
+15: @ no clamp @
+ @
+ ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
+
+ and r11, r11, #0xf800 @ pack pixel
+ mov r11, r11, lsr #8
+ and r7, r7, #0x7e00
+ orr r11, r11, r7, lsr #12
+ mov r7, r7, lsr#4
+ orr r0, r7, r0, lsr #10
+1: @ busy @
+ ldr r7, [r3] @ r7 = LCD1_BASE
+ tst r7, #LCD1_BUSY_MASK @ bridge busy?
+ bne 1b @
+ str r11, [r3, #0x10] @ send MSB
+1: @ busy @
+ ldr r7, [r3] @ r7 = LCD1_BASE
+ tst r7, #LCD1_BUSY_MASK @ bridge busy?
+ bne 1b @
+ str r0, [r3, #0x10] @ send LSB
+
+ sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149
+ add r12, r7, r7, asl #2 @
+ add r12, r12, r12, asl #4 @
+ add r7, r12, r7, asl #6 @
+ @ compute R, G, and B
+ add r0, r8, r7 @ r0 = b' = Y + bu
+ add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
+ rsb r7, r10, r7 @ r7 = g' = Y + guv
+ @
+ sub r12, r0, r0, lsr #5 @ r0 = 31/32*b + b/256
+ add r0, r12, r0, lsr #8 @
+ @
+ sub r12, r11, r11, lsr #5 @ r11 = 31/32*r + r/256
+ add r11, r12, r11, lsr #8 @
+ @
+ sub r12, r7, r7, lsr #6 @ r7 = 63/64*g + g/256
+ add r7, r12, r7, lsr #8 @
+ @
+ @ This element is zero - use r14 @
+ @
+ add r0, r0, r14 @ b = r0 + delta
+ add r11, r11, r14, lsl #1 @ r = r11 + delta*2
+ add r7, r7, r14, lsr #1 @ g = r7 + delta/2
+ @
+ orr r12, r0, r11, asr #1 @ check if clamping is needed...
+ orr r12, r12, r7 @ ...at all
+ movs r12, r12, asr #15 @
+ beq 15f @ no clamp @
+ movs r12, r0, asr #15 @ clamp b
+ mvnne r0, r12, lsr #15 @
+ andne r0, r0, #0x7c00 @ mask b only if clamped
+ movs r12, r11, asr #16 @ clamp r
+ mvnne r11, r12, lsr #16 @
+ movs r12, r7, asr #15 @ clamp g
+ mvnne r7, r12, lsr #15 @
+15: @ no clamp @
+
+ and r11, r11, #0xf800 @ pack pixel
+ mov r11, r11, lsr #8
+ and r7, r7, #0x7e00
+ orr r11, r11, r7, lsr #12
+ mov r7, r7, lsr#4
+ orr r0, r7, r0, lsr #10
+1: @ busy @
+ ldr r7, [r3] @ r7 = LCD1_BASE
+ tst r7, #LCD1_BUSY_MASK @ bridge busy?
+ bne 1b @
+ str r11, [r3, #0x10] @ send MSB
+1: @ busy @
+ ldr r7, [r3] @ r7 = LCD1_BASE
+ tst r7, #LCD1_BUSY_MASK @ bridge busy?
+ bne 1b @
+ str r0, [r3, #0x10] @ send LSB
+
+ subs r1, r1, #2 @ subtract block from width
+ bgt 10b @ loop line @
+ @
+ ldmpc regs=r4-r11 @ restore registers and return
+ .ltorg @ dump constant pool
+ .size lcd_write_yuv420_lines_odither, .-lcd_write_yuv420_lines_odither
diff --git a/firmware/target/arm/samsung/yh820/lcd-yh820.c b/firmware/target/arm/samsung/yh820/lcd-yh820.c
index 25692eb8ac..f4b55ab917 100644
--- a/firmware/target/arm/samsung/yh820/lcd-yh820.c
+++ b/firmware/target/arm/samsung/yh820/lcd-yh820.c
@@ -30,6 +30,8 @@
#endif
/* Display status */
+static unsigned lcd_yuv_options SHAREDBSS_ATTR = 0;
+
#if defined(HAVE_LCD_ENABLE) || defined(HAVE_LCD_SLEEP)
static bool is_lcd_enabled = true;
#endif
@@ -289,6 +291,78 @@ void lcd_set_flip(bool yesno)
/*** update functions ***/
+void lcd_yuv_set_options(unsigned options)
+{
+ lcd_yuv_options = options;
+}
+
+/* Line write helper function for lcd_yuv_blit. Write two lines of yuv420. */
+extern void lcd_write_yuv420_lines(unsigned char const * const src[3],
+ int width,
+ int stride);
+extern void lcd_write_yuv420_lines_odither(unsigned char const * const src[3],
+ int width,
+ int stride,
+ int x_screen, /* To align dither pattern */
+ int y_screen);
+/* Performance function to blit a YUV bitmap directly to the LCD */
+void lcd_blit_yuv(unsigned char * const src[3],
+ int src_x, int src_y, int stride,
+ int x, int y, int width, int height)
+{
+ unsigned char const * yuv_src[3];
+ off_t z;
+
+ /* Sorry, but width and height must be >= 2 or else */
+ width &= ~1;
+ height >>= 1;
+
+ z = stride*src_y;
+ yuv_src[0] = src[0] + z + src_x;
+ yuv_src[1] = src[1] + (z >> 2) + (src_x >> 1);
+ yuv_src[2] = src[2] + (yuv_src[1] - src[1]);
+
+ lcd_send_command(R_ENTRY_MODE);
+ lcd_send_command(0x03);
+
+ lcd_send_command(R_Y_ADDR_AREA);
+ lcd_send_command(x + 4);
+ lcd_send_command(x + width - 1 + 4);
+
+ if (lcd_yuv_options & LCD_YUV_DITHER)
+ {
+ do
+ {
+ lcd_send_command(R_X_ADDR_AREA);
+ lcd_send_command(y);
+ lcd_send_command(y + 1);
+
+ lcd_write_yuv420_lines_odither(yuv_src, width, stride, x, y);
+ yuv_src[0] += stride << 1; /* Skip down two luma lines */
+ yuv_src[1] += stride >> 1; /* Skip down one chroma line */
+ yuv_src[2] += stride >> 1;
+ y += 2;
+ }
+ while (--height > 0);
+ }
+ else
+ {
+ do
+ {
+ lcd_send_command(R_X_ADDR_AREA);
+ lcd_send_command(y);
+ lcd_send_command(y + 1);
+
+ lcd_write_yuv420_lines(yuv_src, width, stride);
+ yuv_src[0] += stride << 1; /* Skip down two luma lines */
+ yuv_src[1] += stride >> 1; /* Skip down one chroma line */
+ yuv_src[2] += stride >> 1;
+ y += 2;
+ }
+ while (--height > 0);
+ }
+}
+
/* Update the display.
This must be called after all other LCD functions that change the display. */
void lcd_update(void)
diff --git a/firmware/target/arm/samsung/yh925/lcd-as-yh925.S b/firmware/target/arm/samsung/yh925/lcd-as-yh925.S
new file mode 100644
index 0000000000..8ac8b4289f
--- /dev/null
+++ b/firmware/target/arm/samsung/yh925/lcd-as-yh925.S
@@ -0,0 +1,538 @@
+/***************************************************************************
+ * __________ __ ___.
+ * Open \______ \ ____ ____ | | _\_ |__ _______ ___
+ * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
+ * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
+ * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
+ * \/ \/ \/ \/ \/
+ * $Id$
+ *
+ * Copyright (C) 2007-2008 by Michael Sevakis
+ *
+ * H10 20GB LCD assembly routines
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+
+#include "config.h"
+#include "cpu.h"
+
+/****************************************************************************
+ * void lcd_write_yuv420_lines(unsigned char const * const src[3],
+ * int width,
+ * int stride);
+ *
+ * |R| |1.000000 -0.000001 1.402000| |Y'|
+ * |G| = |1.000000 -0.334136 -0.714136| |Pb|
+ * |B| |1.000000 1.772000 0.000000| |Pr|
+ * Scaled, normalized, rounded and tweaked to yield RGB 565:
+ * |R| |74 0 101| |Y' - 16| >> 9
+ * |G| = |74 -24 -51| |Cb - 128| >> 8
+ * |B| |74 128 0| |Cr - 128| >> 9
+ *
+ * Write four RGB565 pixels in the following order on each loop:
+ * 1 3 + > down
+ * 2 4 \/ left
+ */
+ .section .icode, "ax", %progbits
+ .align 2
+ .global lcd_write_yuv420_lines
+ .type lcd_write_yuv420_lines, %function
+lcd_write_yuv420_lines:
+ @ r0 = yuv_src
+ @ r1 = width
+ @ r2 = stride
+ stmfd sp!, { r4-r11, lr } @ save non-scratch
+ ldmia r0, { r4, r5, r6 } @ r4 = yuv_src[0] = Y'_p
+ @ r5 = yuv_src[1] = Cb_p
+ @ r6 = yuv_src[2] = Cr_p
+ @
+ mov r0, #0x7000000c @ r0 = &LCD2_PORT = 0x70008a0c
+ add r0, r0, #0x8a00 @
+ mov r14, #LCD2_DATA_MASK @
+ @
+ sub r2, r2, #1 @ Adjust stride because of increment
+10: @ loop line @
+ ldrb r7, [r4], #1 @ r7 = *Y'_p++;
+ ldrb r8, [r5], #1 @ r8 = *Cb_p++;
+ ldrb r9, [r6], #1 @ r9 = *Cr_p++;
+ @
+ sub r7, r7, #16 @ r7 = Y = (Y' - 16)*74
+ add r12, r7, r7, asl #2 @ actually (Y' - 16)*37 and shift right
+ add r7, r12, r7, asl #5 @ by one less when adding - same for all
+ @
+ sub r8, r8, #128 @ Cb -= 128
+ sub r9, r9, #128 @ Cr -= 128
+ @
+ add r10, r9, r9, asl #1 @ r10 = Cr*51 + Cb*24
+ add r10, r10, r10, asl #4 @
+ add r10, r10, r8, asl #3 @
+ add r10, r10, r8, asl #4 @
+ @
+ add r11, r9, r9, asl #2 @ r9 = Cr*101
+ add r11, r11, r9, asl #5 @
+ add r9, r11, r9, asl #6 @
+ @
+ add r8, r8, #2 @ r8 = bu = (Cb*128 + 128) >> 8
+ mov r8, r8, asr #2 @
+ add r9, r9, #256 @ r9 = rv = (r8 + 256) >> 9
+ mov r9, r9, asr #9 @
+ rsb r10, r10, #128 @ r10 = guv = (-r9 + 128) >> 8
+ mov r10, r10, asr #8 @
+ @ compute R, G, and B
+ add r3, r8, r7, asr #8 @ r3 = b = (Y >> 9) + bu
+ add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv
+ add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
+ @
+ orr r12, r3, r11 @ check if clamping is needed...
+ orr r12, r12, r7, asr #1 @ ...at all
+ cmp r12, #31 @
+ bls 15f @ no clamp @
+ cmp r3, #31 @ clamp b
+ mvnhi r3, r3, asr #31 @
+ andhi r3, r3, #31 @
+ cmp r11, #31 @ clamp r
+ mvnhi r11, r11, asr #31 @
+ andhi r11, r11, #31 @
+ cmp r7, #63 @ clamp g
+ mvnhi r7, r7, asr #31 @
+ andhi r7, r7, #63 @
+15: @ no clamp @
+ @
+ ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
+ @
+ orr r3, r3, r11, lsl #11 @ r3 = b | (r << 11)
+ orr r3, r3, r7, lsl #5 @ r3 |= (g << 5)
+ @
+ orr r7, r14, r3, lsr #8 @ store pixel
+ orr r11, r14, r3 @
+20: @
+ ldr r3, [r0] @
+ tst r3, #LCD2_BUSY_MASK @
+ bne 20b @
+ str r7, [r0] @
+ str r11, [r0] @
+ @
+ sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74
+ add r12, r7, r7, asl #2 @
+ add r7, r12, r7, asl #5 @
+ @ compute R, G, and B
+ add r3, r8, r7, asr #8 @ r3 = b = (Y >> 9) + bu
+ add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv
+ add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
+ @
+ orr r12, r3, r11 @ check if clamping is needed...
+ orr r12, r12, r7, asr #1 @ ...at all
+ cmp r12, #31 @
+ bls 15f @ no clamp @
+ cmp r3, #31 @ clamp b
+ mvnhi r3, r3, asr #31 @
+ andhi r3, r3, #31 @
+ cmp r11, #31 @ clamp r
+ mvnhi r11, r11, asr #31 @
+ andhi r11, r11, #31 @
+ cmp r7, #63 @ clamp g
+ mvnhi r7, r7, asr #31 @
+ andhi r7, r7, #63 @
+15: @ no clamp @
+ @
+ ldrb r12, [r4], #1 @ r12 = Y' = *(Y'_p++)
+ @
+ orr r3, r3, r11, lsl #11 @ r3 = b | (r << 11)
+ orr r3, r3, r7, lsl #5 @ r3 |= (g << 5)
+ @
+ orr r7, r14, r3, lsr #8 @ store pixel
+ orr r11, r14, r3 @
+20: @
+ ldr r3, [r0] @
+ tst r3, #LCD2_BUSY_MASK @
+ bne 20b @
+ str r7, [r0] @
+ str r11, [r0] @
+ @
+ sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74
+ add r12, r7, r7, asl #2 @
+ add r7, r12, r7, asl #5 @
+ @ compute R, G, and B
+ add r3, r8, r7, asr #8 @ r3 = b = (Y >> 9) + bu
+ add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv
+ add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
+ @
+ orr r12, r3, r11 @ check if clamping is needed...
+ orr r12, r12, r7, asr #1 @ ...at all
+ cmp r12, #31 @
+ bls 15f @ no clamp @
+ cmp r3, #31 @ clamp b
+ mvnhi r3, r3, asr #31 @
+ andhi r3, r3, #31 @
+ cmp r11, #31 @ clamp r
+ mvnhi r11, r11, asr #31 @
+ andhi r11, r11, #31 @
+ cmp r7, #63 @ clamp g
+ mvnhi r7, r7, asr #31 @
+ andhi r7, r7, #63 @
+15: @ no clamp @
+ @
+ ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
+ @
+ orr r3, r3, r7, lsl #5 @ r3 = b | (g << 5)
+ orr r3, r3, r11, lsl #11 @ r3 |= (r << 11)
+ @
+ orr r7, r14, r3, lsr #8 @ store pixel
+ orr r11, r14, r3 @
+20: @
+ ldr r3, [r0] @
+ tst r3, #LCD2_BUSY_MASK @
+ bne 20b @
+ str r7, [r0] @
+ str r11, [r0] @
+ @
+ sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74
+ add r12, r7, r7, asl #2 @
+ add r7, r12, r7, asl #5 @
+ @ compute R, G, and B
+ add r3, r8, r7, asr #8 @ r3 = b = (Y >> 9) + bu
+ add r11, r9, r7, asr #8 @ r11 = r = (Y >> 9) + rv
+ add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
+ @
+ orr r12, r3, r11 @ check if clamping is needed...
+ orr r12, r12, r7, asr #1 @ ...at all
+ cmp r12, #31 @
+ bls 15f @ no clamp @
+ cmp r3, #31 @ clamp b
+ mvnhi r3, r3, asr #31 @
+ andhi r3, r3, #31 @
+ cmp r11, #31 @ clamp r
+ mvnhi r11, r11, asr #31 @
+ andhi r11, r11, #31 @
+ cmp r7, #63 @ clamp g
+ mvnhi r7, r7, asr #31 @
+ andhi r7, r7, #63 @
+15: @ no clamp @
+ @
+ orr r3, r3, r11, lsl #11 @ r3 = b | (r << 11)
+ orr r3, r3, r7, lsl #5 @ r3 |= (g << 5)
+ @
+ orr r7, r14, r3, lsr #8 @ store pixel
+ orr r11, r14, r3 @
+20: @
+ ldr r3, [r0] @
+ tst r3, #LCD2_BUSY_MASK @
+ bne 20b @
+ str r7, [r0] @
+ str r11, [r0] @
+ @
+ subs r1, r1, #2 @ subtract block from width
+ bgt 10b @ loop line @
+ @
+ ldmpc regs=r4-r11 @ restore registers and return
+ .ltorg @ dump constant pool
+ .size lcd_write_yuv420_lines, .-lcd_write_yuv420_lines
+
+
+/****************************************************************************
+ * void lcd_write_yuv420_lines_odither(unsigned char const * const src[3],
+ * int width,
+ * int stride,
+ * int x_screen,
+ * int y_screen);
+ *
+ * |R| |1.000000 -0.000001 1.402000| |Y'|
+ * |G| = |1.000000 -0.334136 -0.714136| |Pb|
+ * |B| |1.000000 1.772000 0.000000| |Pr|
+ * Red scaled at twice g & b but at same precision to place it in correct
+ * bit position after multiply and leave instruction count lower.
+ * |R| |258 0 408| |Y' - 16|
+ * |G| = |149 -49 -104| |Cb - 128|
+ * |B| |149 258 0| |Cr - 128|
+ *
+ * Write four RGB565 pixels in the following order on each loop:
+ * 1 3 + > down
+ * 2 4 \/ left
+ *
+ * Kernel pattern (raw|use order):
+ * 5 3 4 2 row0 row2 > down
+ * 1 7 0 6 | 5 1 3 7 4 0 2 6 col0 left
+ * 4 2 5 3 | 4 0 2 6 5 1 3 7 col2 \/
+ * 0 6 1 7
+ */
+ .section .icode, "ax", %progbits
+ .align 2
+ .global lcd_write_yuv420_lines_odither
+ .type lcd_write_yuv420_lines_odither, %function
+lcd_write_yuv420_lines_odither:
+ @ r0 = yuv_src
+ @ r1 = width
+ @ r2 = stride
+ @ r3 = x_screen
+ @ [sp] = y_screen
+ stmfd sp!, { r4-r11, lr } @ save non-scratch
+ ldmia r0, { r4, r5, r6 } @ r4 = yuv_src[0] = Y'_p
+ @ r5 = yuv_src[1] = Cb_p
+ @ r6 = yuv_src[2] = Cr_p
+ @
+ ldr r0, [sp, #36] @ Line up pattern and kernel quadrant
+ eor r14, r3, r0 @
+ and r14, r14, #0x2 @
+ mov r14, r14, lsl #6 @ 0x00 or 0x80
+ @
+ mov r0, #0x7000000c @ r0 = &LCD2_PORT = 0x70008a0c
+ add r0, r0, #0x8a00 @
+ @
+ sub r2, r2, #1 @ Adjust stride because of increment
+10: @ loop line @
+ @
+ ldrb r7, [r4], #1 @ r7 = *Y'_p++;
+ ldrb r8, [r5], #1 @ r8 = *Cb_p++;
+ ldrb r9, [r6], #1 @ r9 = *Cr_p++;
+ @
+ eor r14, r14, #0x80 @ flip pattern quadrant
+ @
+ sub r7, r7, #16 @ r7 = Y = (Y' - 16)*149
+ add r12, r7, r7, asl #2 @
+ add r12, r12, r12, asl #4 @
+ add r7, r12, r7, asl #6 @
+ @
+ sub r8, r8, #128 @ Cb -= 128
+ sub r9, r9, #128 @ Cr -= 128
+ @
+ add r10, r8, r8, asl #4 @ r10 = guv = Cr*104 + Cb*49
+ add r10, r10, r8, asl #5 @
+ add r10, r10, r9, asl #3 @
+ add r10, r10, r9, asl #5 @
+ add r10, r10, r9, asl #6 @
+ @
+ mov r8, r8, asl #1 @ r8 = bu = Cb*258
+ add r8, r8, r8, asl #7 @
+ @
+ add r9, r9, r9, asl #1 @ r9 = rv = Cr*408
+ add r9, r9, r9, asl #4 @
+ mov r9, r9, asl #3 @
+ @
+ @ compute R, G, and B
+ add r3, r8, r7 @ r3 = b' = Y + bu
+ add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
+ rsb r7, r10, r7 @ r7 = g' = Y + guv
+ @
+ @ r8 = bu, r9 = rv, r10 = guv
+ @
+ sub r12, r3, r3, lsr #5 @ r3 = 31/32*b + b/256
+ add r3, r12, r3, lsr #8 @
+ @
+ sub r12, r11, r11, lsr #5 @ r11 = 31/32*r + r/256
+ add r11, r12, r11, lsr #8 @
+ @
+ sub r12, r7, r7, lsr #6 @ r7 = 63/64*g + g/256
+ add r7, r12, r7, lsr #8 @
+ @
+ add r12, r14, #0x200 @
+ @
+ add r3, r3, r12 @ b = r3 + delta
+ add r11, r11, r12, lsl #1 @ r = r11 + delta*2
+ add r7, r7, r12, lsr #1 @ g = r7 + delta/2
+ @
+ orr r12, r3, r11, asr #1 @ check if clamping is needed...
+ orr r12, r12, r7 @ ...at all
+ movs r12, r12, asr #15 @
+ beq 15f @ no clamp @
+ movs r12, r3, asr #15 @ clamp b
+ mvnne r3, r12, lsr #15 @
+ andne r3, r3, #0x7c00 @ mask b only if clamped
+ movs r12, r11, asr #16 @ clamp r
+ mvnne r11, r12, lsr #16 @
+ movs r12, r7, asr #15 @ clamp g
+ mvnne r7, r12, lsr #15 @
+15: @ no clamp @
+ @
+ ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
+ @
+ and r11, r11, #0xf800 @ pack pixel
+ and r7, r7, #0x7e00 @ r3 = pixel = (r & 0xf800) |
+ orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
+ orr r3, r11, r3, lsr #10 @ (b >> 10)
+ @
+ mov r11, #LCD2_DATA_MASK @ store pixel
+ orr r7, r11, r3, lsr #8 @
+ orr r11, r11, r3 @
+20: @
+ ldr r3, [r0] @
+ tst r3, #LCD2_BUSY_MASK @
+ bne 20b @
+ str r7, [r0] @
+ str r11, [r0] @
+ @
+ sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149
+ add r12, r7, r7, asl #2 @
+ add r12, r12, r12, asl #4 @
+ add r7, r12, r7, asl #6 @
+ @ compute R, G, and B
+ add r3, r8, r7 @ r3 = b' = Y + bu
+ add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
+ rsb r7, r10, r7 @ r7 = g' = Y + guv
+ @
+ sub r12, r3, r3, lsr #5 @ r3 = 31/32*b' + b'/256
+ add r3, r12, r3, lsr #8 @
+ @
+ sub r12, r11, r11, lsr #5 @ r11 = 31/32*r' + r'/256
+ add r11, r12, r11, lsr #8 @
+ @
+ sub r12, r7, r7, lsr #6 @ r7 = 63/64*g' + g'/256
+ add r7, r12, r7, lsr #8 @
+ @
+ @ This element is zero - use r14 @
+ @
+ add r3, r3, r14 @ b = r3 + delta
+ add r11, r11, r14, lsl #1 @ r = r11 + delta*2
+ add r7, r7, r14, lsr #1 @ g = r7 + delta/2
+ @
+ orr r12, r3, r11, asr #1 @ check if clamping is needed...
+ orr r12, r12, r7 @ ...at all
+ movs r12, r12, asr #15 @
+ beq 15f @ no clamp @
+ movs r12, r3, asr #15 @ clamp b
+ mvnne r3, r12, lsr #15 @
+ andne r3, r3, #0x7c00 @ mask b only if clamped
+ movs r12, r11, asr #16 @ clamp r
+ mvnne r11, r12, lsr #16 @
+ movs r12, r7, asr #15 @ clamp g
+ mvnne r7, r12, lsr #15 @
+15: @ no clamp @
+ @
+ ldrb r12, [r4], #1 @ r12 = Y' = *(Y'_p++)
+ @
+ and r11, r11, #0xf800 @ pack pixel
+ and r7, r7, #0x7e00 @ r3 = pixel = (r & 0xf800) |
+ orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
+ orr r3, r11, r3, lsr #10 @ (b >> 10)
+ @
+ mov r11, #LCD2_DATA_MASK @ store pixel
+ orr r7, r11, r3, lsr #8 @
+ orr r11, r11, r3 @
+20: @
+ ldr r3, [r0] @
+ tst r3, #LCD2_BUSY_MASK @
+ bne 20b @
+ str r7, [r0] @
+ str r11, [r0] @
+ @
+ sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149
+ add r12, r7, r7, asl #2 @
+ add r12, r12, r12, asl #4 @
+ add r7, r12, r7, asl #6 @
+ @ compute R, G, and B
+ add r3, r8, r7 @ r3 = b' = Y + bu
+ add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
+ rsb r7, r10, r7 @ r7 = g' = Y + guv
+ @
+ @ r8 = bu, r9 = rv, r10 = guv
+ @
+ sub r12, r3, r3, lsr #5 @ r3 = 31/32*b' + b'/256
+ add r3, r12, r3, lsr #8 @
+ @
+ sub r12, r11, r11, lsr #5 @ r11 = 31/32*r' + r'/256
+ add r11, r12, r11, lsr #8 @
+ @
+ sub r12, r7, r7, lsr #6 @ r7 = 63/64*g' + g'/256
+ add r7, r12, r7, lsr #8 @
+ @
+ add r12, r14, #0x100 @
+ @
+ add r3, r3, r12 @ b = r3 + delta
+ add r11, r11, r12, lsl #1 @ r = r11 + delta*2
+ add r7, r7, r12, lsr #1 @ g = r7 + delta/2
+ @
+ orr r12, r3, r11, asr #1 @ check if clamping is needed...
+ orr r12, r12, r7 @ ...at all
+ movs r12, r12, asr #15 @
+ beq 15f @ no clamp @
+ movs r12, r3, asr #15 @ clamp b
+ mvnne r3, r12, lsr #15 @
+ andne r3, r3, #0x7c00 @ mask b only if clamped
+ movs r12, r11, asr #16 @ clamp r
+ mvnne r11, r12, lsr #16 @
+ movs r12, r7, asr #15 @ clamp g
+ mvnne r7, r12, lsr #15 @
+15: @ no clamp @
+ @
+ ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
+ @
+ and r11, r11, #0xf800 @ pack pixel
+ and r7, r7, #0x7e00 @ r3 = pixel = (r & 0xf800) |
+ orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
+ orr r3, r11, r3, lsr #10 @ (b >> 10)
+ @
+ mov r11, #LCD2_DATA_MASK @ store pixel
+ orr r7, r11, r3, lsr #8 @
+ orr r11, r11, r3 @
+20: @
+ ldr r3, [r0] @
+ tst r3, #LCD2_BUSY_MASK @
+ bne 20b @
+ str r7, [r0] @
+ str r11, [r0] @
+ @
+ sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149
+ add r12, r7, r7, asl #2 @
+ add r12, r12, r12, asl #4 @
+ add r7, r12, r7, asl #6 @
+ @ compute R, G, and B
+ add r3, r8, r7 @ r3 = b' = Y + bu
+ add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
+ rsb r7, r10, r7 @ r7 = g' = Y + guv
+ @
+ sub r12, r3, r3, lsr #5 @ r3 = 31/32*b + b/256
+ add r3, r12, r3, lsr #8 @
+ @
+ sub r12, r11, r11, lsr #5 @ r11 = 31/32*r + r/256
+ add r11, r12, r11, lsr #8 @
+ @
+ sub r12, r7, r7, lsr #6 @ r7 = 63/64*g + g/256
+ add r7, r12, r7, lsr #8 @
+ @
+ add r12, r14, #0x300 @
+ @
+ add r3, r3, r12 @ b = r3 + delta
+ add r11, r11, r12, lsl #1 @ r = r11 + delta*2
+ add r7, r7, r12, lsr #1 @ g = r7 + delta/2
+ @
+ orr r12, r3, r11, asr #1 @ check if clamping is needed...
+ orr r12, r12, r7 @ ...at all
+ movs r12, r12, asr #15 @
+ beq 15f @ no clamp @
+ movs r12, r3, asr #15 @ clamp b
+ mvnne r3, r12, lsr #15 @
+ andne r3, r3, #0x7c00 @ mask b only if clamped
+ movs r12, r11, asr #16 @ clamp r
+ mvnne r11, r12, lsr #16 @
+ movs r12, r7, asr #15 @ clamp g
+ mvnne r7, r12, lsr #15 @
+15: @ no clamp @
+ @
+ and r11, r11, #0xf800 @ pack pixel
+ and r7, r7, #0x7e00 @ r3 = pixel = (r & 0xf800) |
+ orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
+ orr r3, r11, r3, lsr #10 @ (b >> 10)
+ @
+ mov r11, #LCD2_DATA_MASK @ store pixel
+ orr r7, r11, r3, lsr #8 @
+ orr r11, r11, r3 @
+20: @
+ ldr r3, [r0] @
+ tst r3, #LCD2_BUSY_MASK @
+ bne 20b @
+ str r7, [r0] @
+ str r11, [r0] @
+ @
+ subs r1, r1, #2 @ subtract block from width
+ bgt 10b @ loop line @
+ @
+ ldmpc regs=r4-r11 @ restore registers and return
+ .ltorg @ dump constant pool
+ .size lcd_write_yuv420_lines_odither, .-lcd_write_yuv420_lines_odither
diff --git a/firmware/target/arm/samsung/yh925/lcd-yh925.c b/firmware/target/arm/samsung/yh925/lcd-yh925.c
index e2b3ae3694..93bfb3a5f2 100644
--- a/firmware/target/arm/samsung/yh925/lcd-yh925.c
+++ b/firmware/target/arm/samsung/yh925/lcd-yh925.c
@@ -37,6 +37,8 @@ static unsigned short disp_control_rev;
/* Contrast setting << 8 */
static int lcd_contrast;
+static unsigned lcd_yuv_options SHAREDBSS_ATTR = 0;
+
/* Forward declarations */
#if defined(HAVE_LCD_ENABLE) || defined(HAVE_LCD_SLEEP)
static void lcd_display_off(void);
@@ -508,6 +510,98 @@ bool lcd_active(void)
/*** update functions ***/
+void lcd_yuv_set_options(unsigned options)
+{
+ lcd_yuv_options = options;
+}
+
+/* Line write helper function for lcd_yuv_blit. Write two lines of yuv420. */
+extern void lcd_write_yuv420_lines(unsigned char const * const src[3],
+ int width,
+ int stride);
+extern void lcd_write_yuv420_lines_odither(unsigned char const * const src[3],
+ int width,
+ int stride,
+ int x_screen, /* To align dither pattern */
+ int y_screen);
+
+/* Performance function to blit a YUV bitmap directly to the LCD */
+void lcd_blit_yuv(unsigned char * const src[3],
+ int src_x, int src_y, int stride,
+ int x, int y, int width, int height)
+{
+ const unsigned char *yuv_src[3];
+ const unsigned char *ysrc_max;
+ int y0;
+ int options;
+
+ /* NOT MODIFIED FOR THE YH-925 */
+
+ if (!display_on)
+ return;
+
+ width &= ~1;
+ height &= ~1;
+
+ x += x_offset;
+
+ /* calculate the drawing region */
+
+ /* The 20GB LCD is actually 128x160 but rotated 90 degrees so the origin
+ * is actually the bottom left and horizontal and vertical are swapped.
+ * Rockbox expects the origin to be the top left so we need to use
+ * 127 - y instead of just y */
+
+ /* max vert << 8 | start vert */
+ lcd_write_reg(R_VERT_RAM_ADDR_POS, ((x + width - 1) << 8) | x);
+
+ y0 = LCD_HEIGHT - 1 - y + y_offset;
+
+ /* DIT=0, BGR=1, HWM=0, I/D1-0=10, AM=0, LG2-0=000 */
+ lcd_write_reg(R_ENTRY_MODE, 0x1020);
+
+ yuv_src[0] = src[0] + src_y * stride + src_x;
+ yuv_src[1] = src[1] + (src_y * stride >> 2) + (src_x >> 1);
+ yuv_src[2] = src[2] + (yuv_src[1] - src[1]);
+ ysrc_max = yuv_src[0] + height * stride;
+
+ options = lcd_yuv_options;
+
+ do
+ {
+ /* max horiz << 8 | start horiz */
+ lcd_write_reg(R_HORIZ_RAM_ADDR_POS, (y0 << 8) | (y0 - 1));
+
+ /* position cursor (set AD0-AD15) */
+ /* start vert << 8 | start horiz */
+ lcd_write_reg(R_RAM_ADDR_SET, (x << 8) | y0);
+
+ /* start drawing */
+ lcd_send_cmd(R_WRITE_DATA_2_GRAM);
+
+ if (options & LCD_YUV_DITHER)
+ {
+ lcd_write_yuv420_lines_odither(yuv_src, width, stride,
+ x, y);
+ y -= 2;
+ }
+ else
+ {
+ lcd_write_yuv420_lines(yuv_src, width, stride);
+ }
+
+ y0 -= 2;
+ yuv_src[0] += stride << 1;
+ yuv_src[1] += stride >> 1;
+ yuv_src[2] += stride >> 1;
+ }
+ while (yuv_src[0] < ysrc_max);
+
+ /* DIT=0, BGR=1, HWM=0, I/D1-0=10, AM=1, LG2-0=000 */
+ lcd_write_reg(R_ENTRY_MODE, 0x1028);
+}
+
+
/* Update a fraction of the display. */
void lcd_update_rect(int x0, int y0, int width, int height)
{
diff --git a/firmware/target/arm/sandisk/sansa-c200/lcd-as-c200.S b/firmware/target/arm/sandisk/sansa-c200/lcd-as-c200.S
new file mode 100644
index 0000000000..542ceeeb36
--- /dev/null
+++ b/firmware/target/arm/sandisk/sansa-c200/lcd-as-c200.S
@@ -0,0 +1,550 @@
+/***************************************************************************
+ * __________ __ ___.
+ * Open \______ \ ____ ____ | | _\_ |__ _______ ___
+ * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
+ * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
+ * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
+ * \/ \/ \/ \/ \/
+ * $Id$
+ *
+ * Copyright (C) 2007 by Jens Arnold
+ * Heavily based on lcd-as-memframe.c by Michael Sevakis
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+
+#include "config.h"
+#include "cpu.h"
+
+/****************************************************************************
+ * void lcd_write_yuv420_lines(unsigned char const * const src[3],
+ * int width,
+ * int stride);
+ *
+ * |R| |1.000000 -0.000001 1.402000| |Y'|
+ * |G| = |1.000000 -0.334136 -0.714136| |Pb|
+ * |B| |1.000000 1.772000 0.000000| |Pr|
+ * Scaled, normalized, rounded and tweaked to yield RGB 565:
+ * |R| |74 0 101| |Y' - 16| >> 9
+ * |G| = |74 -24 -51| |Cb - 128| >> 8
+ * |B| |74 128 0| |Cr - 128| >> 9
+ *
+ * Write four RGB565 pixels in the following order on each loop:
+ * 1 3 + > down
+ * 2 4 \/ left
+ */
+ .section .icode, "ax", %progbits
+ .align 2
+ .global lcd_write_yuv420_lines
+ .type lcd_write_yuv420_lines, %function
+lcd_write_yuv420_lines:
+ @ r0 = yuv_src
+ @ r1 = width
+ @ r2 = stride
+ stmfd sp!, { r4-r10, lr } @ save non-scratch
+ ldmia r0, { r4, r5, r6 } @ r4 = yuv_src[0] = Y'_p
+ @ r5 = yuv_src[1] = Cb_p
+ @ r6 = yuv_src[2] = Cr_p
+ @ r0 = scratch
+ sub r2, r2, #1 @
+ mov r3, #0x70000000 @
+ orr r3, r3, #0x3000 @ r3 = LCD1_BASE
+10: @ loop line @
+ ldrb r7, [r4], #1 @ r7 = *Y'_p++;
+ ldrb r8, [r5], #1 @ r8 = *Cb_p++;
+ ldrb r9, [r6], #1 @ r9 = *Cr_p++;
+ @
+ sub r7, r7, #16 @ r7 = Y = (Y' - 16)*74
+ add r12, r7, r7, asl #2 @ actually (Y' - 16)*37 and shift right
+ add r7, r12, r7, asl #5 @ by one less when adding - same for all
+ @
+ sub r8, r8, #128 @ Cb -= 128
+ sub r9, r9, #128 @ Cr -= 128
+ @
+ add r10, r9, r9, asl #1 @ r10 = Cr*51 + Cb*24
+ add r10, r10, r10, asl #4 @
+ add r10, r10, r8, asl #3 @
+ add r10, r10, r8, asl #4 @
+ @
+ add lr, r9, r9, asl #2 @ r9 = Cr*101
+ add lr, lr, r9, asl #5 @
+ add r9, lr, r9, asl #6 @
+ @
+ add r8, r8, #2 @ r8 = bu = (Cb*128 + 128) >> 8
+ mov r8, r8, asr #2 @
+ add r9, r9, #256 @ r9 = rv = (r9 + 256) >> 9
+ mov r9, r9, asr #9 @
+ rsb r10, r10, #128 @ r10 = guv = (-r10 + 128) >> 8
+ mov r10, r10, asr #8 @
+ @ compute R, G, and B
+ add r0, r8, r7, asr #8 @ r0 = b = (Y >> 9) + bu
+ add lr, r9, r7, asr #8 @ lr = r = (Y >> 9) + rv
+ add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
+ @
+ orr r12, r0, lr @ check if clamping is needed...
+ orr r12, r12, r7, asr #1 @ ...at all
+ cmp r12, #31 @
+ bls 15f @ no clamp @
+ cmp r0, #31 @ clamp b
+ mvnhi r0, r0, asr #31 @
+ andhi r0, r0, #31 @
+ cmp lr, #31 @ clamp r
+ mvnhi lr, lr, asr #31 @
+ andhi lr, lr, #31 @
+ cmp r7, #63 @ clamp g
+ mvnhi r7, r7, asr #31 @
+ andhi r7, r7, #63 @
+15: @ no clamp @
+ @
+ ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
+ @
+ mov lr, lr, lsl #3 @
+ orr lr, lr, r7, lsr #3 @ lr = (r << 3) | (g >> 3)
+ orr r0, r0, r7, lsl #5 @ r0 = (g << 5) | b
+1: @ busy @
+ ldr r7, [r3] @ r7 = LCD1_BASE
+ tst r7, #LCD1_BUSY_MASK @ bridge busy?
+ bne 1b @
+ str lr, [r3, #0x10] @ send MSB
+1: @busy @
+ ldr r7, [r3] @ r7 = LCD1_BASE
+ tst r7, #LCD1_BUSY_MASK @ bridge busy?
+ bne 1b @
+ str r0, [r3, #0x10] @ send LSB
+ @
+ sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74
+ add r12, r7, r7, asl #2 @
+ add r7, r12, r7, asl #5 @
+ @ compute R, G, and B
+ add r0, r8, r7, asr #8 @ r0 = b = (Y >> 9) + bu
+ add lr, r9, r7, asr #8 @ lr = r = (Y >> 9) + rv
+ add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
+ @
+ orr r12, r0, lr @ check if clamping is needed...
+ orr r12, r12, r7, asr #1 @ ...at all
+ cmp r12, #31 @
+ bls 15f @ no clamp @
+ cmp r0, #31 @ clamp b
+ mvnhi r0, r0, asr #31 @
+ andhi r0, r0, #31 @
+ cmp lr, #31 @ clamp r
+ mvnhi lr, lr, asr #31 @
+ andhi lr, lr, #31 @
+ cmp r7, #63 @ clamp g
+ mvnhi r7, r7, asr #31 @
+ andhi r7, r7, #63 @
+15: @ no clamp @
+ @
+ ldrb r12, [r4], #1 @ r12 = Y' = *(Y'_p++)
+ @
+ mov lr, lr, lsl #3 @
+ orr lr, lr, r7, lsr #3 @ lr = (r << 3) | (g >> 3)
+ orr r0, r0, r7, lsl #5 @ r0 = (g << 5) | b
+1: @ busy @
+ ldr r7, [r3] @ r7 = LCD1_BASE
+ tst r7, #LCD1_BUSY_MASK @ bridge busy?
+ bne 1b @
+ str lr, [r3, #0x10] @ send MSB
+1: @ busy @
+ ldr r7, [r3] @ r7 = LCD1_BASE
+ tst r7, #LCD1_BUSY_MASK @ bridge busy?
+ bne 1b @
+ str r0, [r3, #0x10] @ send LSB
+ @
+ sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74
+ add r12, r7, r7, asl #2 @
+ add r7, r12, r7, asl #5 @
+ @ compute R, G, and B
+ add r0, r8, r7, asr #8 @ r0 = b = (Y >> 9) + bu
+ add lr, r9, r7, asr #8 @ lr = r = (Y >> 9) + rv
+ add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
+ @
+ orr r12, r0, lr @ check if clamping is needed...
+ orr r12, r12, r7, asr #1 @ ...at all
+ cmp r12, #31 @
+ bls 15f @ no clamp @
+ cmp r0, #31 @ clamp b
+ mvnhi r0, r0, asr #31 @
+ andhi r0, r0, #31 @
+ cmp lr, #31 @ clamp r
+ mvnhi lr, lr, asr #31 @
+ andhi lr, lr, #31 @
+ cmp r7, #63 @ clamp g
+ mvnhi r7, r7, asr #31 @
+ andhi r7, r7, #63 @
+15: @ no clamp @
+ @
+ ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
+ @
+ @
+ mov lr, lr, lsl #3 @
+ orr lr, lr, r7, lsr #3 @ lr = (r << 3) | (g >> 3)
+ orr r0, r0, r7, lsl #5 @ r0 = (g << 5) | b
+1: @ busy @
+ ldr r7, [r3] @ r7 = LCD1_BASE
+ tst r7, #LCD1_BUSY_MASK @ bridge busy?
+ bne 1b @
+ str lr, [r3, #0x10] @ send MSB
+1: @ busy @
+ ldr r7, [r3] @ r7 = LCD1_BASE
+ tst r7, #LCD1_BUSY_MASK @ bridge busy?
+ bne 1b @
+ str r0, [r3, #0x10] @ send LSB
+ @
+ sub r7, r12, #16 @ r7 = Y = (Y' - 16)*74
+ add r12, r7, r7, asl #2 @
+ add r7, r12, r7, asl #5 @
+ @ compute R, G, and B
+ add r0, r8, r7, asr #8 @ r0 = b = (Y >> 9) + bu
+ add lr, r9, r7, asr #8 @ lr = r = (Y >> 9) + rv
+ add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
+ @
+ orr r12, r0, lr @ check if clamping is needed...
+ orr r12, r12, r7, asr #1 @ ...at all
+ cmp r12, #31 @
+ bls 15f @ no clamp @
+ cmp r0, #31 @ clamp b
+ mvnhi r0, r0, asr #31 @
+ andhi r0, r0, #31 @
+ cmp lr, #31 @ clamp r
+ mvnhi lr, lr, asr #31 @
+ andhi lr, lr, #31 @
+ cmp r7, #63 @ clamp g
+ mvnhi r7, r7, asr #31 @
+ andhi r7, r7, #63 @
+15: @ no clamp @
+ @
+ mov lr, lr, lsl #3 @
+ orr lr, lr, r7, lsr #3 @ lr = (r << 3) | (g >> 3)
+ orr r0, r0, r7, lsl #5 @ r0 = (g << 5) | b
+1: @ busy @
+ ldr r7, [r3] @ r7 = LCD1_BASE
+ tst r7, #LCD1_BUSY_MASK @ bridge busy?
+ bne 1b @
+ str lr, [r3, #0x10] @ send MSB
+1: @ busy @
+ ldr r7, [r3] @ r7 = LCD1_BASE
+ tst r7, #LCD1_BUSY_MASK @ bridge busy?
+ bne 1b @
+ str r0, [r3, #0x10] @ send LSB
+ @
+ subs r1, r1, #2 @ subtract block from width
+ bgt 10b @ loop line @
+ @
+ ldmpc regs=r4-r10 @ restore registers and return
+ .ltorg @ dump constant pool
+ .size lcd_write_yuv420_lines, .-lcd_write_yuv420_lines
+
+/****************************************************************************
+ * void lcd_write_yuv420_lines_odither(unsigned char const * const src[3],
+ * int width,
+ * int stride,
+ * int x_screen,
+ * int y_screen);
+ *
+ * |R| |1.000000 -0.000001 1.402000| |Y'|
+ * |G| = |1.000000 -0.334136 -0.714136| |Pb|
+ * |B| |1.000000 1.772000 0.000000| |Pr|
+ * Red scaled at twice g & b but at same precision to place it in correct
+ * bit position after multiply and leave instruction count lower.
+ * |R| |258 0 408| |Y' - 16|
+ * |G| = |149 -49 -104| |Cb - 128|
+ * |B| |149 258 0| |Cr - 128|
+ *
+ * Write four RGB565 pixels in the following order on each loop:
+ * 1 3 + > down
+ * 2 4 \/ left
+ *
+ * Kernel pattern (raw|rotated|use order):
+ * 5 3 4 2 2 6 3 7 row0 row2 > down
+ * 1 7 0 6 | 4 0 5 1 | 2 4 6 0 3 5 7 1 col0 left
+ * 4 2 5 3 | 3 7 2 6 | 3 5 7 1 2 4 6 0 col2 \/
+ * 0 6 1 7 5 1 4 0
+ */
+ .section .icode, "ax", %progbits
+ .align 2
+ .global lcd_write_yuv420_lines_odither
+ .type lcd_write_yuv420_lines_odither, %function
+lcd_write_yuv420_lines_odither:
+ @ r0 = yuv_src
+ @ r1 = width
+ @ r2 = stride
+ @ r3 = x_screen
+ @ [sp] = y_screen
+ stmfd sp!, { r4-r11, lr } @ save non-scratch
+ ldmia r0, { r4, r5, r6 } @ r4 = yuv_src[0] = Y'_p
+ @ r5 = yuv_src[1] = Cb_p
+ @ r6 = yuv_src[2] = Cr_p
+ @
+ sub r2, r2, #1 @
+ ldr r14, [sp, #36] @ Line up pattern and kernel quadrant
+ eor r14, r14, r3 @
+ and r14, r14, #0x2 @
+ mov r14, r14, lsl #6 @ 0x00 or 0x80
+ mov r3, #0x70000000 @
+ orr r3, r3, #0x3000 @ r3 = LCD1_BASE
+10: @ loop line @
+ @
+ ldrb r7, [r4], #1 @ r7 = *Y'_p++;
+ ldrb r8, [r5], #1 @ r8 = *Cb_p++;
+ ldrb r9, [r6], #1 @ r9 = *Cr_p++;
+ @
+ eor r14, r14, #0x80 @ flip pattern quadrant
+ @
+ sub r7, r7, #16 @ r7 = Y = (Y' - 16)*149
+ add r12, r7, r7, asl #2 @
+ add r12, r12, r12, asl #4 @
+ add r7, r12, r7, asl #6 @
+ @
+ sub r8, r8, #128 @ Cb -= 128
+ sub r9, r9, #128 @ Cr -= 128
+ @
+ add r10, r8, r8, asl #4 @ r10 = guv = Cr*104 + Cb*49
+ add r10, r10, r8, asl #5 @
+ add r10, r10, r9, asl #3 @
+ add r10, r10, r9, asl #5 @
+ add r10, r10, r9, asl #6 @
+ @
+ mov r8, r8, asl #1 @ r8 = bu = Cb*258
+ add r8, r8, r8, asl #7 @
+ @
+ add r9, r9, r9, asl #1 @ r9 = rv = Cr*408
+ add r9, r9, r9, asl #4 @
+ mov r9, r9, asl #3 @
+ @
+ @ compute R, G, and B
+ add r0, r8, r7 @ r0 = b' = Y + bu
+ add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
+ rsb r7, r10, r7 @ r7 = g' = Y + guv
+ @
+ @ r8 = bu, r9 = rv, r10 = guv
+ @
+ sub r12, r0, r0, lsr #5 @ r0 = 31/32*b + b/256
+ add r0, r12, r0, lsr #8 @
+ @
+ sub r12, r11, r11, lsr #5 @ r11 = 31/32*r + r/256
+ add r11, r12, r11, lsr #8 @
+ @
+ sub r12, r7, r7, lsr #6 @ r7 = 63/64*g + g/256
+ add r7, r12, r7, lsr #8 @
+ @
+ add r12, r14, #0x100 @
+ @
+ add r0, r0, r12 @ b = r0 + delta
+ add r11, r11, r12, lsl #1 @ r = r11 + delta*2
+ add r7, r7, r12, lsr #1 @ g = r7 + delta/2
+ @
+ orr r12, r0, r11, asr #1 @ check if clamping is needed...
+ orr r12, r12, r7 @ ...at all
+ movs r12, r12, asr #15 @
+ beq 15f @ no clamp @
+ movs r12, r0, asr #15 @ clamp b
+ mvnne r0, r12, lsr #15 @
+ andne r0, r0, #0x7c00 @ mask b only if clamped
+ movs r12, r11, asr #16 @ clamp r
+ mvnne r11, r12, lsr #16 @
+ movs r12, r7, asr #15 @ clamp g
+ mvnne r7, r12, lsr #15 @
+15: @ no clamp @
+ @
+ ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
+ @
+
+ and r11, r11, #0xf800 @ pack pixel
+ mov r11, r11, lsr #8
+ and r7, r7, #0x7e00
+ orr r11, r11, r7, lsr #12
+ mov r7, r7, lsr#4
+ orr r0, r7, r0, lsr #10
+1: @ busy @
+ ldr r7, [r3] @ r7 = LCD1_BASE
+ tst r7, #LCD1_BUSY_MASK @ bridge busy?
+ bne 1b @
+ str r11, [r3, #0x10] @ send MSB
+1: @ busy @
+ ldr r7, [r3] @ r7 = LCD1_BASE
+ tst r7, #LCD1_BUSY_MASK @ bridge busy?
+ bne 1b @
+ str r0, [r3, #0x10] @ send LSB
+ @
+ sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149
+ add r12, r7, r7, asl #2 @
+ add r12, r12, r12, asl #4 @
+ add r7, r12, r7, asl #6 @
+ @ compute R, G, and B
+ add r0, r8, r7 @ r0 = b' = Y + bu
+ add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
+ rsb r7, r10, r7 @ r7 = g' = Y + guv
+ @
+ sub r12, r0, r0, lsr #5 @ r0 = 31/32*b' + b'/256
+ add r0, r12, r0, lsr #8 @
+ @
+ sub r12, r11, r11, lsr #5 @ r11 = 31/32*r' + r'/256
+ add r11, r12, r11, lsr #8 @
+ @
+ sub r12, r7, r7, lsr #6 @ r7 = 63/64*g' + g'/256
+ add r7, r12, r7, lsr #8 @
+ @
+ add r12, r14, #0x200 @
+ @
+ add r0, r0, r12 @ b = r0 + delta
+ add r11, r11, r12, lsl #1 @ r = r11 + delta*2
+ add r7, r7, r12, lsr #1 @ g = r7 + delta/2
+ @
+ orr r12, r0, r11, asr #1 @ check if clamping is needed...
+ orr r12, r12, r7 @ ...at all
+ movs r12, r12, asr #15 @
+ beq 15f @ no clamp @
+ movs r12, r0, asr #15 @ clamp b
+ mvnne r0, r12, lsr #15 @
+ andne r0, r0, #0x7c00 @ mask b only if clamped
+ movs r12, r11, asr #16 @ clamp r
+ mvnne r11, r12, lsr #16 @
+ movs r12, r7, asr #15 @ clamp g
+ mvnne r7, r12, lsr #15 @
+15: @ no clamp @
+ @
+ ldrb r12, [r4], #1 @ r12 = Y' = *(Y'_p++)
+
+ and r11, r11, #0xf800 @ pack pixel
+ mov r11, r11, lsr #8
+ and r7, r7, #0x7e00
+ orr r11, r11, r7, lsr #12
+ mov r7, r7, lsr#4
+ orr r0, r7, r0, lsr #10
+1: @ busy @
+ ldr r7, [r3] @ r7 = LCD1_BASE
+ tst r7, #LCD1_BUSY_MASK @ bridge busy?
+ bne 1b @
+ str r11, [r3, #0x10] @ send MSB
+1: @ busy @
+ ldr r7, [r3] @ r7 = LCD1_BASE
+ tst r7, #LCD1_BUSY_MASK @ bridge busy?
+ bne 1b @
+ str r0, [r3, #0x10] @ send LSB
+
+ sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149
+ add r12, r7, r7, asl #2 @
+ add r12, r12, r12, asl #4 @
+ add r7, r12, r7, asl #6 @
+ @ compute R, G, and B
+ add r0, r8, r7 @ r0 = b' = Y + bu
+ add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
+ rsb r7, r10, r7 @ r7 = g' = Y + guv
+ @
+ @ r8 = bu, r9 = rv, r10 = guv
+ @
+ sub r12, r0, r0, lsr #5 @ r0 = 31/32*b' + b'/256
+ add r0, r12, r0, lsr #8 @
+ @
+ sub r12, r11, r11, lsr #5 @ r11 = 31/32*r' + r'/256
+ add r11, r12, r11, lsr #8 @
+ @
+ sub r12, r7, r7, lsr #6 @ r7 = 63/64*g' + g'/256
+ add r7, r12, r7, lsr #8 @
+ @
+ add r12, r14, #0x300 @
+ @
+ add r0, r0, r12 @ b = r0 + delta
+ add r11, r11, r12, lsl #1 @ r = r11 + delta*2
+ add r7, r7, r12, lsr #1 @ g = r7 + delta/2
+ @
+ orr r12, r0, r11, asr #1 @ check if clamping is needed...
+ orr r12, r12, r7 @ ...at all
+ movs r12, r12, asr #15 @
+ beq 15f @ no clamp @
+ movs r12, r0, asr #15 @ clamp b
+ mvnne r0, r12, lsr #15 @
+ andne r0, r0, #0x7c00 @ mask b only if clamped
+ movs r12, r11, asr #16 @ clamp r
+ mvnne r11, r12, lsr #16 @
+ movs r12, r7, asr #15 @ clamp g
+ mvnne r7, r12, lsr #15 @
+15: @ no clamp @
+ @
+ ldrb r12, [r4, r2] @ r12 = Y' = *(Y'_p + stride)
+
+ and r11, r11, #0xf800 @ pack pixel
+ mov r11, r11, lsr #8
+ and r7, r7, #0x7e00
+ orr r11, r11, r7, lsr #12
+ mov r7, r7, lsr#4
+ orr r0, r7, r0, lsr #10
+1: @ busy @
+ ldr r7, [r3] @ r7 = LCD1_BASE
+ tst r7, #LCD1_BUSY_MASK @ bridge busy?
+ bne 1b @
+ str r11, [r3, #0x10] @ send MSB
+1: @ busy @
+ ldr r7, [r3] @ r7 = LCD1_BASE
+ tst r7, #LCD1_BUSY_MASK @ bridge busy?
+ bne 1b @
+ str r0, [r3, #0x10] @ send LSB
+
+ sub r7, r12, #16 @ r7 = Y = (Y' - 16)*149
+ add r12, r7, r7, asl #2 @
+ add r12, r12, r12, asl #4 @
+ add r7, r12, r7, asl #6 @
+ @ compute R, G, and B
+ add r0, r8, r7 @ r0 = b' = Y + bu
+ add r11, r9, r7, asl #1 @ r11 = r' = Y*2 + rv
+ rsb r7, r10, r7 @ r7 = g' = Y + guv
+ @
+ sub r12, r0, r0, lsr #5 @ r0 = 31/32*b + b/256
+ add r0, r12, r0, lsr #8 @
+ @
+ sub r12, r11, r11, lsr #5 @ r11 = 31/32*r + r/256
+ add r11, r12, r11, lsr #8 @
+ @
+ sub r12, r7, r7, lsr #6 @ r7 = 63/64*g + g/256
+ add r7, r12, r7, lsr #8 @
+ @
+ @ This element is zero - use r14 @
+ @
+ add r0, r0, r14 @ b = r0 + delta
+ add r11, r11, r14, lsl #1 @ r = r11 + delta*2
+ add r7, r7, r14, lsr #1 @ g = r7 + delta/2
+ @
+ orr r12, r0, r11, asr #1 @ check if clamping is needed...
+ orr r12, r12, r7 @ ...at all
+ movs r12, r12, asr #15 @
+ beq 15f @ no clamp @
+ movs r12, r0, asr #15 @ clamp b
+ mvnne r0, r12, lsr #15 @
+ andne r0, r0, #0x7c00 @ mask b only if clamped
+ movs r12, r11, asr #16 @ clamp r
+ mvnne r11, r12, lsr #16 @
+ movs r12, r7, asr #15 @ clamp g
+ mvnne r7, r12, lsr #15 @
+15: @ no clamp @
+
+ and r11, r11, #0xf800 @ pack pixel
+ mov r11, r11, lsr #8
+ and r7, r7, #0x7e00
+ orr r11, r11, r7, lsr #12
+ mov r7, r7, lsr#4
+ orr r0, r7, r0, lsr #10
+1: @ busy @
+ ldr r7, [r3] @ r7 = LCD1_BASE
+ tst r7, #LCD1_BUSY_MASK @ bridge busy?
+ bne 1b @
+ str r11, [r3, #0x10] @ send MSB
+1: @ busy @
+ ldr r7, [r3] @ r7 = LCD1_BASE
+ tst r7, #LCD1_BUSY_MASK @ bridge busy?
+ bne 1b @
+ str r0, [r3, #0x10] @ send LSB
+
+ subs r1, r1, #2 @ subtract block from width
+ bgt 10b @ loop line @
+ @
+ ldmpc regs=r4-r11 @ restore registers and return
+ .ltorg @ dump constant pool
+ .size lcd_write_yuv420_lines_odither, .-lcd_write_yuv420_lines_odither
diff --git a/firmware/target/arm/tms320dm320/mrobe-500/lcd-mr500.c b/firmware/target/arm/tms320dm320/mrobe-500/lcd-mr500.c
index e851c421a6..8620c672e1 100644
--- a/firmware/target/arm/tms320dm320/mrobe-500/lcd-mr500.c
+++ b/firmware/target/arm/tms320dm320/mrobe-500/lcd-mr500.c
@@ -273,7 +273,15 @@ void lcd_init_device(void)
#if defined(HAVE_LCD_MODES)
void lcd_set_mode(int mode)
{
- if(mode==LCD_MODE_RGB565) {
+ if(mode==LCD_MODE_YUV) {
+ /* Turn off the RGB buffer and enable the YUV buffer with zoom */
+ IO_OSD_OSDWINMD0 |= 0x04;
+ IO_OSD_VIDWINMD |= 0x01;
+#if LCD_NATIVE_WIDTH > 240
+ IO_OSD_VIDWINMD |= (0x05<<2); /* This does a 2x zoom */
+#endif
+ memset16(FRAME2, 0x0080, LCD_NATIVE_HEIGHT*(LCD_NATIVE_WIDTH+LCD_FUDGE));
+ } else if(mode==LCD_MODE_RGB565) {
/* Turn on the RGB window, set it to 16 bit and turn YUV window off */
IO_OSD_VIDWINMD &= ~(0x01);
IO_OSD_OSDWIN0OFST = LCD_NATIVE_WIDTH / 16;
@@ -636,6 +644,82 @@ void lcd_pal256_update_pal(fb_data *palette)
}
#endif
+/* Performance function to blit a YUV bitmap directly to the LCD */
+/* Show it rotated so the LCD_WIDTH is now the height */
+void lcd_blit_yuv(unsigned char * const src[3],
+ int src_x, int src_y, int stride,
+ int x, int y, int width, int height)
+{
+ unsigned char const * yuv_src[3];
+
+ if (!lcd_on)
+ return;
+
+ /* y has to be on a 16 pixel boundary */
+ y &= ~0xF;
+
+ if( ((y | x | height | width ) < 0)
+ || y>LCD_NATIVE_HEIGHT || x>LCD_NATIVE_WIDTH )
+ return;
+
+ if(y+height>LCD_NATIVE_WIDTH)
+ {
+ height=LCD_NATIVE_WIDTH-y;
+ }
+ if(x+width>LCD_NATIVE_HEIGHT)
+ {
+ width=LCD_NATIVE_HEIGHT-x;
+ }
+
+ /* Sorry, but width and height must be >= 2 or else */
+ width &= ~1;
+ height>>=1;
+
+ fb_data * dst = FRAME2
+ + ((LCD_NATIVE_WIDTH+LCD_FUDGE)*(LCD_NATIVE_HEIGHT-1))
+ - (LCD_NATIVE_WIDTH+LCD_FUDGE)*x + y ;
+
+ /* Scope z */
+ {
+ off_t z;
+ z = stride*src_y;
+ yuv_src[0] = src[0] + z + src_x;
+ yuv_src[1] = src[1] + (z >> 2) + (src_x >> 1);
+ yuv_src[2] = src[2] + (yuv_src[1] - src[1]);
+ }
+
+ int cbcr_remain=(stride>>1)-(width>>1);
+ int y_remain=(stride<<1)-width;
+ do
+ {
+ register int c_width=width;
+ register unsigned int *c_dst=(unsigned int*)dst;
+ do
+ {
+ register unsigned short Y=*((unsigned short*)yuv_src[0]);
+ register unsigned short Yst=*((unsigned short*)(yuv_src[0]+stride));
+ yuv_src[0]+=2;
+
+ register unsigned char Cb=*yuv_src[1]++;
+ register unsigned char Cr=*yuv_src[2]++;
+
+ *c_dst = (Yst<<24) | (Cr << 16) | ((Y&0xFF)<<8) | Cb;
+ *(c_dst - (LCD_NATIVE_WIDTH+LCD_FUDGE)/2) =
+ ( (Yst&0xFF00)<<16) | (Cr << 16) | (Y&0xFF00) | Cb;
+
+ c_dst -= (LCD_NATIVE_WIDTH+LCD_FUDGE);
+
+ c_width -= 2;
+ } while (c_width);
+
+ yuv_src[0] += y_remain; /* Skip down two luma lines-width */
+ yuv_src[1] += cbcr_remain; /* Skip down one chroma line-width/2 */
+ yuv_src[2] += cbcr_remain;
+
+ dst+=2;
+ } while (--height);
+}
+
void lcd_set_contrast(int val) {
(void) val;
// TODO:
diff --git a/firmware/target/coldfire/iaudio/x5/lcd-as-x5.S b/firmware/target/coldfire/iaudio/x5/lcd-as-x5.S
index b319d745ca..e6621e1dea 100644
--- a/firmware/target/coldfire/iaudio/x5/lcd-as-x5.S
+++ b/firmware/target/coldfire/iaudio/x5/lcd-as-x5.S
@@ -25,6 +25,248 @@
.section .icode,"ax",@progbits
+/* begin lcd_write_yuv420_lines
+ *
+ * See http://en.wikipedia.org/wiki/YCbCr
+ * ITU-R BT.601 (formerly CCIR 601):
+ * |Y'| | 0.299000 0.587000 0.114000| |R|
+ * |Pb| = |-0.168736 -0.331264 0.500000| |G| or 0.564334*(B - Y')
+ * |Pr| | 0.500000 -0.418688 0.081312| |B| or 0.713267*(R - Y')
+ * Scaled, normalized and rounded:
+ * |Y'| | 65 129 25| |R| + 16 : 16->235
+ * |Cb| = |-38 -74 112| |G| + 128 : 16->240
+ * |Cr| |112 -94 -18| |B| + 128 : 16->240
+ *
+ * The inverse:
+ * |R| |1.000000 -0.000001 1.402000| |Y'|
+ * |G| = |1.000000 -0.334136 -0.714136| |Pb|
+ * |B| |1.000000 1.772000 0.000000| |Pr|
+ * Scaled, normalized, rounded and tweaked to yield RGB 666:
+ * |R| |19611723 0 26881894| |Y' - 16| >> 26
+ * |G| = |19611723 -6406711 -13692816| |Cb - 128| >> 26
+ * |B| |19611723 33976259 0| |Cr - 128| >> 26
+ *
+ * Needs EMAC set to saturated, signed integer mode.
+ *
+ * register usage:
+ * %a0 - LCD data port
+ * %a1 - Y pointer
+ * %a2 - C pointer
+ * %a3 - C width
+ * %a4 - Y end address
+ * %a5 - Y factor
+ * %a6 - BU factor
+ * %d0 - scratch
+ * %d1 - B, previous Y \ alternating
+ * %d2 - U / B, previous Y /
+ * %d3 - V / G
+ * %d4 - R / output pixel
+ * %d5 - GU factor
+ * %d6 - GV factor
+ * %d7 - RGB signed -> unsigned conversion mask
+ */
+ .align 2
+ .global lcd_write_yuv420_lines
+ .type lcd_write_yuv420_lines, @function
+
+lcd_write_yuv420_lines:
+ lea.l (-44, %sp), %sp /* free up some registers */
+ movem.l %d2-%d7/%a2-%a6, (%sp)
+
+ lea.l 0xf0008002, %a0 /* LCD data port */
+ movem.l (44+4, %sp), %a1-%a3 /* Y data, C data, C width */
+ lea.l (%a1, %a3*2), %a4 /* Y end address */
+
+ move.l #19611723, %a5 /* y factor */
+ move.l #33976259, %a6 /* bu factor */
+ move.l #-6406711, %d5 /* gu factor */
+ move.l #-13692816, %d6 /* gv factor */
+ move.l #0x01040820, %d7 /* bitmask for signed->unsigned conversion
+ * of R, G and B within RGGB6666 at once */
+
+ /* chroma for first 2x2 block */
+ clr.l %d3 /* load v component */
+ move.b (%a2, %a3), %d3
+ clr.l %d2 /* load u component */
+ move.b (%a2)+, %d2
+ moveq.l #-128, %d0
+ add.l %d0, %d2
+ add.l %d0, %d3
+
+ mac.l %a6, %d2, %acc0 /* bu */
+ mac.l %d5, %d2, %acc1 /* gu */
+ mac.l %d6, %d3, %acc1 /* gv */
+ move.l #26881894, %d0 /* rv factor */
+ mac.l %d0, %d3, %acc2 /* rv */
+
+ /* luma for very first pixel (top left) */
+ clr.l %d1
+ move.b (%a1, %a3*2), %d1
+ moveq.l #-126, %d0
+ add.l %d1, %d0 /* y' (-0.5 ... +0.5) */
+ mac.l %a5, %d0, %acc0
+ mac.l %a5, %d0, %acc1
+ mac.l %a5, %d0, %acc2
+
+ bra.b .yuv_line_entry
+
+.yuv_line_loop:
+ /* chroma for 2x2 pixel block */
+ clr.l %d3 /* load v component */
+ move.b (%a2, %a3), %d3
+ clr.l %d2 /* load u component */
+ move.b (%a2)+, %d2
+ moveq.l #-128, %d0
+ add.l %d0, %d2
+ add.l %d0, %d3
+
+ mac.l %a6, %d2, %acc0 /* bu */
+ mac.l %d5, %d2, %acc1 /* gu */
+ mac.l %d6, %d3, %acc1 /* gv */
+ move.l #26881894, %d0 /* rv factor */
+ mac.l %d0, %d3, %acc2 /* rv */
+
+ /* luma for first pixel (top left) */
+ clr.l %d1
+ move.b (%a1, %a3*2), %d1
+ moveq.l #-126, %d0
+ add.l %d1, %d0 /* y' (-0.5 ... +0.5) */
+ mac.l %a5, %d0, %acc0
+ mac.l %a5, %d0, %acc1
+ mac.l %a5, %d0, %acc2
+
+ move.w %d4, (%a0)
+ /* 2nd LCD write is delayed one pixel to use it for filling the EMAC latency */
+
+ /* convert to RGB666, pack and output */
+.yuv_line_entry:
+ moveq.l #26, %d0
+ move.l %acc0, %d4
+ move.l %acc1, %d3
+ move.l %acc2, %d2
+ lsr.l %d0, %d4
+ lsr.l %d0, %d3
+ lsr.l %d0, %d2
+
+ lsl.l #6, %d2
+ or.l %d3, %d2 /* |00000000|00000000|0000Rrrr|rrGggggg| */
+ lsl.l #7, %d2
+ or.l %d2, %d3 /* |00000000|00000Rrr|rrrGgggg|g0Gggggg| */
+ lsl.l #6, %d3
+ or.l %d3, %d4 /* |0000000R|rrrrrGgg|ggg0Gggg|ggBbbbbb| */
+ eor.l %d7, %d4 /* |0000000r|rrrrrggg|ggg0gggg|ggbbbbbb| */
+ swap %d4
+ move.w %d4, (%a0)
+ swap %d4
+
+ /* luma for second pixel (bottom left) as delta from the first */
+ clr.l %d2
+ move.b (%a1)+, %d2
+ move.l %d2, %d0
+ sub.l %d1, %d0
+ mac.l %a5, %d0, %acc0
+ mac.l %a5, %d0, %acc1
+ mac.l %a5, %d0, %acc2
+
+ move.w %d4, (%a0)
+ /* 2nd LCD write is delayed one pixel to use it for filling the EMAC latency */
+
+ /* convert to RGB666, pack and output */
+ moveq.l #26, %d0
+ move.l %acc0, %d4
+ move.l %acc1, %d3
+ move.l %acc2, %d1
+ lsr.l %d0, %d4
+ lsr.l %d0, %d3
+ lsr.l %d0, %d1
+
+ lsl.l #6, %d1
+ or.l %d3, %d1 /* |00000000|00000000|0000Rrrr|rrGggggg| */
+ lsl.l #7, %d1
+ or.l %d1, %d3 /* |00000000|00000Rrr|rrrGgggg|g0Gggggg| */
+ lsl.l #6, %d3
+ or.l %d3, %d4 /* |0000000R|rrrrrGgg|ggg0Gggg|ggBbbbbb| */
+ eor.l %d7, %d4 /* |0000000r|rrrrrggg|ggg0gggg|ggbbbbbb| */
+ swap %d4
+ move.w %d4, (%a0)
+ swap %d4
+
+ /* luma for third pixel (top right) as delta from the second */
+ clr.l %d1
+ move.b (%a1, %a3*2), %d1
+ move.l %d1, %d0
+ sub.l %d2, %d0
+ mac.l %a5, %d0, %acc0
+ mac.l %a5, %d0, %acc1
+ mac.l %a5, %d0, %acc2
+
+ move.w %d4, (%a0)
+ /* 2nd LCD write is delayed one pixel to use it for filling the EMAC latency */
+
+ /* convert to RGB666, pack and output */
+ moveq.l #26, %d0
+ move.l %acc0, %d4
+ move.l %acc1, %d3
+ move.l %acc2, %d2
+ lsr.l %d0, %d4
+ lsr.l %d0, %d3
+ lsr.l %d0, %d2
+
+ lsl.l #6, %d2
+ or.l %d3, %d2 /* |00000000|00000000|0000Rrrr|rrGggggg| */
+ lsl.l #7, %d2
+ or.l %d2, %d3 /* |00000000|00000Rrr|rrrGgggg|g0Gggggg| */
+ lsl.l #6, %d3
+ or.l %d3, %d4 /* |0000000R|rrrrrGgg|ggg0Gggg|ggBbbbbb| */
+ eor.l %d7, %d4 /* |0000000r|rrrrrggg|ggg0gggg|ggbbbbbb| */
+ swap %d4
+ move.w %d4, (%a0)
+ swap %d4
+
+ /* luma for fourth pixel (bottom right) as delta from the thrid */
+ clr.l %d2
+ move.b (%a1)+, %d2
+ move.l %d2, %d0
+ sub.l %d1, %d0
+ mac.l %a5, %d0, %acc0
+ mac.l %a5, %d0, %acc1
+ mac.l %a5, %d0, %acc2
+
+ move.w %d4, (%a0)
+ /* 2nd LCD write is delayed one pixel to use it for filling the EMAC latency */
+
+ /* convert to RGB666, pack and output */
+ moveq.l #26, %d0
+ movclr.l %acc0, %d4
+ movclr.l %acc1, %d3
+ movclr.l %acc2, %d1
+ lsr.l %d0, %d4
+ lsr.l %d0, %d3
+ lsr.l %d0, %d1
+
+ lsl.l #6, %d1
+ or.l %d3, %d1 /* |00000000|00000000|0000Rrrr|rrGggggg| */
+ lsl.l #7, %d1
+ or.l %d1, %d3 /* |00000000|00000Rrr|rrrGgggg|g0Gggggg| */
+ lsl.l #6, %d3
+ or.l %d3, %d4 /* |0000000R|rrrrrGgg|ggg0Gggg|ggBbbbbb| */
+ eor.l %d7, %d4 /* |0000000r|rrrrrggg|ggg0gggg|ggbbbbbb| */
+ swap %d4
+ move.w %d4, (%a0)
+ swap %d4
+
+ cmp.l %a1, %a4 /* run %a1 up to end of line */
+ bhi.w .yuv_line_loop
+
+ move.w %d4, (%a0) /* write (very) last 2nd word */
+
+ movem.l (%sp), %d2-%d7/%a2-%a6
+ lea.l (44, %sp), %sp /* restore registers */
+ rts
+.yuv_end:
+ .size lcd_write_yuv420_lines, .yuv_end - lcd_write_yuv420_lines
+
+
/* begin lcd_write_data */
.align 2
.global lcd_write_data
diff --git a/firmware/target/coldfire/iaudio/x5/lcd-x5.c b/firmware/target/coldfire/iaudio/x5/lcd-x5.c
index a6a4fc0176..266a381c40 100644
--- a/firmware/target/coldfire/iaudio/x5/lcd-x5.c
+++ b/firmware/target/coldfire/iaudio/x5/lcd-x5.c
@@ -414,6 +414,69 @@ bool lcd_active(void)
#endif
/*** update functions ***/
+/* Line write helper function for lcd_yuv_blit. Write two lines of yuv420.
+ * y should have two lines of Y back to back, 2nd line first.
+ * c should contain the Cb and Cr data for the two lines of Y back to back.
+ * Needs EMAC set to saturated, signed integer mode.
+ */
+extern void lcd_write_yuv420_lines(const unsigned char *y,
+ const unsigned char *c, int width);
+
+/* Performance function to blit a YUV bitmap directly to the LCD
+ * src_x, src_y, width and height should be even and within the LCD's
+ * boundaries.
+ */
+void lcd_blit_yuv(unsigned char * const src[3],
+ int src_x, int src_y, int stride,
+ int x, int y, int width, int height)
+{
+ /* IRAM Y, Cb/bu, guv and Cb/rv buffers. */
+ unsigned char y_ibuf[LCD_WIDTH*2];
+ unsigned char c_ibuf[LCD_WIDTH];
+ const unsigned char *ysrc, *usrc, *vsrc;
+ const unsigned char *ysrc_max;
+
+ if (!display_on)
+ return;
+
+ width &= ~1; /* stay on the safe side */
+ height &= ~1;
+
+ lcd_write_reg(R_ENTRY_MODE, R_ENTRY_MODE_DIT_HORZ);
+ /* Set start position and window */
+ lcd_write_reg(R_VERT_RAM_ADDR_POS, (LCD_WIDTH-1) << 8);
+
+ ysrc = src[0] + src_y * stride + src_x;
+ usrc = src[1] + (src_y * stride >> 2) + (src_x >> 1);
+ vsrc = src[2] + (src_y * stride >> 2) + (src_x >> 1);
+ ysrc_max = ysrc + height * stride;
+
+ unsigned long macsr = coldfire_get_macsr();
+ coldfire_set_macsr(EMAC_SATURATE);
+
+ do
+ {
+ lcd_write_reg(R_HORIZ_RAM_ADDR_POS, ((y + y_offset + 1) << 8) | (y + y_offset));
+ lcd_write_reg(R_RAM_ADDR_SET, (x << 8) | (y + y_offset));
+ lcd_begin_write_gram();
+
+ memcpy(y_ibuf + width, ysrc, width);
+ memcpy(y_ibuf, ysrc + stride, width);
+ memcpy(c_ibuf, usrc, width >> 1);
+ memcpy(c_ibuf + (width >> 1), vsrc, width >> 1);
+ lcd_write_yuv420_lines(y_ibuf, c_ibuf, width >> 1);
+
+ y += 2;
+ ysrc += 2 * stride;
+ usrc += stride >> 1;
+ vsrc += stride >> 1;
+ }
+ while (ysrc < ysrc_max);
+
+ coldfire_set_macsr(macsr);
+} /* lcd_yuv_blit */
+
+
/* Update the display.
This must be called after all other LCD functions that change the
lcd frame buffer. */
diff --git a/firmware/target/coldfire/iriver/h300/lcd-as-h300.S b/firmware/target/coldfire/iriver/h300/lcd-as-h300.S
new file mode 100644
index 0000000000..223c183860
--- /dev/null
+++ b/firmware/target/coldfire/iriver/h300/lcd-as-h300.S
@@ -0,0 +1,246 @@
+/***************************************************************************
+ * __________ __ ___.
+ * Open \______ \ ____ ____ | | _\_ |__ _______ ___
+ * Source | _// _ \_/ ___\| |/ /| __ \ / _ \ \/ /
+ * Jukebox | | ( <_> ) \___| < | \_\ ( <_> > < <
+ * Firmware |____|_ /\____/ \___ >__|_ \|___ /\____/__/\_ \
+ * \/ \/ \/ \/ \/
+ * $Id$
+ *
+ * Copyright (C) 2006 by Jens Arnold
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
+ * KIND, either express or implied.
+ *
+ ****************************************************************************/
+
+#include "config.h"
+#include "cpu.h"
+
+ .section .icode, "ax", @progbits
+
+/* lcd_write_yuv420_lines()
+ *
+ * See http://en.wikipedia.org/wiki/YCbCr
+ * ITU-R BT.601 (formerly CCIR 601):
+ * |Y'| | 0.299000 0.587000 0.114000| |R|
+ * |Pb| = |-0.168736 -0.331264 0.500000| |G| or 0.564334*(B - Y')
+ * |Pr| | 0.500000 -0.418688 0.081312| |B| or 0.713267*(R - Y')
+ * Scaled, normalized and rounded:
+ * |Y'| | 65 129 25| |R| + 16 : 16->235
+ * |Cb| = |-38 -74 112| |G| + 128 : 16->240
+ * |Cr| |112 -94 -18| |B| + 128 : 16->240
+ *
+ * The inverse:
+ * |R| |1.000000 0.000000 1.402000| |Y'|
+ * |G| = |1.000000 -0.334136 -0.714136| |Pb|
+ * |B| |1.000000 1.772000 0.000000| |Pr|
+ * Scaled, normalized, rounded and tweaked to yield RGB565:
+ * |R| |19611723 0 26881894| |Y' - 16| >> 27
+ * |G| = |19611723 -6406711 -13692816| |Cb - 128| >> 26
+ * |B| |19611723 33976259 0| |Cr - 128| >> 27
+ *
+ * Needs EMAC set to saturated, signed integer mode.
+ *
+ * register usage:
+ * %a0 - LCD data port
+ * %a1 - Y pointer
+ * %a2 - C pointer
+ * %a3 - C width
+ * %a4 - Y end address
+ * %a5 - Y factor
+ * %a6 - BU factor
+ * %d0 - scratch
+ * %d1 - B, previous Y \ alternating
+ * %d2 - U / B, previous Y /
+ * %d3 - V / G
+ * %d4 - R / output pixel
+ * %d5 - GU factor
+ * %d6 - GV factor
+ * %d7 - RGB signed -> unsigned conversion mask
+ */
+ .align 2
+ .global lcd_write_yuv420_lines
+ .type lcd_write_yuv420_lines, @function
+
+lcd_write_yuv420_lines:
+ lea.l (-44, %sp), %sp /* free up some registers */
+ movem.l %d2-%d7/%a2-%a6, (%sp)
+
+ lea.l 0xf0000002, %a0 /* LCD data port */
+ movem.l (44+4, %sp), %a1-%a3 /* Y data, C data, C width */
+ lea.l (%a1, %a3*2), %a4 /* Y end address */
+
+ move.l #19611723, %a5 /* y factor */
+ move.l #33976259, %a6 /* bu factor */
+ move.l #-6406711, %d5 /* gu factor */
+ move.l #-13692816, %d6 /* gv factor */
+ move.l #0x8410, %d7 /* bitmask for signed->unsigned conversion
+ * of R, G and B within RGB565 at once */
+
+ /* chroma for first 2x2 pixel block */
+ clr.l %d3 /* load v component */
+ move.b (%a2, %a3), %d3
+ clr.l %d2 /* load u component */
+ move.b (%a2)+, %d2
+ moveq.l #-128, %d0
+ add.l %d0, %d2
+ add.l %d0, %d3
+
+ mac.l %a6, %d2, %acc0 /* bu */
+ mac.l %d5, %d2, %acc1 /* gu */
+ mac.l %d6, %d3, %acc1 /* gv */
+ move.l #26881894, %d0 /* rv factor */
+ mac.l %d0, %d3, %acc2 /* rv */
+
+ /* luma for very first pixel (top left) */
+ clr.l %d1
+ move.b (%a1, %a3*2), %d1
+ moveq.l #-126, %d0
+ add.l %d1, %d0 /* y' (-0.5 ... +0.5) */
+ mac.l %a5, %d0, %acc0
+ mac.l %a5, %d0, %acc1
+ mac.l %a5, %d0, %acc2
+
+ bra.b .yuv_line_entry
+
+.yuv_line_loop:
+ /* chroma for 2x2 pixel block */
+ clr.l %d3 /* load v component */
+ move.b (%a2, %a3), %d3
+ clr.l %d2 /* load u component */
+ move.b (%a2)+, %d2
+ moveq.l #-128, %d0
+ add.l %d0, %d2
+ add.l %d0, %d3
+
+ mac.l %a6, %d2, %acc0 /* bu */
+ mac.l %d5, %d2, %acc1 /* gu */
+ mac.l %d6, %d3, %acc1 /* gv */
+ move.l #26881894, %d0 /* rv factor */
+ mac.l %d0, %d3, %acc2 /* rv */
+
+ /* luma for first pixel (top left) */
+ clr.l %d1
+ move.b (%a1, %a3*2), %d1
+ moveq.l #-126, %d0
+ add.l %d1, %d0 /* y' (-0.5 ... +0.5) */
+ mac.l %a5, %d0, %acc0
+ mac.l %a5, %d0, %acc1
+ mac.l %a5, %d0, %acc2
+
+ move.w %d4, (%a0)
+ /* LCD write is delayed one pixel to use it for filling the EMAC latency */
+
+ /* convert to RGB565, pack and output */
+.yuv_line_entry:
+ moveq.l #27, %d0
+ move.l %acc0, %d2
+ move.l %acc1, %d3
+ move.l %acc2, %d4
+ lsr.l %d0, %d2
+ lsr.l %d0, %d4
+ moveq.l #26, %d0
+ lsr.l %d0, %d3
+ lsl.l #6, %d4
+ or.l %d3, %d4
+ lsl.l #5, %d4
+ or.l %d2, %d4
+ eor.l %d7, %d4
+
+ /* luma for second pixel (bottom left) as delta from the first */
+ clr.l %d2
+ move.b (%a1)+, %d2
+ move.l %d2, %d0
+ sub.l %d1, %d0
+ mac.l %a5, %d0, %acc0
+ mac.l %a5, %d0, %acc1
+ mac.l %a5, %d0, %acc2
+
+ move.w %d4, (%a0)
+ /* LCD write is delayed one pixel to use it for filling the EMAC latency */
+
+ /* convert to RGB565, pack and output */
+ moveq.l #27, %d0
+ move.l %acc0, %d1
+ move.l %acc1, %d3
+ move.l %acc2, %d4
+ lsr.l %d0, %d1
+ lsr.l %d0, %d4
+ moveq.l #26, %d0
+ lsr.l %d0, %d3
+ lsl.l #6, %d4
+ or.l %d3, %d4
+ lsl.l #5, %d4
+ or.l %d1, %d4
+ eor.l %d7, %d4
+
+ /* luma for third pixel (top right) as delta from the second */
+ clr.l %d1
+ move.b (%a1, %a3*2), %d1
+ move.l %d1, %d0
+ sub.l %d2, %d0
+ mac.l %a5, %d0, %acc0
+ mac.l %a5, %d0, %acc1
+ mac.l %a5, %d0, %acc2
+
+ move.w %d4, (%a0)
+ /* LCD write is delayed one pixel to use it for filling the EMAC latency */
+
+ /* convert to RGB565, pack and output */
+ moveq.l #27, %d0
+ move.l %acc0, %d2
+ move.l %acc1, %d3
+ move.l %acc2, %d4
+ lsr.l %d0, %d2
+ lsr.l %d0, %d4
+ moveq.l #26, %d0
+ lsr.l %d0, %d3
+ lsl.l #6, %d4
+ or.l %d3, %d4
+ lsl.l #5, %d4
+ or.l %d2, %d4
+ eor.l %d7, %d4
+
+ /* luma for fourth pixel (bottom right) as delta from the third */
+ clr.l %d2
+ move.b (%a1)+, %d2
+ move.l %d2, %d0
+ sub.l %d1, %d0
+ mac.l %a5, %d0, %acc0
+ mac.l %a5, %d0, %acc1
+ mac.l %a5, %d0, %acc2
+
+ move.w %d4, (%a0)
+ /* LCD write is delayed one pixel to use it for filling the EMAC latency */
+
+ /* convert to RGB565, pack and output */
+ moveq.l #27, %d0
+ movclr.l %acc0, %d1
+ movclr.l %acc1, %d3
+ movclr.l %acc2, %d4
+ lsr.l %d0, %d1
+ lsr.l %d0, %d4
+ moveq.l #26, %d0
+ lsr.l %d0, %d3
+ lsl.l #6, %d4
+ or.l %d3, %d4
+ lsl.l #5, %d4
+ or.l %d1, %d4
+ eor.l %d7, %d4
+
+ cmp.l %a1, %a4 /* run %a1 up to end of line */
+ bhi.w .yuv_line_loop
+
+ move.w %d4, (%a0) /* write (very) last pixel */
+
+ movem.l (%sp), %d2-%d7/%a2-%a6
+ lea.l (44, %sp), %sp /* restore registers */
+ rts
+.yuv_end:
+ .size lcd_write_yuv420_lines, .yuv_end - lcd_write_yuv420_lines
diff --git a/firmware/target/coldfire/iriver/h300/lcd-h300.c b/firmware/target/coldfire/iriver/h300/lcd-h300.c
index 8d5370cdcf..7e73ea3905 100644
--- a/firmware/target/coldfire/iriver/h300/lcd-h300.c
+++ b/firmware/target/coldfire/iriver/h300/lcd-h300.c
@@ -325,6 +325,67 @@ bool lcd_active(void)
/*** update functions ***/
+/* Line write helper function for lcd_yuv_blit. Write two lines of yuv420.
+ * y should have two lines of Y back to back, 2nd line first.
+ * c should contain the Cb and Cr data for the two lines of Y back to back.
+ * Needs EMAC set to saturated, signed integer mode.
+ */
+extern void lcd_write_yuv420_lines(const unsigned char *y,
+ const unsigned char *c, int cwidth);
+
+/* Performance function to blit a YUV bitmap directly to the LCD
+ * src_x, src_y, width and height should be even
+ * x, y, width and height have to be within LCD bounds
+ */
+void lcd_blit_yuv(unsigned char * const src[3],
+ int src_x, int src_y, int stride,
+ int x, int y, int width, int height)
+{
+ /* IRAM Y, Cb and Cb buffers. */
+ unsigned char y_ibuf[LCD_WIDTH*2];
+ unsigned char c_ibuf[LCD_WIDTH];
+ const unsigned char *ysrc, *usrc, *vsrc;
+ const unsigned char *ysrc_max;
+
+ if (!display_on)
+ return;
+
+ LCD_MUTEX_LOCK();
+ width &= ~1; /* stay on the safe side */
+ height &= ~1;
+
+ lcd_write_reg(R_ENTRY_MODE, R_ENTRY_MODE_HORZ);
+ /* Set start position and window */
+ lcd_write_reg(R_VERT_RAM_ADDR_POS, ((xoffset + 219) << 8) | xoffset);
+
+ ysrc = src[0] + src_y * stride + src_x;
+ usrc = src[1] + (src_y * stride >> 2) + (src_x >> 1);
+ vsrc = src[2] + (src_y * stride >> 2) + (src_x >> 1);
+ ysrc_max = ysrc + height * stride;
+
+ coldfire_set_macsr(EMAC_SATURATE);
+ do
+ {
+ lcd_write_reg(R_HORIZ_RAM_ADDR_POS, ((y + 1) << 8) | y);
+ lcd_write_reg(R_RAM_ADDR_SET, ((x+xoffset) << 8) | y);
+ lcd_begin_write_gram();
+
+ memcpy(y_ibuf + width, ysrc, width);
+ memcpy(y_ibuf, ysrc + stride, width);
+ memcpy(c_ibuf, usrc, width >> 1);
+ memcpy(c_ibuf + (width >> 1), vsrc, width >> 1);
+ lcd_write_yuv420_lines(y_ibuf, c_ibuf, width >> 1);
+
+ y += 2;
+ ysrc += 2 * stride;
+ usrc += stride >> 1;
+ vsrc += stride >> 1;
+ }
+ while (ysrc < ysrc_max)
+ ;;
+ LCD_MUTEX_UNLOCK();
+}
+
#ifndef BOOTLOADER
/* LCD DMA ISR */
void DMA3(void) __attribute__ ((interrupt_handler, section(".icode")));
diff --git a/firmware/target/mips/ingenic_jz47xx/lcd-jz4740.c b/firmware/target/mips/ingenic_jz47xx/lcd-jz4740.c
index d2a1d759d0..a2d5b73ea8 100644
--- a/firmware/target/mips/ingenic_jz47xx/lcd-jz4740.c
+++ b/firmware/target/mips/ingenic_jz47xx/lcd-jz4740.c
@@ -158,3 +158,65 @@ void lcd_update(void)
lcd_update_rect(0, 0, LCD_WIDTH, LCD_HEIGHT);
}
+
+/* (Mis)use LCD framebuffer as a temporary buffer */
+void lcd_blit_yuv(unsigned char * const src[3],
+ int src_x, int src_y, int stride,
+ int x, int y, int width, int height)
+{
+ unsigned char const * yuv_src[3];
+ register off_t z;
+
+ if(!lcd_is_on)
+ return;
+
+ z = stride * src_y;
+ yuv_src[0] = src[0] + z + src_x;
+ yuv_src[1] = src[1] + (z >> 2) + (src_x >> 1);
+ yuv_src[2] = src[2] + (yuv_src[1] - src[1]);
+
+ commit_discard_dcache(); // XXX range
+
+ __cpm_start_ipu();
+
+ IPU_STOP_IPU();
+ IPU_RESET_IPU();
+ IPU_CLEAR_END_FLAG();
+
+ IPU_DISABLE_RSIZE();
+ IPU_DISABLE_IRQ();
+
+ IPU_SET_INFMT(INFMT_YUV420);
+ IPU_SET_OUTFMT(OUTFMT_RGB565);
+
+ IPU_SET_IN_FM(width, height);
+ IPU_SET_Y_STRIDE(stride);
+ IPU_SET_UV_STRIDE(stride, stride);
+
+ IPU_SET_Y_ADDR(PHYSADDR((unsigned long)yuv_src[0]));
+ IPU_SET_U_ADDR(PHYSADDR((unsigned long)yuv_src[1]));
+ IPU_SET_V_ADDR(PHYSADDR((unsigned long)yuv_src[2]));
+ IPU_SET_OUT_ADDR(PHYSADDR((unsigned long)FBADDR(y,x)));
+
+ IPU_SET_OUT_FM(height, width);
+ IPU_SET_OUT_STRIDE(height);
+
+ IPU_SET_CSC_C0_COEF(YUV_CSC_C0);
+ IPU_SET_CSC_C1_COEF(YUV_CSC_C1);
+ IPU_SET_CSC_C2_COEF(YUV_CSC_C2);
+ IPU_SET_CSC_C3_COEF(YUV_CSC_C3);
+ IPU_SET_CSC_C4_COEF(YUV_CSC_C4);
+
+ IPU_RUN_IPU();
+
+ while(!(IPU_POLLING_END_FLAG()) && IPU_IS_ENABLED());
+
+ IPU_CLEAR_END_FLAG();
+ IPU_STOP_IPU();
+ IPU_RESET_IPU();
+
+ __cpm_stop_ipu();
+
+ /* YUV speed is limited by LCD speed */
+ lcd_update_rect(y, x, height, width);
+}