summaryrefslogtreecommitdiffstats
path: root/firmware/target/arm
diff options
context:
space:
mode:
authorJens Arnold <amiconn@rockbox.org>2009-07-15 22:14:21 +0000
committerJens Arnold <amiconn@rockbox.org>2009-07-15 22:14:21 +0000
commit989021ed3cca4a76a14062bb2b64109cf77027b6 (patch)
tree7d1de153662e4b120665d674ecb29dd7526a3767 /firmware/target/arm
parent27f5cdaad81f419570f7e23e5db0026c97d78ecd (diff)
downloadrockbox-989021ed3cca4a76a14062bb2b64109cf77027b6.tar.gz
rockbox-989021ed3cca4a76a14062bb2b64109cf77027b6.tar.bz2
rockbox-989021ed3cca4a76a14062bb2b64109cf77027b6.zip
Apply some ARMv6 optimisations to YUV blitting. Speeds up mpegplayer on Gigabeat S by ~2% in undithered and ~7.5% in dithered mode.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@21889 a1c6a512-1295-4272-9138-f99709370657
Diffstat (limited to 'firmware/target/arm')
-rw-r--r--firmware/target/arm/lcd-as-memframe.S68
1 files changed, 67 insertions, 1 deletions
diff --git a/firmware/target/arm/lcd-as-memframe.S b/firmware/target/arm/lcd-as-memframe.S
index 2d0aff937f..a6f9145fab 100644
--- a/firmware/target/arm/lcd-as-memframe.S
+++ b/firmware/target/arm/lcd-as-memframe.S
@@ -170,6 +170,11 @@ lcd_write_yuv420_lines:
add lr, r9, r7, asr #8 @ lr = r = (Y >> 9) + rv
add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
@
+#if ARM_ARCH >= 6
+ usat r1, #5, r1 @ clamp b
+ usat lr, #5, lr @ clamp r
+ usat r7, #6, r7 @ clamp g
+#else
orr r12, r1, lr @ check if clamping is needed...
orr r12, r12, r7, asr #1 @ ...at all
cmp r12, #31 @
@@ -184,6 +189,7 @@ lcd_write_yuv420_lines:
mvnhi r7, r7, asr #31 @
andhi r7, r7, #63 @
15: @ no clamp @
+#endif
@
ldrb r12, [r4, r3] @ r12 = Y' = *(Y'_p + stride)
@
@@ -206,6 +212,11 @@ lcd_write_yuv420_lines:
add lr, r9, r7, asr #8 @ lr = r = (Y >> 9) + rv
add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
@
+#if ARM_ARCH >= 6
+ usat r1, #5, r1 @ clamp b
+ usat lr, #5, lr @ clamp r
+ usat r7, #6, r7 @ clamp g
+#else
orr r12, r1, lr @ check if clamping is needed...
orr r12, r12, r7, asr #1 @ ...at all
cmp r12, #31 @
@@ -220,6 +231,7 @@ lcd_write_yuv420_lines:
mvnhi r7, r7, asr #31 @
andhi r7, r7, #63 @
15: @ no clamp @
+#endif
@
ldrb r12, [r4], #1 @ r12 = Y' = *(Y'_p++)
@
@@ -245,6 +257,11 @@ lcd_write_yuv420_lines:
add lr, r9, r7, asr #8 @ lr = r = (Y >> 9) + rv
add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
@
+#if ARM_ARCH >= 6
+ usat r1, #5, r1 @ clamp b
+ usat lr, #5, lr @ clamp r
+ usat r7, #6, r7 @ clamp g
+#else
orr r12, r1, lr @ check if clamping is needed...
orr r12, r12, r7, asr #1 @ ...at all
cmp r12, #31 @
@@ -259,6 +276,7 @@ lcd_write_yuv420_lines:
mvnhi r7, r7, asr #31 @
andhi r7, r7, #63 @
15: @ no clamp @
+#endif
@
ldrb r12, [r4, r3] @ r12 = Y' = *(Y'_p + stride)
@
@@ -281,6 +299,11 @@ lcd_write_yuv420_lines:
add lr, r9, r7, asr #8 @ lr = r = (Y >> 9) + rv
add r7, r10, r7, asr #7 @ r7 = g = (Y >> 8) + guv
@
+#if ARM_ARCH >= 6
+ usat r1, #5, r1 @ clamp b
+ usat lr, #5, lr @ clamp r
+ usat r7, #6, r7 @ clamp g
+#else
orr r12, r1, lr @ check if clamping is needed...
orr r12, r12, r7, asr #1 @ ...at all
cmp r12, #31 @
@@ -295,6 +318,7 @@ lcd_write_yuv420_lines:
mvnhi r7, r7, asr #31 @
andhi r7, r7, #63 @
15: @ no clamp @
+#endif
@
orr r12, r1, lr, lsl #11 @ r12 = b | (r << 11)
orr r12, r12, r7, lsl #5 @ r12 |= (g << 5)
@@ -425,6 +449,16 @@ lcd_write_yuv420_lines_odither:
add r11, r11, r12, lsl #1 @ r = r11 + delta*2
add r7, r7, r12, lsr #1 @ g = r7 + delta/2
@
+#if ARM_ARCH >= 6
+ usat r11, #5, r11, asr #11 @ clamp r
+ usat r7, #6, r7, asr #9 @ clamp g
+ usat r1, #5, r1, asr #10 @ clamp b
+ @
+ ldrb r12, [r4, r3] @ r12 = Y' = *(Y'_p + stride)
+ @
+ orr r1, r1, r11, lsl #11 @ r1 = b | (r << 11)
+ orr r1, r1, r7, lsl #5 @ r1 |= (g << 5)
+#else
orr r12, r1, r11, asr #1 @ check if clamping is needed...
orr r12, r12, r7 @ ...at all
movs r12, r12, asr #15 @
@@ -444,6 +478,7 @@ lcd_write_yuv420_lines_odither:
and r7, r7, #0x7e00 @ r1 = pixel = (r & 0xf800) |
orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
orr r1, r11, r1, lsr #10 @ (b >> 10)
+#endif
@
#if LCD_WIDTH >= LCD_HEIGHT
strh r1, [r0] @
@@ -477,6 +512,16 @@ lcd_write_yuv420_lines_odither:
add r11, r11, r12, lsl #1 @ r = r11 + delta*2
add r7, r7, r12, lsr #1 @ g = r7 + delta/2
@
+#if ARM_ARCH >= 6
+ usat r11, #5, r11, asr #11 @ clamp r
+ usat r7, #6, r7, asr #9 @ clamp g
+ usat r1, #5, r1, asr #10 @ clamp b
+ @
+ ldrb r12, [r4], #1 @ r12 = Y' = *(Y'_p++)
+ @
+ orr r1, r1, r11, lsl #11 @ r1 = b | (r << 11)
+ orr r1, r1, r7, lsl #5 @ r1 |= (g << 5)
+#else
orr r12, r1, r11, asr #1 @ check if clamping is needed...
orr r12, r12, r7 @ ...at all
movs r12, r12, asr #15 @
@@ -496,6 +541,7 @@ lcd_write_yuv420_lines_odither:
and r7, r7, #0x7e00 @ r1 = pixel = (r & 0xf800) |
orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
orr r1, r11, r1, lsr #10 @ (b >> 10)
+#endif
@
#if LCD_WIDTH >= LCD_HEIGHT
add r0, r0, #2*LCD_WIDTH @
@@ -534,6 +580,16 @@ lcd_write_yuv420_lines_odither:
add r11, r11, r12, lsl #1 @ r = r11 + delta*2
add r7, r7, r12, lsr #1 @ g = r7 + delta/2
@
+#if ARM_ARCH >= 6
+ usat r11, #5, r11, asr #11 @ clamp r
+ usat r7, #6, r7, asr #9 @ clamp g
+ usat r1, #5, r1, asr #10 @ clamp b
+ @
+ ldrb r12, [r4, r3] @ r12 = Y' = *(Y'_p + stride)
+ @
+ orr r1, r1, r11, lsl #11 @ r1 = b | (r << 11)
+ orr r1, r1, r7, lsl #5 @ r1 |= (g << 5)
+#else
orr r12, r1, r11, asr #1 @ check if clamping is needed...
orr r12, r12, r7 @ ...at all
movs r12, r12, asr #15 @
@@ -547,12 +603,13 @@ lcd_write_yuv420_lines_odither:
mvnne r7, r12, lsr #15 @
15: @ no clamp @
@
- ldrb r12, [r4, r3] @ r12 = Y' = *(Y'_p + stride)
+ ldrb r12, [r4, r3] @ r12 = Y' = *(Y'_p + stride)
@
and r11, r11, #0xf800 @ pack pixel
and r7, r7, #0x7e00 @ r1 = pixel = (r & 0xf800) |
orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
orr r1, r11, r1, lsr #10 @ (b >> 10)
+#endif
@
#if LCD_WIDTH >= LCD_HEIGHT
strh r1, [r0, #2]
@@ -586,6 +643,14 @@ lcd_write_yuv420_lines_odither:
add r11, r11, r14, lsl #1 @ r = r11 + delta*2
add r7, r7, r14, lsr #1 @ g = r7 + delta/2
@
+#if ARM_ARCH >= 6
+ usat r11, #5, r11, asr #11 @ clamp r
+ usat r7, #6, r7, asr #9 @ clamp g
+ usat r1, #5, r1, asr #10 @ clamp b
+ @
+ orr r1, r1, r11, lsl #11 @ r1 = b | (r << 11)
+ orr r1, r1, r7, lsl #5 @ r1 |= (g << 5)
+#else
orr r12, r1, r11, asr #1 @ check if clamping is needed...
orr r12, r12, r7 @ ...at all
movs r12, r12, asr #15 @
@@ -603,6 +668,7 @@ lcd_write_yuv420_lines_odither:
and r7, r7, #0x7e00 @ r1 = pixel = (r & 0xf800) |
orr r11, r11, r7, lsr #4 @ ((g & 0x7e00) >> 4) |
orr r1, r11, r1, lsr #10 @ (b >> 10)
+#endif
@
#if LCD_WIDTH >= LCD_HEIGHT
add r0, r0, #2*LCD_WIDTH