summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDave Chapman <dave@dchapman.com>2007-06-13 22:02:34 +0000
committerDave Chapman <dave@dchapman.com>2007-06-13 22:02:34 +0000
commit6b713820c180f3796c07c95826e1b1e00fdbca4f (patch)
treeba4308ac951fa4adb4c7185af1f3eb26ac14ed52
parentc7f9ca4067f26ba3d0471d50ed3f06b047171b50 (diff)
downloadrockbox-6b713820c180f3796c07c95826e1b1e00fdbca4f.tar.gz
rockbox-6b713820c180f3796c07c95826e1b1e00fdbca4f.zip
ARM assembler predictor decoding function. This increases my -c1000 test track from around 94% realtime on an ipod to around 104% realtime, but yields only a tiny speedup (453% to 455%) on the Gigabeat. Including this optimisation, total decoding time for my 245.70s -c1000 test track on an ipod is 236.06s, with the predictor decoding taking 51.40s of that time - meaning the predictor decoding is only about 22% of the total decoding time.
git-svn-id: svn://svn.rockbox.org/rockbox/trunk@13626 a1c6a512-1295-4272-9138-f99709370657
-rw-r--r--apps/codecs/demac/libdemac/SOURCES3
-rw-r--r--apps/codecs/demac/libdemac/parser.h10
-rw-r--r--apps/codecs/demac/libdemac/predictor-arm.S507
-rw-r--r--apps/codecs/demac/libdemac/predictor.c2
4 files changed, 520 insertions, 2 deletions
diff --git a/apps/codecs/demac/libdemac/SOURCES b/apps/codecs/demac/libdemac/SOURCES
index 76b891a90d..c68fff104e 100644
--- a/apps/codecs/demac/libdemac/SOURCES
+++ b/apps/codecs/demac/libdemac/SOURCES
@@ -1,5 +1,8 @@
crc.c
predictor.c
+#ifdef CPU_ARM
+predictor-arm.S
+#endif
entropy.c
decoder.c
parser.c
diff --git a/apps/codecs/demac/libdemac/parser.h b/apps/codecs/demac/libdemac/parser.h
index 301cf4a5e1..4ef0977e6b 100644
--- a/apps/codecs/demac/libdemac/parser.h
+++ b/apps/codecs/demac/libdemac/parser.h
@@ -71,6 +71,10 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
/* Total size of all predictor histories - 50 * sizeof(int32_t) */
#define PREDICTOR_SIZE 50
+
+/* NOTE: This struct is used in predictor-arm.S - any updates need to
+ be reflected there. */
+
struct predictor_t
{
/* Filter histories */
@@ -79,10 +83,12 @@ struct predictor_t
int32_t YlastA;
int32_t XlastA;
- int32_t YfilterA;
- int32_t XfilterA;
+ /* NOTE: The order of the next four fields is important for
+ predictor-arm.S */
int32_t YfilterB;
+ int32_t XfilterA;
int32_t XfilterB;
+ int32_t YfilterA;
/* Adaption co-efficients */
int32_t YcoeffsA[4];
diff --git a/apps/codecs/demac/libdemac/predictor-arm.S b/apps/codecs/demac/libdemac/predictor-arm.S
new file mode 100644
index 0000000000..1a04b5d66a
--- /dev/null
+++ b/apps/codecs/demac/libdemac/predictor-arm.S
@@ -0,0 +1,507 @@
+/*
+
+libdemac - A Monkey's Audio decoder
+
+$Id: predictor.c 13597 2007-06-08 22:35:26Z dave $
+
+Copyright (C) Dave Chapman 2007
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
+
+*/
+
+ .section .text,"ax",%progbits
+
+ .align 2
+
+ .global predictor_decode_stereo
+ .type predictor_decode_stereo,%function
+
+
+/* NOTE: The following need to be kept in sync with parser.h */
+
+#define HISTORY_SIZE 512
+
+#define YDELAYA 200
+#define YDELAYB 168
+#define XDELAYA 136
+#define XDELAYB 104
+#define YADAPTCOEFFSA 72
+#define XADAPTCOEFFSA 56
+#define YADAPTCOEFFSB 40
+#define XADAPTCOEFFSB 20
+
+/* struct predictor_t members: */
+#define buf 0 /* int32_t* buf */
+
+#define YlastA 4 /* int32_t YlastA; */
+#define XlastA 8 /* int32_t XlastA; */
+
+#define YfilterB 12 /* int32_t YfilterB; */
+#define XfilterA 16 /* int32_t XfilterA; */
+
+#define XfilterB 20 /* int32_t XfilterB; */
+#define YfilterA 24 /* int32_t YfilterA; */
+
+#define YcoeffsA 28 /* int32_t YcoeffsA[4]; */
+#define XcoeffsA 44 /* int32_t XcoeffsA[4]; */
+#define YcoeffsB 60 /* int32_t YcoeffsB[5]; */
+#define XcoeffsB 80 /* int32_t XcoeffsB[5]; */
+
+#define historybuffer 100 /* int32_t historybuffer[] */
+
+@ Register usage:
+@
+@ r0-r11 - scratch
+@ r12 - struct predictor_t* p
+@ r14 - int32_t* p->buf
+
+@ void predictor_decode_stereo(struct predictor_t* p,
+@ int32_t* decoded0,
+@ int32_t* decoded1,
+@ int count)
+
+predictor_decode_stereo:
+ stmdb sp!, {r1-r11, lr}
+
+ @ r1 (decoded0) is [sp]
+ @ r2 (decoded1) is [sp, #4]
+ @ r3 (count) is [sp, #8]
+
+ mov r12, r0 @ r12 := p
+ ldr r14, [r0] @ r14 := p->buf
+
+loop:
+
+@@@@@@@@@@@@@@@@@@@@@@@@@@@ PREDICTOR Y
+
+@ Predictor Y, Filter A
+
+ ldr r10, [r12, #YlastA] @ r10 := p->YlastA
+ add r11, r14, #YDELAYA-12 @ r11 := &p->buf[YDELAYA-3]
+
+ ldmia r11, { r2 - r4 } @ r2 := p->buf[YDELAYA-3]
+ @ r3 := p->buf[YDELAYA-2]
+ @ r4 := p->buf[YDELAYA-1]
+
+ subs r4, r10, r4 @ r4 := r10 - r4
+
+ add r1, r12, #YcoeffsA
+ ldmia r1, {r6 - r9} @ r6 := p->YcoeffsA[0]
+ @ r7 := p->YcoeffsA[1]
+ @ r8 := p->YcoeffsA[2]
+ @ r9 := p->YcoeffsA[3]
+
+ mul r0, r10, r6 @ r0 := p->buf[YDELAYA] * p->YcoeffsA[0]
+ mla r0, r4, r7, r0 @ r0 += p->buf[YDELAYA-1] * p->YcoeffsA[1]
+ mla r0, r3, r8, r0 @ r0 += p->buf[YDELAYA-2] * p->YcoeffsA[2]
+ mla r0, r2, r9, r0 @ r0 += p->buf[YDELAYA-3] * p->YcoeffsA[3]
+
+ add r11, r14, #YDELAYA-4
+ stmia r11, { r4, r10 } @ p->buf[YDELAYA-1] = r4
+ @ p->buf[YDELAYA] = r10
+
+ @ flags were set above, in the subs instruction
+ mvngt r4, #0
+ movlt r4, #1 @ r4 := SIGN(r4) (see .c for SIGN macro)
+
+ cmp r10, #0
+ mvngt r10, #0
+ movlt r10, #1 @ r10 := SIGN(r10) (see .c for SIGN macro)
+
+ add r1, r14, #YADAPTCOEFFSA-4
+ stmia r1, {r4, r10} @ p->buf[YADAPTCOEFFSA-1] := r4
+ @ p->buf[YADAPTCOEFFSA] := r10
+
+ @ NOTE: r0 now contains predictionA - don't overwrite.
+
+@ Predictor Y, Filter B
+
+ add r2, r12, #YfilterB
+ ldmia r2, {r2, r11} @ r2 := p->YfilterB
+ @ r11 := p->XfilterA
+
+ rsb r2, r2, r2, lsl #5 @ r2 := r2 * 32 - r2 ( == r2*31)
+ sub r10, r11, r2, asr #5 @ r10 (p->buf[YDELAYB]) := r11 - (r2 >> 5)
+
+ str r11, [r12, #YfilterB] @ p->YfilterB := r11 (p->XfilterA)
+
+ add r11, r14, #YDELAYB-16 @ r11 := &p->buf[YDELAYB-4]
+
+ ldmia r11, { r2 - r5 } @ r2 := p->buf[YDELAYB-4]
+ @ r3 := p->buf[YDELAYB-3]
+ @ r4 := p->buf[YDELAYB-2]
+ @ r5 := p->buf[YDELAYB-1]
+
+ subs r5, r10, r5 @ r5 := r10 - r5
+
+ add r1, r12, #YcoeffsB
+ ldmia r1, {r6,r7,r8,r9,r11} @ r6 := p->YcoeffsB[0]
+ @ r7 := p->YcoeffsB[1]
+ @ r8 := p->YcoeffsB[2]
+ @ r9 := p->YcoeffsB[3]
+ @ r11 := p->YcoeffsB[4]
+
+ mul r1, r10, r6 @ r1 := p->buf[YDELAYB] * p->YcoeffsB[0]
+ mla r1, r5, r7, r1 @ r1 += p->buf[YDELAYB-1] * p->YcoeffsB[1]
+ mla r1, r4, r8, r1 @ r1 += p->buf[YDELAYB-2] * p->YcoeffsB[2]
+ mla r1, r3, r9, r1 @ r1 += p->buf[YDELAYB-3] * p->YcoeffsB[3]
+ mla r1, r2, r11, r1 @ r1 += p->buf[YDELAYB-4] * p->YcoeffsB[4]
+
+ add r2, r14, #YDELAYB-4 @ r2 := &p->buf[YDELAYB-1]
+ stmia r2, { r5, r10 } @ p->buf[YDELAYB-1] = r5
+ @ p->buf[YDELAYB] = r10
+
+ @ flags were set above, in the subs instruction
+ mvngt r5, #0
+ movlt r5, #1 @ r5 := SIGN(r5) (see .c for SIGN macro)
+
+ cmp r10, #0
+ mvngt r10, #0
+ movlt r10, #1 @ r10 := SIGN(r10) (see .c for SIGN macro)
+
+ add r2, r14, #YADAPTCOEFFSB-4
+ stmia r2, {r5, r10} @ p->buf[YADAPTCOEFFSB-1] := r5
+ @ p->buf[YADAPTCOEFFSB] := r10
+
+ @ r0 still contains predictionA
+ @ r1 contains predictionB
+
+ @ Finish Predictor Y
+
+ ldr r2, [sp] @ r2 := decoded0
+ add r0, r0, r1, asr #1 @ r0 := r0 + (r1 >> 1)
+ ldr r3, [r2] @ r3 := *decoded0
+ add r1, r3, r0, asr #10 @ r1 := r3 + (r0 >> 10)
+ str r1, [r12, #YlastA] @ p->YlastA := r1
+
+ ldr r4, [r12, #YfilterA] @ r4 := p->YfilterA
+ rsb r4, r4, r4, lsl #5 @ r4 := r4 * 32 - r4 ( == r4*31)
+ add r1, r1, r4, asr #5 @ r1 := r1 + (r4 >> 5)
+ str r1, [r12, #YfilterA] @ p->YfilterA := r1
+
+ @ r1 contains p->YfilterA
+ @ r2 contains decoded0
+ @ r3 contains *decoded0
+
+ @ r6, r7, r8, r9, r11 contain p->YcoeffsB[0..4]
+ @ r5, r10 contain p->buf[YADAPTCOEFFSB-1] and p->buf[YADAPTCOEFFSB]
+
+ cmp r3, #0
+ stmia r2!, {r1} @ *(decoded0++) := r1 (p->YfilterA)
+ str r2, [sp] @ save decoded0
+ beq 2f
+
+ add r1, r14, #YADAPTCOEFFSB-16
+ ldmia r1, { r2, r3, r4 } @ r2 := p->buf[YADAPTCOEFFSB-4]
+ @ r3 := p->buf[YADAPTCOEFFSB-3]
+ @ r4 := p->buf[YADAPTCOEFFSB-2]
+ blt 1f
+
+ @ *decoded0 > 0
+
+ sub r6, r6, r10 @ r6 := p->YcoeffsB[0] - p->buf[YADAPTCOEFFSB]
+ sub r7, r7, r5 @ r7 := p->YcoeffsB[1] - p->buf[YADAPTCOEFFSB-1]
+ sub r8, r8, r4 @ r8 := p->YcoeffsB[2] - p->buf[YADAPTCOEFFSB-2]
+ sub r9, r9, r3 @ r9 := p->YcoeffsB[3] - p->buf[YADAPTCOEFFSB-3]
+ sub r11, r11, r2 @ r11 := p->YcoeffsB[4] - p->buf[YADAPTCOEFFSB-4]
+
+ add r0, r12, #YcoeffsB
+ stmia r0, {r6,r7,r8,r9,r11} @ Save p->YcoeffsB[]
+
+ add r1, r12, #YcoeffsA
+ ldmia r1, { r2-r5 } @ r2 := p->YcoeffsA[0]
+ @ r3 := p->YcoeffsA[1]
+ @ r4 := p->YcoeffsA[2]
+ @ r5 := p->YcoeffsA[3]
+
+ add r0, r14, #YADAPTCOEFFSA-12
+ ldmia r0, { r6-r9} @ r6 := p->buf[YADAPTCOEFFSA-3]
+ @ r7 := p->buf[YADAPTCOEFFSA-2]
+ @ r8 := p->buf[YADAPTCOEFFSA-1]
+ @ r9 := p->buf[YADAPTCOEFFSA]
+
+ sub r2, r2, r9 @ r2 := p->YcoeffsA[0] - p->buf[YADAPTCOEFFSA]
+ sub r3, r3, r8 @ r3 := p->YcoeffsA[1] - p->buf[YADAPTCOEFFSA-1]
+ sub r4, r4, r7 @ r4 := p->YcoeffsA[2] - p->buf[YADAPTCOEFFSA-2]
+ sub r5, r5, r6 @ r5 := p->YcoeffsA[3] - p->buf[YADAPTCOEFFSA-3]
+
+ stmia r1, {r2-r5} @ Save p->YcoeffsA
+ b 2f
+
+
+1: @ *decoded0 < 0
+
+ add r6, r6, r10 @ r6 := p->YcoeffsB[0] + p->buf[YADAPTCOEFFSB]
+ add r7, r7, r5 @ r7 := p->YcoeffsB[1] + p->buf[YADAPTCOEFFSB-1]
+ add r8, r8, r4 @ r8 := p->YcoeffsB[2] + p->buf[YADAPTCOEFFSB-2]
+ add r9, r9, r3 @ r9 := p->YcoeffsB[3] + p->buf[YADAPTCOEFFSB-3]
+ add r11, r11, r2 @ r11 := p->YcoeffsB[4] + p->buf[YADAPTCOEFFSB-4]
+
+ add r0, r12, #YcoeffsB
+ stmia r0, {r6,r7,r8,r9,r11} @ Save p->YcoeffsB[]
+
+ add r1, r12, #YcoeffsA
+ ldmia r1, { r2-r5 } @ r2 := p->YcoeffsA[0]
+ @ r3 := p->YcoeffsA[1]
+ @ r4 := p->YcoeffsA[2]
+ @ r5 := p->YcoeffsA[3]
+
+ add r0, r14, #YADAPTCOEFFSA-12
+ ldmia r0, { r6-r9} @ r6 := p->buf[YADAPTCOEFFSA-3]
+ @ r7 := p->buf[YADAPTCOEFFSA-2]
+ @ r8 := p->buf[YADAPTCOEFFSA-1]
+ @ r9 := p->buf[YADAPTCOEFFSA]
+
+ add r2, r2, r9 @ r2 := p->YcoeffsA[0] + p->buf[YADAPTCOEFFSA]
+ add r3, r3, r8 @ r3 := p->YcoeffsA[1] + p->buf[YADAPTCOEFFSA-1]
+ add r4, r4, r7 @ r4 := p->YcoeffsA[2] + p->buf[YADAPTCOEFFSA-2]
+ add r5, r5, r6 @ r5 := p->YcoeffsA[3] + p->buf[YADAPTCOEFFSA-3]
+
+ stmia r1, {r2-r5} @ Save p->YcoeffsA
+
+2:
+
+@@@@@@@@@@@@@@@@@@@@@@@@@@@ PREDICTOR X
+
+@ Predictor X, Filter A
+
+ ldr r10, [r12, #XlastA] @ r10 := p->XlastA
+ add r11, r14, #XDELAYA-12 @ r11 := &p->buf[XDELAYA-3]
+
+ ldmia r11, { r2 - r4 } @ r2 := p->buf[XDELAYA-3]
+ @ r3 := p->buf[XDELAYA-2]
+ @ r4 := p->buf[XDELAYA-1]
+
+ subs r4, r10, r4 @ r4 := r10 - r4
+
+ add r1, r12, #XcoeffsA
+ ldmia r1, {r6 - r9} @ r6 := p->XcoeffsA[0]
+ @ r7 := p->XcoeffsA[1]
+ @ r8 := p->XcoeffsA[2]
+ @ r9 := p->XcoeffsA[3]
+
+ mul r0, r10, r6 @ r0 := p->buf[XDELAYA] * p->XcoeffsA[0]
+ mla r0, r4, r7, r0 @ r0 += p->buf[XDELAYA-1] * p->XcoeffsA[1]
+ mla r0, r3, r8, r0 @ r0 += p->buf[XDELAYA-2] * p->XcoeffsA[2]
+ mla r0, r2, r9, r0 @ r0 += p->buf[XDELAYA-3] * p->XcoeffsA[3]
+
+ add r11, r14, #XDELAYA-4
+ stmia r11, { r4, r10 } @ p->buf[XDELAYA-1] = r4
+ @ p->buf[XDELAYA] = r10
+
+ @ flags were set above, in the subs instruction
+ mvngt r4, #0
+ movlt r4, #1 @ r4 := SIGN(r4) (see .c for SIGN macro)
+
+ cmp r10, #0
+ mvngt r10, #0
+ movlt r10, #1 @ r10 := SIGN(r10) (see .c for SIGN macro)
+
+ add r1, r14, #XADAPTCOEFFSA-4
+ stmia r1, {r4, r10} @ p->buf[XADAPTCOEFFSA-1] := r4
+ @ p->buf[XADAPTCOEFFSA] := r10
+
+ @ NOTE: r0 now contains predictionA - don't overwrite.
+
+@ Predictor X, Filter B
+
+ add r2, r12, #XfilterB
+ ldmia r2, {r2, r11} @ r2 := p->XfilterB
+ @ r11 := p->YfilterA
+
+ rsb r2, r2, r2, lsl #5 @ r2 := r2 * 32 - r2 ( == r2*31)
+ sub r10, r11, r2, asr #5 @ r10 (p->buf[XDELAYB]) := r11 - (r2 >> 5)
+
+ str r11, [r12, #XfilterB] @ p->XfilterB := r11 (p->YfilterA)
+
+ add r11, r14, #XDELAYB-16 @ r11 := &p->buf[XDELAYB-4]
+
+ ldmia r11, { r2 - r5 } @ r2 := p->buf[XDELAYB-4]
+ @ r3 := p->buf[XDELAYB-3]
+ @ r4 := p->buf[XDELAYB-2]
+ @ r5 := p->buf[XDELAYB-1]
+
+ subs r5, r10, r5 @ r5 := r10 - r5
+
+ add r1, r12, #XcoeffsB
+ ldmia r1, {r6,r7,r8,r9,r11} @ r6 := p->XcoeffsB[0]
+ @ r7 := p->XcoeffsB[1]
+ @ r8 := p->XcoeffsB[2]
+ @ r9 := p->XcoeffsB[3]
+ @ r11 := p->XcoeffsB[4]
+
+ mul r1, r10, r6 @ r1 := p->buf[XDELAYB] * p->XcoeffsB[0]
+ mla r1, r5, r7, r1 @ r1 += p->buf[XDELAYB-1] * p->XcoeffsB[1]
+ mla r1, r4, r8, r1 @ r1 += p->buf[XDELAYB-2] * p->XcoeffsB[2]
+ mla r1, r3, r9, r1 @ r1 += p->buf[XDELAYB-3] * p->XcoeffsB[3]
+ mla r1, r2, r11, r1 @ r1 += p->buf[XDELAYB-4] * p->XcoeffsB[4]
+
+ add r2, r14, #XDELAYB-4 @ r2 := &p->buf[XDELAYB-1]
+ stmia r2, { r5, r10 } @ p->buf[XDELAYB-1] = r5
+ @ p->buf[XDELAYB] = r10
+
+ @ flags were set above, in the subs instruction
+ mvngt r5, #0
+ movlt r5, #1 @ r5 := SIGN(r5) (see .c for SIGN macro)
+
+ cmp r10, #0
+ mvngt r10, #0
+ movlt r10, #1 @ r10 := SIGN(r10) (see .c for SIGN macro)
+
+ add r2, r14, #XADAPTCOEFFSB-4
+ stmia r2, {r5, r10} @ p->buf[XADAPTCOEFFSB-1] := r5
+ @ p->buf[XADAPTCOEFFSB] := r10
+
+ @ r0 still contains predictionA
+ @ r1 contains predictionB
+
+ @ Finish Predictor X
+
+ ldr r2, [sp, #4] @ r2 := decoded1
+ add r0, r0, r1, asr #1 @ r0 := r0 + (r1 >> 1)
+ ldr r3, [r2] @ r3 := *decoded1
+ add r1, r3, r0, asr #10 @ r1 := r3 + (r0 >> 10)
+ str r1, [r12, #XlastA] @ p->XlastA := r1
+
+ ldr r4, [r12, #XfilterA] @ r4 := p->XfilterA
+ rsb r4, r4, r4, lsl #5 @ r4 := r4 * 32 - r4 ( == r4*31)
+ add r1, r1, r4, asr #5 @ r1 := r1 + (r4 >> 5)
+ str r1, [r12, #XfilterA] @ p->XfilterA := r1
+
+ @ r1 contains p->XfilterA
+ @ r2 contains decoded1
+ @ r3 contains *decoded1
+
+ @ r6, r7, r8, r9, r11 contain p->XcoeffsB[0..4]
+ @ r5, r10 contain p->buf[XADAPTCOEFFSB-1] and p->buf[XADAPTCOEFFSB]
+
+ cmp r3, #0
+ stmia r2!, {r1} @ *(decoded1++) := r1 (p->XfilterA)
+ str r2, [sp, #4] @ save decoded1
+ beq 2f
+
+ add r1, r14, #XADAPTCOEFFSB-16
+ ldmia r1, { r2, r3, r4 } @ r2 := p->buf[XADAPTCOEFFSB-4]
+ @ r3 := p->buf[XADAPTCOEFFSB-3]
+ @ r4 := p->buf[XADAPTCOEFFSB-2]
+ blt 1f
+
+ @ *decoded1 > 0
+
+ sub r6, r6, r10 @ r6 := p->XcoeffsB[0] - p->buf[XADAPTCOEFFSB]
+ sub r7, r7, r5 @ r7 := p->XcoeffsB[1] - p->buf[XADAPTCOEFFSB-1]
+ sub r8, r8, r4 @ r8 := p->XcoeffsB[2] - p->buf[XADAPTCOEFFSB-2]
+ sub r9, r9, r3 @ r9 := p->XcoeffsB[3] - p->buf[XADAPTCOEFFSB-3]
+ sub r11, r11, r2 @ r11 := p->XcoeffsB[4] - p->buf[XADAPTCOEFFSB-4]
+
+ add r0, r12, #XcoeffsB
+ stmia r0, {r6,r7,r8,r9,r11} @ Save p->XcoeffsB[]
+
+ add r1, r12, #XcoeffsA
+ ldmia r1, { r2-r5 } @ r2 := p->XcoeffsA[0]
+ @ r3 := p->XcoeffsA[1]
+ @ r4 := p->XcoeffsA[2]
+ @ r5 := p->XcoeffsA[3]
+
+ add r0, r14, #XADAPTCOEFFSA-12
+ ldmia r0, { r6-r9} @ r6 := p->buf[XADAPTCOEFFSA-3]
+ @ r7 := p->buf[XADAPTCOEFFSA-2]
+ @ r8 := p->buf[XADAPTCOEFFSA-1]
+ @ r9 := p->buf[XADAPTCOEFFSA]
+
+ sub r2, r2, r9 @ r2 := p->XcoeffsA[0] - p->buf[XADAPTCOEFFSA]
+ sub r3, r3, r8 @ r3 := p->XcoeffsA[1] - p->buf[XADAPTCOEFFSA-1]
+ sub r4, r4, r7 @ r4 := p->XcoeffsA[2] - p->buf[XADAPTCOEFFSA-2]
+ sub r5, r5, r6 @ r5 := p->XcoeffsA[3] - p->buf[XADAPTCOEFFSA-3]
+
+ stmia r1, {r2-r5} @ Save p->XcoeffsA
+ b 2f
+
+
+1: @ *decoded1 < 0
+
+ add r6, r6, r10 @ r6 := p->XcoeffsB[0] + p->buf[XADAPTCOEFFSB]
+ add r7, r7, r5 @ r7 := p->XcoeffsB[1] + p->buf[XADAPTCOEFFSB-1]
+ add r8, r8, r4 @ r8 := p->XcoeffsB[2] + p->buf[XADAPTCOEFFSB-2]
+ add r9, r9, r3 @ r9 := p->XcoeffsB[3] + p->buf[XADAPTCOEFFSB-3]
+ add r11, r11, r2 @ r11 := p->XcoeffsB[4] + p->buf[XADAPTCOEFFSB-4]
+
+ add r0, r12, #XcoeffsB
+ stmia r0, {r6,r7,r8,r9,r11} @ Save p->XcoeffsB[]
+
+ add r1, r12, #XcoeffsA
+ ldmia r1, { r2-r5 } @ r2 := p->XcoeffsA[0]
+ @ r3 := p->XcoeffsA[1]
+ @ r4 := p->XcoeffsA[2]
+ @ r5 := p->XcoeffsA[3]
+
+ add r0, r14, #XADAPTCOEFFSA-12
+ ldmia r0, { r6-r9} @ r6 := p->buf[XADAPTCOEFFSA-3]
+ @ r7 := p->buf[XADAPTCOEFFSA-2]
+ @ r8 := p->buf[XADAPTCOEFFSA-1]
+ @ r9 := p->buf[XADAPTCOEFFSA]
+
+ add r2, r2, r9 @ r2 := p->XcoeffsA[0] + p->buf[XADAPTCOEFFSA]
+ add r3, r3, r8 @ r3 := p->XcoeffsA[1] + p->buf[XADAPTCOEFFSA-1]
+ add r4, r4, r7 @ r4 := p->XcoeffsA[2] + p->buf[XADAPTCOEFFSA-2]
+ add r5, r5, r6 @ r5 := p->XcoeffsA[3] + p->buf[XADAPTCOEFFSA-3]
+
+ stmia r1, {r2-r5} @ Save p->XcoeffsA
+
+2:
+
+@@@@@@@@@@@@@@@@@@@@@@@@@@@ COMMON
+
+ add r14, r14, #4 @ p->buf++
+
+ add r11, r12, #historybuffer @ r11 := &p->historybuffer[0]
+
+ sub r10, r14, #HISTORY_SIZE*4 @ r10 := p->buf - HISTORY_SIZE
+
+ cmp r10, r11
+ bne endofloop
+
+ @ The history buffer is full, we need to do a memmove:
+
+ @ dest = r11 (p->historybuffer)
+ @ src = r14 (p->buf)
+ @ n = 200
+
+ ldmia r14!, {r0-r9} @ 40 bytes
+ stmia r11!, {r0-r9}
+ ldmia r14!, {r0-r9} @ 40 bytes
+ stmia r11!, {r0-r9}
+ ldmia r14!, {r0-r9} @ 40 bytes
+ stmia r11!, {r0-r9}
+ ldmia r14!, {r0-r9} @ 40 bytes
+ stmia r11!, {r0-r9}
+ ldmia r14!, {r0-r9} @ 40 bytes
+ stmia r11!, {r0-r9}
+
+ add r14, r12, #historybuffer @ p->buf = &p->historybuffer[0]
+
+
+endofloop:
+@ Check loop count
+ ldr r0, [sp, #8]
+ subs r0, r0, #1
+ strne r0, [sp, #8]
+ bne loop
+
+done:
+ str r14, [r12] @ Save value of p->buf
+ add sp, sp, #12 @ Don't bother restoring r1-r3
+ ldmia sp!, {r4-r11, pc}
diff --git a/apps/codecs/demac/libdemac/predictor.c b/apps/codecs/demac/libdemac/predictor.c
index a7210bf014..90f24e416b 100644
--- a/apps/codecs/demac/libdemac/predictor.c
+++ b/apps/codecs/demac/libdemac/predictor.c
@@ -74,6 +74,7 @@ void init_predictor_decoder(struct predictor_t* p)
int predictor_decode_stereo(struct predictor_t* p, int32_t* decoded0, int32_t* decoded1, int count) ICODE_ATTR;
#endif
+#ifndef CPU_ARM
int predictor_decode_stereo(struct predictor_t* p, int32_t* decoded0, int32_t* decoded1, int count)
{
int32_t predictionA, predictionB;
@@ -208,6 +209,7 @@ int predictor_decode_stereo(struct predictor_t* p, int32_t* decoded0, int32_t* d
return 0;
}
+#endif
int predictor_decode_mono(struct predictor_t* p, int32_t* decoded0, int count)
{