summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--apps/codecs/demac/libdemac/filter.c3
-rw-r--r--apps/codecs/demac/libdemac/vector_math16_mmx.h219
-rwxr-xr-xtools/configure22
3 files changed, 238 insertions, 6 deletions
diff --git a/apps/codecs/demac/libdemac/filter.c b/apps/codecs/demac/libdemac/filter.c
index ed6f3c8dc6..275f12f6ae 100644
--- a/apps/codecs/demac/libdemac/filter.c
+++ b/apps/codecs/demac/libdemac/filter.c
@@ -46,6 +46,9 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
#elif defined(CPU_ARM) && (ARM_ARCH >= 5)
/* Assume all our ARMv5 targets are ARMv5te(j) */
#include "vector_math16_armv5te.h"
+#elif (defined(__i386__) || defined(__i486__)) && defined(__MMX__) \
+ || defined(__x86_64__)
+#include "vector_math16_mmx.h"
#else
#include "vector_math_generic.h"
#endif
diff --git a/apps/codecs/demac/libdemac/vector_math16_mmx.h b/apps/codecs/demac/libdemac/vector_math16_mmx.h
new file mode 100644
index 0000000000..a7f9c73af7
--- /dev/null
+++ b/apps/codecs/demac/libdemac/vector_math16_mmx.h
@@ -0,0 +1,219 @@
+/*
+
+libdemac - A Monkey's Audio decoder
+
+$Id$
+
+Copyright (C) Dave Chapman 2007
+
+MMX vector math copyright (C) 2010 Jens Arnold
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110, USA
+
+*/
+
+#define FUSED_VECTOR_MATH
+
+#define __E(__e) #__e
+#define __S(__e) __E(__e)
+
+static inline int32_t vector_sp_add(int16_t* v1, int16_t* f2, int16_t *s2)
+{
+ int res, t;
+#if ORDER > 256
+ int cnt = ORDER>>8;
+#endif
+
+ asm volatile (
+#if ORDER > 256
+ "pxor %%mm2, %%mm2 \n"
+ ".set ofs, 0 \n"
+ "1: \n"
+ ".rept 64 \n"
+#else
+ "movq (%[v1]), %%mm2 \n"
+ "movq %%mm2, %%mm0 \n"
+ "pmaddwd (%[f2]), %%mm2 \n"
+ "paddw (%[s2]), %%mm0 \n"
+ "movq %%mm0, (%[v1]) \n"
+ ".set ofs, 8 \n"
+
+ ".rept " __S(ORDER>>2 - 1) "\n"
+#endif
+ "movq ofs(%[v1]), %%mm1 \n"
+ "movq %%mm1, %%mm0 \n"
+ "pmaddwd ofs(%[f2]), %%mm1 \n"
+ "paddw ofs(%[s2]), %%mm0 \n"
+ "movq %%mm0, ofs(%[v1]) \n"
+ "paddd %%mm1, %%mm2 \n"
+ ".set ofs, ofs + 8 \n"
+ ".endr \n"
+#if ORDER > 256
+ "add $512, %[v1] \n"
+ "add $512, %[s2] \n"
+ "add $512, %[f2] \n"
+ "dec %[cnt] \n"
+ "jne 1b \n"
+#endif
+
+ "movd %%mm2, %[t] \n"
+ "psrlq $32, %%mm2 \n"
+ "movd %%mm2, %[res] \n"
+ "add %[t], %[res] \n"
+ : /* outputs */
+#if ORDER > 256
+ [cnt]"+r"(cnt),
+ [s2] "+r"(s2),
+ [res]"=r"(res),
+ [t] "=r"(t)
+ : /* inputs */
+ [v1]"2"(v1),
+ [f2]"3"(f2)
+#else
+ [res]"=r"(res),
+ [t] "=r"(t)
+ : /* inputs */
+ [v1]"r"(v1),
+ [f2]"r"(f2),
+ [s2]"r"(s2)
+#endif
+ : /* clobbers */
+ "mm0", "mm1", "mm2"
+ );
+ return res;
+}
+
+static inline int32_t vector_sp_sub(int16_t* v1, int16_t* f2, int16_t *s2)
+{
+ int res, t;
+#if ORDER > 256
+ int cnt = ORDER>>8;
+#endif
+
+ asm volatile (
+#if ORDER > 256
+ "pxor %%mm2, %%mm2 \n"
+ ".set ofs, 0 \n"
+ "1: \n"
+ ".rept 64 \n"
+#else
+ "movq (%[v1]), %%mm2 \n"
+ "movq %%mm2, %%mm0 \n"
+ "pmaddwd (%[f2]), %%mm2 \n"
+ "psubw (%[s2]), %%mm0 \n"
+ "movq %%mm0, (%[v1]) \n"
+ ".set ofs, 8 \n"
+
+ ".rept " __S(ORDER>>2 - 1) "\n"
+#endif
+ "movq ofs(%[v1]), %%mm1 \n"
+ "movq %%mm1, %%mm0 \n"
+ "pmaddwd ofs(%[f2]), %%mm1 \n"
+ "psubw ofs(%[s2]), %%mm0 \n"
+ "movq %%mm0, ofs(%[v1]) \n"
+ "paddd %%mm1, %%mm2 \n"
+ ".set ofs, ofs + 8 \n"
+ ".endr \n"
+#if ORDER > 256
+ "add $512, %[v1] \n"
+ "add $512, %[s2] \n"
+ "add $512, %[f2] \n"
+ "dec %[cnt] \n"
+ "jne 1b \n"
+#endif
+
+ "movd %%mm2, %[t] \n"
+ "psrlq $32, %%mm2 \n"
+ "movd %%mm2, %[res] \n"
+ "add %[t], %[res] \n"
+ : /* outputs */
+#if ORDER > 256
+ [cnt]"+r"(cnt),
+ [s2] "+r"(s2),
+ [res]"=r"(res),
+ [t] "=r"(t)
+ : /* inputs */
+ [v1]"2"(v1),
+ [f2]"3"(f2)
+#else
+ [res]"=r"(res),
+ [t] "=r"(t)
+ : /* inputs */
+ [v1]"r"(v1),
+ [f2]"r"(f2),
+ [s2]"r"(s2)
+#endif
+ : /* clobbers */
+ "mm0", "mm1", "mm2"
+ );
+ return res;
+}
+
+static inline int32_t scalarproduct(int16_t* v1, int16_t* v2)
+{
+ int res, t;
+#if ORDER > 256
+ int cnt = ORDER>>8;
+#endif
+
+ asm volatile (
+#if ORDER > 256
+ "pxor %%mm1, %%mm1 \n"
+ ".set ofs, 0 \n"
+ "1: \n"
+ ".rept 64 \n"
+#else
+ "movq (%[v1]), %%mm1 \n"
+ "pmaddwd (%[v2]), %%mm1 \n"
+ ".set ofs, 8 \n"
+
+ ".rept " __S(ORDER>>2 - 1) "\n"
+#endif
+ "movq ofs(%[v1]), %%mm0 \n"
+ "pmaddwd ofs(%[v2]), %%mm0 \n"
+ "paddd %%mm0, %%mm1 \n"
+ ".set ofs, ofs + 8 \n"
+ ".endr \n"
+#if ORDER > 256
+ "add $512, %[v1] \n"
+ "add $512, %[v2] \n"
+ "dec %[cnt] \n"
+ "jne 1b \n"
+#endif
+
+ "movd %%mm1, %[t] \n"
+ "psrlq $32, %%mm1 \n"
+ "movd %%mm1, %[res] \n"
+ "add %[t], %[res] \n"
+ : /* outputs */
+#if ORDER > 256
+ [cnt]"+r"(cnt),
+ [res]"=r"(res),
+ [t] "=r"(t)
+ : /* inputs */
+ [v1]"1"(v1),
+ [v2]"2"(v2)
+#else
+ [res]"=r"(res),
+ [t] "=r"(t)
+ : /* inputs */
+ [v1]"r"(v1),
+ [v2]"r"(v2)
+#endif
+ : /* clobbers */
+ "mm0", "mm1"
+ );
+ return res;
+}
diff --git a/tools/configure b/tools/configure
index 7a04cecc9c..4d61d8e903 100755
--- a/tools/configure
+++ b/tools/configure
@@ -171,12 +171,20 @@ simcc () {
GCCOPTS="$GCCOPTS -I\$(SIMDIR)"
if test "X$crosscompile" != "Xyes"; then
- if [ "`uname -m`" = "x86_64" ] || [ "`uname -m`" = "amd64" ]; then
- # fPIC is needed to make shared objects link
- # setting visibility to hidden is necessary to avoid strange crashes
- # due to symbol clashing
- GCCOPTS="$GCCOPTS -fPIC -fvisibility=hidden"
- fi
+ case `uname -m` in
+ x86_64|amd64)
+ # fPIC is needed to make shared objects link
+ # setting visibility to hidden is necessary to avoid strange crashes
+ # due to symbol clashing
+ GCCOPTS="$GCCOPTS -fPIC -fvisibility=hidden"
+ # x86_64 supports MMX by default
+ ;;
+
+ i686)
+ echo "Enabling MMX support"
+ GCCOPTS="$GCCOPTS -mmmx"
+ ;;
+ esac
id=$$
cat >$tmpdir/conftest-$id.c <<EOF
@@ -218,6 +226,8 @@ EOF
LDOPTS="-mconsole $sdl_libs"
output="rockboxui.exe" # use this as output binary name
endian="little" # windows is little endian
+ echo "Enabling MMX support"
+ GCCOPTS="$GCCOPTS -mmmx"
fi
}