From cf43e5083b9e0f87de262ea31fd8067225ebfcda Mon Sep 17 00:00:00 2001
From: Mohamed Tarek <mt@rockbox.org>
Date: Fri, 30 Apr 2010 11:11:56 +0000
Subject: Add libwmapro to apps/codecs. These files comprise a set of
 unmodified files needed from ffmpeg's libavcodec and libavutil to compile and
 use the wma pro decoder standalone. The files were taken from ffmpeg's svn
 r22886 dated 15 April 2010.

git-svn-id: svn://svn.rockbox.org/rockbox/trunk@25763 a1c6a512-1295-4272-9138-f99709370657
---
 apps/codecs/libwmapro/avcodec.h                | 3952 ++++++++++++++++++++
 apps/codecs/libwmapro/avfft.h                  |   99 +
 apps/codecs/libwmapro/bitstream.c              |  336 ++
 apps/codecs/libwmapro/dsputil.c                | 4565 ++++++++++++++++++++++++
 apps/codecs/libwmapro/dsputil.h                |  808 +++++
 apps/codecs/libwmapro/fft.c                    |  368 ++
 apps/codecs/libwmapro/fft.h                    |  240 ++
 apps/codecs/libwmapro/get_bits.h               |  691 ++++
 apps/codecs/libwmapro/internal.h               |   51 +
 apps/codecs/libwmapro/libavutil/attributes.h   |  113 +
 apps/codecs/libwmapro/libavutil/avutil.h       |   89 +
 apps/codecs/libwmapro/libavutil/bswap.h        |  101 +
 apps/codecs/libwmapro/libavutil/common.h       |  297 ++
 apps/codecs/libwmapro/libavutil/internal.h     |  207 ++
 apps/codecs/libwmapro/libavutil/intreadwrite.h |  516 +++
 apps/codecs/libwmapro/libavutil/log.c          |   92 +
 apps/codecs/libwmapro/libavutil/log.h          |  115 +
 apps/codecs/libwmapro/libavutil/mathematics.c  |  174 +
 apps/codecs/libwmapro/libavutil/mathematics.h  |   98 +
 apps/codecs/libwmapro/libavutil/mem.c          |  176 +
 apps/codecs/libwmapro/libavutil/mem.h          |  125 +
 apps/codecs/libwmapro/mathops.h                |  150 +
 apps/codecs/libwmapro/mdct.c                   |  232 ++
 apps/codecs/libwmapro/mdct_tablegen.h          |   60 +
 apps/codecs/libwmapro/put_bits.h               |  343 ++
 apps/codecs/libwmapro/wma.c                    |  523 +++
 apps/codecs/libwmapro/wma.h                    |  163 +
 apps/codecs/libwmapro/wmaprodata.h             |  604 ++++
 apps/codecs/libwmapro/wmaprodec.c              | 1578 ++++++++
 29 files changed, 16866 insertions(+)
 create mode 100644 apps/codecs/libwmapro/avcodec.h
 create mode 100644 apps/codecs/libwmapro/avfft.h
 create mode 100644 apps/codecs/libwmapro/bitstream.c
 create mode 100644 apps/codecs/libwmapro/dsputil.c
 create mode 100644 apps/codecs/libwmapro/dsputil.h
 create mode 100644 apps/codecs/libwmapro/fft.c
 create mode 100644 apps/codecs/libwmapro/fft.h
 create mode 100644 apps/codecs/libwmapro/get_bits.h
 create mode 100644 apps/codecs/libwmapro/internal.h
 create mode 100644 apps/codecs/libwmapro/libavutil/attributes.h
 create mode 100644 apps/codecs/libwmapro/libavutil/avutil.h
 create mode 100644 apps/codecs/libwmapro/libavutil/bswap.h
 create mode 100644 apps/codecs/libwmapro/libavutil/common.h
 create mode 100644 apps/codecs/libwmapro/libavutil/internal.h
 create mode 100644 apps/codecs/libwmapro/libavutil/intreadwrite.h
 create mode 100644 apps/codecs/libwmapro/libavutil/log.c
 create mode 100644 apps/codecs/libwmapro/libavutil/log.h
 create mode 100644 apps/codecs/libwmapro/libavutil/mathematics.c
 create mode 100644 apps/codecs/libwmapro/libavutil/mathematics.h
 create mode 100644 apps/codecs/libwmapro/libavutil/mem.c
 create mode 100644 apps/codecs/libwmapro/libavutil/mem.h
 create mode 100644 apps/codecs/libwmapro/mathops.h
 create mode 100644 apps/codecs/libwmapro/mdct.c
 create mode 100644 apps/codecs/libwmapro/mdct_tablegen.h
 create mode 100644 apps/codecs/libwmapro/put_bits.h
 create mode 100644 apps/codecs/libwmapro/wma.c
 create mode 100644 apps/codecs/libwmapro/wma.h
 create mode 100644 apps/codecs/libwmapro/wmaprodata.h
 create mode 100644 apps/codecs/libwmapro/wmaprodec.c

(limited to 'apps')

diff --git a/apps/codecs/libwmapro/avcodec.h b/apps/codecs/libwmapro/avcodec.h
new file mode 100644
index 0000000000..add4b100ae
--- /dev/null
+++ b/apps/codecs/libwmapro/avcodec.h
@@ -0,0 +1,3952 @@
+/*
+ * copyright (c) 2001 Fabrice Bellard
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_AVCODEC_H
+#define AVCODEC_AVCODEC_H
+
+/**
+ * @file libavcodec/avcodec.h
+ * external API header
+ */
+
+#include <errno.h>
+#include "libavutil/avutil.h"
+
+#define LIBAVCODEC_VERSION_MAJOR 52
+#define LIBAVCODEC_VERSION_MINOR 66
+#define LIBAVCODEC_VERSION_MICRO  0
+
+#define LIBAVCODEC_VERSION_INT  AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
+                                               LIBAVCODEC_VERSION_MINOR, \
+                                               LIBAVCODEC_VERSION_MICRO)
+#define LIBAVCODEC_VERSION      AV_VERSION(LIBAVCODEC_VERSION_MAJOR,    \
+                                           LIBAVCODEC_VERSION_MINOR,    \
+                                           LIBAVCODEC_VERSION_MICRO)
+#define LIBAVCODEC_BUILD        LIBAVCODEC_VERSION_INT
+
+#define LIBAVCODEC_IDENT        "Lavc" AV_STRINGIFY(LIBAVCODEC_VERSION)
+
+#define AV_NOPTS_VALUE          INT64_C(0x8000000000000000)
+#define AV_TIME_BASE            1000000
+#define AV_TIME_BASE_Q          (AVRational){1, AV_TIME_BASE}
+
+/**
+ * Identifies the syntax and semantics of the bitstream.
+ * The principle is roughly:
+ * Two decoders with the same ID can decode the same streams.
+ * Two encoders with the same ID can encode compatible streams.
+ * There may be slight deviations from the principle due to implementation
+ * details.
+ *
+ * If you add a codec ID to this list, add it so that
+ * 1. no value of a existing codec ID changes (that would break ABI),
+ * 2. it is as close as possible to similar codecs.
+ */
+enum CodecID {
+    CODEC_ID_NONE,
+
+    /* video codecs */
+    CODEC_ID_MPEG1VIDEO,
+    CODEC_ID_MPEG2VIDEO, ///< preferred ID for MPEG-1/2 video decoding
+    CODEC_ID_MPEG2VIDEO_XVMC,
+    CODEC_ID_H261,
+    CODEC_ID_H263,
+    CODEC_ID_RV10,
+    CODEC_ID_RV20,
+    CODEC_ID_MJPEG,
+    CODEC_ID_MJPEGB,
+    CODEC_ID_LJPEG,
+    CODEC_ID_SP5X,
+    CODEC_ID_JPEGLS,
+    CODEC_ID_MPEG4,
+    CODEC_ID_RAWVIDEO,
+    CODEC_ID_MSMPEG4V1,
+    CODEC_ID_MSMPEG4V2,
+    CODEC_ID_MSMPEG4V3,
+    CODEC_ID_WMV1,
+    CODEC_ID_WMV2,
+    CODEC_ID_H263P,
+    CODEC_ID_H263I,
+    CODEC_ID_FLV1,
+    CODEC_ID_SVQ1,
+    CODEC_ID_SVQ3,
+    CODEC_ID_DVVIDEO,
+    CODEC_ID_HUFFYUV,
+    CODEC_ID_CYUV,
+    CODEC_ID_H264,
+    CODEC_ID_INDEO3,
+    CODEC_ID_VP3,
+    CODEC_ID_THEORA,
+    CODEC_ID_ASV1,
+    CODEC_ID_ASV2,
+    CODEC_ID_FFV1,
+    CODEC_ID_4XM,
+    CODEC_ID_VCR1,
+    CODEC_ID_CLJR,
+    CODEC_ID_MDEC,
+    CODEC_ID_ROQ,
+    CODEC_ID_INTERPLAY_VIDEO,
+    CODEC_ID_XAN_WC3,
+    CODEC_ID_XAN_WC4,
+    CODEC_ID_RPZA,
+    CODEC_ID_CINEPAK,
+    CODEC_ID_WS_VQA,
+    CODEC_ID_MSRLE,
+    CODEC_ID_MSVIDEO1,
+    CODEC_ID_IDCIN,
+    CODEC_ID_8BPS,
+    CODEC_ID_SMC,
+    CODEC_ID_FLIC,
+    CODEC_ID_TRUEMOTION1,
+    CODEC_ID_VMDVIDEO,
+    CODEC_ID_MSZH,
+    CODEC_ID_ZLIB,
+    CODEC_ID_QTRLE,
+    CODEC_ID_SNOW,
+    CODEC_ID_TSCC,
+    CODEC_ID_ULTI,
+    CODEC_ID_QDRAW,
+    CODEC_ID_VIXL,
+    CODEC_ID_QPEG,
+#if LIBAVCODEC_VERSION_MAJOR < 53
+    CODEC_ID_XVID,
+#endif
+    CODEC_ID_PNG,
+    CODEC_ID_PPM,
+    CODEC_ID_PBM,
+    CODEC_ID_PGM,
+    CODEC_ID_PGMYUV,
+    CODEC_ID_PAM,
+    CODEC_ID_FFVHUFF,
+    CODEC_ID_RV30,
+    CODEC_ID_RV40,
+    CODEC_ID_VC1,
+    CODEC_ID_WMV3,
+    CODEC_ID_LOCO,
+    CODEC_ID_WNV1,
+    CODEC_ID_AASC,
+    CODEC_ID_INDEO2,
+    CODEC_ID_FRAPS,
+    CODEC_ID_TRUEMOTION2,
+    CODEC_ID_BMP,
+    CODEC_ID_CSCD,
+    CODEC_ID_MMVIDEO,
+    CODEC_ID_ZMBV,
+    CODEC_ID_AVS,
+    CODEC_ID_SMACKVIDEO,
+    CODEC_ID_NUV,
+    CODEC_ID_KMVC,
+    CODEC_ID_FLASHSV,
+    CODEC_ID_CAVS,
+    CODEC_ID_JPEG2000,
+    CODEC_ID_VMNC,
+    CODEC_ID_VP5,
+    CODEC_ID_VP6,
+    CODEC_ID_VP6F,
+    CODEC_ID_TARGA,
+    CODEC_ID_DSICINVIDEO,
+    CODEC_ID_TIERTEXSEQVIDEO,
+    CODEC_ID_TIFF,
+    CODEC_ID_GIF,
+    CODEC_ID_FFH264,
+    CODEC_ID_DXA,
+    CODEC_ID_DNXHD,
+    CODEC_ID_THP,
+    CODEC_ID_SGI,
+    CODEC_ID_C93,
+    CODEC_ID_BETHSOFTVID,
+    CODEC_ID_PTX,
+    CODEC_ID_TXD,
+    CODEC_ID_VP6A,
+    CODEC_ID_AMV,
+    CODEC_ID_VB,
+    CODEC_ID_PCX,
+    CODEC_ID_SUNRAST,
+    CODEC_ID_INDEO4,
+    CODEC_ID_INDEO5,
+    CODEC_ID_MIMIC,
+    CODEC_ID_RL2,
+    CODEC_ID_8SVX_EXP,
+    CODEC_ID_8SVX_FIB,
+    CODEC_ID_ESCAPE124,
+    CODEC_ID_DIRAC,
+    CODEC_ID_BFI,
+    CODEC_ID_CMV,
+    CODEC_ID_MOTIONPIXELS,
+    CODEC_ID_TGV,
+    CODEC_ID_TGQ,
+    CODEC_ID_TQI,
+    CODEC_ID_AURA,
+    CODEC_ID_AURA2,
+    CODEC_ID_V210X,
+    CODEC_ID_TMV,
+    CODEC_ID_V210,
+    CODEC_ID_DPX,
+    CODEC_ID_MAD,
+    CODEC_ID_FRWU,
+    CODEC_ID_FLASHSV2,
+    CODEC_ID_CDGRAPHICS,
+    CODEC_ID_R210,
+    CODEC_ID_ANM,
+    CODEC_ID_BINKVIDEO,
+    CODEC_ID_IFF_ILBM,
+    CODEC_ID_IFF_BYTERUN1,
+    CODEC_ID_KGV1,
+    CODEC_ID_YOP,
+
+    /* various PCM "codecs" */
+    CODEC_ID_PCM_S16LE= 0x10000,
+    CODEC_ID_PCM_S16BE,
+    CODEC_ID_PCM_U16LE,
+    CODEC_ID_PCM_U16BE,
+    CODEC_ID_PCM_S8,
+    CODEC_ID_PCM_U8,
+    CODEC_ID_PCM_MULAW,
+    CODEC_ID_PCM_ALAW,
+    CODEC_ID_PCM_S32LE,
+    CODEC_ID_PCM_S32BE,
+    CODEC_ID_PCM_U32LE,
+    CODEC_ID_PCM_U32BE,
+    CODEC_ID_PCM_S24LE,
+    CODEC_ID_PCM_S24BE,
+    CODEC_ID_PCM_U24LE,
+    CODEC_ID_PCM_U24BE,
+    CODEC_ID_PCM_S24DAUD,
+    CODEC_ID_PCM_ZORK,
+    CODEC_ID_PCM_S16LE_PLANAR,
+    CODEC_ID_PCM_DVD,
+    CODEC_ID_PCM_F32BE,
+    CODEC_ID_PCM_F32LE,
+    CODEC_ID_PCM_F64BE,
+    CODEC_ID_PCM_F64LE,
+    CODEC_ID_PCM_BLURAY,
+
+    /* various ADPCM codecs */
+    CODEC_ID_ADPCM_IMA_QT= 0x11000,
+    CODEC_ID_ADPCM_IMA_WAV,
+    CODEC_ID_ADPCM_IMA_DK3,
+    CODEC_ID_ADPCM_IMA_DK4,
+    CODEC_ID_ADPCM_IMA_WS,
+    CODEC_ID_ADPCM_IMA_SMJPEG,
+    CODEC_ID_ADPCM_MS,
+    CODEC_ID_ADPCM_4XM,
+    CODEC_ID_ADPCM_XA,
+    CODEC_ID_ADPCM_ADX,
+    CODEC_ID_ADPCM_EA,
+    CODEC_ID_ADPCM_G726,
+    CODEC_ID_ADPCM_CT,
+    CODEC_ID_ADPCM_SWF,
+    CODEC_ID_ADPCM_YAMAHA,
+    CODEC_ID_ADPCM_SBPRO_4,
+    CODEC_ID_ADPCM_SBPRO_3,
+    CODEC_ID_ADPCM_SBPRO_2,
+    CODEC_ID_ADPCM_THP,
+    CODEC_ID_ADPCM_IMA_AMV,
+    CODEC_ID_ADPCM_EA_R1,
+    CODEC_ID_ADPCM_EA_R3,
+    CODEC_ID_ADPCM_EA_R2,
+    CODEC_ID_ADPCM_IMA_EA_SEAD,
+    CODEC_ID_ADPCM_IMA_EA_EACS,
+    CODEC_ID_ADPCM_EA_XAS,
+    CODEC_ID_ADPCM_EA_MAXIS_XA,
+    CODEC_ID_ADPCM_IMA_ISS,
+
+    /* AMR */
+    CODEC_ID_AMR_NB= 0x12000,
+    CODEC_ID_AMR_WB,
+
+    /* RealAudio codecs*/
+    CODEC_ID_RA_144= 0x13000,
+    CODEC_ID_RA_288,
+
+    /* various DPCM codecs */
+    CODEC_ID_ROQ_DPCM= 0x14000,
+    CODEC_ID_INTERPLAY_DPCM,
+    CODEC_ID_XAN_DPCM,
+    CODEC_ID_SOL_DPCM,
+
+    /* audio codecs */
+    CODEC_ID_MP2= 0x15000,
+    CODEC_ID_MP3, ///< preferred ID for decoding MPEG audio layer 1, 2 or 3
+    CODEC_ID_AAC,
+    CODEC_ID_AC3,
+    CODEC_ID_DTS,
+    CODEC_ID_VORBIS,
+    CODEC_ID_DVAUDIO,
+    CODEC_ID_WMAV1,
+    CODEC_ID_WMAV2,
+    CODEC_ID_MACE3,
+    CODEC_ID_MACE6,
+    CODEC_ID_VMDAUDIO,
+    CODEC_ID_SONIC,
+    CODEC_ID_SONIC_LS,
+    CODEC_ID_FLAC,
+    CODEC_ID_MP3ADU,
+    CODEC_ID_MP3ON4,
+    CODEC_ID_SHORTEN,
+    CODEC_ID_ALAC,
+    CODEC_ID_WESTWOOD_SND1,
+    CODEC_ID_GSM, ///< as in Berlin toast format
+    CODEC_ID_QDM2,
+    CODEC_ID_COOK,
+    CODEC_ID_TRUESPEECH,
+    CODEC_ID_TTA,
+    CODEC_ID_SMACKAUDIO,
+    CODEC_ID_QCELP,
+    CODEC_ID_WAVPACK,
+    CODEC_ID_DSICINAUDIO,
+    CODEC_ID_IMC,
+    CODEC_ID_MUSEPACK7,
+    CODEC_ID_MLP,
+    CODEC_ID_GSM_MS, /* as found in WAV */
+    CODEC_ID_ATRAC3,
+    CODEC_ID_VOXWARE,
+    CODEC_ID_APE,
+    CODEC_ID_NELLYMOSER,
+    CODEC_ID_MUSEPACK8,
+    CODEC_ID_SPEEX,
+    CODEC_ID_WMAVOICE,
+    CODEC_ID_WMAPRO,
+    CODEC_ID_WMALOSSLESS,
+    CODEC_ID_ATRAC3P,
+    CODEC_ID_EAC3,
+    CODEC_ID_SIPR,
+    CODEC_ID_MP1,
+    CODEC_ID_TWINVQ,
+    CODEC_ID_TRUEHD,
+    CODEC_ID_MP4ALS,
+    CODEC_ID_ATRAC1,
+    CODEC_ID_BINKAUDIO_RDFT,
+    CODEC_ID_BINKAUDIO_DCT,
+
+    /* subtitle codecs */
+    CODEC_ID_DVD_SUBTITLE= 0x17000,
+    CODEC_ID_DVB_SUBTITLE,
+    CODEC_ID_TEXT,  ///< raw UTF-8 text
+    CODEC_ID_XSUB,
+    CODEC_ID_SSA,
+    CODEC_ID_MOV_TEXT,
+    CODEC_ID_HDMV_PGS_SUBTITLE,
+    CODEC_ID_DVB_TELETEXT,
+
+    /* other specific kind of codecs (generally used for attachments) */
+    CODEC_ID_TTF= 0x18000,
+
+    CODEC_ID_PROBE= 0x19000, ///< codec_id is not known (like CODEC_ID_NONE) but lavf should attempt to identify it
+
+    CODEC_ID_MPEG2TS= 0x20000, /**< _FAKE_ codec to indicate a raw MPEG-2 TS
+                                * stream (only used by libavformat) */
+};
+
+#if LIBAVCODEC_VERSION_MAJOR < 53
+#define CodecType AVMediaType
+
+#define CODEC_TYPE_UNKNOWN    AVMEDIA_TYPE_UNKNOWN
+#define CODEC_TYPE_VIDEO      AVMEDIA_TYPE_VIDEO
+#define CODEC_TYPE_AUDIO      AVMEDIA_TYPE_AUDIO
+#define CODEC_TYPE_DATA       AVMEDIA_TYPE_DATA
+#define CODEC_TYPE_SUBTITLE   AVMEDIA_TYPE_SUBTITLE
+#define CODEC_TYPE_ATTACHMENT AVMEDIA_TYPE_ATTACHMENT
+#define CODEC_TYPE_NB         AVMEDIA_TYPE_NB
+#endif
+
+/**
+ * all in native-endian format
+ */
+enum SampleFormat {
+    SAMPLE_FMT_NONE = -1,
+    SAMPLE_FMT_U8,              ///< unsigned 8 bits
+    SAMPLE_FMT_S16,             ///< signed 16 bits
+    SAMPLE_FMT_S32,             ///< signed 32 bits
+    SAMPLE_FMT_FLT,             ///< float
+    SAMPLE_FMT_DBL,             ///< double
+    SAMPLE_FMT_NB               ///< Number of sample formats. DO NOT USE if dynamically linking to libavcodec
+};
+
+/* Audio channel masks */
+#define CH_FRONT_LEFT             0x00000001
+#define CH_FRONT_RIGHT            0x00000002
+#define CH_FRONT_CENTER           0x00000004
+#define CH_LOW_FREQUENCY          0x00000008
+#define CH_BACK_LEFT              0x00000010
+#define CH_BACK_RIGHT             0x00000020
+#define CH_FRONT_LEFT_OF_CENTER   0x00000040
+#define CH_FRONT_RIGHT_OF_CENTER  0x00000080
+#define CH_BACK_CENTER            0x00000100
+#define CH_SIDE_LEFT              0x00000200
+#define CH_SIDE_RIGHT             0x00000400
+#define CH_TOP_CENTER             0x00000800
+#define CH_TOP_FRONT_LEFT         0x00001000
+#define CH_TOP_FRONT_CENTER       0x00002000
+#define CH_TOP_FRONT_RIGHT        0x00004000
+#define CH_TOP_BACK_LEFT          0x00008000
+#define CH_TOP_BACK_CENTER        0x00010000
+#define CH_TOP_BACK_RIGHT         0x00020000
+#define CH_STEREO_LEFT            0x20000000  ///< Stereo downmix.
+#define CH_STEREO_RIGHT           0x40000000  ///< See CH_STEREO_LEFT.
+
+/** Channel mask value used for AVCodecContext.request_channel_layout
+    to indicate that the user requests the channel order of the decoder output
+    to be the native codec channel order. */
+#define CH_LAYOUT_NATIVE          0x8000000000000000LL
+
+/* Audio channel convenience macros */
+#define CH_LAYOUT_MONO              (CH_FRONT_CENTER)
+#define CH_LAYOUT_STEREO            (CH_FRONT_LEFT|CH_FRONT_RIGHT)
+#define CH_LAYOUT_2_1               (CH_LAYOUT_STEREO|CH_BACK_CENTER)
+#define CH_LAYOUT_SURROUND          (CH_LAYOUT_STEREO|CH_FRONT_CENTER)
+#define CH_LAYOUT_4POINT0           (CH_LAYOUT_SURROUND|CH_BACK_CENTER)
+#define CH_LAYOUT_2_2               (CH_LAYOUT_STEREO|CH_SIDE_LEFT|CH_SIDE_RIGHT)
+#define CH_LAYOUT_QUAD              (CH_LAYOUT_STEREO|CH_BACK_LEFT|CH_BACK_RIGHT)
+#define CH_LAYOUT_5POINT0           (CH_LAYOUT_SURROUND|CH_SIDE_LEFT|CH_SIDE_RIGHT)
+#define CH_LAYOUT_5POINT1           (CH_LAYOUT_5POINT0|CH_LOW_FREQUENCY)
+#define CH_LAYOUT_5POINT0_BACK      (CH_LAYOUT_SURROUND|CH_BACK_LEFT|CH_BACK_RIGHT)
+#define CH_LAYOUT_5POINT1_BACK      (CH_LAYOUT_5POINT0_BACK|CH_LOW_FREQUENCY)
+#define CH_LAYOUT_7POINT0           (CH_LAYOUT_5POINT0|CH_BACK_LEFT|CH_BACK_RIGHT)
+#define CH_LAYOUT_7POINT1           (CH_LAYOUT_5POINT1|CH_BACK_LEFT|CH_BACK_RIGHT)
+#define CH_LAYOUT_7POINT1_WIDE      (CH_LAYOUT_5POINT1_BACK|\
+                                          CH_FRONT_LEFT_OF_CENTER|CH_FRONT_RIGHT_OF_CENTER)
+#define CH_LAYOUT_STEREO_DOWNMIX    (CH_STEREO_LEFT|CH_STEREO_RIGHT)
+
+/* in bytes */
+#define AVCODEC_MAX_AUDIO_FRAME_SIZE 192000 // 1 second of 48khz 32bit audio
+
+/**
+ * Required number of additionally allocated bytes at the end of the input bitstream for decoding.
+ * This is mainly needed because some optimized bitstream readers read
+ * 32 or 64 bit at once and could read over the end.<br>
+ * Note: If the first 23 bits of the additional bytes are not 0, then damaged
+ * MPEG bitstreams could cause overread and segfault.
+ */
+#define FF_INPUT_BUFFER_PADDING_SIZE 8
+
+/**
+ * minimum encoding buffer size
+ * Used to avoid some checks during header writing.
+ */
+#define FF_MIN_BUFFER_SIZE 16384
+
+
+/**
+ * motion estimation type.
+ */
+enum Motion_Est_ID {
+    ME_ZERO = 1,    ///< no search, that is use 0,0 vector whenever one is needed
+    ME_FULL,
+    ME_LOG,
+    ME_PHODS,
+    ME_EPZS,        ///< enhanced predictive zonal search
+    ME_X1,          ///< reserved for experiments
+    ME_HEX,         ///< hexagon based search
+    ME_UMH,         ///< uneven multi-hexagon search
+    ME_ITER,        ///< iterative search
+    ME_TESA,        ///< transformed exhaustive search algorithm
+};
+
+enum AVDiscard{
+    /* We leave some space between them for extensions (drop some
+     * keyframes for intra-only or drop just some bidir frames). */
+    AVDISCARD_NONE   =-16, ///< discard nothing
+    AVDISCARD_DEFAULT=  0, ///< discard useless packets like 0 size packets in avi
+    AVDISCARD_NONREF =  8, ///< discard all non reference
+    AVDISCARD_BIDIR  = 16, ///< discard all bidirectional frames
+    AVDISCARD_NONKEY = 32, ///< discard all frames except keyframes
+    AVDISCARD_ALL    = 48, ///< discard all
+};
+
+enum AVColorPrimaries{
+    AVCOL_PRI_BT709      =1, ///< also ITU-R BT1361 / IEC 61966-2-4 / SMPTE RP177 Annex B
+    AVCOL_PRI_UNSPECIFIED=2,
+    AVCOL_PRI_BT470M     =4,
+    AVCOL_PRI_BT470BG    =5, ///< also ITU-R BT601-6 625 / ITU-R BT1358 625 / ITU-R BT1700 625 PAL & SECAM
+    AVCOL_PRI_SMPTE170M  =6, ///< also ITU-R BT601-6 525 / ITU-R BT1358 525 / ITU-R BT1700 NTSC
+    AVCOL_PRI_SMPTE240M  =7, ///< functionally identical to above
+    AVCOL_PRI_FILM       =8,
+    AVCOL_PRI_NB           , ///< Not part of ABI
+};
+
+enum AVColorTransferCharacteristic{
+    AVCOL_TRC_BT709      =1, ///< also ITU-R BT1361
+    AVCOL_TRC_UNSPECIFIED=2,
+    AVCOL_TRC_GAMMA22    =4, ///< also ITU-R BT470M / ITU-R BT1700 625 PAL & SECAM
+    AVCOL_TRC_GAMMA28    =5, ///< also ITU-R BT470BG
+    AVCOL_TRC_NB           , ///< Not part of ABI
+};
+
+enum AVColorSpace{
+    AVCOL_SPC_RGB        =0,
+    AVCOL_SPC_BT709      =1, ///< also ITU-R BT1361 / IEC 61966-2-4 xvYCC709 / SMPTE RP177 Annex B
+    AVCOL_SPC_UNSPECIFIED=2,
+    AVCOL_SPC_FCC        =4,
+    AVCOL_SPC_BT470BG    =5, ///< also ITU-R BT601-6 625 / ITU-R BT1358 625 / ITU-R BT1700 625 PAL & SECAM / IEC 61966-2-4 xvYCC601
+    AVCOL_SPC_SMPTE170M  =6, ///< also ITU-R BT601-6 525 / ITU-R BT1358 525 / ITU-R BT1700 NTSC / functionally identical to above
+    AVCOL_SPC_SMPTE240M  =7,
+    AVCOL_SPC_NB           , ///< Not part of ABI
+};
+
+enum AVColorRange{
+    AVCOL_RANGE_UNSPECIFIED=0,
+    AVCOL_RANGE_MPEG       =1, ///< the normal 219*2^(n-8) "MPEG" YUV ranges
+    AVCOL_RANGE_JPEG       =2, ///< the normal     2^n-1   "JPEG" YUV ranges
+    AVCOL_RANGE_NB           , ///< Not part of ABI
+};
+
+/**
+ *  X   X      3 4 X      X are luma samples,
+ *             1 2        1-6 are possible chroma positions
+ *  X   X      5 6 X      0 is undefined/unknown position
+ */
+enum AVChromaLocation{
+    AVCHROMA_LOC_UNSPECIFIED=0,
+    AVCHROMA_LOC_LEFT       =1, ///< mpeg2/4, h264 default
+    AVCHROMA_LOC_CENTER     =2, ///< mpeg1, jpeg, h263
+    AVCHROMA_LOC_TOPLEFT    =3, ///< DV
+    AVCHROMA_LOC_TOP        =4,
+    AVCHROMA_LOC_BOTTOMLEFT =5,
+    AVCHROMA_LOC_BOTTOM     =6,
+    AVCHROMA_LOC_NB           , ///< Not part of ABI
+};
+
+typedef struct RcOverride{
+    int start_frame;
+    int end_frame;
+    int qscale; // If this is 0 then quality_factor will be used instead.
+    float quality_factor;
+} RcOverride;
+
+#define FF_MAX_B_FRAMES 16
+
+/* encoding support
+   These flags can be passed in AVCodecContext.flags before initialization.
+   Note: Not everything is supported yet.
+*/
+
+#define CODEC_FLAG_QSCALE 0x0002  ///< Use fixed qscale.
+#define CODEC_FLAG_4MV    0x0004  ///< 4 MV per MB allowed / advanced prediction for H.263.
+#define CODEC_FLAG_QPEL   0x0010  ///< Use qpel MC.
+#define CODEC_FLAG_GMC    0x0020  ///< Use GMC.
+#define CODEC_FLAG_MV0    0x0040  ///< Always try a MB with MV=<0,0>.
+#define CODEC_FLAG_PART   0x0080  ///< Use data partitioning.
+/**
+ * The parent program guarantees that the input for B-frames containing
+ * streams is not written to for at least s->max_b_frames+1 frames, if
+ * this is not set the input will be copied.
+ */
+#define CODEC_FLAG_INPUT_PRESERVED 0x0100
+#define CODEC_FLAG_PASS1           0x0200   ///< Use internal 2pass ratecontrol in first pass mode.
+#define CODEC_FLAG_PASS2           0x0400   ///< Use internal 2pass ratecontrol in second pass mode.
+#define CODEC_FLAG_EXTERN_HUFF     0x1000   ///< Use external Huffman table (for MJPEG).
+#define CODEC_FLAG_GRAY            0x2000   ///< Only decode/encode grayscale.
+#define CODEC_FLAG_EMU_EDGE        0x4000   ///< Don't draw edges.
+#define CODEC_FLAG_PSNR            0x8000   ///< error[?] variables will be set during encoding.
+#define CODEC_FLAG_TRUNCATED       0x00010000 /** Input bitstream might be truncated at a random
+                                                  location instead of only at frame boundaries. */
+#define CODEC_FLAG_NORMALIZE_AQP  0x00020000 ///< Normalize adaptive quantization.
+#define CODEC_FLAG_INTERLACED_DCT 0x00040000 ///< Use interlaced DCT.
+#define CODEC_FLAG_LOW_DELAY      0x00080000 ///< Force low delay.
+#define CODEC_FLAG_ALT_SCAN       0x00100000 ///< Use alternate scan.
+#define CODEC_FLAG_GLOBAL_HEADER  0x00400000 ///< Place global headers in extradata instead of every keyframe.
+#define CODEC_FLAG_BITEXACT       0x00800000 ///< Use only bitexact stuff (except (I)DCT).
+/* Fx : Flag for h263+ extra options */
+#define CODEC_FLAG_AC_PRED        0x01000000 ///< H.263 advanced intra coding / MPEG-4 AC prediction
+#define CODEC_FLAG_H263P_UMV      0x02000000 ///< unlimited motion vector
+#define CODEC_FLAG_CBP_RD         0x04000000 ///< Use rate distortion optimization for cbp.
+#define CODEC_FLAG_QP_RD          0x08000000 ///< Use rate distortion optimization for qp selectioon.
+#define CODEC_FLAG_H263P_AIV      0x00000008 ///< H.263 alternative inter VLC
+#define CODEC_FLAG_OBMC           0x00000001 ///< OBMC
+#define CODEC_FLAG_LOOP_FILTER    0x00000800 ///< loop filter
+#define CODEC_FLAG_H263P_SLICE_STRUCT 0x10000000
+#define CODEC_FLAG_INTERLACED_ME  0x20000000 ///< interlaced motion estimation
+#define CODEC_FLAG_SVCD_SCAN_OFFSET 0x40000000 ///< Will reserve space for SVCD scan offset user data.
+#define CODEC_FLAG_CLOSED_GOP     0x80000000
+#define CODEC_FLAG2_FAST          0x00000001 ///< Allow non spec compliant speedup tricks.
+#define CODEC_FLAG2_STRICT_GOP    0x00000002 ///< Strictly enforce GOP size.
+#define CODEC_FLAG2_NO_OUTPUT     0x00000004 ///< Skip bitstream encoding.
+#define CODEC_FLAG2_LOCAL_HEADER  0x00000008 ///< Place global headers at every keyframe instead of in extradata.
+#define CODEC_FLAG2_BPYRAMID      0x00000010 ///< H.264 allow B-frames to be used as references.
+#define CODEC_FLAG2_WPRED         0x00000020 ///< H.264 weighted biprediction for B-frames
+#define CODEC_FLAG2_MIXED_REFS    0x00000040 ///< H.264 one reference per partition, as opposed to one reference per macroblock
+#define CODEC_FLAG2_8X8DCT        0x00000080 ///< H.264 high profile 8x8 transform
+#define CODEC_FLAG2_FASTPSKIP     0x00000100 ///< H.264 fast pskip
+#define CODEC_FLAG2_AUD           0x00000200 ///< H.264 access unit delimiters
+#define CODEC_FLAG2_BRDO          0x00000400 ///< B-frame rate-distortion optimization
+#define CODEC_FLAG2_INTRA_VLC     0x00000800 ///< Use MPEG-2 intra VLC table.
+#define CODEC_FLAG2_MEMC_ONLY     0x00001000 ///< Only do ME/MC (I frames -> ref, P frame -> ME+MC).
+#define CODEC_FLAG2_DROP_FRAME_TIMECODE 0x00002000 ///< timecode is in drop frame format.
+#define CODEC_FLAG2_SKIP_RD       0x00004000 ///< RD optimal MB level residual skipping
+#define CODEC_FLAG2_CHUNKS        0x00008000 ///< Input bitstream might be truncated at a packet boundaries instead of only at frame boundaries.
+#define CODEC_FLAG2_NON_LINEAR_QUANT 0x00010000 ///< Use MPEG-2 nonlinear quantizer.
+#define CODEC_FLAG2_BIT_RESERVOIR 0x00020000 ///< Use a bit reservoir when encoding if possible
+#define CODEC_FLAG2_MBTREE        0x00040000 ///< Use macroblock tree ratecontrol (x264 only)
+#define CODEC_FLAG2_PSY           0x00080000 ///< Use psycho visual optimizations.
+#define CODEC_FLAG2_SSIM          0x00100000 ///< Compute SSIM during encoding, error[] values are undefined.
+
+/* Unsupported options :
+ *              Syntax Arithmetic coding (SAC)
+ *              Reference Picture Selection
+ *              Independent Segment Decoding */
+/* /Fx */
+/* codec capabilities */
+
+#define CODEC_CAP_DRAW_HORIZ_BAND 0x0001 ///< Decoder can use draw_horiz_band callback.
+/**
+ * Codec uses get_buffer() for allocating buffers and supports custom allocators.
+ * If not set, it might not use get_buffer() at all or use operations that
+ * assume the buffer was allocated by avcodec_default_get_buffer.
+ */
+#define CODEC_CAP_DR1             0x0002
+/* If 'parse_only' field is true, then avcodec_parse_frame() can be used. */
+#define CODEC_CAP_PARSE_ONLY      0x0004
+#define CODEC_CAP_TRUNCATED       0x0008
+/* Codec can export data for HW decoding (XvMC). */
+#define CODEC_CAP_HWACCEL         0x0010
+/**
+ * Codec has a nonzero delay and needs to be fed with NULL at the end to get the delayed data.
+ * If this is not set, the codec is guaranteed to never be fed with NULL data.
+ */
+#define CODEC_CAP_DELAY           0x0020
+/**
+ * Codec can be fed a final frame with a smaller size.
+ * This can be used to prevent truncation of the last audio samples.
+ */
+#define CODEC_CAP_SMALL_LAST_FRAME 0x0040
+/**
+ * Codec can export data for HW decoding (VDPAU).
+ */
+#define CODEC_CAP_HWACCEL_VDPAU    0x0080
+/**
+ * Codec can output multiple frames per AVPacket
+ * Normally demuxers return one frame at a time, demuxers which do not do
+ * are connected to a parser to split what they return into proper frames.
+ * This flag is reserved to the very rare category of codecs which have a
+ * bitstream that cannot be split into frames without timeconsuming
+ * operations like full decoding. Demuxers carring such bitstreams thus
+ * may return multiple frames in a packet. This has many disadvantages like
+ * prohibiting stream copy in many cases thus it should only be considered
+ * as a last resort.
+ */
+#define CODEC_CAP_SUBFRAMES        0x0100
+
+//The following defines may change, don't expect compatibility if you use them.
+#define MB_TYPE_INTRA4x4   0x0001
+#define MB_TYPE_INTRA16x16 0x0002 //FIXME H.264-specific
+#define MB_TYPE_INTRA_PCM  0x0004 //FIXME H.264-specific
+#define MB_TYPE_16x16      0x0008
+#define MB_TYPE_16x8       0x0010
+#define MB_TYPE_8x16       0x0020
+#define MB_TYPE_8x8        0x0040
+#define MB_TYPE_INTERLACED 0x0080
+#define MB_TYPE_DIRECT2    0x0100 //FIXME
+#define MB_TYPE_ACPRED     0x0200
+#define MB_TYPE_GMC        0x0400
+#define MB_TYPE_SKIP       0x0800
+#define MB_TYPE_P0L0       0x1000
+#define MB_TYPE_P1L0       0x2000
+#define MB_TYPE_P0L1       0x4000
+#define MB_TYPE_P1L1       0x8000
+#define MB_TYPE_L0         (MB_TYPE_P0L0 | MB_TYPE_P1L0)
+#define MB_TYPE_L1         (MB_TYPE_P0L1 | MB_TYPE_P1L1)
+#define MB_TYPE_L0L1       (MB_TYPE_L0   | MB_TYPE_L1)
+#define MB_TYPE_QUANT      0x00010000
+#define MB_TYPE_CBP        0x00020000
+//Note bits 24-31 are reserved for codec specific use (h264 ref0, mpeg1 0mv, ...)
+
+/**
+ * Pan Scan area.
+ * This specifies the area which should be displayed.
+ * Note there may be multiple such areas for one frame.
+ */
+typedef struct AVPanScan{
+    /**
+     * id
+     * - encoding: Set by user.
+     * - decoding: Set by libavcodec.
+     */
+    int id;
+
+    /**
+     * width and height in 1/16 pel
+     * - encoding: Set by user.
+     * - decoding: Set by libavcodec.
+     */
+    int width;
+    int height;
+
+    /**
+     * position of the top left corner in 1/16 pel for up to 3 fields/frames
+     * - encoding: Set by user.
+     * - decoding: Set by libavcodec.
+     */
+    int16_t position[3][2];
+}AVPanScan;
+
+#define FF_COMMON_FRAME \
+    /**\
+     * pointer to the picture planes.\
+     * This might be different from the first allocated byte\
+     * - encoding: \
+     * - decoding: \
+     */\
+    uint8_t *data[4];\
+    int linesize[4];\
+    /**\
+     * pointer to the first allocated byte of the picture. Can be used in get_buffer/release_buffer.\
+     * This isn't used by libavcodec unless the default get/release_buffer() is used.\
+     * - encoding: \
+     * - decoding: \
+     */\
+    uint8_t *base[4];\
+    /**\
+     * 1 -> keyframe, 0-> not\
+     * - encoding: Set by libavcodec.\
+     * - decoding: Set by libavcodec.\
+     */\
+    int key_frame;\
+\
+    /**\
+     * Picture type of the frame, see ?_TYPE below.\
+     * - encoding: Set by libavcodec. for coded_picture (and set by user for input).\
+     * - decoding: Set by libavcodec.\
+     */\
+    int pict_type;\
+\
+    /**\
+     * presentation timestamp in time_base units (time when frame should be shown to user)\
+     * If AV_NOPTS_VALUE then frame_rate = 1/time_base will be assumed.\
+     * - encoding: MUST be set by user.\
+     * - decoding: Set by libavcodec.\
+     */\
+    int64_t pts;\
+\
+    /**\
+     * picture number in bitstream order\
+     * - encoding: set by\
+     * - decoding: Set by libavcodec.\
+     */\
+    int coded_picture_number;\
+    /**\
+     * picture number in display order\
+     * - encoding: set by\
+     * - decoding: Set by libavcodec.\
+     */\
+    int display_picture_number;\
+\
+    /**\
+     * quality (between 1 (good) and FF_LAMBDA_MAX (bad)) \
+     * - encoding: Set by libavcodec. for coded_picture (and set by user for input).\
+     * - decoding: Set by libavcodec.\
+     */\
+    int quality; \
+\
+    /**\
+     * buffer age (1->was last buffer and dint change, 2->..., ...).\
+     * Set to INT_MAX if the buffer has not been used yet.\
+     * - encoding: unused\
+     * - decoding: MUST be set by get_buffer().\
+     */\
+    int age;\
+\
+    /**\
+     * is this picture used as reference\
+     * The values for this are the same as the MpegEncContext.picture_structure\
+     * variable, that is 1->top field, 2->bottom field, 3->frame/both fields.\
+     * Set to 4 for delayed, non-reference frames.\
+     * - encoding: unused\
+     * - decoding: Set by libavcodec. (before get_buffer() call)).\
+     */\
+    int reference;\
+\
+    /**\
+     * QP table\
+     * - encoding: unused\
+     * - decoding: Set by libavcodec.\
+     */\
+    int8_t *qscale_table;\
+    /**\
+     * QP store stride\
+     * - encoding: unused\
+     * - decoding: Set by libavcodec.\
+     */\
+    int qstride;\
+\
+    /**\
+     * mbskip_table[mb]>=1 if MB didn't change\
+     * stride= mb_width = (width+15)>>4\
+     * - encoding: unused\
+     * - decoding: Set by libavcodec.\
+     */\
+    uint8_t *mbskip_table;\
+\
+    /**\
+     * motion vector table\
+     * @code\
+     * example:\
+     * int mv_sample_log2= 4 - motion_subsample_log2;\
+     * int mb_width= (width+15)>>4;\
+     * int mv_stride= (mb_width << mv_sample_log2) + 1;\
+     * motion_val[direction][x + y*mv_stride][0->mv_x, 1->mv_y];\
+     * @endcode\
+     * - encoding: Set by user.\
+     * - decoding: Set by libavcodec.\
+     */\
+    int16_t (*motion_val[2])[2];\
+\
+    /**\
+     * macroblock type table\
+     * mb_type_base + mb_width + 2\
+     * - encoding: Set by user.\
+     * - decoding: Set by libavcodec.\
+     */\
+    uint32_t *mb_type;\
+\
+    /**\
+     * log2 of the size of the block which a single vector in motion_val represents: \
+     * (4->16x16, 3->8x8, 2-> 4x4, 1-> 2x2)\
+     * - encoding: unused\
+     * - decoding: Set by libavcodec.\
+     */\
+    uint8_t motion_subsample_log2;\
+\
+    /**\
+     * for some private data of the user\
+     * - encoding: unused\
+     * - decoding: Set by user.\
+     */\
+    void *opaque;\
+\
+    /**\
+     * error\
+     * - encoding: Set by libavcodec. if flags&CODEC_FLAG_PSNR.\
+     * - decoding: unused\
+     */\
+    uint64_t error[4];\
+\
+    /**\
+     * type of the buffer (to keep track of who has to deallocate data[*])\
+     * - encoding: Set by the one who allocates it.\
+     * - decoding: Set by the one who allocates it.\
+     * Note: User allocated (direct rendering) & internal buffers cannot coexist currently.\
+     */\
+    int type;\
+    \
+    /**\
+     * When decoding, this signals how much the picture must be delayed.\
+     * extra_delay = repeat_pict / (2*fps)\
+     * - encoding: unused\
+     * - decoding: Set by libavcodec.\
+     */\
+    int repeat_pict;\
+    \
+    /**\
+     * \
+     */\
+    int qscale_type;\
+    \
+    /**\
+     * The content of the picture is interlaced.\
+     * - encoding: Set by user.\
+     * - decoding: Set by libavcodec. (default 0)\
+     */\
+    int interlaced_frame;\
+    \
+    /**\
+     * If the content is interlaced, is top field displayed first.\
+     * - encoding: Set by user.\
+     * - decoding: Set by libavcodec.\
+     */\
+    int top_field_first;\
+    \
+    /**\
+     * Pan scan.\
+     * - encoding: Set by user.\
+     * - decoding: Set by libavcodec.\
+     */\
+    AVPanScan *pan_scan;\
+    \
+    /**\
+     * Tell user application that palette has changed from previous frame.\
+     * - encoding: ??? (no palette-enabled encoder yet)\
+     * - decoding: Set by libavcodec. (default 0).\
+     */\
+    int palette_has_changed;\
+    \
+    /**\
+     * codec suggestion on buffer type if != 0\
+     * - encoding: unused\
+     * - decoding: Set by libavcodec. (before get_buffer() call)).\
+     */\
+    int buffer_hints;\
+\
+    /**\
+     * DCT coefficients\
+     * - encoding: unused\
+     * - decoding: Set by libavcodec.\
+     */\
+    short *dct_coeff;\
+\
+    /**\
+     * motion reference frame index\
+     * the order in which these are stored can depend on the codec.\
+     * - encoding: Set by user.\
+     * - decoding: Set by libavcodec.\
+     */\
+    int8_t *ref_index[2];\
+\
+    /**\
+     * reordered opaque 64bit number (generally a PTS) from AVCodecContext.reordered_opaque\
+     * output in AVFrame.reordered_opaque\
+     * - encoding: unused\
+     * - decoding: Read by user.\
+     */\
+    int64_t reordered_opaque;\
+\
+    /**\
+     * hardware accelerator private data (FFmpeg allocated)\
+     * - encoding: unused\
+     * - decoding: Set by libavcodec\
+     */\
+    void *hwaccel_picture_private;\
+
+
+#define FF_QSCALE_TYPE_MPEG1 0
+#define FF_QSCALE_TYPE_MPEG2 1
+#define FF_QSCALE_TYPE_H264  2
+#define FF_QSCALE_TYPE_VP56  3
+
+#define FF_BUFFER_TYPE_INTERNAL 1
+#define FF_BUFFER_TYPE_USER     2 ///< direct rendering buffers (image is (de)allocated by user)
+#define FF_BUFFER_TYPE_SHARED   4 ///< Buffer from somewhere else; don't deallocate image (data/base), all other tables are not shared.
+#define FF_BUFFER_TYPE_COPY     8 ///< Just a (modified) copy of some other buffer, don't deallocate anything.
+
+
+#define FF_I_TYPE  1 ///< Intra
+#define FF_P_TYPE  2 ///< Predicted
+#define FF_B_TYPE  3 ///< Bi-dir predicted
+#define FF_S_TYPE  4 ///< S(GMC)-VOP MPEG4
+#define FF_SI_TYPE 5 ///< Switching Intra
+#define FF_SP_TYPE 6 ///< Switching Predicted
+#define FF_BI_TYPE 7
+
+#define FF_BUFFER_HINTS_VALID    0x01 // Buffer hints value is meaningful (if 0 ignore).
+#define FF_BUFFER_HINTS_READABLE 0x02 // Codec will read from buffer.
+#define FF_BUFFER_HINTS_PRESERVE 0x04 // User must not alter buffer content.
+#define FF_BUFFER_HINTS_REUSABLE 0x08 // Codec will reuse the buffer (update).
+
+typedef struct AVPacket {
+    /**
+     * Presentation timestamp in AVStream->time_base units; the time at which
+     * the decompressed packet will be presented to the user.
+     * Can be AV_NOPTS_VALUE if it is not stored in the file.
+     * pts MUST be larger or equal to dts as presentation cannot happen before
+     * decompression, unless one wants to view hex dumps. Some formats misuse
+     * the terms dts and pts/cts to mean something different. Such timestamps
+     * must be converted to true pts/dts before they are stored in AVPacket.
+     */
+    int64_t pts;
+    /**
+     * Decompression timestamp in AVStream->time_base units; the time at which
+     * the packet is decompressed.
+     * Can be AV_NOPTS_VALUE if it is not stored in the file.
+     */
+    int64_t dts;
+    uint8_t *data;
+    int   size;
+    int   stream_index;
+    int   flags;
+    /**
+     * Duration of this packet in AVStream->time_base units, 0 if unknown.
+     * Equals next_pts - this_pts in presentation order.
+     */
+    int   duration;
+    void  (*destruct)(struct AVPacket *);
+    void  *priv;
+    int64_t pos;                            ///< byte position in stream, -1 if unknown
+
+    /**
+     * Time difference in AVStream->time_base units from the pts of this
+     * packet to the point at which the output from the decoder has converged
+     * independent from the availability of previous frames. That is, the
+     * frames are virtually identical no matter if decoding started from
+     * the very first frame or from this keyframe.
+     * Is AV_NOPTS_VALUE if unknown.
+     * This field is not the display duration of the current packet.
+     *
+     * The purpose of this field is to allow seeking in streams that have no
+     * keyframes in the conventional sense. It corresponds to the
+     * recovery point SEI in H.264 and match_time_delta in NUT. It is also
+     * essential for some types of subtitle streams to ensure that all
+     * subtitles are correctly displayed after seeking.
+     */
+    int64_t convergence_duration;
+} AVPacket;
+#define AV_PKT_FLAG_KEY   0x0001
+#if LIBAVCODEC_VERSION_MAJOR < 53
+#define PKT_FLAG_KEY AV_PKT_FLAG_KEY
+#endif
+
+/**
+ * Audio Video Frame.
+ * New fields can be added to the end of FF_COMMON_FRAME with minor version
+ * bumps.
+ * Removal, reordering and changes to existing fields require a major
+ * version bump. No fields should be added into AVFrame before or after
+ * FF_COMMON_FRAME!
+ * sizeof(AVFrame) must not be used outside libav*.
+ */
+typedef struct AVFrame {
+    FF_COMMON_FRAME
+} AVFrame;
+
+/**
+ * main external API structure.
+ * New fields can be added to the end with minor version bumps.
+ * Removal, reordering and changes to existing fields require a major
+ * version bump.
+ * sizeof(AVCodecContext) must not be used outside libav*.
+ */
+typedef struct AVCodecContext {
+    /**
+     * information on struct for av_log
+     * - set by avcodec_alloc_context
+     */
+    const AVClass *av_class;
+    /**
+     * the average bitrate
+     * - encoding: Set by user; unused for constant quantizer encoding.
+     * - decoding: Set by libavcodec. 0 or some bitrate if this info is available in the stream.
+     */
+    int bit_rate;
+
+    /**
+     * number of bits the bitstream is allowed to diverge from the reference.
+     *           the reference can be CBR (for CBR pass1) or VBR (for pass2)
+     * - encoding: Set by user; unused for constant quantizer encoding.
+     * - decoding: unused
+     */
+    int bit_rate_tolerance;
+
+    /**
+     * CODEC_FLAG_*.
+     * - encoding: Set by user.
+     * - decoding: Set by user.
+     */
+    int flags;
+
+    /**
+     * Some codecs need additional format info. It is stored here.
+     * If any muxer uses this then ALL demuxers/parsers AND encoders for the
+     * specific codec MUST set it correctly otherwise stream copy breaks.
+     * In general use of this field by muxers is not recommanded.
+     * - encoding: Set by libavcodec.
+     * - decoding: Set by libavcodec. (FIXME: Is this OK?)
+     */
+    int sub_id;
+
+    /**
+     * Motion estimation algorithm used for video coding.
+     * 1 (zero), 2 (full), 3 (log), 4 (phods), 5 (epzs), 6 (x1), 7 (hex),
+     * 8 (umh), 9 (iter), 10 (tesa) [7, 8, 10 are x264 specific, 9 is snow specific]
+     * - encoding: MUST be set by user.
+     * - decoding: unused
+     */
+    int me_method;
+
+    /**
+     * some codecs need / can use extradata like Huffman tables.
+     * mjpeg: Huffman tables
+     * rv10: additional flags
+     * mpeg4: global headers (they can be in the bitstream or here)
+     * The allocated memory should be FF_INPUT_BUFFER_PADDING_SIZE bytes larger
+     * than extradata_size to avoid prolems if it is read with the bitstream reader.
+     * The bytewise contents of extradata must not depend on the architecture or CPU endianness.
+     * - encoding: Set/allocated/freed by libavcodec.
+     * - decoding: Set/allocated/freed by user.
+     */
+    uint8_t *extradata;
+    int extradata_size;
+
+    /**
+     * This is the fundamental unit of time (in seconds) in terms
+     * of which frame timestamps are represented. For fixed-fps content,
+     * timebase should be 1/framerate and timestamp increments should be
+     * identically 1.
+     * - encoding: MUST be set by user.
+     * - decoding: Set by libavcodec.
+     */
+    AVRational time_base;
+
+    /* video only */
+    /**
+     * picture width / height.
+     * - encoding: MUST be set by user.
+     * - decoding: Set by libavcodec.
+     * Note: For compatibility it is possible to set this instead of
+     * coded_width/height before decoding.
+     */
+    int width, height;
+
+#define FF_ASPECT_EXTENDED 15
+
+    /**
+     * the number of pictures in a group of pictures, or 0 for intra_only
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int gop_size;
+
+    /**
+     * Pixel format, see PIX_FMT_xxx.
+     * - encoding: Set by user.
+     * - decoding: Set by libavcodec.
+     */
+    enum PixelFormat pix_fmt;
+
+    /**
+     * Frame rate emulation. If not zero, the lower layer (i.e. format handler)
+     * has to read frames at native frame rate.
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int rate_emu;
+
+    /**
+     * If non NULL, 'draw_horiz_band' is called by the libavcodec
+     * decoder to draw a horizontal band. It improves cache usage. Not
+     * all codecs can do that. You must check the codec capabilities
+     * beforehand.
+     * The function is also used by hardware acceleration APIs.
+     * It is called at least once during frame decoding to pass
+     * the data needed for hardware render.
+     * In that mode instead of pixel data, AVFrame points to
+     * a structure specific to the acceleration API. The application
+     * reads the structure and can change some fields to indicate progress
+     * or mark state.
+     * - encoding: unused
+     * - decoding: Set by user.
+     * @param height the height of the slice
+     * @param y the y position of the slice
+     * @param type 1->top field, 2->bottom field, 3->frame
+     * @param offset offset into the AVFrame.data from which the slice should be read
+     */
+    void (*draw_horiz_band)(struct AVCodecContext *s,
+                            const AVFrame *src, int offset[4],
+                            int y, int type, int height);
+
+    /* audio only */
+    int sample_rate; ///< samples per second
+    int channels;    ///< number of audio channels
+
+    /**
+     * audio sample format
+     * - encoding: Set by user.
+     * - decoding: Set by libavcodec.
+     */
+    enum SampleFormat sample_fmt;  ///< sample format
+
+    /* The following data should not be initialized. */
+    /**
+     * Samples per packet, initialized when calling 'init'.
+     */
+    int frame_size;
+    int frame_number;   ///< audio or video frame number
+#if LIBAVCODEC_VERSION_MAJOR < 53
+    int real_pict_num;  ///< Returns the real picture number of previous encoded frame.
+#endif
+
+    /**
+     * Number of frames the decoded output will be delayed relative to
+     * the encoded input.
+     * - encoding: Set by libavcodec.
+     * - decoding: unused
+     */
+    int delay;
+
+    /* - encoding parameters */
+    float qcompress;  ///< amount of qscale change between easy & hard scenes (0.0-1.0)
+    float qblur;      ///< amount of qscale smoothing over time (0.0-1.0)
+
+    /**
+     * minimum quantizer
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int qmin;
+
+    /**
+     * maximum quantizer
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int qmax;
+
+    /**
+     * maximum quantizer difference between frames
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int max_qdiff;
+
+    /**
+     * maximum number of B-frames between non-B-frames
+     * Note: The output will be delayed by max_b_frames+1 relative to the input.
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int max_b_frames;
+
+    /**
+     * qscale factor between IP and B-frames
+     * If > 0 then the last P-frame quantizer will be used (q= lastp_q*factor+offset).
+     * If < 0 then normal ratecontrol will be done (q= -normal_q*factor+offset).
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    float b_quant_factor;
+
+    /** obsolete FIXME remove */
+    int rc_strategy;
+#define FF_RC_STRATEGY_XVID 1
+
+    int b_frame_strategy;
+
+    /**
+     * hurry up amount
+     * - encoding: unused
+     * - decoding: Set by user. 1-> Skip B-frames, 2-> Skip IDCT/dequant too, 5-> Skip everything except header
+     * @deprecated Deprecated in favor of skip_idct and skip_frame.
+     */
+    int hurry_up;
+
+    struct AVCodec *codec;
+
+    void *priv_data;
+
+    int rtp_payload_size;   /* The size of the RTP payload: the coder will  */
+                            /* do its best to deliver a chunk with size     */
+                            /* below rtp_payload_size, the chunk will start */
+                            /* with a start code on some codecs like H.263. */
+                            /* This doesn't take account of any particular  */
+                            /* headers inside the transmitted RTP payload.  */
+
+
+    /* The RTP callback: This function is called    */
+    /* every time the encoder has a packet to send. */
+    /* It depends on the encoder if the data starts */
+    /* with a Start Code (it should). H.263 does.   */
+    /* mb_nb contains the number of macroblocks     */
+    /* encoded in the RTP payload.                  */
+    void (*rtp_callback)(struct AVCodecContext *avctx, void *data, int size, int mb_nb);
+
+    /* statistics, used for 2-pass encoding */
+    int mv_bits;
+    int header_bits;
+    int i_tex_bits;
+    int p_tex_bits;
+    int i_count;
+    int p_count;
+    int skip_count;
+    int misc_bits;
+
+    /**
+     * number of bits used for the previously encoded frame
+     * - encoding: Set by libavcodec.
+     * - decoding: unused
+     */
+    int frame_bits;
+
+    /**
+     * Private data of the user, can be used to carry app specific stuff.
+     * - encoding: Set by user.
+     * - decoding: Set by user.
+     */
+    void *opaque;
+
+    char codec_name[32];
+    enum AVMediaType codec_type; /* see AVMEDIA_TYPE_xxx */
+    enum CodecID codec_id; /* see CODEC_ID_xxx */
+
+    /**
+     * fourcc (LSB first, so "ABCD" -> ('D'<<24) + ('C'<<16) + ('B'<<8) + 'A').
+     * This is used to work around some encoder bugs.
+     * A demuxer should set this to what is stored in the field used to identify the codec.
+     * If there are multiple such fields in a container then the demuxer should choose the one
+     * which maximizes the information about the used codec.
+     * If the codec tag field in a container is larger then 32 bits then the demuxer should
+     * remap the longer ID to 32 bits with a table or other structure. Alternatively a new
+     * extra_codec_tag + size could be added but for this a clear advantage must be demonstrated
+     * first.
+     * - encoding: Set by user, if not then the default based on codec_id will be used.
+     * - decoding: Set by user, will be converted to uppercase by libavcodec during init.
+     */
+    unsigned int codec_tag;
+
+    /**
+     * Work around bugs in encoders which sometimes cannot be detected automatically.
+     * - encoding: Set by user
+     * - decoding: Set by user
+     */
+    int workaround_bugs;
+#define FF_BUG_AUTODETECT       1  ///< autodetection
+#define FF_BUG_OLD_MSMPEG4      2
+#define FF_BUG_XVID_ILACE       4
+#define FF_BUG_UMP4             8
+#define FF_BUG_NO_PADDING       16
+#define FF_BUG_AMV              32
+#define FF_BUG_AC_VLC           0  ///< Will be removed, libavcodec can now handle these non-compliant files by default.
+#define FF_BUG_QPEL_CHROMA      64
+#define FF_BUG_STD_QPEL         128
+#define FF_BUG_QPEL_CHROMA2     256
+#define FF_BUG_DIRECT_BLOCKSIZE 512
+#define FF_BUG_EDGE             1024
+#define FF_BUG_HPEL_CHROMA      2048
+#define FF_BUG_DC_CLIP          4096
+#define FF_BUG_MS               8192 ///< Work around various bugs in Microsoft's broken decoders.
+#define FF_BUG_TRUNCATED       16384
+//#define FF_BUG_FAKE_SCALABILITY 16 //Autodetection should work 100%.
+
+    /**
+     * luma single coefficient elimination threshold
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int luma_elim_threshold;
+
+    /**
+     * chroma single coeff elimination threshold
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int chroma_elim_threshold;
+
+    /**
+     * strictly follow the standard (MPEG4, ...).
+     * - encoding: Set by user.
+     * - decoding: Set by user.
+     * Setting this to STRICT or higher means the encoder and decoder will
+     * generally do stupid things. While setting it to inofficial or lower
+     * will mean the encoder might use things that are not supported by all
+     * spec compliant decoders. Decoders make no difference between normal,
+     * inofficial and experimental, that is they always try to decode things
+     * when they can unless they are explicitly asked to behave stupid
+     * (=strictly conform to the specs)
+     */
+    int strict_std_compliance;
+#define FF_COMPLIANCE_VERY_STRICT   2 ///< Strictly conform to a older more strict version of the spec or reference software.
+#define FF_COMPLIANCE_STRICT        1 ///< Strictly conform to all the things in the spec no matter what consequences.
+#define FF_COMPLIANCE_NORMAL        0
+#define FF_COMPLIANCE_INOFFICIAL   -1 ///< Allow inofficial extensions.
+#define FF_COMPLIANCE_EXPERIMENTAL -2 ///< Allow nonstandardized experimental things.
+
+    /**
+     * qscale offset between IP and B-frames
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    float b_quant_offset;
+
+    /**
+     * Error recognization; higher values will detect more errors but may
+     * misdetect some more or less valid parts as errors.
+     * - encoding: unused
+     * - decoding: Set by user.
+     */
+    int error_recognition;
+#define FF_ER_CAREFUL         1
+#define FF_ER_COMPLIANT       2
+#define FF_ER_AGGRESSIVE      3
+#define FF_ER_VERY_AGGRESSIVE 4
+
+    /**
+     * Called at the beginning of each frame to get a buffer for it.
+     * If pic.reference is set then the frame will be read later by libavcodec.
+     * avcodec_align_dimensions2() should be used to find the required width and
+     * height, as they normally need to be rounded up to the next multiple of 16.
+     * if CODEC_CAP_DR1 is not set then get_buffer() must call
+     * avcodec_default_get_buffer() instead of providing buffers allocated by
+     * some other means.
+     * - encoding: unused
+     * - decoding: Set by libavcodec., user can override.
+     */
+    int (*get_buffer)(struct AVCodecContext *c, AVFrame *pic);
+
+    /**
+     * Called to release buffers which were allocated with get_buffer.
+     * A released buffer can be reused in get_buffer().
+     * pic.data[*] must be set to NULL.
+     * - encoding: unused
+     * - decoding: Set by libavcodec., user can override.
+     */
+    void (*release_buffer)(struct AVCodecContext *c, AVFrame *pic);
+
+    /**
+     * Size of the frame reordering buffer in the decoder.
+     * For MPEG-2 it is 1 IPB or 0 low delay IP.
+     * - encoding: Set by libavcodec.
+     * - decoding: Set by libavcodec.
+     */
+    int has_b_frames;
+
+    /**
+     * number of bytes per packet if constant and known or 0
+     * Used by some WAV based audio codecs.
+     */
+    int block_align;
+
+    int parse_only; /* - decoding only: If true, only parsing is done
+                       (function avcodec_parse_frame()). The frame
+                       data is returned. Only MPEG codecs support this now. */
+
+    /**
+     * 0-> h263 quant 1-> mpeg quant
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int mpeg_quant;
+
+    /**
+     * pass1 encoding statistics output buffer
+     * - encoding: Set by libavcodec.
+     * - decoding: unused
+     */
+    char *stats_out;
+
+    /**
+     * pass2 encoding statistics input buffer
+     * Concatenated stuff from stats_out of pass1 should be placed here.
+     * - encoding: Allocated/set/freed by user.
+     * - decoding: unused
+     */
+    char *stats_in;
+
+    /**
+     * ratecontrol qmin qmax limiting method
+     * 0-> clipping, 1-> use a nice continous function to limit qscale wthin qmin/qmax.
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    float rc_qsquish;
+
+    float rc_qmod_amp;
+    int rc_qmod_freq;
+
+    /**
+     * ratecontrol override, see RcOverride
+     * - encoding: Allocated/set/freed by user.
+     * - decoding: unused
+     */
+    RcOverride *rc_override;
+    int rc_override_count;
+
+    /**
+     * rate control equation
+     * - encoding: Set by user
+     * - decoding: unused
+     */
+    const char *rc_eq;
+
+    /**
+     * maximum bitrate
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int rc_max_rate;
+
+    /**
+     * minimum bitrate
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int rc_min_rate;
+
+    /**
+     * decoder bitstream buffer size
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int rc_buffer_size;
+    float rc_buffer_aggressivity;
+
+    /**
+     * qscale factor between P and I-frames
+     * If > 0 then the last p frame quantizer will be used (q= lastp_q*factor+offset).
+     * If < 0 then normal ratecontrol will be done (q= -normal_q*factor+offset).
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    float i_quant_factor;
+
+    /**
+     * qscale offset between P and I-frames
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    float i_quant_offset;
+
+    /**
+     * initial complexity for pass1 ratecontrol
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    float rc_initial_cplx;
+
+    /**
+     * DCT algorithm, see FF_DCT_* below
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int dct_algo;
+#define FF_DCT_AUTO    0
+#define FF_DCT_FASTINT 1
+#define FF_DCT_INT     2
+#define FF_DCT_MMX     3
+#define FF_DCT_MLIB    4
+#define FF_DCT_ALTIVEC 5
+#define FF_DCT_FAAN    6
+
+    /**
+     * luminance masking (0-> disabled)
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    float lumi_masking;
+
+    /**
+     * temporary complexity masking (0-> disabled)
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    float temporal_cplx_masking;
+
+    /**
+     * spatial complexity masking (0-> disabled)
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    float spatial_cplx_masking;
+
+    /**
+     * p block masking (0-> disabled)
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    float p_masking;
+
+    /**
+     * darkness masking (0-> disabled)
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    float dark_masking;
+
+    /**
+     * IDCT algorithm, see FF_IDCT_* below.
+     * - encoding: Set by user.
+     * - decoding: Set by user.
+     */
+    int idct_algo;
+#define FF_IDCT_AUTO          0
+#define FF_IDCT_INT           1
+#define FF_IDCT_SIMPLE        2
+#define FF_IDCT_SIMPLEMMX     3
+#define FF_IDCT_LIBMPEG2MMX   4
+#define FF_IDCT_PS2           5
+#define FF_IDCT_MLIB          6
+#define FF_IDCT_ARM           7
+#define FF_IDCT_ALTIVEC       8
+#define FF_IDCT_SH4           9
+#define FF_IDCT_SIMPLEARM     10
+#define FF_IDCT_H264          11
+#define FF_IDCT_VP3           12
+#define FF_IDCT_IPP           13
+#define FF_IDCT_XVIDMMX       14
+#define FF_IDCT_CAVS          15
+#define FF_IDCT_SIMPLEARMV5TE 16
+#define FF_IDCT_SIMPLEARMV6   17
+#define FF_IDCT_SIMPLEVIS     18
+#define FF_IDCT_WMV2          19
+#define FF_IDCT_FAAN          20
+#define FF_IDCT_EA            21
+#define FF_IDCT_SIMPLENEON    22
+#define FF_IDCT_SIMPLEALPHA   23
+#define FF_IDCT_BINK          24
+
+    /**
+     * slice count
+     * - encoding: Set by libavcodec.
+     * - decoding: Set by user (or 0).
+     */
+    int slice_count;
+    /**
+     * slice offsets in the frame in bytes
+     * - encoding: Set/allocated by libavcodec.
+     * - decoding: Set/allocated by user (or NULL).
+     */
+    int *slice_offset;
+
+    /**
+     * error concealment flags
+     * - encoding: unused
+     * - decoding: Set by user.
+     */
+    int error_concealment;
+#define FF_EC_GUESS_MVS   1
+#define FF_EC_DEBLOCK     2
+
+    /**
+     * dsp_mask could be add used to disable unwanted CPU features
+     * CPU features (i.e. MMX, SSE. ...)
+     *
+     * With the FORCE flag you may instead enable given CPU features.
+     * (Dangerous: Usable in case of misdetection, improper usage however will
+     * result into program crash.)
+     */
+    unsigned dsp_mask;
+#define FF_MM_FORCE    0x80000000 /* Force usage of selected flags (OR) */
+    /* lower 16 bits - CPU features */
+#define FF_MM_MMX      0x0001 ///< standard MMX
+#define FF_MM_3DNOW    0x0004 ///< AMD 3DNOW
+#if LIBAVCODEC_VERSION_MAJOR < 53
+#define FF_MM_MMXEXT   0x0002 ///< SSE integer functions or AMD MMX ext
+#endif
+#define FF_MM_MMX2     0x0002 ///< SSE integer functions or AMD MMX ext
+#define FF_MM_SSE      0x0008 ///< SSE functions
+#define FF_MM_SSE2     0x0010 ///< PIV SSE2 functions
+#define FF_MM_3DNOWEXT 0x0020 ///< AMD 3DNowExt
+#define FF_MM_SSE3     0x0040 ///< Prescott SSE3 functions
+#define FF_MM_SSSE3    0x0080 ///< Conroe SSSE3 functions
+#define FF_MM_SSE4     0x0100 ///< Penryn SSE4.1 functions
+#define FF_MM_SSE42    0x0200 ///< Nehalem SSE4.2 functions
+#define FF_MM_IWMMXT   0x0100 ///< XScale IWMMXT
+#define FF_MM_ALTIVEC  0x0001 ///< standard AltiVec
+
+    /**
+     * bits per sample/pixel from the demuxer (needed for huffyuv).
+     * - encoding: Set by libavcodec.
+     * - decoding: Set by user.
+     */
+     int bits_per_coded_sample;
+
+    /**
+     * prediction method (needed for huffyuv)
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+     int prediction_method;
+#define FF_PRED_LEFT   0
+#define FF_PRED_PLANE  1
+#define FF_PRED_MEDIAN 2
+
+    /**
+     * sample aspect ratio (0 if unknown)
+     * That is the width of a pixel divided by the height of the pixel.
+     * Numerator and denominator must be relatively prime and smaller than 256 for some video standards.
+     * - encoding: Set by user.
+     * - decoding: Set by libavcodec.
+     */
+    AVRational sample_aspect_ratio;
+
+    /**
+     * the picture in the bitstream
+     * - encoding: Set by libavcodec.
+     * - decoding: Set by libavcodec.
+     */
+    AVFrame *coded_frame;
+
+    /**
+     * debug
+     * - encoding: Set by user.
+     * - decoding: Set by user.
+     */
+    int debug;
+#define FF_DEBUG_PICT_INFO   1
+#define FF_DEBUG_RC          2
+#define FF_DEBUG_BITSTREAM   4
+#define FF_DEBUG_MB_TYPE     8
+#define FF_DEBUG_QP          16
+#define FF_DEBUG_MV          32
+#define FF_DEBUG_DCT_COEFF   0x00000040
+#define FF_DEBUG_SKIP        0x00000080
+#define FF_DEBUG_STARTCODE   0x00000100
+#define FF_DEBUG_PTS         0x00000200
+#define FF_DEBUG_ER          0x00000400
+#define FF_DEBUG_MMCO        0x00000800
+#define FF_DEBUG_BUGS        0x00001000
+#define FF_DEBUG_VIS_QP      0x00002000
+#define FF_DEBUG_VIS_MB_TYPE 0x00004000
+#define FF_DEBUG_BUFFERS     0x00008000
+
+    /**
+     * debug
+     * - encoding: Set by user.
+     * - decoding: Set by user.
+     */
+    int debug_mv;
+#define FF_DEBUG_VIS_MV_P_FOR  0x00000001 //visualize forward predicted MVs of P frames
+#define FF_DEBUG_VIS_MV_B_FOR  0x00000002 //visualize forward predicted MVs of B frames
+#define FF_DEBUG_VIS_MV_B_BACK 0x00000004 //visualize backward predicted MVs of B frames
+
+    /**
+     * error
+     * - encoding: Set by libavcodec if flags&CODEC_FLAG_PSNR.
+     * - decoding: unused
+     */
+    uint64_t error[4];
+
+    /**
+     * minimum MB quantizer
+     * - encoding: unused
+     * - decoding: unused
+     */
+    int mb_qmin;
+
+    /**
+     * maximum MB quantizer
+     * - encoding: unused
+     * - decoding: unused
+     */
+    int mb_qmax;
+
+    /**
+     * motion estimation comparison function
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int me_cmp;
+    /**
+     * subpixel motion estimation comparison function
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int me_sub_cmp;
+    /**
+     * macroblock comparison function (not supported yet)
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int mb_cmp;
+    /**
+     * interlaced DCT comparison function
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int ildct_cmp;
+#define FF_CMP_SAD    0
+#define FF_CMP_SSE    1
+#define FF_CMP_SATD   2
+#define FF_CMP_DCT    3
+#define FF_CMP_PSNR   4
+#define FF_CMP_BIT    5
+#define FF_CMP_RD     6
+#define FF_CMP_ZERO   7
+#define FF_CMP_VSAD   8
+#define FF_CMP_VSSE   9
+#define FF_CMP_NSSE   10
+#define FF_CMP_W53    11
+#define FF_CMP_W97    12
+#define FF_CMP_DCTMAX 13
+#define FF_CMP_DCT264 14
+#define FF_CMP_CHROMA 256
+
+    /**
+     * ME diamond size & shape
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int dia_size;
+
+    /**
+     * amount of previous MV predictors (2a+1 x 2a+1 square)
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int last_predictor_count;
+
+    /**
+     * prepass for motion estimation
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int pre_me;
+
+    /**
+     * motion estimation prepass comparison function
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int me_pre_cmp;
+
+    /**
+     * ME prepass diamond size & shape
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int pre_dia_size;
+
+    /**
+     * subpel ME quality
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int me_subpel_quality;
+
+    /**
+     * callback to negotiate the pixelFormat
+     * @param fmt is the list of formats which are supported by the codec,
+     * it is terminated by -1 as 0 is a valid format, the formats are ordered by quality.
+     * The first is always the native one.
+     * @return the chosen format
+     * - encoding: unused
+     * - decoding: Set by user, if not set the native format will be chosen.
+     */
+    enum PixelFormat (*get_format)(struct AVCodecContext *s, const enum PixelFormat * fmt);
+
+    /**
+     * DTG active format information (additional aspect ratio
+     * information only used in DVB MPEG-2 transport streams)
+     * 0 if not set.
+     *
+     * - encoding: unused
+     * - decoding: Set by decoder.
+     */
+    int dtg_active_format;
+#define FF_DTG_AFD_SAME         8
+#define FF_DTG_AFD_4_3          9
+#define FF_DTG_AFD_16_9         10
+#define FF_DTG_AFD_14_9         11
+#define FF_DTG_AFD_4_3_SP_14_9  13
+#define FF_DTG_AFD_16_9_SP_14_9 14
+#define FF_DTG_AFD_SP_4_3       15
+
+    /**
+     * maximum motion estimation search range in subpel units
+     * If 0 then no limit.
+     *
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int me_range;
+
+    /**
+     * intra quantizer bias
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int intra_quant_bias;
+#define FF_DEFAULT_QUANT_BIAS 999999
+
+    /**
+     * inter quantizer bias
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int inter_quant_bias;
+
+    /**
+     * color table ID
+     * - encoding: unused
+     * - decoding: Which clrtable should be used for 8bit RGB images.
+     *             Tables have to be stored somewhere. FIXME
+     */
+    int color_table_id;
+
+    /**
+     * internal_buffer count
+     * Don't touch, used by libavcodec default_get_buffer().
+     */
+    int internal_buffer_count;
+
+    /**
+     * internal_buffers
+     * Don't touch, used by libavcodec default_get_buffer().
+     */
+    void *internal_buffer;
+
+#define FF_LAMBDA_SHIFT 7
+#define FF_LAMBDA_SCALE (1<<FF_LAMBDA_SHIFT)
+#define FF_QP2LAMBDA 118 ///< factor to convert from H.263 QP to lambda
+#define FF_LAMBDA_MAX (256*128-1)
+
+#define FF_QUALITY_SCALE FF_LAMBDA_SCALE //FIXME maybe remove
+    /**
+     * Global quality for codecs which cannot change it per frame.
+     * This should be proportional to MPEG-1/2/4 qscale.
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int global_quality;
+
+#define FF_CODER_TYPE_VLC       0
+#define FF_CODER_TYPE_AC        1
+#define FF_CODER_TYPE_RAW       2
+#define FF_CODER_TYPE_RLE       3
+#define FF_CODER_TYPE_DEFLATE   4
+    /**
+     * coder type
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int coder_type;
+
+    /**
+     * context model
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int context_model;
+#if 0
+    /**
+     *
+     * - encoding: unused
+     * - decoding: Set by user.
+     */
+    uint8_t * (*realloc)(struct AVCodecContext *s, uint8_t *buf, int buf_size);
+#endif
+
+    /**
+     * slice flags
+     * - encoding: unused
+     * - decoding: Set by user.
+     */
+    int slice_flags;
+#define SLICE_FLAG_CODED_ORDER    0x0001 ///< draw_horiz_band() is called in coded order instead of display
+#define SLICE_FLAG_ALLOW_FIELD    0x0002 ///< allow draw_horiz_band() with field slices (MPEG2 field pics)
+#define SLICE_FLAG_ALLOW_PLANE    0x0004 ///< allow draw_horiz_band() with 1 component at a time (SVQ1)
+
+    /**
+     * XVideo Motion Acceleration
+     * - encoding: forbidden
+     * - decoding: set by decoder
+     */
+    int xvmc_acceleration;
+
+    /**
+     * macroblock decision mode
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int mb_decision;
+#define FF_MB_DECISION_SIMPLE 0        ///< uses mb_cmp
+#define FF_MB_DECISION_BITS   1        ///< chooses the one which needs the fewest bits
+#define FF_MB_DECISION_RD     2        ///< rate distortion
+
+    /**
+     * custom intra quantization matrix
+     * - encoding: Set by user, can be NULL.
+     * - decoding: Set by libavcodec.
+     */
+    uint16_t *intra_matrix;
+
+    /**
+     * custom inter quantization matrix
+     * - encoding: Set by user, can be NULL.
+     * - decoding: Set by libavcodec.
+     */
+    uint16_t *inter_matrix;
+
+    /**
+     * fourcc from the AVI stream header (LSB first, so "ABCD" -> ('D'<<24) + ('C'<<16) + ('B'<<8) + 'A').
+     * This is used to work around some encoder bugs.
+     * - encoding: unused
+     * - decoding: Set by user, will be converted to uppercase by libavcodec during init.
+     */
+    unsigned int stream_codec_tag;
+
+    /**
+     * scene change detection threshold
+     * 0 is default, larger means fewer detected scene changes.
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int scenechange_threshold;
+
+    /**
+     * minimum Lagrange multipler
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int lmin;
+
+    /**
+     * maximum Lagrange multipler
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int lmax;
+
+    /**
+     * palette control structure
+     * - encoding: ??? (no palette-enabled encoder yet)
+     * - decoding: Set by user.
+     */
+    struct AVPaletteControl *palctrl;
+
+    /**
+     * noise reduction strength
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int noise_reduction;
+
+    /**
+     * Called at the beginning of a frame to get cr buffer for it.
+     * Buffer type (size, hints) must be the same. libavcodec won't check it.
+     * libavcodec will pass previous buffer in pic, function should return
+     * same buffer or new buffer with old frame "painted" into it.
+     * If pic.data[0] == NULL must behave like get_buffer().
+     * if CODEC_CAP_DR1 is not set then reget_buffer() must call
+     * avcodec_default_reget_buffer() instead of providing buffers allocated by
+     * some other means.
+     * - encoding: unused
+     * - decoding: Set by libavcodec., user can override
+     */
+    int (*reget_buffer)(struct AVCodecContext *c, AVFrame *pic);
+
+    /**
+     * Number of bits which should be loaded into the rc buffer before decoding starts.
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int rc_initial_buffer_occupancy;
+
+    /**
+     *
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int inter_threshold;
+
+    /**
+     * CODEC_FLAG2_*
+     * - encoding: Set by user.
+     * - decoding: Set by user.
+     */
+    int flags2;
+
+    /**
+     * Simulates errors in the bitstream to test error concealment.
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int error_rate;
+
+    /**
+     * MP3 antialias algorithm, see FF_AA_* below.
+     * - encoding: unused
+     * - decoding: Set by user.
+     */
+    int antialias_algo;
+#define FF_AA_AUTO    0
+#define FF_AA_FASTINT 1 //not implemented yet
+#define FF_AA_INT     2
+#define FF_AA_FLOAT   3
+    /**
+     * quantizer noise shaping
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int quantizer_noise_shaping;
+
+    /**
+     * thread count
+     * is used to decide how many independent tasks should be passed to execute()
+     * - encoding: Set by user.
+     * - decoding: Set by user.
+     */
+    int thread_count;
+
+    /**
+     * The codec may call this to execute several independent things.
+     * It will return only after finishing all tasks.
+     * The user may replace this with some multithreaded implementation,
+     * the default implementation will execute the parts serially.
+     * @param count the number of things to execute
+     * - encoding: Set by libavcodec, user can override.
+     * - decoding: Set by libavcodec, user can override.
+     */
+    int (*execute)(struct AVCodecContext *c, int (*func)(struct AVCodecContext *c2, void *arg), void *arg2, int *ret, int count, int size);
+
+    /**
+     * thread opaque
+     * Can be used by execute() to store some per AVCodecContext stuff.
+     * - encoding: set by execute()
+     * - decoding: set by execute()
+     */
+    void *thread_opaque;
+
+    /**
+     * Motion estimation threshold below which no motion estimation is
+     * performed, but instead the user specified motion vectors are used.
+     *
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+     int me_threshold;
+
+    /**
+     * Macroblock threshold below which the user specified macroblock types will be used.
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+     int mb_threshold;
+
+    /**
+     * precision of the intra DC coefficient - 8
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+     int intra_dc_precision;
+
+    /**
+     * noise vs. sse weight for the nsse comparsion function
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+     int nsse_weight;
+
+    /**
+     * Number of macroblock rows at the top which are skipped.
+     * - encoding: unused
+     * - decoding: Set by user.
+     */
+     int skip_top;
+
+    /**
+     * Number of macroblock rows at the bottom which are skipped.
+     * - encoding: unused
+     * - decoding: Set by user.
+     */
+     int skip_bottom;
+
+    /**
+     * profile
+     * - encoding: Set by user.
+     * - decoding: Set by libavcodec.
+     */
+     int profile;
+#define FF_PROFILE_UNKNOWN -99
+
+#define FF_PROFILE_AAC_MAIN 0
+#define FF_PROFILE_AAC_LOW  1
+#define FF_PROFILE_AAC_SSR  2
+#define FF_PROFILE_AAC_LTP  3
+
+#define FF_PROFILE_H264_BASELINE    66
+#define FF_PROFILE_H264_MAIN        77
+#define FF_PROFILE_H264_EXTENDED    88
+#define FF_PROFILE_H264_HIGH        100
+#define FF_PROFILE_H264_HIGH_10     110
+#define FF_PROFILE_H264_HIGH_422    122
+#define FF_PROFILE_H264_HIGH_444    244
+#define FF_PROFILE_H264_CAVLC_444   44
+
+    /**
+     * level
+     * - encoding: Set by user.
+     * - decoding: Set by libavcodec.
+     */
+     int level;
+#define FF_LEVEL_UNKNOWN -99
+
+    /**
+     * low resolution decoding, 1-> 1/2 size, 2->1/4 size
+     * - encoding: unused
+     * - decoding: Set by user.
+     */
+     int lowres;
+
+    /**
+     * Bitstream width / height, may be different from width/height if lowres
+     * or other things are used.
+     * - encoding: unused
+     * - decoding: Set by user before init if known. Codec should override / dynamically change if needed.
+     */
+    int coded_width, coded_height;
+
+    /**
+     * frame skip threshold
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int frame_skip_threshold;
+
+    /**
+     * frame skip factor
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int frame_skip_factor;
+
+    /**
+     * frame skip exponent
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int frame_skip_exp;
+
+    /**
+     * frame skip comparison function
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int frame_skip_cmp;
+
+    /**
+     * Border processing masking, raises the quantizer for mbs on the borders
+     * of the picture.
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    float border_masking;
+
+    /**
+     * minimum MB lagrange multipler
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int mb_lmin;
+
+    /**
+     * maximum MB lagrange multipler
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int mb_lmax;
+
+    /**
+     *
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int me_penalty_compensation;
+
+    /**
+     *
+     * - encoding: unused
+     * - decoding: Set by user.
+     */
+    enum AVDiscard skip_loop_filter;
+
+    /**
+     *
+     * - encoding: unused
+     * - decoding: Set by user.
+     */
+    enum AVDiscard skip_idct;
+
+    /**
+     *
+     * - encoding: unused
+     * - decoding: Set by user.
+     */
+    enum AVDiscard skip_frame;
+
+    /**
+     *
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int bidir_refine;
+
+    /**
+     *
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int brd_scale;
+
+    /**
+     * constant rate factor - quality-based VBR - values ~correspond to qps
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    float crf;
+
+    /**
+     * constant quantization parameter rate control method
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int cqp;
+
+    /**
+     * minimum GOP size
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int keyint_min;
+
+    /**
+     * number of reference frames
+     * - encoding: Set by user.
+     * - decoding: Set by lavc.
+     */
+    int refs;
+
+    /**
+     * chroma qp offset from luma
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int chromaoffset;
+
+    /**
+     * Influences how often B-frames are used.
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int bframebias;
+
+    /**
+     * trellis RD quantization
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int trellis;
+
+    /**
+     * Reduce fluctuations in qp (before curve compression).
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    float complexityblur;
+
+    /**
+     * in-loop deblocking filter alphac0 parameter
+     * alpha is in the range -6...6
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int deblockalpha;
+
+    /**
+     * in-loop deblocking filter beta parameter
+     * beta is in the range -6...6
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int deblockbeta;
+
+    /**
+     * macroblock subpartition sizes to consider - p8x8, p4x4, b8x8, i8x8, i4x4
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int partitions;
+#define X264_PART_I4X4 0x001  /* Analyze i4x4 */
+#define X264_PART_I8X8 0x002  /* Analyze i8x8 (requires 8x8 transform) */
+#define X264_PART_P8X8 0x010  /* Analyze p16x8, p8x16 and p8x8 */
+#define X264_PART_P4X4 0x020  /* Analyze p8x4, p4x8, p4x4 */
+#define X264_PART_B8X8 0x100  /* Analyze b16x8, b8x16 and b8x8 */
+
+    /**
+     * direct MV prediction mode - 0 (none), 1 (spatial), 2 (temporal), 3 (auto)
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int directpred;
+
+    /**
+     * Audio cutoff bandwidth (0 means "automatic")
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int cutoff;
+
+    /**
+     * Multiplied by qscale for each frame and added to scene_change_score.
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int scenechange_factor;
+
+    /**
+     *
+     * Note: Value depends upon the compare function used for fullpel ME.
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int mv0_threshold;
+
+    /**
+     * Adjusts sensitivity of b_frame_strategy 1.
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int b_sensitivity;
+
+    /**
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int compression_level;
+#define FF_COMPRESSION_DEFAULT -1
+
+    /**
+     * Sets whether to use LPC mode - used by FLAC encoder.
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int use_lpc;
+
+    /**
+     * LPC coefficient precision - used by FLAC encoder
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int lpc_coeff_precision;
+
+    /**
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int min_prediction_order;
+
+    /**
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int max_prediction_order;
+
+    /**
+     * search method for selecting prediction order
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int prediction_order_method;
+
+    /**
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int min_partition_order;
+
+    /**
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int max_partition_order;
+
+    /**
+     * GOP timecode frame start number, in non drop frame format
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int64_t timecode_frame_start;
+
+#if LIBAVCODEC_VERSION_MAJOR < 53
+    /**
+     * Decoder should decode to this many channels if it can (0 for default)
+     * - encoding: unused
+     * - decoding: Set by user.
+     * @deprecated Deprecated in favor of request_channel_layout.
+     */
+    int request_channels;
+#endif
+
+    /**
+     * Percentage of dynamic range compression to be applied by the decoder.
+     * The default value is 1.0, corresponding to full compression.
+     * - encoding: unused
+     * - decoding: Set by user.
+     */
+    float drc_scale;
+
+    /**
+     * opaque 64bit number (generally a PTS) that will be reordered and
+     * output in AVFrame.reordered_opaque
+     * - encoding: unused
+     * - decoding: Set by user.
+     */
+    int64_t reordered_opaque;
+
+    /**
+     * Bits per sample/pixel of internal libavcodec pixel/sample format.
+     * This field is applicable only when sample_fmt is SAMPLE_FMT_S32.
+     * - encoding: set by user.
+     * - decoding: set by libavcodec.
+     */
+    int bits_per_raw_sample;
+
+    /**
+     * Audio channel layout.
+     * - encoding: set by user.
+     * - decoding: set by libavcodec.
+     */
+    int64_t channel_layout;
+
+    /**
+     * Request decoder to use this channel layout if it can (0 for default)
+     * - encoding: unused
+     * - decoding: Set by user.
+     */
+    int64_t request_channel_layout;
+
+    /**
+     * Ratecontrol attempt to use, at maximum, <value> of what can be used without an underflow.
+     * - encoding: Set by user.
+     * - decoding: unused.
+     */
+    float rc_max_available_vbv_use;
+
+    /**
+     * Ratecontrol attempt to use, at least, <value> times the amount needed to prevent a vbv overflow.
+     * - encoding: Set by user.
+     * - decoding: unused.
+     */
+    float rc_min_vbv_overflow_use;
+
+    /**
+     * Hardware accelerator in use
+     * - encoding: unused.
+     * - decoding: Set by libavcodec
+     */
+    struct AVHWAccel *hwaccel;
+
+    /**
+     * For some codecs, the time base is closer to the field rate than the frame rate.
+     * Most notably, H.264 and MPEG-2 specify time_base as half of frame duration
+     * if no telecine is used ...
+     *
+     * Set to time_base ticks per frame. Default 1, e.g., H.264/MPEG-2 set it to 2.
+     */
+    int ticks_per_frame;
+
+    /**
+     * Hardware accelerator context.
+     * For some hardware accelerators, a global context needs to be
+     * provided by the user. In that case, this holds display-dependent
+     * data FFmpeg cannot instantiate itself. Please refer to the
+     * FFmpeg HW accelerator documentation to know how to fill this
+     * is. e.g. for VA API, this is a struct vaapi_context.
+     * - encoding: unused
+     * - decoding: Set by user
+     */
+    void *hwaccel_context;
+
+    /**
+     * Chromaticity coordinates of the source primaries.
+     * - encoding: Set by user
+     * - decoding: Set by libavcodec
+     */
+    enum AVColorPrimaries color_primaries;
+
+    /**
+     * Color Transfer Characteristic.
+     * - encoding: Set by user
+     * - decoding: Set by libavcodec
+     */
+    enum AVColorTransferCharacteristic color_trc;
+
+    /**
+     * YUV colorspace type.
+     * - encoding: Set by user
+     * - decoding: Set by libavcodec
+     */
+    enum AVColorSpace colorspace;
+
+    /**
+     * MPEG vs JPEG YUV range.
+     * - encoding: Set by user
+     * - decoding: Set by libavcodec
+     */
+    enum AVColorRange color_range;
+
+    /**
+     * This defines the location of chroma samples.
+     * - encoding: Set by user
+     * - decoding: Set by libavcodec
+     */
+    enum AVChromaLocation chroma_sample_location;
+
+    /**
+     * The codec may call this to execute several independent things.
+     * It will return only after finishing all tasks.
+     * The user may replace this with some multithreaded implementation,
+     * the default implementation will execute the parts serially.
+     * Also see avcodec_thread_init and e.g. the --enable-pthread configure option.
+     * @param c context passed also to func
+     * @param count the number of things to execute
+     * @param arg2 argument passed unchanged to func
+     * @param ret return values of executed functions, must have space for "count" values. May be NULL.
+     * @param func function that will be called count times, with jobnr from 0 to count-1.
+     *             threadnr will be in the range 0 to c->thread_count-1 < MAX_THREADS and so that no
+     *             two instances of func executing at the same time will have the same threadnr.
+     * @return always 0 currently, but code should handle a future improvement where when any call to func
+     *         returns < 0 no further calls to func may be done and < 0 is returned.
+     * - encoding: Set by libavcodec, user can override.
+     * - decoding: Set by libavcodec, user can override.
+     */
+    int (*execute2)(struct AVCodecContext *c, int (*func)(struct AVCodecContext *c2, void *arg, int jobnr, int threadnr), void *arg2, int *ret, int count);
+
+    /**
+     * explicit P-frame weighted prediction analysis method
+     * 0: off
+     * 1: fast blind weighting (one reference duplicate with -1 offset)
+     * 2: smart weighting (full fade detection analysis)
+     * - encoding: Set by user.
+     * - decoding: unused
+     */
+    int weighted_p_pred;
+
+    /**
+     * AQ mode
+     * 0: Disabled
+     * 1: Variance AQ (complexity mask)
+     * 2: Auto-variance AQ (experimental)
+     * - encoding: Set by user
+     * - decoding: unused
+     */
+    int aq_mode;
+
+    /**
+     * AQ strength
+     * Reduces blocking and blurring in flat and textured areas.
+     * - encoding: Set by user
+     * - decoding: unused
+     */
+    float aq_strength;
+
+    /**
+     * PSY RD
+     * Strength of psychovisual optimization
+     * - encoding: Set by user
+     * - decoding: unused
+     */
+    float psy_rd;
+
+    /**
+     * PSY trellis
+     * Strength of psychovisual optimization
+     * - encoding: Set by user
+     * - decoding: unused
+     */
+    float psy_trellis;
+
+    /**
+     * RC lookahead
+     * Number of frames for frametype and ratecontrol lookahead
+     * - encoding: Set by user
+     * - decoding: unused
+     */
+    int rc_lookahead;
+} AVCodecContext;
+
+/**
+ * AVCodec.
+ */
+typedef struct AVCodec {
+    /**
+     * Name of the codec implementation.
+     * The name is globally unique among encoders and among decoders (but an
+     * encoder and a decoder can share the same name).
+     * This is the primary way to find a codec from the user perspective.
+     */
+    const char *name;
+    enum AVMediaType type;
+    enum CodecID id;
+    int priv_data_size;
+    int (*init)(AVCodecContext *);
+    int (*encode)(AVCodecContext *, uint8_t *buf, int buf_size, void *data);
+    int (*close)(AVCodecContext *);
+    int (*decode)(AVCodecContext *, void *outdata, int *outdata_size, AVPacket *avpkt);
+    /**
+     * Codec capabilities.
+     * see CODEC_CAP_*
+     */
+    int capabilities;
+    struct AVCodec *next;
+    /**
+     * Flush buffers.
+     * Will be called when seeking
+     */
+    void (*flush)(AVCodecContext *);
+    const AVRational *supported_framerates; ///< array of supported framerates, or NULL if any, array is terminated by {0,0}
+    const enum PixelFormat *pix_fmts;       ///< array of supported pixel formats, or NULL if unknown, array is terminated by -1
+    /**
+     * Descriptive name for the codec, meant to be more human readable than name.
+     * You should use the NULL_IF_CONFIG_SMALL() macro to define it.
+     */
+    const char *long_name;
+    const int *supported_samplerates;       ///< array of supported audio samplerates, or NULL if unknown, array is terminated by 0
+    const enum SampleFormat *sample_fmts;   ///< array of supported sample formats, or NULL if unknown, array is terminated by -1
+    const int64_t *channel_layouts;         ///< array of support channel layouts, or NULL if unknown. array is terminated by 0
+} AVCodec;
+
+/**
+ * AVHWAccel.
+ */
+typedef struct AVHWAccel {
+    /**
+     * Name of the hardware accelerated codec.
+     * The name is globally unique among encoders and among decoders (but an
+     * encoder and a decoder can share the same name).
+     */
+    const char *name;
+
+    /**
+     * Type of codec implemented by the hardware accelerator.
+     *
+     * See AVMEDIA_TYPE_xxx
+     */
+    enum AVMediaType type;
+
+    /**
+     * Codec implemented by the hardware accelerator.
+     *
+     * See CODEC_ID_xxx
+     */
+    enum CodecID id;
+
+    /**
+     * Supported pixel format.
+     *
+     * Only hardware accelerated formats are supported here.
+     */
+    enum PixelFormat pix_fmt;
+
+    /**
+     * Hardware accelerated codec capabilities.
+     * see FF_HWACCEL_CODEC_CAP_*
+     */
+    int capabilities;
+
+    struct AVHWAccel *next;
+
+    /**
+     * Called at the beginning of each frame or field picture.
+     *
+     * Meaningful frame information (codec specific) is guaranteed to
+     * be parsed at this point. This function is mandatory.
+     *
+     * Note that buf can be NULL along with buf_size set to 0.
+     * Otherwise, this means the whole frame is available at this point.
+     *
+     * @param avctx the codec context
+     * @param buf the frame data buffer base
+     * @param buf_size the size of the frame in bytes
+     * @return zero if successful, a negative value otherwise
+     */
+    int (*start_frame)(AVCodecContext *avctx, const uint8_t *buf, uint32_t buf_size);
+
+    /**
+     * Callback for each slice.
+     *
+     * Meaningful slice information (codec specific) is guaranteed to
+     * be parsed at this point. This function is mandatory.
+     *
+     * @param avctx the codec context
+     * @param buf the slice data buffer base
+     * @param buf_size the size of the slice in bytes
+     * @return zero if successful, a negative value otherwise
+     */
+    int (*decode_slice)(AVCodecContext *avctx, const uint8_t *buf, uint32_t buf_size);
+
+    /**
+     * Called at the end of each frame or field picture.
+     *
+     * The whole picture is parsed at this point and can now be sent
+     * to the hardware accelerator. This function is mandatory.
+     *
+     * @param avctx the codec context
+     * @return zero if successful, a negative value otherwise
+     */
+    int (*end_frame)(AVCodecContext *avctx);
+
+    /**
+     * Size of HW accelerator private data.
+     *
+     * Private data is allocated with av_mallocz() before
+     * AVCodecContext.get_buffer() and deallocated after
+     * AVCodecContext.release_buffer().
+     */
+    int priv_data_size;
+} AVHWAccel;
+
+/**
+ * four components are given, that's all.
+ * the last component is alpha
+ */
+typedef struct AVPicture {
+    uint8_t *data[4];
+    int linesize[4];       ///< number of bytes per line
+} AVPicture;
+
+#if LIBAVCODEC_VERSION_MAJOR < 53
+/**
+ * AVPaletteControl
+ * This structure defines a method for communicating palette changes
+ * between and demuxer and a decoder.
+ *
+ * @deprecated Use AVPacket to send palette changes instead.
+ * This is totally broken.
+ */
+#define AVPALETTE_SIZE 1024
+#define AVPALETTE_COUNT 256
+typedef struct AVPaletteControl {
+
+    /* Demuxer sets this to 1 to indicate the palette has changed;
+     * decoder resets to 0. */
+    int palette_changed;
+
+    /* 4-byte ARGB palette entries, stored in native byte order; note that
+     * the individual palette components should be on a 8-bit scale; if
+     * the palette data comes from an IBM VGA native format, the component
+     * data is probably 6 bits in size and needs to be scaled. */
+    unsigned int palette[AVPALETTE_COUNT];
+
+} AVPaletteControl attribute_deprecated;
+#endif
+
+enum AVSubtitleType {
+    SUBTITLE_NONE,
+
+    SUBTITLE_BITMAP,                ///< A bitmap, pict will be set
+
+    /**
+     * Plain text, the text field must be set by the decoder and is
+     * authoritative. ass and pict fields may contain approximations.
+     */
+    SUBTITLE_TEXT,
+
+    /**
+     * Formatted text, the ass field must be set by the decoder and is
+     * authoritative. pict and text fields may contain approximations.
+     */
+    SUBTITLE_ASS,
+};
+
+typedef struct AVSubtitleRect {
+    int x;         ///< top left corner  of pict, undefined when pict is not set
+    int y;         ///< top left corner  of pict, undefined when pict is not set
+    int w;         ///< width            of pict, undefined when pict is not set
+    int h;         ///< height           of pict, undefined when pict is not set
+    int nb_colors; ///< number of colors in pict, undefined when pict is not set
+
+    /**
+     * data+linesize for the bitmap of this subtitle.
+     * can be set for text/ass as well once they where rendered
+     */
+    AVPicture pict;
+    enum AVSubtitleType type;
+
+    char *text;                     ///< 0 terminated plain UTF-8 text
+
+    /**
+     * 0 terminated ASS/SSA compatible event line.
+     * The pressentation of this is unaffected by the other values in this
+     * struct.
+     */
+    char *ass;
+} AVSubtitleRect;
+
+typedef struct AVSubtitle {
+    uint16_t format; /* 0 = graphics */
+    uint32_t start_display_time; /* relative to packet pts, in ms */
+    uint32_t end_display_time; /* relative to packet pts, in ms */
+    unsigned num_rects;
+    AVSubtitleRect **rects;
+    int64_t pts;    ///< Same as packet pts, in AV_TIME_BASE
+} AVSubtitle;
+
+/* packet functions */
+
+/**
+ * @deprecated use NULL instead
+ */
+attribute_deprecated void av_destruct_packet_nofree(AVPacket *pkt);
+
+/**
+ * Default packet destructor.
+ */
+void av_destruct_packet(AVPacket *pkt);
+
+/**
+ * Initialize optional fields of a packet with default values.
+ *
+ * @param pkt packet
+ */
+void av_init_packet(AVPacket *pkt);
+
+/**
+ * Allocate the payload of a packet and initialize its fields with
+ * default values.
+ *
+ * @param pkt packet
+ * @param size wanted payload size
+ * @return 0 if OK, AVERROR_xxx otherwise
+ */
+int av_new_packet(AVPacket *pkt, int size);
+
+/**
+ * Reduce packet size, correctly zeroing padding
+ *
+ * @param pkt packet
+ * @param size new size
+ */
+void av_shrink_packet(AVPacket *pkt, int size);
+
+/**
+ * @warning This is a hack - the packet memory allocation stuff is broken. The
+ * packet is allocated if it was not really allocated.
+ */
+int av_dup_packet(AVPacket *pkt);
+
+/**
+ * Free a packet.
+ *
+ * @param pkt packet to free
+ */
+void av_free_packet(AVPacket *pkt);
+
+/* resample.c */
+
+struct ReSampleContext;
+struct AVResampleContext;
+
+typedef struct ReSampleContext ReSampleContext;
+
+#if LIBAVCODEC_VERSION_MAJOR < 53
+/**
+ * @deprecated Use av_audio_resample_init() instead.
+ */
+attribute_deprecated ReSampleContext *audio_resample_init(int output_channels, int input_channels,
+                                                          int output_rate, int input_rate);
+#endif
+/**
+ *  Initializes audio resampling context
+ *
+ * @param output_channels  number of output channels
+ * @param input_channels   number of input channels
+ * @param output_rate      output sample rate
+ * @param input_rate       input sample rate
+ * @param sample_fmt_out   requested output sample format
+ * @param sample_fmt_in    input sample format
+ * @param filter_length    length of each FIR filter in the filterbank relative to the cutoff freq
+ * @param log2_phase_count log2 of the number of entries in the polyphase filterbank
+ * @param linear           If 1 then the used FIR filter will be linearly interpolated
+                           between the 2 closest, if 0 the closest will be used
+ * @param cutoff           cutoff frequency, 1.0 corresponds to half the output sampling rate
+ * @return allocated ReSampleContext, NULL if error occured
+ */
+ReSampleContext *av_audio_resample_init(int output_channels, int input_channels,
+                                        int output_rate, int input_rate,
+                                        enum SampleFormat sample_fmt_out,
+                                        enum SampleFormat sample_fmt_in,
+                                        int filter_length, int log2_phase_count,
+                                        int linear, double cutoff);
+
+int audio_resample(ReSampleContext *s, short *output, short *input, int nb_samples);
+void audio_resample_close(ReSampleContext *s);
+
+
+/**
+ * Initializes an audio resampler.
+ * Note, if either rate is not an integer then simply scale both rates up so they are.
+ * @param filter_length length of each FIR filter in the filterbank relative to the cutoff freq
+ * @param log2_phase_count log2 of the number of entries in the polyphase filterbank
+ * @param linear If 1 then the used FIR filter will be linearly interpolated
+                 between the 2 closest, if 0 the closest will be used
+ * @param cutoff cutoff frequency, 1.0 corresponds to half the output sampling rate
+ */
+struct AVResampleContext *av_resample_init(int out_rate, int in_rate, int filter_length, int log2_phase_count, int linear, double cutoff);
+
+/**
+ * resamples.
+ * @param src an array of unconsumed samples
+ * @param consumed the number of samples of src which have been consumed are returned here
+ * @param src_size the number of unconsumed samples available
+ * @param dst_size the amount of space in samples available in dst
+ * @param update_ctx If this is 0 then the context will not be modified, that way several channels can be resampled with the same context.
+ * @return the number of samples written in dst or -1 if an error occurred
+ */
+int av_resample(struct AVResampleContext *c, short *dst, short *src, int *consumed, int src_size, int dst_size, int update_ctx);
+
+
+/**
+ * Compensates samplerate/timestamp drift. The compensation is done by changing
+ * the resampler parameters, so no audible clicks or similar distortions occur
+ * @param compensation_distance distance in output samples over which the compensation should be performed
+ * @param sample_delta number of output samples which should be output less
+ *
+ * example: av_resample_compensate(c, 10, 500)
+ * here instead of 510 samples only 500 samples would be output
+ *
+ * note, due to rounding the actual compensation might be slightly different,
+ * especially if the compensation_distance is large and the in_rate used during init is small
+ */
+void av_resample_compensate(struct AVResampleContext *c, int sample_delta, int compensation_distance);
+void av_resample_close(struct AVResampleContext *c);
+
+/**
+ * Allocate memory for a picture.  Call avpicture_free to free it.
+ *
+ * @param picture the picture to be filled in
+ * @param pix_fmt the format of the picture
+ * @param width the width of the picture
+ * @param height the height of the picture
+ * @return zero if successful, a negative value if not
+ */
+int avpicture_alloc(AVPicture *picture, enum PixelFormat pix_fmt, int width, int height);
+
+/**
+ * Free a picture previously allocated by avpicture_alloc().
+ *
+ * @param picture the AVPicture to be freed
+ */
+void avpicture_free(AVPicture *picture);
+
+/**
+ * Fill in the AVPicture fields.
+ * The fields of the given AVPicture are filled in by using the 'ptr' address
+ * which points to the image data buffer. Depending on the specified picture
+ * format, one or multiple image data pointers and line sizes will be set.
+ * If a planar format is specified, several pointers will be set pointing to
+ * the different picture planes and the line sizes of the different planes
+ * will be stored in the lines_sizes array.
+ * Call with ptr == NULL to get the required size for the ptr buffer.
+ *
+ * @param picture AVPicture whose fields are to be filled in
+ * @param ptr Buffer which will contain or contains the actual image data
+ * @param pix_fmt The format in which the picture data is stored.
+ * @param width the width of the image in pixels
+ * @param height the height of the image in pixels
+ * @return size of the image data in bytes
+ */
+int avpicture_fill(AVPicture *picture, uint8_t *ptr,
+                   enum PixelFormat pix_fmt, int width, int height);
+int avpicture_layout(const AVPicture* src, enum PixelFormat pix_fmt, int width, int height,
+                     unsigned char *dest, int dest_size);
+
+/**
+ * Calculate the size in bytes that a picture of the given width and height
+ * would occupy if stored in the given picture format.
+ * Note that this returns the size of a compact representation as generated
+ * by avpicture_layout, which can be smaller than the size required for e.g.
+ * avpicture_fill.
+ *
+ * @param pix_fmt the given picture format
+ * @param width the width of the image
+ * @param height the height of the image
+ * @return Image data size in bytes or -1 on error (e.g. too large dimensions).
+ */
+int avpicture_get_size(enum PixelFormat pix_fmt, int width, int height);
+void avcodec_get_chroma_sub_sample(enum PixelFormat pix_fmt, int *h_shift, int *v_shift);
+const char *avcodec_get_pix_fmt_name(enum PixelFormat pix_fmt);
+void avcodec_set_dimensions(AVCodecContext *s, int width, int height);
+
+#if LIBAVCODEC_VERSION_MAJOR < 53
+/**
+ * Returns the pixel format corresponding to the name name.
+ *
+ * If there is no pixel format with name name, then looks for a
+ * pixel format with the name corresponding to the native endian
+ * format of name.
+ * For example in a little-endian system, first looks for "gray16",
+ * then for "gray16le".
+ *
+ * Finally if no pixel format has been found, returns PIX_FMT_NONE.
+ *
+ * @deprecated Deprecated in favor of av_get_pix_fmt().
+ */
+attribute_deprecated enum PixelFormat avcodec_get_pix_fmt(const char* name);
+#endif
+
+/**
+ * Returns a value representing the fourCC code associated to the
+ * pixel format pix_fmt, or 0 if no associated fourCC code can be
+ * found.
+ */
+unsigned int avcodec_pix_fmt_to_codec_tag(enum PixelFormat pix_fmt);
+
+#define FF_LOSS_RESOLUTION  0x0001 /**< loss due to resolution change */
+#define FF_LOSS_DEPTH       0x0002 /**< loss due to color depth change */
+#define FF_LOSS_COLORSPACE  0x0004 /**< loss due to color space conversion */
+#define FF_LOSS_ALPHA       0x0008 /**< loss of alpha bits */
+#define FF_LOSS_COLORQUANT  0x0010 /**< loss due to color quantization */
+#define FF_LOSS_CHROMA      0x0020 /**< loss of chroma (e.g. RGB to gray conversion) */
+
+/**
+ * Computes what kind of losses will occur when converting from one specific
+ * pixel format to another.
+ * When converting from one pixel format to another, information loss may occur.
+ * For example, when converting from RGB24 to GRAY, the color information will
+ * be lost. Similarly, other losses occur when converting from some formats to
+ * other formats. These losses can involve loss of chroma, but also loss of
+ * resolution, loss of color depth, loss due to the color space conversion, loss
+ * of the alpha bits or loss due to color quantization.
+ * avcodec_get_fix_fmt_loss() informs you about the various types of losses
+ * which will occur when converting from one pixel format to another.
+ *
+ * @param[in] dst_pix_fmt destination pixel format
+ * @param[in] src_pix_fmt source pixel format
+ * @param[in] has_alpha Whether the source pixel format alpha channel is used.
+ * @return Combination of flags informing you what kind of losses will occur.
+ */
+int avcodec_get_pix_fmt_loss(enum PixelFormat dst_pix_fmt, enum PixelFormat src_pix_fmt,
+                             int has_alpha);
+
+/**
+ * Finds the best pixel format to convert to given a certain source pixel
+ * format.  When converting from one pixel format to another, information loss
+ * may occur.  For example, when converting from RGB24 to GRAY, the color
+ * information will be lost. Similarly, other losses occur when converting from
+ * some formats to other formats. avcodec_find_best_pix_fmt() searches which of
+ * the given pixel formats should be used to suffer the least amount of loss.
+ * The pixel formats from which it chooses one, are determined by the
+ * pix_fmt_mask parameter.
+ *
+ * @code
+ * src_pix_fmt = PIX_FMT_YUV420P;
+ * pix_fmt_mask = (1 << PIX_FMT_YUV422P) || (1 << PIX_FMT_RGB24);
+ * dst_pix_fmt = avcodec_find_best_pix_fmt(pix_fmt_mask, src_pix_fmt, alpha, &loss);
+ * @endcode
+ *
+ * @param[in] pix_fmt_mask bitmask determining which pixel format to choose from
+ * @param[in] src_pix_fmt source pixel format
+ * @param[in] has_alpha Whether the source pixel format alpha channel is used.
+ * @param[out] loss_ptr Combination of flags informing you what kind of losses will occur.
+ * @return The best pixel format to convert to or -1 if none was found.
+ */
+enum PixelFormat avcodec_find_best_pix_fmt(int64_t pix_fmt_mask, enum PixelFormat src_pix_fmt,
+                              int has_alpha, int *loss_ptr);
+
+
+/**
+ * Print in buf the string corresponding to the pixel format with
+ * number pix_fmt, or an header if pix_fmt is negative.
+ *
+ * @param[in] buf the buffer where to write the string
+ * @param[in] buf_size the size of buf
+ * @param[in] pix_fmt the number of the pixel format to print the corresponding info string, or
+ * a negative value to print the corresponding header.
+ * Meaningful values for obtaining a pixel format info vary from 0 to PIX_FMT_NB -1.
+ */
+void avcodec_pix_fmt_string (char *buf, int buf_size, enum PixelFormat pix_fmt);
+
+#define FF_ALPHA_TRANSP       0x0001 /* image has some totally transparent pixels */
+#define FF_ALPHA_SEMI_TRANSP  0x0002 /* image has some transparent pixels */
+
+/**
+ * Tell if an image really has transparent alpha values.
+ * @return ored mask of FF_ALPHA_xxx constants
+ */
+int img_get_alpha_info(const AVPicture *src,
+                       enum PixelFormat pix_fmt, int width, int height);
+
+/* deinterlace a picture */
+/* deinterlace - if not supported return -1 */
+int avpicture_deinterlace(AVPicture *dst, const AVPicture *src,
+                          enum PixelFormat pix_fmt, int width, int height);
+
+/* external high level API */
+
+/**
+ * If c is NULL, returns the first registered codec,
+ * if c is non-NULL, returns the next registered codec after c,
+ * or NULL if c is the last one.
+ */
+AVCodec *av_codec_next(AVCodec *c);
+
+/**
+ * Returns the LIBAVCODEC_VERSION_INT constant.
+ */
+unsigned avcodec_version(void);
+
+/**
+ * Returns the libavcodec build-time configuration.
+ */
+const char *avcodec_configuration(void);
+
+/**
+ * Returns the libavcodec license.
+ */
+const char *avcodec_license(void);
+
+/**
+ * Initializes libavcodec.
+ *
+ * @warning This function must be called before any other libavcodec
+ * function.
+ */
+void avcodec_init(void);
+
+#if LIBAVCODEC_VERSION_MAJOR < 53
+/**
+ * @deprecated Deprecated in favor of avcodec_register().
+ */
+attribute_deprecated void register_avcodec(AVCodec *codec);
+#endif
+
+/**
+ * Register the codec codec and initialize libavcodec.
+ *
+ * @see avcodec_init()
+ */
+void avcodec_register(AVCodec *codec);
+
+/**
+ * Finds a registered encoder with a matching codec ID.
+ *
+ * @param id CodecID of the requested encoder
+ * @return An encoder if one was found, NULL otherwise.
+ */
+AVCodec *avcodec_find_encoder(enum CodecID id);
+
+/**
+ * Finds a registered encoder with the specified name.
+ *
+ * @param name name of the requested encoder
+ * @return An encoder if one was found, NULL otherwise.
+ */
+AVCodec *avcodec_find_encoder_by_name(const char *name);
+
+/**
+ * Finds a registered decoder with a matching codec ID.
+ *
+ * @param id CodecID of the requested decoder
+ * @return A decoder if one was found, NULL otherwise.
+ */
+AVCodec *avcodec_find_decoder(enum CodecID id);
+
+/**
+ * Finds a registered decoder with the specified name.
+ *
+ * @param name name of the requested decoder
+ * @return A decoder if one was found, NULL otherwise.
+ */
+AVCodec *avcodec_find_decoder_by_name(const char *name);
+void avcodec_string(char *buf, int buf_size, AVCodecContext *enc, int encode);
+
+/**
+ * Sets the fields of the given AVCodecContext to default values.
+ *
+ * @param s The AVCodecContext of which the fields should be set to default values.
+ */
+void avcodec_get_context_defaults(AVCodecContext *s);
+
+/** THIS FUNCTION IS NOT YET PART OF THE PUBLIC API!
+ *  we WILL change its arguments and name a few times! */
+void avcodec_get_context_defaults2(AVCodecContext *s, enum AVMediaType);
+
+/**
+ * Allocates an AVCodecContext and sets its fields to default values.  The
+ * resulting struct can be deallocated by simply calling av_free().
+ *
+ * @return An AVCodecContext filled with default values or NULL on failure.
+ * @see avcodec_get_context_defaults
+ */
+AVCodecContext *avcodec_alloc_context(void);
+
+/** THIS FUNCTION IS NOT YET PART OF THE PUBLIC API!
+ *  we WILL change its arguments and name a few times! */
+AVCodecContext *avcodec_alloc_context2(enum AVMediaType);
+
+/**
+ * Copy the settings of the source AVCodecContext into the destination
+ * AVCodecContext. The resulting destination codec context will be
+ * unopened, i.e. you are required to call avcodec_open() before you
+ * can use this AVCodecContext to decode/encode video/audio data.
+ *
+ * @param dest target codec context, should be initialized with
+ *             avcodec_alloc_context(), but otherwise uninitialized
+ * @param src source codec context
+ * @return AVERROR() on error (e.g. memory allocation error), 0 on success
+ */
+int avcodec_copy_context(AVCodecContext *dest, const AVCodecContext *src);
+
+/**
+ * Sets the fields of the given AVFrame to default values.
+ *
+ * @param pic The AVFrame of which the fields should be set to default values.
+ */
+void avcodec_get_frame_defaults(AVFrame *pic);
+
+/**
+ * Allocates an AVFrame and sets its fields to default values.  The resulting
+ * struct can be deallocated by simply calling av_free().
+ *
+ * @return An AVFrame filled with default values or NULL on failure.
+ * @see avcodec_get_frame_defaults
+ */
+AVFrame *avcodec_alloc_frame(void);
+
+int avcodec_default_get_buffer(AVCodecContext *s, AVFrame *pic);
+void avcodec_default_release_buffer(AVCodecContext *s, AVFrame *pic);
+int avcodec_default_reget_buffer(AVCodecContext *s, AVFrame *pic);
+
+/**
+ * Returns the amount of padding in pixels which the get_buffer callback must
+ * provide around the edge of the image for codecs which do not have the
+ * CODEC_FLAG_EMU_EDGE flag.
+ *
+ * @return Required padding in pixels.
+ */
+unsigned avcodec_get_edge_width(void);
+/**
+ * Modifies width and height values so that they will result in a memory
+ * buffer that is acceptable for the codec if you do not use any horizontal
+ * padding.
+ */
+void avcodec_align_dimensions(AVCodecContext *s, int *width, int *height);
+/**
+ * Modifies width and height values so that they will result in a memory
+ * buffer that is acceptable for the codec if you also ensure that all
+ * line sizes are a multiple of the respective linesize_align[i].
+ */
+void avcodec_align_dimensions2(AVCodecContext *s, int *width, int *height,
+                               int linesize_align[4]);
+
+/**
+ * Checks if the given dimension of a picture is valid, meaning that all
+ * bytes of the picture can be addressed with a signed int.
+ *
+ * @param[in] w Width of the picture.
+ * @param[in] h Height of the picture.
+ * @return Zero if valid, a negative value if invalid.
+ */
+int avcodec_check_dimensions(void *av_log_ctx, unsigned int w, unsigned int h);
+enum PixelFormat avcodec_default_get_format(struct AVCodecContext *s, const enum PixelFormat * fmt);
+
+int avcodec_thread_init(AVCodecContext *s, int thread_count);
+void avcodec_thread_free(AVCodecContext *s);
+int avcodec_default_execute(AVCodecContext *c, int (*func)(AVCodecContext *c2, void *arg2),void *arg, int *ret, int count, int size);
+int avcodec_default_execute2(AVCodecContext *c, int (*func)(AVCodecContext *c2, void *arg2, int, int),void *arg, int *ret, int count);
+//FIXME func typedef
+
+/**
+ * Initializes the AVCodecContext to use the given AVCodec. Prior to using this
+ * function the context has to be allocated.
+ *
+ * The functions avcodec_find_decoder_by_name(), avcodec_find_encoder_by_name(),
+ * avcodec_find_decoder() and avcodec_find_encoder() provide an easy way for
+ * retrieving a codec.
+ *
+ * @warning This function is not thread safe!
+ *
+ * @code
+ * avcodec_register_all();
+ * codec = avcodec_find_decoder(CODEC_ID_H264);
+ * if (!codec)
+ *     exit(1);
+ *
+ * context = avcodec_alloc_context();
+ *
+ * if (avcodec_open(context, codec) < 0)
+ *     exit(1);
+ * @endcode
+ *
+ * @param avctx The context which will be set up to use the given codec.
+ * @param codec The codec to use within the context.
+ * @return zero on success, a negative value on error
+ * @see avcodec_alloc_context, avcodec_find_decoder, avcodec_find_encoder
+ */
+int avcodec_open(AVCodecContext *avctx, AVCodec *codec);
+
+#if LIBAVCODEC_VERSION_MAJOR < 53
+/**
+ * Decodes an audio frame from buf into samples.
+ * Wrapper function which calls avcodec_decode_audio3.
+ *
+ * @deprecated Use avcodec_decode_audio3 instead.
+ * @param avctx the codec context
+ * @param[out] samples the output buffer
+ * @param[in,out] frame_size_ptr the output buffer size in bytes
+ * @param[in] buf the input buffer
+ * @param[in] buf_size the input buffer size in bytes
+ * @return On error a negative value is returned, otherwise the number of bytes
+ * used or zero if no frame could be decompressed.
+ */
+attribute_deprecated int avcodec_decode_audio2(AVCodecContext *avctx, int16_t *samples,
+                         int *frame_size_ptr,
+                         const uint8_t *buf, int buf_size);
+#endif
+
+/**
+ * Decodes the audio frame of size avpkt->size from avpkt->data into samples.
+ * Some decoders may support multiple frames in a single AVPacket, such
+ * decoders would then just decode the first frame. In this case,
+ * avcodec_decode_audio3 has to be called again with an AVPacket that contains
+ * the remaining data in order to decode the second frame etc.
+ * If no frame
+ * could be outputted, frame_size_ptr is zero. Otherwise, it is the
+ * decompressed frame size in bytes.
+ *
+ * @warning You must set frame_size_ptr to the allocated size of the
+ * output buffer before calling avcodec_decode_audio3().
+ *
+ * @warning The input buffer must be FF_INPUT_BUFFER_PADDING_SIZE larger than
+ * the actual read bytes because some optimized bitstream readers read 32 or 64
+ * bits at once and could read over the end.
+ *
+ * @warning The end of the input buffer avpkt->data should be set to 0 to ensure that
+ * no overreading happens for damaged MPEG streams.
+ *
+ * @note You might have to align the input buffer avpkt->data and output buffer
+ * samples. The alignment requirements depend on the CPU: On some CPUs it isn't
+ * necessary at all, on others it won't work at all if not aligned and on others
+ * it will work but it will have an impact on performance.
+ *
+ * In practice, avpkt->data should have 4 byte alignment at minimum and
+ * samples should be 16 byte aligned unless the CPU doesn't need it
+ * (AltiVec and SSE do).
+ *
+ * @param avctx the codec context
+ * @param[out] samples the output buffer, sample type in avctx->sample_fmt
+ * @param[in,out] frame_size_ptr the output buffer size in bytes
+ * @param[in] avpkt The input AVPacket containing the input buffer.
+ *            You can create such packet with av_init_packet() and by then setting
+ *            data and size, some decoders might in addition need other fields.
+ *            All decoders are designed to use the least fields possible though.
+ * @return On error a negative value is returned, otherwise the number of bytes
+ * used or zero if no frame data was decompressed (used) from the input AVPacket.
+ */
+int avcodec_decode_audio3(AVCodecContext *avctx, int16_t *samples,
+                         int *frame_size_ptr,
+                         AVPacket *avpkt);
+
+#if LIBAVCODEC_VERSION_MAJOR < 53
+/**
+ * Decodes a video frame from buf into picture.
+ * Wrapper function which calls avcodec_decode_video2.
+ *
+ * @deprecated Use avcodec_decode_video2 instead.
+ * @param avctx the codec context
+ * @param[out] picture The AVFrame in which the decoded video frame will be stored.
+ * @param[in] buf the input buffer
+ * @param[in] buf_size the size of the input buffer in bytes
+ * @param[in,out] got_picture_ptr Zero if no frame could be decompressed, otherwise, it is nonzero.
+ * @return On error a negative value is returned, otherwise the number of bytes
+ * used or zero if no frame could be decompressed.
+ */
+attribute_deprecated int avcodec_decode_video(AVCodecContext *avctx, AVFrame *picture,
+                         int *got_picture_ptr,
+                         const uint8_t *buf, int buf_size);
+#endif
+
+/**
+ * Decodes the video frame of size avpkt->size from avpkt->data into picture.
+ * Some decoders may support multiple frames in a single AVPacket, such
+ * decoders would then just decode the first frame.
+ *
+ * @warning The input buffer must be FF_INPUT_BUFFER_PADDING_SIZE larger than
+ * the actual read bytes because some optimized bitstream readers read 32 or 64
+ * bits at once and could read over the end.
+ *
+ * @warning The end of the input buffer buf should be set to 0 to ensure that
+ * no overreading happens for damaged MPEG streams.
+ *
+ * @note You might have to align the input buffer avpkt->data.
+ * The alignment requirements depend on the CPU: on some CPUs it isn't
+ * necessary at all, on others it won't work at all if not aligned and on others
+ * it will work but it will have an impact on performance.
+ *
+ * In practice, avpkt->data should have 4 byte alignment at minimum.
+ *
+ * @note Some codecs have a delay between input and output, these need to be
+ * fed with avpkt->data=NULL, avpkt->size=0 at the end to return the remaining frames.
+ *
+ * @param avctx the codec context
+ * @param[out] picture The AVFrame in which the decoded video frame will be stored.
+ * @param[in] avpkt The input AVpacket containing the input buffer.
+ *            You can create such packet with av_init_packet() and by then setting
+ *            data and size, some decoders might in addition need other fields like
+ *            flags&AV_PKT_FLAG_KEY. All decoders are designed to use the least
+ *            fields possible.
+ * @param[in,out] got_picture_ptr Zero if no frame could be decompressed, otherwise, it is nonzero.
+ * @return On error a negative value is returned, otherwise the number of bytes
+ * used or zero if no frame could be decompressed.
+ */
+int avcodec_decode_video2(AVCodecContext *avctx, AVFrame *picture,
+                         int *got_picture_ptr,
+                         AVPacket *avpkt);
+
+#if LIBAVCODEC_VERSION_MAJOR < 53
+/* Decode a subtitle message. Return -1 if error, otherwise return the
+ * number of bytes used. If no subtitle could be decompressed,
+ * got_sub_ptr is zero. Otherwise, the subtitle is stored in *sub. */
+attribute_deprecated int avcodec_decode_subtitle(AVCodecContext *avctx, AVSubtitle *sub,
+                            int *got_sub_ptr,
+                            const uint8_t *buf, int buf_size);
+#endif
+
+/**
+ * Decodes a subtitle message.
+ * Returns a negative value on error, otherwise returns the number of bytes used.
+ * If no subtitle could be decompressed, got_sub_ptr is zero.
+ * Otherwise, the subtitle is stored in *sub.
+ *
+ * @param avctx the codec context
+ * @param[out] sub The AVSubtitle in which the decoded subtitle will be stored.
+ * @param[in,out] got_sub_ptr Zero if no subtitle could be decompressed, otherwise, it is nonzero.
+ * @param[in] avpkt The input AVPacket containing the input buffer.
+ */
+int avcodec_decode_subtitle2(AVCodecContext *avctx, AVSubtitle *sub,
+                            int *got_sub_ptr,
+                            AVPacket *avpkt);
+int avcodec_parse_frame(AVCodecContext *avctx, uint8_t **pdata,
+                        int *data_size_ptr,
+                        uint8_t *buf, int buf_size);
+
+/**
+ * Encodes an audio frame from samples into buf.
+ *
+ * @note The output buffer should be at least FF_MIN_BUFFER_SIZE bytes large.
+ * However, for PCM audio the user will know how much space is needed
+ * because it depends on the value passed in buf_size as described
+ * below. In that case a lower value can be used.
+ *
+ * @param avctx the codec context
+ * @param[out] buf the output buffer
+ * @param[in] buf_size the output buffer size
+ * @param[in] samples the input buffer containing the samples
+ * The number of samples read from this buffer is frame_size*channels,
+ * both of which are defined in avctx.
+ * For PCM audio the number of samples read from samples is equal to
+ * buf_size * input_sample_size / output_sample_size.
+ * @return On error a negative value is returned, on success zero or the number
+ * of bytes used to encode the data read from the input buffer.
+ */
+int avcodec_encode_audio(AVCodecContext *avctx, uint8_t *buf, int buf_size,
+                         const short *samples);
+
+/**
+ * Encodes a video frame from pict into buf.
+ * The input picture should be
+ * stored using a specific format, namely avctx.pix_fmt.
+ *
+ * @param avctx the codec context
+ * @param[out] buf the output buffer for the bitstream of encoded frame
+ * @param[in] buf_size the size of the output buffer in bytes
+ * @param[in] pict the input picture to encode
+ * @return On error a negative value is returned, on success zero or the number
+ * of bytes used from the output buffer.
+ */
+int avcodec_encode_video(AVCodecContext *avctx, uint8_t *buf, int buf_size,
+                         const AVFrame *pict);
+int avcodec_encode_subtitle(AVCodecContext *avctx, uint8_t *buf, int buf_size,
+                            const AVSubtitle *sub);
+
+int avcodec_close(AVCodecContext *avctx);
+
+/**
+ * Register all the codecs, parsers and bitstream filters which were enabled at
+ * configuration time. If you do not call this function you can select exactly
+ * which formats you want to support, by using the individual registration
+ * functions.
+ *
+ * @see avcodec_register
+ * @see av_register_codec_parser
+ * @see av_register_bitstream_filter
+ */
+void avcodec_register_all(void);
+
+/**
+ * Flush buffers, should be called when seeking or when switching to a different stream.
+ */
+void avcodec_flush_buffers(AVCodecContext *avctx);
+
+void avcodec_default_free_buffers(AVCodecContext *s);
+
+/* misc useful functions */
+
+/**
+ * Returns a single letter to describe the given picture type pict_type.
+ *
+ * @param[in] pict_type the picture type
+ * @return A single character representing the picture type.
+ */
+char av_get_pict_type_char(int pict_type);
+
+/**
+ * Returns codec bits per sample.
+ *
+ * @param[in] codec_id the codec
+ * @return Number of bits per sample or zero if unknown for the given codec.
+ */
+int av_get_bits_per_sample(enum CodecID codec_id);
+
+/**
+ * Returns sample format bits per sample.
+ *
+ * @param[in] sample_fmt the sample format
+ * @return Number of bits per sample or zero if unknown for the given sample format.
+ */
+int av_get_bits_per_sample_format(enum SampleFormat sample_fmt);
+
+/* frame parsing */
+typedef struct AVCodecParserContext {
+    void *priv_data;
+    struct AVCodecParser *parser;
+    int64_t frame_offset; /* offset of the current frame */
+    int64_t cur_offset; /* current offset
+                           (incremented by each av_parser_parse()) */
+    int64_t next_frame_offset; /* offset of the next frame */
+    /* video info */
+    int pict_type; /* XXX: Put it back in AVCodecContext. */
+    /**
+     * This field is used for proper frame duration computation in lavf.
+     * It signals, how much longer the frame duration of the current frame
+     * is compared to normal frame duration.
+     *
+     * frame_duration = (1 + repeat_pict) * time_base
+     *
+     * It is used by codecs like H.264 to display telecined material.
+     */
+    int repeat_pict; /* XXX: Put it back in AVCodecContext. */
+    int64_t pts;     /* pts of the current frame */
+    int64_t dts;     /* dts of the current frame */
+
+    /* private data */
+    int64_t last_pts;
+    int64_t last_dts;
+    int fetch_timestamp;
+
+#define AV_PARSER_PTS_NB 4
+    int cur_frame_start_index;
+    int64_t cur_frame_offset[AV_PARSER_PTS_NB];
+    int64_t cur_frame_pts[AV_PARSER_PTS_NB];
+    int64_t cur_frame_dts[AV_PARSER_PTS_NB];
+
+    int flags;
+#define PARSER_FLAG_COMPLETE_FRAMES           0x0001
+
+    int64_t offset;      ///< byte offset from starting packet start
+    int64_t cur_frame_end[AV_PARSER_PTS_NB];
+
+    /*!
+     * Set by parser to 1 for key frames and 0 for non-key frames.
+     * It is initialized to -1, so if the parser doesn't set this flag,
+     * old-style fallback using FF_I_TYPE picture type as key frames
+     * will be used.
+     */
+    int key_frame;
+
+    /**
+     * Time difference in stream time base units from the pts of this
+     * packet to the point at which the output from the decoder has converged
+     * independent from the availability of previous frames. That is, the
+     * frames are virtually identical no matter if decoding started from
+     * the very first frame or from this keyframe.
+     * Is AV_NOPTS_VALUE if unknown.
+     * This field is not the display duration of the current frame.
+     *
+     * The purpose of this field is to allow seeking in streams that have no
+     * keyframes in the conventional sense. It corresponds to the
+     * recovery point SEI in H.264 and match_time_delta in NUT. It is also
+     * essential for some types of subtitle streams to ensure that all
+     * subtitles are correctly displayed after seeking.
+     */
+    int64_t convergence_duration;
+
+    // Timestamp generation support:
+    /**
+     * Synchronization point for start of timestamp generation.
+     *
+     * Set to >0 for sync point, 0 for no sync point and <0 for undefined
+     * (default).
+     *
+     * For example, this corresponds to presence of H.264 buffering period
+     * SEI message.
+     */
+    int dts_sync_point;
+
+    /**
+     * Offset of the current timestamp against last timestamp sync point in
+     * units of AVCodecContext.time_base.
+     *
+     * Set to INT_MIN when dts_sync_point unused. Otherwise, it must
+     * contain a valid timestamp offset.
+     *
+     * Note that the timestamp of sync point has usually a nonzero
+     * dts_ref_dts_delta, which refers to the previous sync point. Offset of
+     * the next frame after timestamp sync point will be usually 1.
+     *
+     * For example, this corresponds to H.264 cpb_removal_delay.
+     */
+    int dts_ref_dts_delta;
+
+    /**
+     * Presentation delay of current frame in units of AVCodecContext.time_base.
+     *
+     * Set to INT_MIN when dts_sync_point unused. Otherwise, it must
+     * contain valid non-negative timestamp delta (presentation time of a frame
+     * must not lie in the past).
+     *
+     * This delay represents the difference between decoding and presentation
+     * time of the frame.
+     *
+     * For example, this corresponds to H.264 dpb_output_delay.
+     */
+    int pts_dts_delta;
+
+    /**
+     * Position of the packet in file.
+     *
+     * Analogous to cur_frame_pts/dts
+     */
+    int64_t cur_frame_pos[AV_PARSER_PTS_NB];
+
+    /**
+     * Byte position of currently parsed frame in stream.
+     */
+    int64_t pos;
+
+    /**
+     * Previous frame byte position.
+     */
+    int64_t last_pos;
+} AVCodecParserContext;
+
+typedef struct AVCodecParser {
+    int codec_ids[5]; /* several codec IDs are permitted */
+    int priv_data_size;
+    int (*parser_init)(AVCodecParserContext *s);
+    int (*parser_parse)(AVCodecParserContext *s,
+                        AVCodecContext *avctx,
+                        const uint8_t **poutbuf, int *poutbuf_size,
+                        const uint8_t *buf, int buf_size);
+    void (*parser_close)(AVCodecParserContext *s);
+    int (*split)(AVCodecContext *avctx, const uint8_t *buf, int buf_size);
+    struct AVCodecParser *next;
+} AVCodecParser;
+
+AVCodecParser *av_parser_next(AVCodecParser *c);
+
+void av_register_codec_parser(AVCodecParser *parser);
+AVCodecParserContext *av_parser_init(int codec_id);
+
+#if LIBAVCODEC_VERSION_MAJOR < 53
+attribute_deprecated
+int av_parser_parse(AVCodecParserContext *s,
+                    AVCodecContext *avctx,
+                    uint8_t **poutbuf, int *poutbuf_size,
+                    const uint8_t *buf, int buf_size,
+                    int64_t pts, int64_t dts);
+#endif
+
+/**
+ * Parse a packet.
+ *
+ * @param s             parser context.
+ * @param avctx         codec context.
+ * @param poutbuf       set to pointer to parsed buffer or NULL if not yet finished.
+ * @param poutbuf_size  set to size of parsed buffer or zero if not yet finished.
+ * @param buf           input buffer.
+ * @param buf_size      input length, to signal EOF, this should be 0 (so that the last frame can be output).
+ * @param pts           input presentation timestamp.
+ * @param dts           input decoding timestamp.
+ * @param pos           input byte position in stream.
+ * @return the number of bytes of the input bitstream used.
+ *
+ * Example:
+ * @code
+ *   while(in_len){
+ *       len = av_parser_parse2(myparser, AVCodecContext, &data, &size,
+ *                                        in_data, in_len,
+ *                                        pts, dts, pos);
+ *       in_data += len;
+ *       in_len  -= len;
+ *
+ *       if(size)
+ *          decode_frame(data, size);
+ *   }
+ * @endcode
+ */
+int av_parser_parse2(AVCodecParserContext *s,
+                     AVCodecContext *avctx,
+                     uint8_t **poutbuf, int *poutbuf_size,
+                     const uint8_t *buf, int buf_size,
+                     int64_t pts, int64_t dts,
+                     int64_t pos);
+
+int av_parser_change(AVCodecParserContext *s,
+                     AVCodecContext *avctx,
+                     uint8_t **poutbuf, int *poutbuf_size,
+                     const uint8_t *buf, int buf_size, int keyframe);
+void av_parser_close(AVCodecParserContext *s);
+
+
+typedef struct AVBitStreamFilterContext {
+    void *priv_data;
+    struct AVBitStreamFilter *filter;
+    AVCodecParserContext *parser;
+    struct AVBitStreamFilterContext *next;
+} AVBitStreamFilterContext;
+
+
+typedef struct AVBitStreamFilter {
+    const char *name;
+    int priv_data_size;
+    int (*filter)(AVBitStreamFilterContext *bsfc,
+                  AVCodecContext *avctx, const char *args,
+                  uint8_t **poutbuf, int *poutbuf_size,
+                  const uint8_t *buf, int buf_size, int keyframe);
+    void (*close)(AVBitStreamFilterContext *bsfc);
+    struct AVBitStreamFilter *next;
+} AVBitStreamFilter;
+
+void av_register_bitstream_filter(AVBitStreamFilter *bsf);
+AVBitStreamFilterContext *av_bitstream_filter_init(const char *name);
+int av_bitstream_filter_filter(AVBitStreamFilterContext *bsfc,
+                               AVCodecContext *avctx, const char *args,
+                               uint8_t **poutbuf, int *poutbuf_size,
+                               const uint8_t *buf, int buf_size, int keyframe);
+void av_bitstream_filter_close(AVBitStreamFilterContext *bsf);
+
+AVBitStreamFilter *av_bitstream_filter_next(AVBitStreamFilter *f);
+
+/* memory */
+
+/**
+ * Reallocates the given block if it is not large enough, otherwise it
+ * does nothing.
+ *
+ * @see av_realloc
+ */
+void *av_fast_realloc(void *ptr, unsigned int *size, unsigned int min_size);
+
+/**
+ * Allocates a buffer, reusing the given one if large enough.
+ *
+ * Contrary to av_fast_realloc the current buffer contents might not be
+ * preserved and on error the old buffer is freed, thus no special
+ * handling to avoid memleaks is necessary.
+ *
+ * @param ptr pointer to pointer to already allocated buffer, overwritten with pointer to new buffer
+ * @param size size of the buffer *ptr points to
+ * @param min_size minimum size of *ptr buffer after returning, *ptr will be NULL and
+ *                 *size 0 if an error occurred.
+ */
+void av_fast_malloc(void *ptr, unsigned int *size, unsigned int min_size);
+
+/**
+ * Copy image 'src' to 'dst'.
+ */
+void av_picture_copy(AVPicture *dst, const AVPicture *src,
+                     enum PixelFormat pix_fmt, int width, int height);
+
+/**
+ * Crop image top and left side.
+ */
+int av_picture_crop(AVPicture *dst, const AVPicture *src,
+                    enum PixelFormat pix_fmt, int top_band, int left_band);
+
+/**
+ * Pad image.
+ */
+int av_picture_pad(AVPicture *dst, const AVPicture *src, int height, int width, enum PixelFormat pix_fmt,
+            int padtop, int padbottom, int padleft, int padright, int *color);
+
+/**
+ * Encodes extradata length to a buffer. Used by xiph codecs.
+ *
+ * @param s buffer to write to; must be at least (v/255+1) bytes long
+ * @param v size of extradata in bytes
+ * @return number of bytes written to the buffer.
+ */
+unsigned int av_xiphlacing(unsigned char *s, unsigned int v);
+
+/**
+ * Parses str and put in width_ptr and height_ptr the detected values.
+ *
+ * @return 0 in case of a successful parsing, a negative value otherwise
+ * @param[in] str the string to parse: it has to be a string in the format
+ * <width>x<height> or a valid video frame size abbreviation.
+ * @param[in,out] width_ptr pointer to the variable which will contain the detected
+ * frame width value
+ * @param[in,out] height_ptr pointer to the variable which will contain the detected
+ * frame height value
+ */
+int av_parse_video_frame_size(int *width_ptr, int *height_ptr, const char *str);
+
+/**
+ * Parses str and put in frame_rate the detected values.
+ *
+ * @return 0 in case of a successful parsing, a negative value otherwise
+ * @param[in] str the string to parse: it has to be a string in the format
+ * <frame_rate_num>/<frame_rate_den>, a float number or a valid video rate abbreviation
+ * @param[in,out] frame_rate pointer to the AVRational which will contain the detected
+ * frame rate
+ */
+int av_parse_video_frame_rate(AVRational *frame_rate, const char *str);
+
+/**
+ * Logs a generic warning message about a missing feature. This function is
+ * intended to be used internally by FFmpeg (libavcodec, libavformat, etc.)
+ * only, and would normally not be used by applications.
+ * @param[in] avc a pointer to an arbitrary struct of which the first field is
+ * a pointer to an AVClass struct
+ * @param[in] feature string containing the name of the missing feature
+ * @param[in] want_sample indicates if samples are wanted which exhibit this feature.
+ * If want_sample is non-zero, additional verbage will be added to the log
+ * message which tells the user how to report samples to the development
+ * mailing list.
+ */
+void av_log_missing_feature(void *avc, const char *feature, int want_sample);
+
+/**
+ * Logs a generic warning message asking for a sample. This function is
+ * intended to be used internally by FFmpeg (libavcodec, libavformat, etc.)
+ * only, and would normally not be used by applications.
+ * @param[in] avc a pointer to an arbitrary struct of which the first field is
+ * a pointer to an AVClass struct
+ * @param[in] msg string containing an optional message, or NULL if no message
+ */
+void av_log_ask_for_sample(void *avc, const char *msg);
+
+/**
+ * Registers the hardware accelerator hwaccel.
+ */
+void av_register_hwaccel(AVHWAccel *hwaccel);
+
+/**
+ * If hwaccel is NULL, returns the first registered hardware accelerator,
+ * if hwaccel is non-NULL, returns the next registered hardware accelerator
+ * after hwaccel, or NULL if hwaccel is the last one.
+ */
+AVHWAccel *av_hwaccel_next(AVHWAccel *hwaccel);
+
+
+/**
+ * Lock operation used by lockmgr
+ */
+enum AVLockOp {
+  AV_LOCK_CREATE,  ///< Create a mutex
+  AV_LOCK_OBTAIN,  ///< Lock the mutex
+  AV_LOCK_RELEASE, ///< Unlock the mutex
+  AV_LOCK_DESTROY, ///< Free mutex resources
+};
+
+/**
+ * Register a user provided lock manager supporting the operations
+ * specified by AVLockOp. mutex points to a (void *) where the
+ * lockmgr should store/get a pointer to a user allocated mutex. It's
+ * NULL upon AV_LOCK_CREATE and != NULL for all other ops.
+ *
+ * @param cb User defined callback. Note: FFmpeg may invoke calls to this
+ *           callback during the call to av_lockmgr_register().
+ *           Thus, the application must be prepared to handle that.
+ *           If cb is set to NULL the lockmgr will be unregistered.
+ *           Also note that during unregistration the previously registered
+ *           lockmgr callback may also be invoked.
+ */
+int av_lockmgr_register(int (*cb)(void **mutex, enum AVLockOp op));
+
+#endif /* AVCODEC_AVCODEC_H */
diff --git a/apps/codecs/libwmapro/avfft.h b/apps/codecs/libwmapro/avfft.h
new file mode 100644
index 0000000000..623f0a33b5
--- /dev/null
+++ b/apps/codecs/libwmapro/avfft.h
@@ -0,0 +1,99 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_AVFFT_H
+#define AVCODEC_AVFFT_H
+
+typedef float FFTSample;
+
+typedef struct FFTComplex {
+    FFTSample re, im;
+} FFTComplex;
+
+typedef struct FFTContext FFTContext;
+
+/**
+ * Set up a complex FFT.
+ * @param nbits           log2 of the length of the input array
+ * @param inverse         if 0 perform the forward transform, if 1 perform the inverse
+ */
+FFTContext *av_fft_init(int nbits, int inverse);
+
+/**
+ * Do the permutation needed BEFORE calling ff_fft_calc().
+ */
+void av_fft_permute(FFTContext *s, FFTComplex *z);
+
+/**
+ * Do a complex FFT with the parameters defined in av_fft_init(). The
+ * input data must be permuted before. No 1.0/sqrt(n) normalization is done.
+ */
+void av_fft_calc(FFTContext *s, FFTComplex *z);
+
+void av_fft_end(FFTContext *s);
+
+FFTContext *av_mdct_init(int nbits, int inverse, double scale);
+void av_imdct_calc(FFTContext *s, FFTSample *output, const FFTSample *input);
+void av_imdct_half(FFTContext *s, FFTSample *output, const FFTSample *input);
+void av_mdct_calc(FFTContext *s, FFTSample *output, const FFTSample *input);
+void av_mdct_end(FFTContext *s);
+
+/* Real Discrete Fourier Transform */
+
+enum RDFTransformType {
+    DFT_R2C,
+    IDFT_C2R,
+    IDFT_R2C,
+    DFT_C2R,
+};
+
+typedef struct RDFTContext RDFTContext;
+
+/**
+ * Set up a real FFT.
+ * @param nbits           log2 of the length of the input array
+ * @param trans           the type of transform
+ */
+RDFTContext *av_rdft_init(int nbits, enum RDFTransformType trans);
+void av_rdft_calc(RDFTContext *s, FFTSample *data);
+void av_rdft_end(RDFTContext *s);
+
+/* Discrete Cosine Transform */
+
+typedef struct DCTContext DCTContext;
+
+enum DCTTransformType {
+    DCT_II = 0,
+    DCT_III,
+    DCT_I,
+    DST_I,
+};
+
+/**
+ * Sets up DCT.
+ * @param nbits           size of the input array:
+ *                        (1 << nbits)     for DCT-II, DCT-III and DST-I
+ *                        (1 << nbits) + 1 for DCT-I
+ *
+ * @note the first element of the input of DST-I is ignored
+ */
+DCTContext *av_dct_init(int nbits, enum DCTTransformType type);
+void av_dct_calc(DCTContext *s, FFTSample *data);
+void av_dct_end (DCTContext *s);
+
+#endif /* AVCODEC_AVFFT_H */
diff --git a/apps/codecs/libwmapro/bitstream.c b/apps/codecs/libwmapro/bitstream.c
new file mode 100644
index 0000000000..1e24099cc8
--- /dev/null
+++ b/apps/codecs/libwmapro/bitstream.c
@@ -0,0 +1,336 @@
+/*
+ * Common bit i/o utils
+ * Copyright (c) 2000, 2001 Fabrice Bellard
+ * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (c) 2010 Loren Merritt
+ *
+ * alternative bitstream reader & writer by Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file libavcodec/bitstream.c
+ * bitstream api.
+ */
+
+#include "avcodec.h"
+#include "get_bits.h"
+#include "put_bits.h"
+
+const uint8_t ff_log2_run[32]={
+ 0, 0, 0, 0, 1, 1, 1, 1,
+ 2, 2, 2, 2, 3, 3, 3, 3,
+ 4, 4, 5, 5, 6, 6, 7, 7,
+ 8, 9,10,11,12,13,14,15
+};
+
+void align_put_bits(PutBitContext *s)
+{
+#ifdef ALT_BITSTREAM_WRITER
+    put_bits(s,(  - s->index) & 7,0);
+#else
+    put_bits(s,s->bit_left & 7,0);
+#endif
+}
+
+void ff_put_string(PutBitContext *pb, const char *string, int terminate_string)
+{
+    while(*string){
+        put_bits(pb, 8, *string);
+        string++;
+    }
+    if(terminate_string)
+        put_bits(pb, 8, 0);
+}
+
+void ff_copy_bits(PutBitContext *pb, const uint8_t *src, int length)
+{
+    int words= length>>4;
+    int bits= length&15;
+    int i;
+
+    if(length==0) return;
+
+    if(CONFIG_SMALL || words < 16 || put_bits_count(pb)&7){
+        for(i=0; i<words; i++) put_bits(pb, 16, AV_RB16(src + 2*i));
+    }else{
+        for(i=0; put_bits_count(pb)&31; i++)
+            put_bits(pb, 8, src[i]);
+        flush_put_bits(pb);
+        memcpy(put_bits_ptr(pb), src+i, 2*words-i);
+        skip_put_bytes(pb, 2*words-i);
+    }
+
+    put_bits(pb, bits, AV_RB16(src + 2*words)>>(16-bits));
+}
+
+/* VLC decoding */
+
+//#define DEBUG_VLC
+
+#define GET_DATA(v, table, i, wrap, size) \
+{\
+    const uint8_t *ptr = (const uint8_t *)table + i * wrap;\
+    switch(size) {\
+    case 1:\
+        v = *(const uint8_t *)ptr;\
+        break;\
+    case 2:\
+        v = *(const uint16_t *)ptr;\
+        break;\
+    default:\
+        v = *(const uint32_t *)ptr;\
+        break;\
+    }\
+}
+
+
+static int alloc_table(VLC *vlc, int size, int use_static)
+{
+    int index;
+    index = vlc->table_size;
+    vlc->table_size += size;
+    if (vlc->table_size > vlc->table_allocated) {
+        if(use_static)
+            abort(); //cant do anything, init_vlc() is used with too little memory
+        vlc->table_allocated += (1 << vlc->bits);
+        vlc->table = av_realloc(vlc->table,
+                                sizeof(VLC_TYPE) * 2 * vlc->table_allocated);
+        if (!vlc->table)
+            return -1;
+    }
+    return index;
+}
+
+static av_always_inline uint32_t bitswap_32(uint32_t x) {
+    return av_reverse[x&0xFF]<<24
+         | av_reverse[(x>>8)&0xFF]<<16
+         | av_reverse[(x>>16)&0xFF]<<8
+         | av_reverse[x>>24];
+}
+
+typedef struct {
+    uint8_t bits;
+    uint16_t symbol;
+    /** codeword, with the first bit-to-be-read in the msb
+     * (even if intended for a little-endian bitstream reader) */
+    uint32_t code;
+} VLCcode;
+
+static int compare_vlcspec(const void *a, const void *b)
+{
+    const VLCcode *sa=a, *sb=b;
+    return (sa->code >> 1) - (sb->code >> 1);
+}
+
+/**
+ * Build VLC decoding tables suitable for use with get_vlc().
+ *
+ * @param vlc            the context to be initted
+ *
+ * @param table_nb_bits  max length of vlc codes to store directly in this table
+ *                       (Longer codes are delegated to subtables.)
+ *
+ * @param nb_codes       number of elements in codes[]
+ *
+ * @param codes          descriptions of the vlc codes
+ *                       These must be ordered such that codes going into the same subtable are contiguous.
+ *                       Sorting by VLCcode.code is sufficient, though not necessary.
+ */
+static int build_table(VLC *vlc, int table_nb_bits, int nb_codes,
+                       VLCcode *codes, int flags)
+{
+    int table_size, table_index, index, code_prefix, symbol, subtable_bits;
+    int i, j, k, n, nb, inc;
+    uint32_t code;
+    VLC_TYPE (*table)[2];
+
+    table_size = 1 << table_nb_bits;
+    table_index = alloc_table(vlc, table_size, flags & INIT_VLC_USE_NEW_STATIC);
+#ifdef DEBUG_VLC
+    av_log(NULL,AV_LOG_DEBUG,"new table index=%d size=%d\n",
+           table_index, table_size);
+#endif
+    if (table_index < 0)
+        return -1;
+    table = &vlc->table[table_index];
+
+    for (i = 0; i < table_size; i++) {
+        table[i][1] = 0; //bits
+        table[i][0] = -1; //codes
+    }
+
+    /* first pass: map codes and compute auxillary table sizes */
+    for (i = 0; i < nb_codes; i++) {
+        n = codes[i].bits;
+        code = codes[i].code;
+        symbol = codes[i].symbol;
+#if defined(DEBUG_VLC) && 0
+        av_log(NULL,AV_LOG_DEBUG,"i=%d n=%d code=0x%x\n", i, n, code);
+#endif
+        if (n <= table_nb_bits) {
+            /* no need to add another table */
+            j = code >> (32 - table_nb_bits);
+            nb = 1 << (table_nb_bits - n);
+            inc = 1;
+            if (flags & INIT_VLC_LE) {
+                j = bitswap_32(code);
+                inc = 1 << n;
+            }
+            for (k = 0; k < nb; k++) {
+#ifdef DEBUG_VLC
+                av_log(NULL, AV_LOG_DEBUG, "%4x: code=%d n=%d\n",
+                       j, i, n);
+#endif
+                if (table[j][1] /*bits*/ != 0) {
+                    av_log(NULL, AV_LOG_ERROR, "incorrect codes\n");
+                    return -1;
+                }
+                table[j][1] = n; //bits
+                table[j][0] = symbol;
+                j += inc;
+            }
+        } else {
+            /* fill auxiliary table recursively */
+            n -= table_nb_bits;
+            code_prefix = code >> (32 - table_nb_bits);
+            subtable_bits = n;
+            codes[i].bits = n;
+            codes[i].code = code << table_nb_bits;
+            for (k = i+1; k < nb_codes; k++) {
+                n = codes[k].bits - table_nb_bits;
+                if (n <= 0)
+                    break;
+                code = codes[k].code;
+                if (code >> (32 - table_nb_bits) != code_prefix)
+                    break;
+                codes[k].bits = n;
+                codes[k].code = code << table_nb_bits;
+                subtable_bits = FFMAX(subtable_bits, n);
+            }
+            subtable_bits = FFMIN(subtable_bits, table_nb_bits);
+            j = (flags & INIT_VLC_LE) ? bitswap_32(code_prefix) >> (32 - table_nb_bits) : code_prefix;
+            table[j][1] = -subtable_bits;
+#ifdef DEBUG_VLC
+            av_log(NULL,AV_LOG_DEBUG,"%4x: n=%d (subtable)\n",
+                   j, codes[i].bits + table_nb_bits);
+#endif
+            index = build_table(vlc, subtable_bits, k-i, codes+i, flags);
+            if (index < 0)
+                return -1;
+            /* note: realloc has been done, so reload tables */
+            table = &vlc->table[table_index];
+            table[j][0] = index; //code
+            i = k-1;
+        }
+    }
+    return table_index;
+}
+
+
+/* Build VLC decoding tables suitable for use with get_vlc().
+
+   'nb_bits' set thee decoding table size (2^nb_bits) entries. The
+   bigger it is, the faster is the decoding. But it should not be too
+   big to save memory and L1 cache. '9' is a good compromise.
+
+   'nb_codes' : number of vlcs codes
+
+   'bits' : table which gives the size (in bits) of each vlc code.
+
+   'codes' : table which gives the bit pattern of of each vlc code.
+
+   'symbols' : table which gives the values to be returned from get_vlc().
+
+   'xxx_wrap' : give the number of bytes between each entry of the
+   'bits' or 'codes' tables.
+
+   'xxx_size' : gives the number of bytes of each entry of the 'bits'
+   or 'codes' tables.
+
+   'wrap' and 'size' allows to use any memory configuration and types
+   (byte/word/long) to store the 'bits', 'codes', and 'symbols' tables.
+
+   'use_static' should be set to 1 for tables, which should be freed
+   with av_free_static(), 0 if free_vlc() will be used.
+*/
+int init_vlc_sparse(VLC *vlc, int nb_bits, int nb_codes,
+             const void *bits, int bits_wrap, int bits_size,
+             const void *codes, int codes_wrap, int codes_size,
+             const void *symbols, int symbols_wrap, int symbols_size,
+             int flags)
+{
+    VLCcode buf[nb_codes];
+    int i, j;
+
+    vlc->bits = nb_bits;
+    if(flags & INIT_VLC_USE_NEW_STATIC){
+        if(vlc->table_size && vlc->table_size == vlc->table_allocated){
+            return 0;
+        }else if(vlc->table_size){
+            abort(); // fatal error, we are called on a partially initialized table
+        }
+    }else {
+        vlc->table = NULL;
+        vlc->table_allocated = 0;
+        vlc->table_size = 0;
+    }
+
+#ifdef DEBUG_VLC
+    av_log(NULL,AV_LOG_DEBUG,"build table nb_codes=%d\n", nb_codes);
+#endif
+
+    assert(symbols_size <= 2 || !symbols);
+    j = 0;
+#define COPY(condition)\
+    for (i = 0; i < nb_codes; i++) {\
+        GET_DATA(buf[j].bits, bits, i, bits_wrap, bits_size);\
+        if (!(condition))\
+            continue;\
+        GET_DATA(buf[j].code, codes, i, codes_wrap, codes_size);\
+        if (flags & INIT_VLC_LE)\
+            buf[j].code = bitswap_32(buf[j].code);\
+        else\
+            buf[j].code <<= 32 - buf[j].bits;\
+        if (symbols)\
+            GET_DATA(buf[j].symbol, symbols, i, symbols_wrap, symbols_size)\
+        else\
+            buf[j].symbol = i;\
+        j++;\
+    }
+    COPY(buf[j].bits > nb_bits);
+    // qsort is the slowest part of init_vlc, and could probably be improved or avoided
+    qsort(buf, j, sizeof(VLCcode), compare_vlcspec);
+    COPY(buf[j].bits && buf[j].bits <= nb_bits);
+    nb_codes = j;
+
+    if (build_table(vlc, nb_bits, nb_codes, buf, flags) < 0) {
+        av_freep(&vlc->table);
+        return -1;
+    }
+    if((flags & INIT_VLC_USE_NEW_STATIC) && vlc->table_size != vlc->table_allocated)
+        av_log(NULL, AV_LOG_ERROR, "needed %d had %d\n", vlc->table_size, vlc->table_allocated);
+    return 0;
+}
+
+
+void free_vlc(VLC *vlc)
+{
+    av_freep(&vlc->table);
+}
+
diff --git a/apps/codecs/libwmapro/dsputil.c b/apps/codecs/libwmapro/dsputil.c
new file mode 100644
index 0000000000..bbfdb6ae8d
--- /dev/null
+++ b/apps/codecs/libwmapro/dsputil.c
@@ -0,0 +1,4565 @@
+/*
+ * DSP utils
+ * Copyright (c) 2000, 2001 Fabrice Bellard
+ * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file libavcodec/dsputil.c
+ * DSP utils
+ */
+
+#include "avcodec.h"
+#include "dsputil.h"
+#include "simple_idct.h"
+#include "faandct.h"
+#include "faanidct.h"
+#include "mathops.h"
+#include "mpegvideo.h"
+#include "config.h"
+#include "lpc.h"
+#include "ac3dec.h"
+#include "vorbis.h"
+#include "png.h"
+
+uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
+uint32_t ff_squareTbl[512] = {0, };
+
+// 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size
+#define pb_7f (~0UL/255 * 0x7f)
+#define pb_80 (~0UL/255 * 0x80)
+
+const uint8_t ff_zigzag_direct[64] = {
+    0,   1,  8, 16,  9,  2,  3, 10,
+    17, 24, 32, 25, 18, 11,  4,  5,
+    12, 19, 26, 33, 40, 48, 41, 34,
+    27, 20, 13,  6,  7, 14, 21, 28,
+    35, 42, 49, 56, 57, 50, 43, 36,
+    29, 22, 15, 23, 30, 37, 44, 51,
+    58, 59, 52, 45, 38, 31, 39, 46,
+    53, 60, 61, 54, 47, 55, 62, 63
+};
+
+/* Specific zigzag scan for 248 idct. NOTE that unlike the
+   specification, we interleave the fields */
+const uint8_t ff_zigzag248_direct[64] = {
+     0,  8,  1,  9, 16, 24,  2, 10,
+    17, 25, 32, 40, 48, 56, 33, 41,
+    18, 26,  3, 11,  4, 12, 19, 27,
+    34, 42, 49, 57, 50, 58, 35, 43,
+    20, 28,  5, 13,  6, 14, 21, 29,
+    36, 44, 51, 59, 52, 60, 37, 45,
+    22, 30,  7, 15, 23, 31, 38, 46,
+    53, 61, 54, 62, 39, 47, 55, 63,
+};
+
+/* not permutated inverse zigzag_direct + 1 for MMX quantizer */
+DECLARE_ALIGNED(16, uint16_t, inv_zigzag_direct16)[64];
+
+const uint8_t ff_alternate_horizontal_scan[64] = {
+    0,  1,   2,  3,  8,  9, 16, 17,
+    10, 11,  4,  5,  6,  7, 15, 14,
+    13, 12, 19, 18, 24, 25, 32, 33,
+    26, 27, 20, 21, 22, 23, 28, 29,
+    30, 31, 34, 35, 40, 41, 48, 49,
+    42, 43, 36, 37, 38, 39, 44, 45,
+    46, 47, 50, 51, 56, 57, 58, 59,
+    52, 53, 54, 55, 60, 61, 62, 63,
+};
+
+const uint8_t ff_alternate_vertical_scan[64] = {
+    0,  8,  16, 24,  1,  9,  2, 10,
+    17, 25, 32, 40, 48, 56, 57, 49,
+    41, 33, 26, 18,  3, 11,  4, 12,
+    19, 27, 34, 42, 50, 58, 35, 43,
+    51, 59, 20, 28,  5, 13,  6, 14,
+    21, 29, 36, 44, 52, 60, 37, 45,
+    53, 61, 22, 30,  7, 15, 23, 31,
+    38, 46, 54, 62, 39, 47, 55, 63,
+};
+
+/* a*inverse[b]>>32 == a/b for all 0<=a<=16909558 && 2<=b<=256
+ * for a>16909558, is an overestimate by less than 1 part in 1<<24 */
+const uint32_t ff_inverse[257]={
+         0, 4294967295U,2147483648U,1431655766, 1073741824,  858993460,  715827883,  613566757,
+ 536870912,  477218589,  429496730,  390451573,  357913942,  330382100,  306783379,  286331154,
+ 268435456,  252645136,  238609295,  226050911,  214748365,  204522253,  195225787,  186737709,
+ 178956971,  171798692,  165191050,  159072863,  153391690,  148102321,  143165577,  138547333,
+ 134217728,  130150525,  126322568,  122713352,  119304648,  116080198,  113025456,  110127367,
+ 107374183,  104755300,  102261127,   99882961,   97612894,   95443718,   93368855,   91382283,
+  89478486,   87652394,   85899346,   84215046,   82595525,   81037119,   79536432,   78090315,
+  76695845,   75350304,   74051161,   72796056,   71582789,   70409300,   69273667,   68174085,
+  67108864,   66076420,   65075263,   64103990,   63161284,   62245903,   61356676,   60492498,
+  59652324,   58835169,   58040099,   57266231,   56512728,   55778797,   55063684,   54366675,
+  53687092,   53024288,   52377650,   51746594,   51130564,   50529028,   49941481,   49367441,
+  48806447,   48258060,   47721859,   47197443,   46684428,   46182445,   45691142,   45210183,
+  44739243,   44278014,   43826197,   43383509,   42949673,   42524429,   42107523,   41698712,
+  41297763,   40904451,   40518560,   40139882,   39768216,   39403370,   39045158,   38693400,
+  38347923,   38008561,   37675152,   37347542,   37025581,   36709123,   36398028,   36092163,
+  35791395,   35495598,   35204650,   34918434,   34636834,   34359739,   34087043,   33818641,
+  33554432,   33294321,   33038210,   32786010,   32537632,   32292988,   32051995,   31814573,
+  31580642,   31350127,   31122952,   30899046,   30678338,   30460761,   30246249,   30034737,
+  29826162,   29620465,   29417585,   29217465,   29020050,   28825284,   28633116,   28443493,
+  28256364,   28071682,   27889399,   27709467,   27531842,   27356480,   27183338,   27012373,
+  26843546,   26676816,   26512144,   26349493,   26188825,   26030105,   25873297,   25718368,
+  25565282,   25414008,   25264514,   25116768,   24970741,   24826401,   24683721,   24542671,
+  24403224,   24265352,   24129030,   23994231,   23860930,   23729102,   23598722,   23469767,
+  23342214,   23216040,   23091223,   22967740,   22845571,   22724695,   22605092,   22486740,
+  22369622,   22253717,   22139007,   22025474,   21913099,   21801865,   21691755,   21582751,
+  21474837,   21367997,   21262215,   21157475,   21053762,   20951060,   20849356,   20748635,
+  20648882,   20550083,   20452226,   20355296,   20259280,   20164166,   20069941,   19976593,
+  19884108,   19792477,   19701685,   19611723,   19522579,   19434242,   19346700,   19259944,
+  19173962,   19088744,   19004281,   18920561,   18837576,   18755316,   18673771,   18592933,
+  18512791,   18433337,   18354562,   18276457,   18199014,   18122225,   18046082,   17970575,
+  17895698,   17821442,   17747799,   17674763,   17602325,   17530479,   17459217,   17388532,
+  17318417,   17248865,   17179870,   17111424,   17043522,   16976156,   16909321,   16843010,
+  16777216
+};
+
+/* Input permutation for the simple_idct_mmx */
+static const uint8_t simple_mmx_permutation[64]={
+        0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
+        0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
+        0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
+        0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
+        0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
+        0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
+        0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
+        0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
+};
+
+static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7};
+
+void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
+    int i;
+    int end;
+
+    st->scantable= src_scantable;
+
+    for(i=0; i<64; i++){
+        int j;
+        j = src_scantable[i];
+        st->permutated[i] = permutation[j];
+#if ARCH_PPC
+        st->inverse[j] = i;
+#endif
+    }
+
+    end=-1;
+    for(i=0; i<64; i++){
+        int j;
+        j = st->permutated[i];
+        if(j>end) end=j;
+        st->raster_end[i]= end;
+    }
+}
+
+static int pix_sum_c(uint8_t * pix, int line_size)
+{
+    int s, i, j;
+
+    s = 0;
+    for (i = 0; i < 16; i++) {
+        for (j = 0; j < 16; j += 8) {
+            s += pix[0];
+            s += pix[1];
+            s += pix[2];
+            s += pix[3];
+            s += pix[4];
+            s += pix[5];
+            s += pix[6];
+            s += pix[7];
+            pix += 8;
+        }
+        pix += line_size - 16;
+    }
+    return s;
+}
+
+static int pix_norm1_c(uint8_t * pix, int line_size)
+{
+    int s, i, j;
+    uint32_t *sq = ff_squareTbl + 256;
+
+    s = 0;
+    for (i = 0; i < 16; i++) {
+        for (j = 0; j < 16; j += 8) {
+#if 0
+            s += sq[pix[0]];
+            s += sq[pix[1]];
+            s += sq[pix[2]];
+            s += sq[pix[3]];
+            s += sq[pix[4]];
+            s += sq[pix[5]];
+            s += sq[pix[6]];
+            s += sq[pix[7]];
+#else
+#if LONG_MAX > 2147483647
+            register uint64_t x=*(uint64_t*)pix;
+            s += sq[x&0xff];
+            s += sq[(x>>8)&0xff];
+            s += sq[(x>>16)&0xff];
+            s += sq[(x>>24)&0xff];
+            s += sq[(x>>32)&0xff];
+            s += sq[(x>>40)&0xff];
+            s += sq[(x>>48)&0xff];
+            s += sq[(x>>56)&0xff];
+#else
+            register uint32_t x=*(uint32_t*)pix;
+            s += sq[x&0xff];
+            s += sq[(x>>8)&0xff];
+            s += sq[(x>>16)&0xff];
+            s += sq[(x>>24)&0xff];
+            x=*(uint32_t*)(pix+4);
+            s += sq[x&0xff];
+            s += sq[(x>>8)&0xff];
+            s += sq[(x>>16)&0xff];
+            s += sq[(x>>24)&0xff];
+#endif
+#endif
+            pix += 8;
+        }
+        pix += line_size - 16;
+    }
+    return s;
+}
+
+static void bswap_buf(uint32_t *dst, const uint32_t *src, int w){
+    int i;
+
+    for(i=0; i+8<=w; i+=8){
+        dst[i+0]= bswap_32(src[i+0]);
+        dst[i+1]= bswap_32(src[i+1]);
+        dst[i+2]= bswap_32(src[i+2]);
+        dst[i+3]= bswap_32(src[i+3]);
+        dst[i+4]= bswap_32(src[i+4]);
+        dst[i+5]= bswap_32(src[i+5]);
+        dst[i+6]= bswap_32(src[i+6]);
+        dst[i+7]= bswap_32(src[i+7]);
+    }
+    for(;i<w; i++){
+        dst[i+0]= bswap_32(src[i+0]);
+    }
+}
+
+static int sse4_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
+{
+    int s, i;
+    uint32_t *sq = ff_squareTbl + 256;
+
+    s = 0;
+    for (i = 0; i < h; i++) {
+        s += sq[pix1[0] - pix2[0]];
+        s += sq[pix1[1] - pix2[1]];
+        s += sq[pix1[2] - pix2[2]];
+        s += sq[pix1[3] - pix2[3]];
+        pix1 += line_size;
+        pix2 += line_size;
+    }
+    return s;
+}
+
+static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
+{
+    int s, i;
+    uint32_t *sq = ff_squareTbl + 256;
+
+    s = 0;
+    for (i = 0; i < h; i++) {
+        s += sq[pix1[0] - pix2[0]];
+        s += sq[pix1[1] - pix2[1]];
+        s += sq[pix1[2] - pix2[2]];
+        s += sq[pix1[3] - pix2[3]];
+        s += sq[pix1[4] - pix2[4]];
+        s += sq[pix1[5] - pix2[5]];
+        s += sq[pix1[6] - pix2[6]];
+        s += sq[pix1[7] - pix2[7]];
+        pix1 += line_size;
+        pix2 += line_size;
+    }
+    return s;
+}
+
+static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
+{
+    int s, i;
+    uint32_t *sq = ff_squareTbl + 256;
+
+    s = 0;
+    for (i = 0; i < h; i++) {
+        s += sq[pix1[ 0] - pix2[ 0]];
+        s += sq[pix1[ 1] - pix2[ 1]];
+        s += sq[pix1[ 2] - pix2[ 2]];
+        s += sq[pix1[ 3] - pix2[ 3]];
+        s += sq[pix1[ 4] - pix2[ 4]];
+        s += sq[pix1[ 5] - pix2[ 5]];
+        s += sq[pix1[ 6] - pix2[ 6]];
+        s += sq[pix1[ 7] - pix2[ 7]];
+        s += sq[pix1[ 8] - pix2[ 8]];
+        s += sq[pix1[ 9] - pix2[ 9]];
+        s += sq[pix1[10] - pix2[10]];
+        s += sq[pix1[11] - pix2[11]];
+        s += sq[pix1[12] - pix2[12]];
+        s += sq[pix1[13] - pix2[13]];
+        s += sq[pix1[14] - pix2[14]];
+        s += sq[pix1[15] - pix2[15]];
+
+        pix1 += line_size;
+        pix2 += line_size;
+    }
+    return s;
+}
+
+/* draw the edges of width 'w' of an image of size width, height */
+//FIXME check that this is ok for mpeg4 interlaced
+static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w)
+{
+    uint8_t *ptr, *last_line;
+    int i;
+
+    last_line = buf + (height - 1) * wrap;
+    for(i=0;i<w;i++) {
+        /* top and bottom */
+        memcpy(buf - (i + 1) * wrap, buf, width);
+        memcpy(last_line + (i + 1) * wrap, last_line, width);
+    }
+    /* left and right */
+    ptr = buf;
+    for(i=0;i<height;i++) {
+        memset(ptr - w, ptr[0], w);
+        memset(ptr + width, ptr[width-1], w);
+        ptr += wrap;
+    }
+    /* corners */
+    for(i=0;i<w;i++) {
+        memset(buf - (i + 1) * wrap - w, buf[0], w); /* top left */
+        memset(buf - (i + 1) * wrap + width, buf[width-1], w); /* top right */
+        memset(last_line + (i + 1) * wrap - w, last_line[0], w); /* top left */
+        memset(last_line + (i + 1) * wrap + width, last_line[width-1], w); /* top right */
+    }
+}
+
+/**
+ * Copies a rectangular area of samples to a temporary buffer and replicates the boarder samples.
+ * @param buf destination buffer
+ * @param src source buffer
+ * @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers
+ * @param block_w width of block
+ * @param block_h height of block
+ * @param src_x x coordinate of the top left sample of the block in the source buffer
+ * @param src_y y coordinate of the top left sample of the block in the source buffer
+ * @param w width of the source buffer
+ * @param h height of the source buffer
+ */
+void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize, int block_w, int block_h,
+                                    int src_x, int src_y, int w, int h){
+    int x, y;
+    int start_y, start_x, end_y, end_x;
+
+    if(src_y>= h){
+        src+= (h-1-src_y)*linesize;
+        src_y=h-1;
+    }else if(src_y<=-block_h){
+        src+= (1-block_h-src_y)*linesize;
+        src_y=1-block_h;
+    }
+    if(src_x>= w){
+        src+= (w-1-src_x);
+        src_x=w-1;
+    }else if(src_x<=-block_w){
+        src+= (1-block_w-src_x);
+        src_x=1-block_w;
+    }
+
+    start_y= FFMAX(0, -src_y);
+    start_x= FFMAX(0, -src_x);
+    end_y= FFMIN(block_h, h-src_y);
+    end_x= FFMIN(block_w, w-src_x);
+
+    // copy existing part
+    for(y=start_y; y<end_y; y++){
+        for(x=start_x; x<end_x; x++){
+            buf[x + y*linesize]= src[x + y*linesize];
+        }
+    }
+
+    //top
+    for(y=0; y<start_y; y++){
+        for(x=start_x; x<end_x; x++){
+            buf[x + y*linesize]= buf[x + start_y*linesize];
+        }
+    }
+
+    //bottom
+    for(y=end_y; y<block_h; y++){
+        for(x=start_x; x<end_x; x++){
+            buf[x + y*linesize]= buf[x + (end_y-1)*linesize];
+        }
+    }
+
+    for(y=0; y<block_h; y++){
+       //left
+        for(x=0; x<start_x; x++){
+            buf[x + y*linesize]= buf[start_x + y*linesize];
+        }
+
+       //right
+        for(x=end_x; x<block_w; x++){
+            buf[x + y*linesize]= buf[end_x - 1 + y*linesize];
+        }
+    }
+}
+
+static void get_pixels_c(DCTELEM *restrict block, const uint8_t *pixels, int line_size)
+{
+    int i;
+
+    /* read the pixels */
+    for(i=0;i<8;i++) {
+        block[0] = pixels[0];
+        block[1] = pixels[1];
+        block[2] = pixels[2];
+        block[3] = pixels[3];
+        block[4] = pixels[4];
+        block[5] = pixels[5];
+        block[6] = pixels[6];
+        block[7] = pixels[7];
+        pixels += line_size;
+        block += 8;
+    }
+}
+
+static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1,
+                          const uint8_t *s2, int stride){
+    int i;
+
+    /* read the pixels */
+    for(i=0;i<8;i++) {
+        block[0] = s1[0] - s2[0];
+        block[1] = s1[1] - s2[1];
+        block[2] = s1[2] - s2[2];
+        block[3] = s1[3] - s2[3];
+        block[4] = s1[4] - s2[4];
+        block[5] = s1[5] - s2[5];
+        block[6] = s1[6] - s2[6];
+        block[7] = s1[7] - s2[7];
+        s1 += stride;
+        s2 += stride;
+        block += 8;
+    }
+}
+
+
+static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
+                                 int line_size)
+{
+    int i;
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+
+    /* read the pixels */
+    for(i=0;i<8;i++) {
+        pixels[0] = cm[block[0]];
+        pixels[1] = cm[block[1]];
+        pixels[2] = cm[block[2]];
+        pixels[3] = cm[block[3]];
+        pixels[4] = cm[block[4]];
+        pixels[5] = cm[block[5]];
+        pixels[6] = cm[block[6]];
+        pixels[7] = cm[block[7]];
+
+        pixels += line_size;
+        block += 8;
+    }
+}
+
+static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
+                                 int line_size)
+{
+    int i;
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+
+    /* read the pixels */
+    for(i=0;i<4;i++) {
+        pixels[0] = cm[block[0]];
+        pixels[1] = cm[block[1]];
+        pixels[2] = cm[block[2]];
+        pixels[3] = cm[block[3]];
+
+        pixels += line_size;
+        block += 8;
+    }
+}
+
+static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
+                                 int line_size)
+{
+    int i;
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+
+    /* read the pixels */
+    for(i=0;i<2;i++) {
+        pixels[0] = cm[block[0]];
+        pixels[1] = cm[block[1]];
+
+        pixels += line_size;
+        block += 8;
+    }
+}
+
+static void put_signed_pixels_clamped_c(const DCTELEM *block,
+                                        uint8_t *restrict pixels,
+                                        int line_size)
+{
+    int i, j;
+
+    for (i = 0; i < 8; i++) {
+        for (j = 0; j < 8; j++) {
+            if (*block < -128)
+                *pixels = 0;
+            else if (*block > 127)
+                *pixels = 255;
+            else
+                *pixels = (uint8_t)(*block + 128);
+            block++;
+            pixels++;
+        }
+        pixels += (line_size - 8);
+    }
+}
+
+static void put_pixels_nonclamped_c(const DCTELEM *block, uint8_t *restrict pixels,
+                                    int line_size)
+{
+    int i;
+
+    /* read the pixels */
+    for(i=0;i<8;i++) {
+        pixels[0] = block[0];
+        pixels[1] = block[1];
+        pixels[2] = block[2];
+        pixels[3] = block[3];
+        pixels[4] = block[4];
+        pixels[5] = block[5];
+        pixels[6] = block[6];
+        pixels[7] = block[7];
+
+        pixels += line_size;
+        block += 8;
+    }
+}
+
+static void add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
+                          int line_size)
+{
+    int i;
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+
+    /* read the pixels */
+    for(i=0;i<8;i++) {
+        pixels[0] = cm[pixels[0] + block[0]];
+        pixels[1] = cm[pixels[1] + block[1]];
+        pixels[2] = cm[pixels[2] + block[2]];
+        pixels[3] = cm[pixels[3] + block[3]];
+        pixels[4] = cm[pixels[4] + block[4]];
+        pixels[5] = cm[pixels[5] + block[5]];
+        pixels[6] = cm[pixels[6] + block[6]];
+        pixels[7] = cm[pixels[7] + block[7]];
+        pixels += line_size;
+        block += 8;
+    }
+}
+
+static void add_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
+                          int line_size)
+{
+    int i;
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+
+    /* read the pixels */
+    for(i=0;i<4;i++) {
+        pixels[0] = cm[pixels[0] + block[0]];
+        pixels[1] = cm[pixels[1] + block[1]];
+        pixels[2] = cm[pixels[2] + block[2]];
+        pixels[3] = cm[pixels[3] + block[3]];
+        pixels += line_size;
+        block += 8;
+    }
+}
+
+static void add_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
+                          int line_size)
+{
+    int i;
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+
+    /* read the pixels */
+    for(i=0;i<2;i++) {
+        pixels[0] = cm[pixels[0] + block[0]];
+        pixels[1] = cm[pixels[1] + block[1]];
+        pixels += line_size;
+        block += 8;
+    }
+}
+
+static void add_pixels8_c(uint8_t *restrict pixels, DCTELEM *block, int line_size)
+{
+    int i;
+    for(i=0;i<8;i++) {
+        pixels[0] += block[0];
+        pixels[1] += block[1];
+        pixels[2] += block[2];
+        pixels[3] += block[3];
+        pixels[4] += block[4];
+        pixels[5] += block[5];
+        pixels[6] += block[6];
+        pixels[7] += block[7];
+        pixels += line_size;
+        block += 8;
+    }
+}
+
+static void add_pixels4_c(uint8_t *restrict pixels, DCTELEM *block, int line_size)
+{
+    int i;
+    for(i=0;i<4;i++) {
+        pixels[0] += block[0];
+        pixels[1] += block[1];
+        pixels[2] += block[2];
+        pixels[3] += block[3];
+        pixels += line_size;
+        block += 4;
+    }
+}
+
+static int sum_abs_dctelem_c(DCTELEM *block)
+{
+    int sum=0, i;
+    for(i=0; i<64; i++)
+        sum+= FFABS(block[i]);
+    return sum;
+}
+
+static void fill_block16_c(uint8_t *block, uint8_t value, int line_size, int h)
+{
+    int i;
+
+    for (i = 0; i < h; i++) {
+        memset(block, value, 16);
+        block += line_size;
+    }
+}
+
+static void fill_block8_c(uint8_t *block, uint8_t value, int line_size, int h)
+{
+    int i;
+
+    for (i = 0; i < h; i++) {
+        memset(block, value, 8);
+        block += line_size;
+    }
+}
+
+static void scale_block_c(const uint8_t src[64]/*align 8*/, uint8_t *dst/*align 8*/, int linesize)
+{
+    int i, j;
+    uint16_t *dst1 = (uint16_t *) dst;
+    uint16_t *dst2 = (uint16_t *)(dst + linesize);
+
+    for (j = 0; j < 8; j++) {
+        for (i = 0; i < 8; i++) {
+            dst1[i] = dst2[i] = src[i] * 0x0101;
+        }
+        src  += 8;
+        dst1 += linesize;
+        dst2 += linesize;
+    }
+}
+
+#if 0
+
+#define PIXOP2(OPNAME, OP) \
+static void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+{\
+    int i;\
+    for(i=0; i<h; i++){\
+        OP(*((uint64_t*)block), AV_RN64(pixels));\
+        pixels+=line_size;\
+        block +=line_size;\
+    }\
+}\
+\
+static void OPNAME ## _no_rnd_pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+{\
+    int i;\
+    for(i=0; i<h; i++){\
+        const uint64_t a= AV_RN64(pixels  );\
+        const uint64_t b= AV_RN64(pixels+1);\
+        OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
+        pixels+=line_size;\
+        block +=line_size;\
+    }\
+}\
+\
+static void OPNAME ## _pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+{\
+    int i;\
+    for(i=0; i<h; i++){\
+        const uint64_t a= AV_RN64(pixels  );\
+        const uint64_t b= AV_RN64(pixels+1);\
+        OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
+        pixels+=line_size;\
+        block +=line_size;\
+    }\
+}\
+\
+static void OPNAME ## _no_rnd_pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+{\
+    int i;\
+    for(i=0; i<h; i++){\
+        const uint64_t a= AV_RN64(pixels          );\
+        const uint64_t b= AV_RN64(pixels+line_size);\
+        OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
+        pixels+=line_size;\
+        block +=line_size;\
+    }\
+}\
+\
+static void OPNAME ## _pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+{\
+    int i;\
+    for(i=0; i<h; i++){\
+        const uint64_t a= AV_RN64(pixels          );\
+        const uint64_t b= AV_RN64(pixels+line_size);\
+        OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
+        pixels+=line_size;\
+        block +=line_size;\
+    }\
+}\
+\
+static void OPNAME ## _pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+{\
+        int i;\
+        const uint64_t a= AV_RN64(pixels  );\
+        const uint64_t b= AV_RN64(pixels+1);\
+        uint64_t l0=  (a&0x0303030303030303ULL)\
+                    + (b&0x0303030303030303ULL)\
+                    + 0x0202020202020202ULL;\
+        uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
+                   + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
+        uint64_t l1,h1;\
+\
+        pixels+=line_size;\
+        for(i=0; i<h; i+=2){\
+            uint64_t a= AV_RN64(pixels  );\
+            uint64_t b= AV_RN64(pixels+1);\
+            l1=  (a&0x0303030303030303ULL)\
+               + (b&0x0303030303030303ULL);\
+            h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
+              + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
+            OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
+            pixels+=line_size;\
+            block +=line_size;\
+            a= AV_RN64(pixels  );\
+            b= AV_RN64(pixels+1);\
+            l0=  (a&0x0303030303030303ULL)\
+               + (b&0x0303030303030303ULL)\
+               + 0x0202020202020202ULL;\
+            h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
+              + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
+            OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
+            pixels+=line_size;\
+            block +=line_size;\
+        }\
+}\
+\
+static void OPNAME ## _no_rnd_pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+{\
+        int i;\
+        const uint64_t a= AV_RN64(pixels  );\
+        const uint64_t b= AV_RN64(pixels+1);\
+        uint64_t l0=  (a&0x0303030303030303ULL)\
+                    + (b&0x0303030303030303ULL)\
+                    + 0x0101010101010101ULL;\
+        uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
+                   + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
+        uint64_t l1,h1;\
+\
+        pixels+=line_size;\
+        for(i=0; i<h; i+=2){\
+            uint64_t a= AV_RN64(pixels  );\
+            uint64_t b= AV_RN64(pixels+1);\
+            l1=  (a&0x0303030303030303ULL)\
+               + (b&0x0303030303030303ULL);\
+            h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
+              + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
+            OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
+            pixels+=line_size;\
+            block +=line_size;\
+            a= AV_RN64(pixels  );\
+            b= AV_RN64(pixels+1);\
+            l0=  (a&0x0303030303030303ULL)\
+               + (b&0x0303030303030303ULL)\
+               + 0x0101010101010101ULL;\
+            h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
+              + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
+            OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
+            pixels+=line_size;\
+            block +=line_size;\
+        }\
+}\
+\
+CALL_2X_PIXELS(OPNAME ## _pixels16_c    , OPNAME ## _pixels_c    , 8)\
+CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels_x2_c , 8)\
+CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels_y2_c , 8)\
+CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels_xy2_c, 8)\
+CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels_x2_c , 8)\
+CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels_y2_c , 8)\
+CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels_xy2_c, 8)
+
+#define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) )
+#else // 64 bit variant
+
+#define PIXOP2(OPNAME, OP) \
+static void OPNAME ## _pixels2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+    int i;\
+    for(i=0; i<h; i++){\
+        OP(*((uint16_t*)(block  )), AV_RN16(pixels  ));\
+        pixels+=line_size;\
+        block +=line_size;\
+    }\
+}\
+static void OPNAME ## _pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+    int i;\
+    for(i=0; i<h; i++){\
+        OP(*((uint32_t*)(block  )), AV_RN32(pixels  ));\
+        pixels+=line_size;\
+        block +=line_size;\
+    }\
+}\
+static void OPNAME ## _pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+    int i;\
+    for(i=0; i<h; i++){\
+        OP(*((uint32_t*)(block  )), AV_RN32(pixels  ));\
+        OP(*((uint32_t*)(block+4)), AV_RN32(pixels+4));\
+        pixels+=line_size;\
+        block +=line_size;\
+    }\
+}\
+static inline void OPNAME ## _no_rnd_pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+    OPNAME ## _pixels8_c(block, pixels, line_size, h);\
+}\
+\
+static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
+                                                int src_stride1, int src_stride2, int h){\
+    int i;\
+    for(i=0; i<h; i++){\
+        uint32_t a,b;\
+        a= AV_RN32(&src1[i*src_stride1  ]);\
+        b= AV_RN32(&src2[i*src_stride2  ]);\
+        OP(*((uint32_t*)&dst[i*dst_stride  ]), no_rnd_avg32(a, b));\
+        a= AV_RN32(&src1[i*src_stride1+4]);\
+        b= AV_RN32(&src2[i*src_stride2+4]);\
+        OP(*((uint32_t*)&dst[i*dst_stride+4]), no_rnd_avg32(a, b));\
+    }\
+}\
+\
+static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
+                                                int src_stride1, int src_stride2, int h){\
+    int i;\
+    for(i=0; i<h; i++){\
+        uint32_t a,b;\
+        a= AV_RN32(&src1[i*src_stride1  ]);\
+        b= AV_RN32(&src2[i*src_stride2  ]);\
+        OP(*((uint32_t*)&dst[i*dst_stride  ]), rnd_avg32(a, b));\
+        a= AV_RN32(&src1[i*src_stride1+4]);\
+        b= AV_RN32(&src2[i*src_stride2+4]);\
+        OP(*((uint32_t*)&dst[i*dst_stride+4]), rnd_avg32(a, b));\
+    }\
+}\
+\
+static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
+                                                int src_stride1, int src_stride2, int h){\
+    int i;\
+    for(i=0; i<h; i++){\
+        uint32_t a,b;\
+        a= AV_RN32(&src1[i*src_stride1  ]);\
+        b= AV_RN32(&src2[i*src_stride2  ]);\
+        OP(*((uint32_t*)&dst[i*dst_stride  ]), rnd_avg32(a, b));\
+    }\
+}\
+\
+static inline void OPNAME ## _pixels2_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
+                                                int src_stride1, int src_stride2, int h){\
+    int i;\
+    for(i=0; i<h; i++){\
+        uint32_t a,b;\
+        a= AV_RN16(&src1[i*src_stride1  ]);\
+        b= AV_RN16(&src2[i*src_stride2  ]);\
+        OP(*((uint16_t*)&dst[i*dst_stride  ]), rnd_avg32(a, b));\
+    }\
+}\
+\
+static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
+                                                int src_stride1, int src_stride2, int h){\
+    OPNAME ## _pixels8_l2(dst  , src1  , src2  , dst_stride, src_stride1, src_stride2, h);\
+    OPNAME ## _pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
+}\
+\
+static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
+                                                int src_stride1, int src_stride2, int h){\
+    OPNAME ## _no_rnd_pixels8_l2(dst  , src1  , src2  , dst_stride, src_stride1, src_stride2, h);\
+    OPNAME ## _no_rnd_pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
+}\
+\
+static inline void OPNAME ## _no_rnd_pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+    OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
+}\
+\
+static inline void OPNAME ## _pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+    OPNAME ## _pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
+}\
+\
+static inline void OPNAME ## _no_rnd_pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+    OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
+}\
+\
+static inline void OPNAME ## _pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+    OPNAME ## _pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
+}\
+\
+static inline void OPNAME ## _pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
+                 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
+    int i;\
+    for(i=0; i<h; i++){\
+        uint32_t a, b, c, d, l0, l1, h0, h1;\
+        a= AV_RN32(&src1[i*src_stride1]);\
+        b= AV_RN32(&src2[i*src_stride2]);\
+        c= AV_RN32(&src3[i*src_stride3]);\
+        d= AV_RN32(&src4[i*src_stride4]);\
+        l0=  (a&0x03030303UL)\
+           + (b&0x03030303UL)\
+           + 0x02020202UL;\
+        h0= ((a&0xFCFCFCFCUL)>>2)\
+          + ((b&0xFCFCFCFCUL)>>2);\
+        l1=  (c&0x03030303UL)\
+           + (d&0x03030303UL);\
+        h1= ((c&0xFCFCFCFCUL)>>2)\
+          + ((d&0xFCFCFCFCUL)>>2);\
+        OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
+        a= AV_RN32(&src1[i*src_stride1+4]);\
+        b= AV_RN32(&src2[i*src_stride2+4]);\
+        c= AV_RN32(&src3[i*src_stride3+4]);\
+        d= AV_RN32(&src4[i*src_stride4+4]);\
+        l0=  (a&0x03030303UL)\
+           + (b&0x03030303UL)\
+           + 0x02020202UL;\
+        h0= ((a&0xFCFCFCFCUL)>>2)\
+          + ((b&0xFCFCFCFCUL)>>2);\
+        l1=  (c&0x03030303UL)\
+           + (d&0x03030303UL);\
+        h1= ((c&0xFCFCFCFCUL)>>2)\
+          + ((d&0xFCFCFCFCUL)>>2);\
+        OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
+    }\
+}\
+\
+static inline void OPNAME ## _pixels4_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+    OPNAME ## _pixels4_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
+}\
+\
+static inline void OPNAME ## _pixels4_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+    OPNAME ## _pixels4_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
+}\
+\
+static inline void OPNAME ## _pixels2_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+    OPNAME ## _pixels2_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
+}\
+\
+static inline void OPNAME ## _pixels2_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+    OPNAME ## _pixels2_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
+}\
+\
+static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
+                 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
+    int i;\
+    for(i=0; i<h; i++){\
+        uint32_t a, b, c, d, l0, l1, h0, h1;\
+        a= AV_RN32(&src1[i*src_stride1]);\
+        b= AV_RN32(&src2[i*src_stride2]);\
+        c= AV_RN32(&src3[i*src_stride3]);\
+        d= AV_RN32(&src4[i*src_stride4]);\
+        l0=  (a&0x03030303UL)\
+           + (b&0x03030303UL)\
+           + 0x01010101UL;\
+        h0= ((a&0xFCFCFCFCUL)>>2)\
+          + ((b&0xFCFCFCFCUL)>>2);\
+        l1=  (c&0x03030303UL)\
+           + (d&0x03030303UL);\
+        h1= ((c&0xFCFCFCFCUL)>>2)\
+          + ((d&0xFCFCFCFCUL)>>2);\
+        OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
+        a= AV_RN32(&src1[i*src_stride1+4]);\
+        b= AV_RN32(&src2[i*src_stride2+4]);\
+        c= AV_RN32(&src3[i*src_stride3+4]);\
+        d= AV_RN32(&src4[i*src_stride4+4]);\
+        l0=  (a&0x03030303UL)\
+           + (b&0x03030303UL)\
+           + 0x01010101UL;\
+        h0= ((a&0xFCFCFCFCUL)>>2)\
+          + ((b&0xFCFCFCFCUL)>>2);\
+        l1=  (c&0x03030303UL)\
+           + (d&0x03030303UL);\
+        h1= ((c&0xFCFCFCFCUL)>>2)\
+          + ((d&0xFCFCFCFCUL)>>2);\
+        OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
+    }\
+}\
+static inline void OPNAME ## _pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
+                 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
+    OPNAME ## _pixels8_l4(dst  , src1  , src2  , src3  , src4  , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
+    OPNAME ## _pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
+}\
+static inline void OPNAME ## _no_rnd_pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
+                 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
+    OPNAME ## _no_rnd_pixels8_l4(dst  , src1  , src2  , src3  , src4  , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
+    OPNAME ## _no_rnd_pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
+}\
+\
+static inline void OPNAME ## _pixels2_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+{\
+        int i, a0, b0, a1, b1;\
+        a0= pixels[0];\
+        b0= pixels[1] + 2;\
+        a0 += b0;\
+        b0 += pixels[2];\
+\
+        pixels+=line_size;\
+        for(i=0; i<h; i+=2){\
+            a1= pixels[0];\
+            b1= pixels[1];\
+            a1 += b1;\
+            b1 += pixels[2];\
+\
+            block[0]= (a1+a0)>>2; /* FIXME non put */\
+            block[1]= (b1+b0)>>2;\
+\
+            pixels+=line_size;\
+            block +=line_size;\
+\
+            a0= pixels[0];\
+            b0= pixels[1] + 2;\
+            a0 += b0;\
+            b0 += pixels[2];\
+\
+            block[0]= (a1+a0)>>2;\
+            block[1]= (b1+b0)>>2;\
+            pixels+=line_size;\
+            block +=line_size;\
+        }\
+}\
+\
+static inline void OPNAME ## _pixels4_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+{\
+        int i;\
+        const uint32_t a= AV_RN32(pixels  );\
+        const uint32_t b= AV_RN32(pixels+1);\
+        uint32_t l0=  (a&0x03030303UL)\
+                    + (b&0x03030303UL)\
+                    + 0x02020202UL;\
+        uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
+                   + ((b&0xFCFCFCFCUL)>>2);\
+        uint32_t l1,h1;\
+\
+        pixels+=line_size;\
+        for(i=0; i<h; i+=2){\
+            uint32_t a= AV_RN32(pixels  );\
+            uint32_t b= AV_RN32(pixels+1);\
+            l1=  (a&0x03030303UL)\
+               + (b&0x03030303UL);\
+            h1= ((a&0xFCFCFCFCUL)>>2)\
+              + ((b&0xFCFCFCFCUL)>>2);\
+            OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
+            pixels+=line_size;\
+            block +=line_size;\
+            a= AV_RN32(pixels  );\
+            b= AV_RN32(pixels+1);\
+            l0=  (a&0x03030303UL)\
+               + (b&0x03030303UL)\
+               + 0x02020202UL;\
+            h0= ((a&0xFCFCFCFCUL)>>2)\
+              + ((b&0xFCFCFCFCUL)>>2);\
+            OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
+            pixels+=line_size;\
+            block +=line_size;\
+        }\
+}\
+\
+static inline void OPNAME ## _pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+{\
+    int j;\
+    for(j=0; j<2; j++){\
+        int i;\
+        const uint32_t a= AV_RN32(pixels  );\
+        const uint32_t b= AV_RN32(pixels+1);\
+        uint32_t l0=  (a&0x03030303UL)\
+                    + (b&0x03030303UL)\
+                    + 0x02020202UL;\
+        uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
+                   + ((b&0xFCFCFCFCUL)>>2);\
+        uint32_t l1,h1;\
+\
+        pixels+=line_size;\
+        for(i=0; i<h; i+=2){\
+            uint32_t a= AV_RN32(pixels  );\
+            uint32_t b= AV_RN32(pixels+1);\
+            l1=  (a&0x03030303UL)\
+               + (b&0x03030303UL);\
+            h1= ((a&0xFCFCFCFCUL)>>2)\
+              + ((b&0xFCFCFCFCUL)>>2);\
+            OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
+            pixels+=line_size;\
+            block +=line_size;\
+            a= AV_RN32(pixels  );\
+            b= AV_RN32(pixels+1);\
+            l0=  (a&0x03030303UL)\
+               + (b&0x03030303UL)\
+               + 0x02020202UL;\
+            h0= ((a&0xFCFCFCFCUL)>>2)\
+              + ((b&0xFCFCFCFCUL)>>2);\
+            OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
+            pixels+=line_size;\
+            block +=line_size;\
+        }\
+        pixels+=4-line_size*(h+1);\
+        block +=4-line_size*h;\
+    }\
+}\
+\
+static inline void OPNAME ## _no_rnd_pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
+{\
+    int j;\
+    for(j=0; j<2; j++){\
+        int i;\
+        const uint32_t a= AV_RN32(pixels  );\
+        const uint32_t b= AV_RN32(pixels+1);\
+        uint32_t l0=  (a&0x03030303UL)\
+                    + (b&0x03030303UL)\
+                    + 0x01010101UL;\
+        uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
+                   + ((b&0xFCFCFCFCUL)>>2);\
+        uint32_t l1,h1;\
+\
+        pixels+=line_size;\
+        for(i=0; i<h; i+=2){\
+            uint32_t a= AV_RN32(pixels  );\
+            uint32_t b= AV_RN32(pixels+1);\
+            l1=  (a&0x03030303UL)\
+               + (b&0x03030303UL);\
+            h1= ((a&0xFCFCFCFCUL)>>2)\
+              + ((b&0xFCFCFCFCUL)>>2);\
+            OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
+            pixels+=line_size;\
+            block +=line_size;\
+            a= AV_RN32(pixels  );\
+            b= AV_RN32(pixels+1);\
+            l0=  (a&0x03030303UL)\
+               + (b&0x03030303UL)\
+               + 0x01010101UL;\
+            h0= ((a&0xFCFCFCFCUL)>>2)\
+              + ((b&0xFCFCFCFCUL)>>2);\
+            OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
+            pixels+=line_size;\
+            block +=line_size;\
+        }\
+        pixels+=4-line_size*(h+1);\
+        block +=4-line_size*h;\
+    }\
+}\
+\
+CALL_2X_PIXELS(OPNAME ## _pixels16_c  , OPNAME ## _pixels8_c  , 8)\
+CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels8_x2_c , 8)\
+CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels8_y2_c , 8)\
+CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels8_xy2_c, 8)\
+CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_c  , OPNAME ## _pixels8_c         , 8)\
+CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels8_x2_c , 8)\
+CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels8_y2_c , 8)\
+CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels8_xy2_c, 8)\
+
+#define op_avg(a, b) a = rnd_avg32(a, b)
+#endif
+#define op_put(a, b) a = b
+
+PIXOP2(avg, op_avg)
+PIXOP2(put, op_put)
+#undef op_avg
+#undef op_put
+
+#define avg2(a,b) ((a+b+1)>>1)
+#define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
+
+static void put_no_rnd_pixels16_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){
+    put_no_rnd_pixels16_l2(dst, a, b, stride, stride, stride, h);
+}
+
+static void put_no_rnd_pixels8_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){
+    put_no_rnd_pixels8_l2(dst, a, b, stride, stride, stride, h);
+}
+
+static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder)
+{
+    const int A=(16-x16)*(16-y16);
+    const int B=(   x16)*(16-y16);
+    const int C=(16-x16)*(   y16);
+    const int D=(   x16)*(   y16);
+    int i;
+
+    for(i=0; i<h; i++)
+    {
+        dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8;
+        dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8;
+        dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8;
+        dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8;
+        dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8;
+        dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8;
+        dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8;
+        dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8;
+        dst+= stride;
+        src+= stride;
+    }
+}
+
+void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
+                  int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
+{
+    int y, vx, vy;
+    const int s= 1<<shift;
+
+    width--;
+    height--;
+
+    for(y=0; y<h; y++){
+        int x;
+
+        vx= ox;
+        vy= oy;
+        for(x=0; x<8; x++){ //XXX FIXME optimize
+            int src_x, src_y, frac_x, frac_y, index;
+
+            src_x= vx>>16;
+            src_y= vy>>16;
+            frac_x= src_x&(s-1);
+            frac_y= src_y&(s-1);
+            src_x>>=shift;
+            src_y>>=shift;
+
+            if((unsigned)src_x < width){
+                if((unsigned)src_y < height){
+                    index= src_x + src_y*stride;
+                    dst[y*stride + x]= (  (  src[index         ]*(s-frac_x)
+                                           + src[index       +1]*   frac_x )*(s-frac_y)
+                                        + (  src[index+stride  ]*(s-frac_x)
+                                           + src[index+stride+1]*   frac_x )*   frac_y
+                                        + r)>>(shift*2);
+                }else{
+                    index= src_x + av_clip(src_y, 0, height)*stride;
+                    dst[y*stride + x]= ( (  src[index         ]*(s-frac_x)
+                                          + src[index       +1]*   frac_x )*s
+                                        + r)>>(shift*2);
+                }
+            }else{
+                if((unsigned)src_y < height){
+                    index= av_clip(src_x, 0, width) + src_y*stride;
+                    dst[y*stride + x]= (  (  src[index         ]*(s-frac_y)
+                                           + src[index+stride  ]*   frac_y )*s
+                                        + r)>>(shift*2);
+                }else{
+                    index= av_clip(src_x, 0, width) + av_clip(src_y, 0, height)*stride;
+                    dst[y*stride + x]=    src[index         ];
+                }
+            }
+
+            vx+= dxx;
+            vy+= dyx;
+        }
+        ox += dxy;
+        oy += dyy;
+    }
+}
+
+static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
+    switch(width){
+    case 2: put_pixels2_c (dst, src, stride, height); break;
+    case 4: put_pixels4_c (dst, src, stride, height); break;
+    case 8: put_pixels8_c (dst, src, stride, height); break;
+    case 16:put_pixels16_c(dst, src, stride, height); break;
+    }
+}
+
+static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
+    int i,j;
+    for (i=0; i < height; i++) {
+      for (j=0; j < width; j++) {
+        dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
+      }
+      src += stride;
+      dst += stride;
+    }
+}
+
+static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
+    int i,j;
+    for (i=0; i < height; i++) {
+      for (j=0; j < width; j++) {
+        dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
+      }
+      src += stride;
+      dst += stride;
+    }
+}
+
+static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
+    int i,j;
+    for (i=0; i < height; i++) {
+      for (j=0; j < width; j++) {
+        dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11;
+      }
+      src += stride;
+      dst += stride;
+    }
+}
+
+static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
+    int i,j;
+    for (i=0; i < height; i++) {
+      for (j=0; j < width; j++) {
+        dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15;
+      }
+      src += stride;
+      dst += stride;
+    }
+}
+
+static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
+    int i,j;
+    for (i=0; i < height; i++) {
+      for (j=0; j < width; j++) {
+        dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
+      }
+      src += stride;
+      dst += stride;
+    }
+}
+
+static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
+    int i,j;
+    for (i=0; i < height; i++) {
+      for (j=0; j < width; j++) {
+        dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11;
+      }
+      src += stride;
+      dst += stride;
+    }
+}
+
+static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
+    int i,j;
+    for (i=0; i < height; i++) {
+      for (j=0; j < width; j++) {
+        dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
+      }
+      src += stride;
+      dst += stride;
+    }
+}
+
+static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
+    int i,j;
+    for (i=0; i < height; i++) {
+      for (j=0; j < width; j++) {
+        dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15;
+      }
+      src += stride;
+      dst += stride;
+    }
+}
+
+static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
+    switch(width){
+    case 2: avg_pixels2_c (dst, src, stride, height); break;
+    case 4: avg_pixels4_c (dst, src, stride, height); break;
+    case 8: avg_pixels8_c (dst, src, stride, height); break;
+    case 16:avg_pixels16_c(dst, src, stride, height); break;
+    }
+}
+
+static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
+    int i,j;
+    for (i=0; i < height; i++) {
+      for (j=0; j < width; j++) {
+        dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1;
+      }
+      src += stride;
+      dst += stride;
+    }
+}
+
+static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
+    int i,j;
+    for (i=0; i < height; i++) {
+      for (j=0; j < width; j++) {
+        dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1;
+      }
+      src += stride;
+      dst += stride;
+    }
+}
+
+static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
+    int i,j;
+    for (i=0; i < height; i++) {
+      for (j=0; j < width; j++) {
+        dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1;
+      }
+      src += stride;
+      dst += stride;
+    }
+}
+
+static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
+    int i,j;
+    for (i=0; i < height; i++) {
+      for (j=0; j < width; j++) {
+        dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
+      }
+      src += stride;
+      dst += stride;
+    }
+}
+
+static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
+    int i,j;
+    for (i=0; i < height; i++) {
+      for (j=0; j < width; j++) {
+        dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
+      }
+      src += stride;
+      dst += stride;
+    }
+}
+
+static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
+    int i,j;
+    for (i=0; i < height; i++) {
+      for (j=0; j < width; j++) {
+        dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1;
+      }
+      src += stride;
+      dst += stride;
+    }
+}
+
+static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
+    int i,j;
+    for (i=0; i < height; i++) {
+      for (j=0; j < width; j++) {
+        dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
+      }
+      src += stride;
+      dst += stride;
+    }
+}
+
+static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
+    int i,j;
+    for (i=0; i < height; i++) {
+      for (j=0; j < width; j++) {
+        dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
+      }
+      src += stride;
+      dst += stride;
+    }
+}
+#if 0
+#define TPEL_WIDTH(width)\
+static void put_tpel_pixels ## width ## _mc00_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
+    void put_tpel_pixels_mc00_c(dst, src, stride, width, height);}\
+static void put_tpel_pixels ## width ## _mc10_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
+    void put_tpel_pixels_mc10_c(dst, src, stride, width, height);}\
+static void put_tpel_pixels ## width ## _mc20_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
+    void put_tpel_pixels_mc20_c(dst, src, stride, width, height);}\
+static void put_tpel_pixels ## width ## _mc01_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
+    void put_tpel_pixels_mc01_c(dst, src, stride, width, height);}\
+static void put_tpel_pixels ## width ## _mc11_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
+    void put_tpel_pixels_mc11_c(dst, src, stride, width, height);}\
+static void put_tpel_pixels ## width ## _mc21_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
+    void put_tpel_pixels_mc21_c(dst, src, stride, width, height);}\
+static void put_tpel_pixels ## width ## _mc02_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
+    void put_tpel_pixels_mc02_c(dst, src, stride, width, height);}\
+static void put_tpel_pixels ## width ## _mc12_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
+    void put_tpel_pixels_mc12_c(dst, src, stride, width, height);}\
+static void put_tpel_pixels ## width ## _mc22_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
+    void put_tpel_pixels_mc22_c(dst, src, stride, width, height);}
+#endif
+
+#define H264_CHROMA_MC(OPNAME, OP)\
+static void OPNAME ## h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
+    const int A=(8-x)*(8-y);\
+    const int B=(  x)*(8-y);\
+    const int C=(8-x)*(  y);\
+    const int D=(  x)*(  y);\
+    int i;\
+    \
+    assert(x<8 && y<8 && x>=0 && y>=0);\
+\
+    if(D){\
+        for(i=0; i<h; i++){\
+            OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
+            OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
+            dst+= stride;\
+            src+= stride;\
+        }\
+    }else{\
+        const int E= B+C;\
+        const int step= C ? stride : 1;\
+        for(i=0; i<h; i++){\
+            OP(dst[0], (A*src[0] + E*src[step+0]));\
+            OP(dst[1], (A*src[1] + E*src[step+1]));\
+            dst+= stride;\
+            src+= stride;\
+        }\
+    }\
+}\
+\
+static void OPNAME ## h264_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
+    const int A=(8-x)*(8-y);\
+    const int B=(  x)*(8-y);\
+    const int C=(8-x)*(  y);\
+    const int D=(  x)*(  y);\
+    int i;\
+    \
+    assert(x<8 && y<8 && x>=0 && y>=0);\
+\
+    if(D){\
+        for(i=0; i<h; i++){\
+            OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
+            OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
+            OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
+            OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
+            dst+= stride;\
+            src+= stride;\
+        }\
+    }else{\
+        const int E= B+C;\
+        const int step= C ? stride : 1;\
+        for(i=0; i<h; i++){\
+            OP(dst[0], (A*src[0] + E*src[step+0]));\
+            OP(dst[1], (A*src[1] + E*src[step+1]));\
+            OP(dst[2], (A*src[2] + E*src[step+2]));\
+            OP(dst[3], (A*src[3] + E*src[step+3]));\
+            dst+= stride;\
+            src+= stride;\
+        }\
+    }\
+}\
+\
+static void OPNAME ## h264_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
+    const int A=(8-x)*(8-y);\
+    const int B=(  x)*(8-y);\
+    const int C=(8-x)*(  y);\
+    const int D=(  x)*(  y);\
+    int i;\
+    \
+    assert(x<8 && y<8 && x>=0 && y>=0);\
+\
+    if(D){\
+        for(i=0; i<h; i++){\
+            OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
+            OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
+            OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
+            OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
+            OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5]));\
+            OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6]));\
+            OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7]));\
+            OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8]));\
+            dst+= stride;\
+            src+= stride;\
+        }\
+    }else{\
+        const int E= B+C;\
+        const int step= C ? stride : 1;\
+        for(i=0; i<h; i++){\
+            OP(dst[0], (A*src[0] + E*src[step+0]));\
+            OP(dst[1], (A*src[1] + E*src[step+1]));\
+            OP(dst[2], (A*src[2] + E*src[step+2]));\
+            OP(dst[3], (A*src[3] + E*src[step+3]));\
+            OP(dst[4], (A*src[4] + E*src[step+4]));\
+            OP(dst[5], (A*src[5] + E*src[step+5]));\
+            OP(dst[6], (A*src[6] + E*src[step+6]));\
+            OP(dst[7], (A*src[7] + E*src[step+7]));\
+            dst+= stride;\
+            src+= stride;\
+        }\
+    }\
+}
+
+#define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1)
+#define op_put(a, b) a = (((b) + 32)>>6)
+
+H264_CHROMA_MC(put_       , op_put)
+H264_CHROMA_MC(avg_       , op_avg)
+#undef op_avg
+#undef op_put
+
+static void put_no_rnd_vc1_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){
+    const int A=(8-x)*(8-y);
+    const int B=(  x)*(8-y);
+    const int C=(8-x)*(  y);
+    const int D=(  x)*(  y);
+    int i;
+
+    assert(x<8 && y<8 && x>=0 && y>=0);
+
+    for(i=0; i<h; i++)
+    {
+        dst[0] = (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + 32 - 4) >> 6;
+        dst[1] = (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + 32 - 4) >> 6;
+        dst[2] = (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + 32 - 4) >> 6;
+        dst[3] = (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + 32 - 4) >> 6;
+        dst[4] = (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + 32 - 4) >> 6;
+        dst[5] = (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + 32 - 4) >> 6;
+        dst[6] = (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + 32 - 4) >> 6;
+        dst[7] = (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + 32 - 4) >> 6;
+        dst+= stride;
+        src+= stride;
+    }
+}
+
+static void avg_no_rnd_vc1_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){
+    const int A=(8-x)*(8-y);
+    const int B=(  x)*(8-y);
+    const int C=(8-x)*(  y);
+    const int D=(  x)*(  y);
+    int i;
+
+    assert(x<8 && y<8 && x>=0 && y>=0);
+
+    for(i=0; i<h; i++)
+    {
+        dst[0] = avg2(dst[0], ((A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + 32 - 4) >> 6));
+        dst[1] = avg2(dst[1], ((A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + 32 - 4) >> 6));
+        dst[2] = avg2(dst[2], ((A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + 32 - 4) >> 6));
+        dst[3] = avg2(dst[3], ((A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + 32 - 4) >> 6));
+        dst[4] = avg2(dst[4], ((A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + 32 - 4) >> 6));
+        dst[5] = avg2(dst[5], ((A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + 32 - 4) >> 6));
+        dst[6] = avg2(dst[6], ((A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + 32 - 4) >> 6));
+        dst[7] = avg2(dst[7], ((A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + 32 - 4) >> 6));
+        dst+= stride;
+        src+= stride;
+    }
+}
+
+#define QPEL_MC(r, OPNAME, RND, OP) \
+static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+    int i;\
+    for(i=0; i<h; i++)\
+    {\
+        OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
+        OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
+        OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
+        OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
+        OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
+        OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
+        OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
+        OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
+        dst+=dstStride;\
+        src+=srcStride;\
+    }\
+}\
+\
+static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+    const int w=8;\
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+    int i;\
+    for(i=0; i<w; i++)\
+    {\
+        const int src0= src[0*srcStride];\
+        const int src1= src[1*srcStride];\
+        const int src2= src[2*srcStride];\
+        const int src3= src[3*srcStride];\
+        const int src4= src[4*srcStride];\
+        const int src5= src[5*srcStride];\
+        const int src6= src[6*srcStride];\
+        const int src7= src[7*srcStride];\
+        const int src8= src[8*srcStride];\
+        OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
+        OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
+        OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
+        OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
+        OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
+        OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
+        OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
+        OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
+        dst++;\
+        src++;\
+    }\
+}\
+\
+static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+    int i;\
+    \
+    for(i=0; i<h; i++)\
+    {\
+        OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
+        OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
+        OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
+        OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
+        OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
+        OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
+        OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
+        OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
+        OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
+        OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
+        OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
+        OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
+        OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
+        OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
+        OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
+        OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
+        dst+=dstStride;\
+        src+=srcStride;\
+    }\
+}\
+\
+static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+    int i;\
+    const int w=16;\
+    for(i=0; i<w; i++)\
+    {\
+        const int src0= src[0*srcStride];\
+        const int src1= src[1*srcStride];\
+        const int src2= src[2*srcStride];\
+        const int src3= src[3*srcStride];\
+        const int src4= src[4*srcStride];\
+        const int src5= src[5*srcStride];\
+        const int src6= src[6*srcStride];\
+        const int src7= src[7*srcStride];\
+        const int src8= src[8*srcStride];\
+        const int src9= src[9*srcStride];\
+        const int src10= src[10*srcStride];\
+        const int src11= src[11*srcStride];\
+        const int src12= src[12*srcStride];\
+        const int src13= src[13*srcStride];\
+        const int src14= src[14*srcStride];\
+        const int src15= src[15*srcStride];\
+        const int src16= src[16*srcStride];\
+        OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
+        OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
+        OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
+        OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
+        OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
+        OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
+        OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
+        OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
+        OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
+        OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
+        OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
+        OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
+        OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
+        OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
+        OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
+        OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
+        dst++;\
+        src++;\
+    }\
+}\
+\
+static void OPNAME ## qpel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){\
+    OPNAME ## pixels8_c(dst, src, stride, 8);\
+}\
+\
+static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t half[64];\
+    put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
+    OPNAME ## pixels8_l2(dst, src, half, stride, stride, 8, 8);\
+}\
+\
+static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
+    OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
+}\
+\
+static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t half[64];\
+    put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
+    OPNAME ## pixels8_l2(dst, src+1, half, stride, stride, 8, 8);\
+}\
+\
+static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[16*9];\
+    uint8_t half[64];\
+    copy_block9(full, src, 16, stride, 9);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
+    OPNAME ## pixels8_l2(dst, full, half, stride, 16, 8, 8);\
+}\
+\
+static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[16*9];\
+    copy_block9(full, src, 16, stride, 9);\
+    OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
+}\
+\
+static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[16*9];\
+    uint8_t half[64];\
+    copy_block9(full, src, 16, stride, 9);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
+    OPNAME ## pixels8_l2(dst, full+16, half, stride, 16, 8, 8);\
+}\
+void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[16*9];\
+    uint8_t halfH[72];\
+    uint8_t halfV[64];\
+    uint8_t halfHV[64];\
+    copy_block9(full, src, 16, stride, 9);\
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
+    OPNAME ## pixels8_l4(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
+}\
+static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[16*9];\
+    uint8_t halfH[72];\
+    uint8_t halfHV[64];\
+    copy_block9(full, src, 16, stride, 9);\
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
+    put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
+    OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
+}\
+void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[16*9];\
+    uint8_t halfH[72];\
+    uint8_t halfV[64];\
+    uint8_t halfHV[64];\
+    copy_block9(full, src, 16, stride, 9);\
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
+    OPNAME ## pixels8_l4(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
+}\
+static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[16*9];\
+    uint8_t halfH[72];\
+    uint8_t halfHV[64];\
+    copy_block9(full, src, 16, stride, 9);\
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
+    put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
+    OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
+}\
+void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[16*9];\
+    uint8_t halfH[72];\
+    uint8_t halfV[64];\
+    uint8_t halfHV[64];\
+    copy_block9(full, src, 16, stride, 9);\
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
+    OPNAME ## pixels8_l4(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
+}\
+static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[16*9];\
+    uint8_t halfH[72];\
+    uint8_t halfHV[64];\
+    copy_block9(full, src, 16, stride, 9);\
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
+    put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
+    OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
+}\
+void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[16*9];\
+    uint8_t halfH[72];\
+    uint8_t halfV[64];\
+    uint8_t halfHV[64];\
+    copy_block9(full, src, 16, stride, 9);\
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full  , 8, 16, 9);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
+    OPNAME ## pixels8_l4(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
+}\
+static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[16*9];\
+    uint8_t halfH[72];\
+    uint8_t halfHV[64];\
+    copy_block9(full, src, 16, stride, 9);\
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
+    put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
+    OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
+}\
+static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t halfH[72];\
+    uint8_t halfHV[64];\
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
+    OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
+}\
+static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t halfH[72];\
+    uint8_t halfHV[64];\
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
+    OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
+}\
+void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[16*9];\
+    uint8_t halfH[72];\
+    uint8_t halfV[64];\
+    uint8_t halfHV[64];\
+    copy_block9(full, src, 16, stride, 9);\
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
+    OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
+}\
+static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[16*9];\
+    uint8_t halfH[72];\
+    copy_block9(full, src, 16, stride, 9);\
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
+    put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
+    OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
+}\
+void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[16*9];\
+    uint8_t halfH[72];\
+    uint8_t halfV[64];\
+    uint8_t halfHV[64];\
+    copy_block9(full, src, 16, stride, 9);\
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
+    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
+    OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
+}\
+static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[16*9];\
+    uint8_t halfH[72];\
+    copy_block9(full, src, 16, stride, 9);\
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
+    put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
+    OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
+}\
+static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t halfH[72];\
+    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
+    OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
+}\
+static void OPNAME ## qpel16_mc00_c (uint8_t *dst, uint8_t *src, int stride){\
+    OPNAME ## pixels16_c(dst, src, stride, 16);\
+}\
+\
+static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t half[256];\
+    put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
+    OPNAME ## pixels16_l2(dst, src, half, stride, stride, 16, 16);\
+}\
+\
+static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
+    OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
+}\
+\
+static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t half[256];\
+    put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
+    OPNAME ## pixels16_l2(dst, src+1, half, stride, stride, 16, 16);\
+}\
+\
+static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[24*17];\
+    uint8_t half[256];\
+    copy_block17(full, src, 24, stride, 17);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
+    OPNAME ## pixels16_l2(dst, full, half, stride, 24, 16, 16);\
+}\
+\
+static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[24*17];\
+    copy_block17(full, src, 24, stride, 17);\
+    OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
+}\
+\
+static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[24*17];\
+    uint8_t half[256];\
+    copy_block17(full, src, 24, stride, 17);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
+    OPNAME ## pixels16_l2(dst, full+24, half, stride, 24, 16, 16);\
+}\
+void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[24*17];\
+    uint8_t halfH[272];\
+    uint8_t halfV[256];\
+    uint8_t halfHV[256];\
+    copy_block17(full, src, 24, stride, 17);\
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
+    OPNAME ## pixels16_l4(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
+}\
+static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[24*17];\
+    uint8_t halfH[272];\
+    uint8_t halfHV[256];\
+    copy_block17(full, src, 24, stride, 17);\
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
+    put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
+    OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
+}\
+void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[24*17];\
+    uint8_t halfH[272];\
+    uint8_t halfV[256];\
+    uint8_t halfHV[256];\
+    copy_block17(full, src, 24, stride, 17);\
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
+    OPNAME ## pixels16_l4(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
+}\
+static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[24*17];\
+    uint8_t halfH[272];\
+    uint8_t halfHV[256];\
+    copy_block17(full, src, 24, stride, 17);\
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
+    put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
+    OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
+}\
+void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[24*17];\
+    uint8_t halfH[272];\
+    uint8_t halfV[256];\
+    uint8_t halfHV[256];\
+    copy_block17(full, src, 24, stride, 17);\
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
+    OPNAME ## pixels16_l4(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
+}\
+static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[24*17];\
+    uint8_t halfH[272];\
+    uint8_t halfHV[256];\
+    copy_block17(full, src, 24, stride, 17);\
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
+    put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
+    OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
+}\
+void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[24*17];\
+    uint8_t halfH[272];\
+    uint8_t halfV[256];\
+    uint8_t halfHV[256];\
+    copy_block17(full, src, 24, stride, 17);\
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full  , 16, 24, 17);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
+    OPNAME ## pixels16_l4(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
+}\
+static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[24*17];\
+    uint8_t halfH[272];\
+    uint8_t halfHV[256];\
+    copy_block17(full, src, 24, stride, 17);\
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
+    put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
+    OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
+}\
+static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t halfH[272];\
+    uint8_t halfHV[256];\
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
+    OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
+}\
+static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t halfH[272];\
+    uint8_t halfHV[256];\
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
+    OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
+}\
+void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[24*17];\
+    uint8_t halfH[272];\
+    uint8_t halfV[256];\
+    uint8_t halfHV[256];\
+    copy_block17(full, src, 24, stride, 17);\
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
+    OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
+}\
+static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[24*17];\
+    uint8_t halfH[272];\
+    copy_block17(full, src, 24, stride, 17);\
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
+    put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
+    OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
+}\
+void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[24*17];\
+    uint8_t halfH[272];\
+    uint8_t halfV[256];\
+    uint8_t halfHV[256];\
+    copy_block17(full, src, 24, stride, 17);\
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
+    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
+    OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
+}\
+static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[24*17];\
+    uint8_t halfH[272];\
+    copy_block17(full, src, 24, stride, 17);\
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
+    put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
+    OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
+}\
+static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t halfH[272];\
+    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
+    OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
+}
+
+#define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
+#define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
+#define op_put(a, b) a = cm[((b) + 16)>>5]
+#define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
+
+QPEL_MC(0, put_       , _       , op_put)
+QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
+QPEL_MC(0, avg_       , _       , op_avg)
+//QPEL_MC(1, avg_no_rnd , _       , op_avg)
+#undef op_avg
+#undef op_avg_no_rnd
+#undef op_put
+#undef op_put_no_rnd
+
+#if 1
+#define H264_LOWPASS(OPNAME, OP, OP2) \
+static av_unused void OPNAME ## h264_qpel2_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+    const int h=2;\
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+    int i;\
+    for(i=0; i<h; i++)\
+    {\
+        OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
+        OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
+        dst+=dstStride;\
+        src+=srcStride;\
+    }\
+}\
+\
+static av_unused void OPNAME ## h264_qpel2_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+    const int w=2;\
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+    int i;\
+    for(i=0; i<w; i++)\
+    {\
+        const int srcB= src[-2*srcStride];\
+        const int srcA= src[-1*srcStride];\
+        const int src0= src[0 *srcStride];\
+        const int src1= src[1 *srcStride];\
+        const int src2= src[2 *srcStride];\
+        const int src3= src[3 *srcStride];\
+        const int src4= src[4 *srcStride];\
+        OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
+        OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
+        dst++;\
+        src++;\
+    }\
+}\
+\
+static av_unused void OPNAME ## h264_qpel2_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
+    const int h=2;\
+    const int w=2;\
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+    int i;\
+    src -= 2*srcStride;\
+    for(i=0; i<h+5; i++)\
+    {\
+        tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\
+        tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\
+        tmp+=tmpStride;\
+        src+=srcStride;\
+    }\
+    tmp -= tmpStride*(h+5-2);\
+    for(i=0; i<w; i++)\
+    {\
+        const int tmpB= tmp[-2*tmpStride];\
+        const int tmpA= tmp[-1*tmpStride];\
+        const int tmp0= tmp[0 *tmpStride];\
+        const int tmp1= tmp[1 *tmpStride];\
+        const int tmp2= tmp[2 *tmpStride];\
+        const int tmp3= tmp[3 *tmpStride];\
+        const int tmp4= tmp[4 *tmpStride];\
+        OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
+        OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
+        dst++;\
+        tmp++;\
+    }\
+}\
+static void OPNAME ## h264_qpel4_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+    const int h=4;\
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+    int i;\
+    for(i=0; i<h; i++)\
+    {\
+        OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
+        OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
+        OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]));\
+        OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]));\
+        dst+=dstStride;\
+        src+=srcStride;\
+    }\
+}\
+\
+static void OPNAME ## h264_qpel4_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+    const int w=4;\
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+    int i;\
+    for(i=0; i<w; i++)\
+    {\
+        const int srcB= src[-2*srcStride];\
+        const int srcA= src[-1*srcStride];\
+        const int src0= src[0 *srcStride];\
+        const int src1= src[1 *srcStride];\
+        const int src2= src[2 *srcStride];\
+        const int src3= src[3 *srcStride];\
+        const int src4= src[4 *srcStride];\
+        const int src5= src[5 *srcStride];\
+        const int src6= src[6 *srcStride];\
+        OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
+        OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
+        OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
+        OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
+        dst++;\
+        src++;\
+    }\
+}\
+\
+static void OPNAME ## h264_qpel4_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
+    const int h=4;\
+    const int w=4;\
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+    int i;\
+    src -= 2*srcStride;\
+    for(i=0; i<h+5; i++)\
+    {\
+        tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\
+        tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\
+        tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]);\
+        tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]);\
+        tmp+=tmpStride;\
+        src+=srcStride;\
+    }\
+    tmp -= tmpStride*(h+5-2);\
+    for(i=0; i<w; i++)\
+    {\
+        const int tmpB= tmp[-2*tmpStride];\
+        const int tmpA= tmp[-1*tmpStride];\
+        const int tmp0= tmp[0 *tmpStride];\
+        const int tmp1= tmp[1 *tmpStride];\
+        const int tmp2= tmp[2 *tmpStride];\
+        const int tmp3= tmp[3 *tmpStride];\
+        const int tmp4= tmp[4 *tmpStride];\
+        const int tmp5= tmp[5 *tmpStride];\
+        const int tmp6= tmp[6 *tmpStride];\
+        OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
+        OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
+        OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
+        OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
+        dst++;\
+        tmp++;\
+    }\
+}\
+\
+static void OPNAME ## h264_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+    const int h=8;\
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+    int i;\
+    for(i=0; i<h; i++)\
+    {\
+        OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]));\
+        OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]));\
+        OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]));\
+        OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]));\
+        OP(dst[4], (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]));\
+        OP(dst[5], (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]));\
+        OP(dst[6], (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]));\
+        OP(dst[7], (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]));\
+        dst+=dstStride;\
+        src+=srcStride;\
+    }\
+}\
+\
+static void OPNAME ## h264_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+    const int w=8;\
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+    int i;\
+    for(i=0; i<w; i++)\
+    {\
+        const int srcB= src[-2*srcStride];\
+        const int srcA= src[-1*srcStride];\
+        const int src0= src[0 *srcStride];\
+        const int src1= src[1 *srcStride];\
+        const int src2= src[2 *srcStride];\
+        const int src3= src[3 *srcStride];\
+        const int src4= src[4 *srcStride];\
+        const int src5= src[5 *srcStride];\
+        const int src6= src[6 *srcStride];\
+        const int src7= src[7 *srcStride];\
+        const int src8= src[8 *srcStride];\
+        const int src9= src[9 *srcStride];\
+        const int src10=src[10*srcStride];\
+        OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
+        OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
+        OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
+        OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
+        OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*5 + (src2+src7));\
+        OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*5 + (src3+src8));\
+        OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*5 + (src4+src9));\
+        OP(dst[7*dstStride], (src7+src8)*20 - (src6+src9)*5 + (src5+src10));\
+        dst++;\
+        src++;\
+    }\
+}\
+\
+static void OPNAME ## h264_qpel8_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
+    const int h=8;\
+    const int w=8;\
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+    int i;\
+    src -= 2*srcStride;\
+    for(i=0; i<h+5; i++)\
+    {\
+        tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]);\
+        tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]);\
+        tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]);\
+        tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]);\
+        tmp[4]= (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]);\
+        tmp[5]= (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]);\
+        tmp[6]= (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]);\
+        tmp[7]= (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]);\
+        tmp+=tmpStride;\
+        src+=srcStride;\
+    }\
+    tmp -= tmpStride*(h+5-2);\
+    for(i=0; i<w; i++)\
+    {\
+        const int tmpB= tmp[-2*tmpStride];\
+        const int tmpA= tmp[-1*tmpStride];\
+        const int tmp0= tmp[0 *tmpStride];\
+        const int tmp1= tmp[1 *tmpStride];\
+        const int tmp2= tmp[2 *tmpStride];\
+        const int tmp3= tmp[3 *tmpStride];\
+        const int tmp4= tmp[4 *tmpStride];\
+        const int tmp5= tmp[5 *tmpStride];\
+        const int tmp6= tmp[6 *tmpStride];\
+        const int tmp7= tmp[7 *tmpStride];\
+        const int tmp8= tmp[8 *tmpStride];\
+        const int tmp9= tmp[9 *tmpStride];\
+        const int tmp10=tmp[10*tmpStride];\
+        OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
+        OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
+        OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
+        OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
+        OP2(dst[4*dstStride], (tmp4+tmp5)*20 - (tmp3+tmp6)*5 + (tmp2+tmp7));\
+        OP2(dst[5*dstStride], (tmp5+tmp6)*20 - (tmp4+tmp7)*5 + (tmp3+tmp8));\
+        OP2(dst[6*dstStride], (tmp6+tmp7)*20 - (tmp5+tmp8)*5 + (tmp4+tmp9));\
+        OP2(dst[7*dstStride], (tmp7+tmp8)*20 - (tmp6+tmp9)*5 + (tmp5+tmp10));\
+        dst++;\
+        tmp++;\
+    }\
+}\
+\
+static void OPNAME ## h264_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+    OPNAME ## h264_qpel8_v_lowpass(dst  , src  , dstStride, srcStride);\
+    OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\
+    src += 8*srcStride;\
+    dst += 8*dstStride;\
+    OPNAME ## h264_qpel8_v_lowpass(dst  , src  , dstStride, srcStride);\
+    OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\
+}\
+\
+static void OPNAME ## h264_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+    OPNAME ## h264_qpel8_h_lowpass(dst  , src  , dstStride, srcStride);\
+    OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\
+    src += 8*srcStride;\
+    dst += 8*dstStride;\
+    OPNAME ## h264_qpel8_h_lowpass(dst  , src  , dstStride, srcStride);\
+    OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\
+}\
+\
+static void OPNAME ## h264_qpel16_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
+    OPNAME ## h264_qpel8_hv_lowpass(dst  , tmp  , src  , dstStride, tmpStride, srcStride);\
+    OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\
+    src += 8*srcStride;\
+    dst += 8*dstStride;\
+    OPNAME ## h264_qpel8_hv_lowpass(dst  , tmp  , src  , dstStride, tmpStride, srcStride);\
+    OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\
+}\
+
+#define H264_MC(OPNAME, SIZE) \
+static void OPNAME ## h264_qpel ## SIZE ## _mc00_c (uint8_t *dst, uint8_t *src, int stride){\
+    OPNAME ## pixels ## SIZE ## _c(dst, src, stride, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc10_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t half[SIZE*SIZE];\
+    put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
+    OPNAME ## pixels ## SIZE ## _l2(dst, src, half, stride, stride, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc20_c(uint8_t *dst, uint8_t *src, int stride){\
+    OPNAME ## h264_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc30_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t half[SIZE*SIZE];\
+    put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
+    OPNAME ## pixels ## SIZE ## _l2(dst, src+1, half, stride, stride, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc01_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[SIZE*(SIZE+5)];\
+    uint8_t * const full_mid= full + SIZE*2;\
+    uint8_t half[SIZE*SIZE];\
+    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
+    put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
+    OPNAME ## pixels ## SIZE ## _l2(dst, full_mid, half, stride, SIZE, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc02_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[SIZE*(SIZE+5)];\
+    uint8_t * const full_mid= full + SIZE*2;\
+    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
+    OPNAME ## h264_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc03_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[SIZE*(SIZE+5)];\
+    uint8_t * const full_mid= full + SIZE*2;\
+    uint8_t half[SIZE*SIZE];\
+    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
+    put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
+    OPNAME ## pixels ## SIZE ## _l2(dst, full_mid+SIZE, half, stride, SIZE, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc11_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[SIZE*(SIZE+5)];\
+    uint8_t * const full_mid= full + SIZE*2;\
+    uint8_t halfH[SIZE*SIZE];\
+    uint8_t halfV[SIZE*SIZE];\
+    put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
+    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
+    put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
+    OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc31_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[SIZE*(SIZE+5)];\
+    uint8_t * const full_mid= full + SIZE*2;\
+    uint8_t halfH[SIZE*SIZE];\
+    uint8_t halfV[SIZE*SIZE];\
+    put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
+    copy_block ## SIZE (full, src - stride*2 + 1, SIZE,  stride, SIZE + 5);\
+    put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
+    OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc13_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[SIZE*(SIZE+5)];\
+    uint8_t * const full_mid= full + SIZE*2;\
+    uint8_t halfH[SIZE*SIZE];\
+    uint8_t halfV[SIZE*SIZE];\
+    put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
+    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
+    put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
+    OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc33_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[SIZE*(SIZE+5)];\
+    uint8_t * const full_mid= full + SIZE*2;\
+    uint8_t halfH[SIZE*SIZE];\
+    uint8_t halfV[SIZE*SIZE];\
+    put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
+    copy_block ## SIZE (full, src - stride*2 + 1, SIZE,  stride, SIZE + 5);\
+    put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
+    OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc22_c(uint8_t *dst, uint8_t *src, int stride){\
+    int16_t tmp[SIZE*(SIZE+5)];\
+    OPNAME ## h264_qpel ## SIZE ## _hv_lowpass(dst, tmp, src, stride, SIZE, stride);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc21_c(uint8_t *dst, uint8_t *src, int stride){\
+    int16_t tmp[SIZE*(SIZE+5)];\
+    uint8_t halfH[SIZE*SIZE];\
+    uint8_t halfHV[SIZE*SIZE];\
+    put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
+    put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
+    OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc23_c(uint8_t *dst, uint8_t *src, int stride){\
+    int16_t tmp[SIZE*(SIZE+5)];\
+    uint8_t halfH[SIZE*SIZE];\
+    uint8_t halfHV[SIZE*SIZE];\
+    put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
+    put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
+    OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc12_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[SIZE*(SIZE+5)];\
+    uint8_t * const full_mid= full + SIZE*2;\
+    int16_t tmp[SIZE*(SIZE+5)];\
+    uint8_t halfV[SIZE*SIZE];\
+    uint8_t halfHV[SIZE*SIZE];\
+    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
+    put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
+    put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
+    OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc32_c(uint8_t *dst, uint8_t *src, int stride){\
+    uint8_t full[SIZE*(SIZE+5)];\
+    uint8_t * const full_mid= full + SIZE*2;\
+    int16_t tmp[SIZE*(SIZE+5)];\
+    uint8_t halfV[SIZE*SIZE];\
+    uint8_t halfHV[SIZE*SIZE];\
+    copy_block ## SIZE (full, src - stride*2 + 1, SIZE,  stride, SIZE + 5);\
+    put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
+    put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
+    OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
+}\
+
+#define op_avg(a, b)  a = (((a)+cm[((b) + 16)>>5]+1)>>1)
+//#define op_avg2(a, b) a = (((a)*w1+cm[((b) + 16)>>5]*w2 + o + 64)>>7)
+#define op_put(a, b)  a = cm[((b) + 16)>>5]
+#define op2_avg(a, b)  a = (((a)+cm[((b) + 512)>>10]+1)>>1)
+#define op2_put(a, b)  a = cm[((b) + 512)>>10]
+
+H264_LOWPASS(put_       , op_put, op2_put)
+H264_LOWPASS(avg_       , op_avg, op2_avg)
+H264_MC(put_, 2)
+H264_MC(put_, 4)
+H264_MC(put_, 8)
+H264_MC(put_, 16)
+H264_MC(avg_, 4)
+H264_MC(avg_, 8)
+H264_MC(avg_, 16)
+
+#undef op_avg
+#undef op_put
+#undef op2_avg
+#undef op2_put
+#endif
+
+static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+    int i;
+
+    for(i=0; i<h; i++){
+        dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4];
+        dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4];
+        dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4];
+        dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4];
+        dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4];
+        dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4];
+        dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4];
+        dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4];
+        dst+=dstStride;
+        src+=srcStride;
+    }
+}
+
+#if CONFIG_CAVS_DECODER
+/* AVS specific */
+void ff_put_cavs_qpel8_mc00_c(uint8_t *dst, uint8_t *src, int stride) {
+    put_pixels8_c(dst, src, stride, 8);
+}
+void ff_avg_cavs_qpel8_mc00_c(uint8_t *dst, uint8_t *src, int stride) {
+    avg_pixels8_c(dst, src, stride, 8);
+}
+void ff_put_cavs_qpel16_mc00_c(uint8_t *dst, uint8_t *src, int stride) {
+    put_pixels16_c(dst, src, stride, 16);
+}
+void ff_avg_cavs_qpel16_mc00_c(uint8_t *dst, uint8_t *src, int stride) {
+    avg_pixels16_c(dst, src, stride, 16);
+}
+#endif /* CONFIG_CAVS_DECODER */
+
+#if CONFIG_VC1_DECODER
+/* VC-1 specific */
+void ff_put_vc1_mspel_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
+    put_pixels8_c(dst, src, stride, 8);
+}
+void ff_avg_vc1_mspel_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
+    avg_pixels8_c(dst, src, stride, 8);
+}
+#endif /* CONFIG_VC1_DECODER */
+
+#if CONFIG_RV40_DECODER
+static void put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
+    put_pixels16_xy2_c(dst, src, stride, 16);
+}
+static void avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
+    avg_pixels16_xy2_c(dst, src, stride, 16);
+}
+static void put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
+    put_pixels8_xy2_c(dst, src, stride, 8);
+}
+static void avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
+    avg_pixels8_xy2_c(dst, src, stride, 8);
+}
+#endif /* CONFIG_RV40_DECODER */
+
+static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+    int i;
+
+    for(i=0; i<w; i++){
+        const int src_1= src[ -srcStride];
+        const int src0 = src[0          ];
+        const int src1 = src[  srcStride];
+        const int src2 = src[2*srcStride];
+        const int src3 = src[3*srcStride];
+        const int src4 = src[4*srcStride];
+        const int src5 = src[5*srcStride];
+        const int src6 = src[6*srcStride];
+        const int src7 = src[7*srcStride];
+        const int src8 = src[8*srcStride];
+        const int src9 = src[9*srcStride];
+        dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
+        dst[1*dstStride]= cm[(9*(src1 + src2) - (src0  + src3) + 8)>>4];
+        dst[2*dstStride]= cm[(9*(src2 + src3) - (src1  + src4) + 8)>>4];
+        dst[3*dstStride]= cm[(9*(src3 + src4) - (src2  + src5) + 8)>>4];
+        dst[4*dstStride]= cm[(9*(src4 + src5) - (src3  + src6) + 8)>>4];
+        dst[5*dstStride]= cm[(9*(src5 + src6) - (src4  + src7) + 8)>>4];
+        dst[6*dstStride]= cm[(9*(src6 + src7) - (src5  + src8) + 8)>>4];
+        dst[7*dstStride]= cm[(9*(src7 + src8) - (src6  + src9) + 8)>>4];
+        src++;
+        dst++;
+    }
+}
+
+static void put_mspel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){
+    put_pixels8_c(dst, src, stride, 8);
+}
+
+static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){
+    uint8_t half[64];
+    wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
+    put_pixels8_l2(dst, src, half, stride, stride, 8, 8);
+}
+
+static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){
+    wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8);
+}
+
+static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){
+    uint8_t half[64];
+    wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
+    put_pixels8_l2(dst, src+1, half, stride, stride, 8, 8);
+}
+
+static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){
+    wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8);
+}
+
+static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){
+    uint8_t halfH[88];
+    uint8_t halfV[64];
+    uint8_t halfHV[64];
+    wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
+    wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
+    wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
+    put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);
+}
+static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){
+    uint8_t halfH[88];
+    uint8_t halfV[64];
+    uint8_t halfHV[64];
+    wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
+    wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8);
+    wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
+    put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);
+}
+static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){
+    uint8_t halfH[88];
+    wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
+    wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8);
+}
+
+static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale){
+    if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
+    int x;
+    const int strength= ff_h263_loop_filter_strength[qscale];
+
+    for(x=0; x<8; x++){
+        int d1, d2, ad1;
+        int p0= src[x-2*stride];
+        int p1= src[x-1*stride];
+        int p2= src[x+0*stride];
+        int p3= src[x+1*stride];
+        int d = (p0 - p3 + 4*(p2 - p1)) / 8;
+
+        if     (d<-2*strength) d1= 0;
+        else if(d<-  strength) d1=-2*strength - d;
+        else if(d<   strength) d1= d;
+        else if(d< 2*strength) d1= 2*strength - d;
+        else                   d1= 0;
+
+        p1 += d1;
+        p2 -= d1;
+        if(p1&256) p1= ~(p1>>31);
+        if(p2&256) p2= ~(p2>>31);
+
+        src[x-1*stride] = p1;
+        src[x+0*stride] = p2;
+
+        ad1= FFABS(d1)>>1;
+
+        d2= av_clip((p0-p3)/4, -ad1, ad1);
+
+        src[x-2*stride] = p0 - d2;
+        src[x+  stride] = p3 + d2;
+    }
+    }
+}
+
+static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){
+    if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
+    int y;
+    const int strength= ff_h263_loop_filter_strength[qscale];
+
+    for(y=0; y<8; y++){
+        int d1, d2, ad1;
+        int p0= src[y*stride-2];
+        int p1= src[y*stride-1];
+        int p2= src[y*stride+0];
+        int p3= src[y*stride+1];
+        int d = (p0 - p3 + 4*(p2 - p1)) / 8;
+
+        if     (d<-2*strength) d1= 0;
+        else if(d<-  strength) d1=-2*strength - d;
+        else if(d<   strength) d1= d;
+        else if(d< 2*strength) d1= 2*strength - d;
+        else                   d1= 0;
+
+        p1 += d1;
+        p2 -= d1;
+        if(p1&256) p1= ~(p1>>31);
+        if(p2&256) p2= ~(p2>>31);
+
+        src[y*stride-1] = p1;
+        src[y*stride+0] = p2;
+
+        ad1= FFABS(d1)>>1;
+
+        d2= av_clip((p0-p3)/4, -ad1, ad1);
+
+        src[y*stride-2] = p0 - d2;
+        src[y*stride+1] = p3 + d2;
+    }
+    }
+}
+
+static void h261_loop_filter_c(uint8_t *src, int stride){
+    int x,y,xy,yz;
+    int temp[64];
+
+    for(x=0; x<8; x++){
+        temp[x      ] = 4*src[x           ];
+        temp[x + 7*8] = 4*src[x + 7*stride];
+    }
+    for(y=1; y<7; y++){
+        for(x=0; x<8; x++){
+            xy = y * stride + x;
+            yz = y * 8 + x;
+            temp[yz] = src[xy - stride] + 2*src[xy] + src[xy + stride];
+        }
+    }
+
+    for(y=0; y<8; y++){
+        src[  y*stride] = (temp[  y*8] + 2)>>2;
+        src[7+y*stride] = (temp[7+y*8] + 2)>>2;
+        for(x=1; x<7; x++){
+            xy = y * stride + x;
+            yz = y * 8 + x;
+            src[xy] = (temp[yz-1] + 2*temp[yz] + temp[yz+1] + 8)>>4;
+        }
+    }
+}
+
+static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
+{
+    int s, i;
+
+    s = 0;
+    for(i=0;i<h;i++) {
+        s += abs(pix1[0] - pix2[0]);
+        s += abs(pix1[1] - pix2[1]);
+        s += abs(pix1[2] - pix2[2]);
+        s += abs(pix1[3] - pix2[3]);
+        s += abs(pix1[4] - pix2[4]);
+        s += abs(pix1[5] - pix2[5]);
+        s += abs(pix1[6] - pix2[6]);
+        s += abs(pix1[7] - pix2[7]);
+        s += abs(pix1[8] - pix2[8]);
+        s += abs(pix1[9] - pix2[9]);
+        s += abs(pix1[10] - pix2[10]);
+        s += abs(pix1[11] - pix2[11]);
+        s += abs(pix1[12] - pix2[12]);
+        s += abs(pix1[13] - pix2[13]);
+        s += abs(pix1[14] - pix2[14]);
+        s += abs(pix1[15] - pix2[15]);
+        pix1 += line_size;
+        pix2 += line_size;
+    }
+    return s;
+}
+
+static int pix_abs16_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
+{
+    int s, i;
+
+    s = 0;
+    for(i=0;i<h;i++) {
+        s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
+        s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
+        s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
+        s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
+        s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
+        s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
+        s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
+        s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
+        s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
+        s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
+        s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
+        s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
+        s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
+        s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
+        s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
+        s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
+        pix1 += line_size;
+        pix2 += line_size;
+    }
+    return s;
+}
+
+static int pix_abs16_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
+{
+    int s, i;
+    uint8_t *pix3 = pix2 + line_size;
+
+    s = 0;
+    for(i=0;i<h;i++) {
+        s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
+        s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
+        s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
+        s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
+        s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
+        s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
+        s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
+        s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
+        s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
+        s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
+        s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
+        s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
+        s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
+        s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
+        s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
+        s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
+        pix1 += line_size;
+        pix2 += line_size;
+        pix3 += line_size;
+    }
+    return s;
+}
+
+static int pix_abs16_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
+{
+    int s, i;
+    uint8_t *pix3 = pix2 + line_size;
+
+    s = 0;
+    for(i=0;i<h;i++) {
+        s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
+        s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
+        s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
+        s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
+        s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
+        s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
+        s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
+        s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
+        s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
+        s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
+        s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
+        s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
+        s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
+        s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
+        s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
+        s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
+        pix1 += line_size;
+        pix2 += line_size;
+        pix3 += line_size;
+    }
+    return s;
+}
+
+static inline int pix_abs8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
+{
+    int s, i;
+
+    s = 0;
+    for(i=0;i<h;i++) {
+        s += abs(pix1[0] - pix2[0]);
+        s += abs(pix1[1] - pix2[1]);
+        s += abs(pix1[2] - pix2[2]);
+        s += abs(pix1[3] - pix2[3]);
+        s += abs(pix1[4] - pix2[4]);
+        s += abs(pix1[5] - pix2[5]);
+        s += abs(pix1[6] - pix2[6]);
+        s += abs(pix1[7] - pix2[7]);
+        pix1 += line_size;
+        pix2 += line_size;
+    }
+    return s;
+}
+
+static int pix_abs8_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
+{
+    int s, i;
+
+    s = 0;
+    for(i=0;i<h;i++) {
+        s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
+        s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
+        s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
+        s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
+        s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
+        s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
+        s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
+        s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
+        pix1 += line_size;
+        pix2 += line_size;
+    }
+    return s;
+}
+
+static int pix_abs8_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
+{
+    int s, i;
+    uint8_t *pix3 = pix2 + line_size;
+
+    s = 0;
+    for(i=0;i<h;i++) {
+        s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
+        s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
+        s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
+        s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
+        s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
+        s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
+        s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
+        s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
+        pix1 += line_size;
+        pix2 += line_size;
+        pix3 += line_size;
+    }
+    return s;
+}
+
+static int pix_abs8_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
+{
+    int s, i;
+    uint8_t *pix3 = pix2 + line_size;
+
+    s = 0;
+    for(i=0;i<h;i++) {
+        s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
+        s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
+        s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
+        s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
+        s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
+        s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
+        s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
+        s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
+        pix1 += line_size;
+        pix2 += line_size;
+        pix3 += line_size;
+    }
+    return s;
+}
+
+static int nsse16_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
+    MpegEncContext *c = v;
+    int score1=0;
+    int score2=0;
+    int x,y;
+
+    for(y=0; y<h; y++){
+        for(x=0; x<16; x++){
+            score1+= (s1[x  ] - s2[x ])*(s1[x  ] - s2[x ]);
+        }
+        if(y+1<h){
+            for(x=0; x<15; x++){
+                score2+= FFABS(  s1[x  ] - s1[x  +stride]
+                             - s1[x+1] + s1[x+1+stride])
+                        -FFABS(  s2[x  ] - s2[x  +stride]
+                             - s2[x+1] + s2[x+1+stride]);
+            }
+        }
+        s1+= stride;
+        s2+= stride;
+    }
+
+    if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
+    else  return score1 + FFABS(score2)*8;
+}
+
+static int nsse8_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
+    MpegEncContext *c = v;
+    int score1=0;
+    int score2=0;
+    int x,y;
+
+    for(y=0; y<h; y++){
+        for(x=0; x<8; x++){
+            score1+= (s1[x  ] - s2[x ])*(s1[x  ] - s2[x ]);
+        }
+        if(y+1<h){
+            for(x=0; x<7; x++){
+                score2+= FFABS(  s1[x  ] - s1[x  +stride]
+                             - s1[x+1] + s1[x+1+stride])
+                        -FFABS(  s2[x  ] - s2[x  +stride]
+                             - s2[x+1] + s2[x+1+stride]);
+            }
+        }
+        s1+= stride;
+        s2+= stride;
+    }
+
+    if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
+    else  return score1 + FFABS(score2)*8;
+}
+
+static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){
+    int i;
+    unsigned int sum=0;
+
+    for(i=0; i<8*8; i++){
+        int b= rem[i] + ((basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT));
+        int w= weight[i];
+        b>>= RECON_SHIFT;
+        assert(-512<b && b<512);
+
+        sum += (w*b)*(w*b)>>4;
+    }
+    return sum>>2;
+}
+
+static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale){
+    int i;
+
+    for(i=0; i<8*8; i++){
+        rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT);
+    }
+}
+
+/**
+ * permutes an 8x8 block.
+ * @param block the block which will be permuted according to the given permutation vector
+ * @param permutation the permutation vector
+ * @param last the last non zero coefficient in scantable order, used to speed the permutation up
+ * @param scantable the used scantable, this is only used to speed the permutation up, the block is not
+ *                  (inverse) permutated to scantable order!
+ */
+void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last)
+{
+    int i;
+    DCTELEM temp[64];
+
+    if(last<=0) return;
+    //if(permutation[1]==1) return; //FIXME it is ok but not clean and might fail for some permutations
+
+    for(i=0; i<=last; i++){
+        const int j= scantable[i];
+        temp[j]= block[j];
+        block[j]=0;
+    }
+
+    for(i=0; i<=last; i++){
+        const int j= scantable[i];
+        const int perm_j= permutation[j];
+        block[perm_j]= temp[j];
+    }
+}
+
+static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){
+    return 0;
+}
+
+void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){
+    int i;
+
+    memset(cmp, 0, sizeof(void*)*6);
+
+    for(i=0; i<6; i++){
+        switch(type&0xFF){
+        case FF_CMP_SAD:
+            cmp[i]= c->sad[i];
+            break;
+        case FF_CMP_SATD:
+            cmp[i]= c->hadamard8_diff[i];
+            break;
+        case FF_CMP_SSE:
+            cmp[i]= c->sse[i];
+            break;
+        case FF_CMP_DCT:
+            cmp[i]= c->dct_sad[i];
+            break;
+        case FF_CMP_DCT264:
+            cmp[i]= c->dct264_sad[i];
+            break;
+        case FF_CMP_DCTMAX:
+            cmp[i]= c->dct_max[i];
+            break;
+        case FF_CMP_PSNR:
+            cmp[i]= c->quant_psnr[i];
+            break;
+        case FF_CMP_BIT:
+            cmp[i]= c->bit[i];
+            break;
+        case FF_CMP_RD:
+            cmp[i]= c->rd[i];
+            break;
+        case FF_CMP_VSAD:
+            cmp[i]= c->vsad[i];
+            break;
+        case FF_CMP_VSSE:
+            cmp[i]= c->vsse[i];
+            break;
+        case FF_CMP_ZERO:
+            cmp[i]= zero_cmp;
+            break;
+        case FF_CMP_NSSE:
+            cmp[i]= c->nsse[i];
+            break;
+#if CONFIG_DWT
+        case FF_CMP_W53:
+            cmp[i]= c->w53[i];
+            break;
+        case FF_CMP_W97:
+            cmp[i]= c->w97[i];
+            break;
+#endif
+        default:
+            av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n");
+        }
+    }
+}
+
+static void clear_block_c(DCTELEM *block)
+{
+    memset(block, 0, sizeof(DCTELEM)*64);
+}
+
+/**
+ * memset(blocks, 0, sizeof(DCTELEM)*6*64)
+ */
+static void clear_blocks_c(DCTELEM *blocks)
+{
+    memset(blocks, 0, sizeof(DCTELEM)*6*64);
+}
+
+static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){
+    long i;
+    for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
+        long a = *(long*)(src+i);
+        long b = *(long*)(dst+i);
+        *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80);
+    }
+    for(; i<w; i++)
+        dst[i+0] += src[i+0];
+}
+
+static void add_bytes_l2_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
+    long i;
+    for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
+        long a = *(long*)(src1+i);
+        long b = *(long*)(src2+i);
+        *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80);
+    }
+    for(; i<w; i++)
+        dst[i] = src1[i]+src2[i];
+}
+
+static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
+    long i;
+#if !HAVE_FAST_UNALIGNED
+    if((long)src2 & (sizeof(long)-1)){
+        for(i=0; i+7<w; i+=8){
+            dst[i+0] = src1[i+0]-src2[i+0];
+            dst[i+1] = src1[i+1]-src2[i+1];
+            dst[i+2] = src1[i+2]-src2[i+2];
+            dst[i+3] = src1[i+3]-src2[i+3];
+            dst[i+4] = src1[i+4]-src2[i+4];
+            dst[i+5] = src1[i+5]-src2[i+5];
+            dst[i+6] = src1[i+6]-src2[i+6];
+            dst[i+7] = src1[i+7]-src2[i+7];
+        }
+    }else
+#endif
+    for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
+        long a = *(long*)(src1+i);
+        long b = *(long*)(src2+i);
+        *(long*)(dst+i) = ((a|pb_80) - (b&pb_7f)) ^ ((a^b^pb_80)&pb_80);
+    }
+    for(; i<w; i++)
+        dst[i+0] = src1[i+0]-src2[i+0];
+}
+
+static void add_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *diff, int w, int *left, int *left_top){
+    int i;
+    uint8_t l, lt;
+
+    l= *left;
+    lt= *left_top;
+
+    for(i=0; i<w; i++){
+        l= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF) + diff[i];
+        lt= src1[i];
+        dst[i]= l;
+    }
+
+    *left= l;
+    *left_top= lt;
+}
+
+static void sub_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w, int *left, int *left_top){
+    int i;
+    uint8_t l, lt;
+
+    l= *left;
+    lt= *left_top;
+
+    for(i=0; i<w; i++){
+        const int pred= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF);
+        lt= src1[i];
+        l= src2[i];
+        dst[i]= l - pred;
+    }
+
+    *left= l;
+    *left_top= lt;
+}
+
+static int add_hfyu_left_prediction_c(uint8_t *dst, const uint8_t *src, int w, int acc){
+    int i;
+
+    for(i=0; i<w-1; i++){
+        acc+= src[i];
+        dst[i]= acc;
+        i++;
+        acc+= src[i];
+        dst[i]= acc;
+    }
+
+    for(; i<w; i++){
+        acc+= src[i];
+        dst[i]= acc;
+    }
+
+    return acc;
+}
+
+#if HAVE_BIGENDIAN
+#define B 3
+#define G 2
+#define R 1
+#define A 0
+#else
+#define B 0
+#define G 1
+#define R 2
+#define A 3
+#endif
+static void add_hfyu_left_prediction_bgr32_c(uint8_t *dst, const uint8_t *src, int w, int *red, int *green, int *blue, int *alpha){
+    int i;
+    int r,g,b,a;
+    r= *red;
+    g= *green;
+    b= *blue;
+    a= *alpha;
+
+    for(i=0; i<w; i++){
+        b+= src[4*i+B];
+        g+= src[4*i+G];
+        r+= src[4*i+R];
+        a+= src[4*i+A];
+
+        dst[4*i+B]= b;
+        dst[4*i+G]= g;
+        dst[4*i+R]= r;
+        dst[4*i+A]= a;
+    }
+
+    *red= r;
+    *green= g;
+    *blue= b;
+    *alpha= a;
+}
+#undef B
+#undef G
+#undef R
+#undef A
+
+#define BUTTERFLY2(o1,o2,i1,i2) \
+o1= (i1)+(i2);\
+o2= (i1)-(i2);
+
+#define BUTTERFLY1(x,y) \
+{\
+    int a,b;\
+    a= x;\
+    b= y;\
+    x= a+b;\
+    y= a-b;\
+}
+
+#define BUTTERFLYA(x,y) (FFABS((x)+(y)) + FFABS((x)-(y)))
+
+static int hadamard8_diff8x8_c(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){
+    int i;
+    int temp[64];
+    int sum=0;
+
+    assert(h==8);
+
+    for(i=0; i<8; i++){
+        //FIXME try pointer walks
+        BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]);
+        BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]);
+        BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]);
+        BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]);
+
+        BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
+        BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
+        BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
+        BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
+
+        BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
+        BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
+        BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
+        BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
+    }
+
+    for(i=0; i<8; i++){
+        BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
+        BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
+        BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
+        BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
+
+        BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
+        BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
+        BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
+        BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
+
+        sum +=
+             BUTTERFLYA(temp[8*0+i], temp[8*4+i])
+            +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
+            +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
+            +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
+    }
+#if 0
+static int maxi=0;
+if(sum>maxi){
+    maxi=sum;
+    printf("MAX:%d\n", maxi);
+}
+#endif
+    return sum;
+}
+
+static int hadamard8_intra8x8_c(/*MpegEncContext*/ void *s, uint8_t *src, uint8_t *dummy, int stride, int h){
+    int i;
+    int temp[64];
+    int sum=0;
+
+    assert(h==8);
+
+    for(i=0; i<8; i++){
+        //FIXME try pointer walks
+        BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0],src[stride*i+1]);
+        BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2],src[stride*i+3]);
+        BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4],src[stride*i+5]);
+        BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6],src[stride*i+7]);
+
+        BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
+        BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
+        BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
+        BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
+
+        BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
+        BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
+        BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
+        BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
+    }
+
+    for(i=0; i<8; i++){
+        BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
+        BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
+        BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
+        BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
+
+        BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
+        BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
+        BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
+        BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
+
+        sum +=
+             BUTTERFLYA(temp[8*0+i], temp[8*4+i])
+            +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
+            +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
+            +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
+    }
+
+    sum -= FFABS(temp[8*0] + temp[8*4]); // -mean
+
+    return sum;
+}
+
+static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
+    MpegEncContext * const s= (MpegEncContext *)c;
+    LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
+
+    assert(h==8);
+
+    s->dsp.diff_pixels(temp, src1, src2, stride);
+    s->dsp.fdct(temp);
+    return s->dsp.sum_abs_dctelem(temp);
+}
+
+#if CONFIG_GPL
+#define DCT8_1D {\
+    const int s07 = SRC(0) + SRC(7);\
+    const int s16 = SRC(1) + SRC(6);\
+    const int s25 = SRC(2) + SRC(5);\
+    const int s34 = SRC(3) + SRC(4);\
+    const int a0 = s07 + s34;\
+    const int a1 = s16 + s25;\
+    const int a2 = s07 - s34;\
+    const int a3 = s16 - s25;\
+    const int d07 = SRC(0) - SRC(7);\
+    const int d16 = SRC(1) - SRC(6);\
+    const int d25 = SRC(2) - SRC(5);\
+    const int d34 = SRC(3) - SRC(4);\
+    const int a4 = d16 + d25 + (d07 + (d07>>1));\
+    const int a5 = d07 - d34 - (d25 + (d25>>1));\
+    const int a6 = d07 + d34 - (d16 + (d16>>1));\
+    const int a7 = d16 - d25 + (d34 + (d34>>1));\
+    DST(0,  a0 + a1     ) ;\
+    DST(1,  a4 + (a7>>2)) ;\
+    DST(2,  a2 + (a3>>1)) ;\
+    DST(3,  a5 + (a6>>2)) ;\
+    DST(4,  a0 - a1     ) ;\
+    DST(5,  a6 - (a5>>2)) ;\
+    DST(6, (a2>>1) - a3 ) ;\
+    DST(7, (a4>>2) - a7 ) ;\
+}
+
+static int dct264_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
+    MpegEncContext * const s= (MpegEncContext *)c;
+    DCTELEM dct[8][8];
+    int i;
+    int sum=0;
+
+    s->dsp.diff_pixels(dct[0], src1, src2, stride);
+
+#define SRC(x) dct[i][x]
+#define DST(x,v) dct[i][x]= v
+    for( i = 0; i < 8; i++ )
+        DCT8_1D
+#undef SRC
+#undef DST
+
+#define SRC(x) dct[x][i]
+#define DST(x,v) sum += FFABS(v)
+    for( i = 0; i < 8; i++ )
+        DCT8_1D
+#undef SRC
+#undef DST
+    return sum;
+}
+#endif
+
+static int dct_max8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
+    MpegEncContext * const s= (MpegEncContext *)c;
+    LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
+    int sum=0, i;
+
+    assert(h==8);
+
+    s->dsp.diff_pixels(temp, src1, src2, stride);
+    s->dsp.fdct(temp);
+
+    for(i=0; i<64; i++)
+        sum= FFMAX(sum, FFABS(temp[i]));
+
+    return sum;
+}
+
+static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
+    MpegEncContext * const s= (MpegEncContext *)c;
+    LOCAL_ALIGNED_16(DCTELEM, temp, [64*2]);
+    DCTELEM * const bak = temp+64;
+    int sum=0, i;
+
+    assert(h==8);
+    s->mb_intra=0;
+
+    s->dsp.diff_pixels(temp, src1, src2, stride);
+
+    memcpy(bak, temp, 64*sizeof(DCTELEM));
+
+    s->block_last_index[0/*FIXME*/]= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
+    s->dct_unquantize_inter(s, temp, 0, s->qscale);
+    ff_simple_idct(temp); //FIXME
+
+    for(i=0; i<64; i++)
+        sum+= (temp[i]-bak[i])*(temp[i]-bak[i]);
+
+    return sum;
+}
+
+static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
+    MpegEncContext * const s= (MpegEncContext *)c;
+    const uint8_t *scantable= s->intra_scantable.permutated;
+    LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
+    LOCAL_ALIGNED_16(uint8_t, lsrc1, [64]);
+    LOCAL_ALIGNED_16(uint8_t, lsrc2, [64]);
+    int i, last, run, bits, level, distortion, start_i;
+    const int esc_length= s->ac_esc_length;
+    uint8_t * length;
+    uint8_t * last_length;
+
+    assert(h==8);
+
+    copy_block8(lsrc1, src1, 8, stride, 8);
+    copy_block8(lsrc2, src2, 8, stride, 8);
+
+    s->dsp.diff_pixels(temp, lsrc1, lsrc2, 8);
+
+    s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
+
+    bits=0;
+
+    if (s->mb_intra) {
+        start_i = 1;
+        length     = s->intra_ac_vlc_length;
+        last_length= s->intra_ac_vlc_last_length;
+        bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
+    } else {
+        start_i = 0;
+        length     = s->inter_ac_vlc_length;
+        last_length= s->inter_ac_vlc_last_length;
+    }
+
+    if(last>=start_i){
+        run=0;
+        for(i=start_i; i<last; i++){
+            int j= scantable[i];
+            level= temp[j];
+
+            if(level){
+                level+=64;
+                if((level&(~127)) == 0){
+                    bits+= length[UNI_AC_ENC_INDEX(run, level)];
+                }else
+                    bits+= esc_length;
+                run=0;
+            }else
+                run++;
+        }
+        i= scantable[last];
+
+        level= temp[i] + 64;
+
+        assert(level - 64);
+
+        if((level&(~127)) == 0){
+            bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
+        }else
+            bits+= esc_length;
+
+    }
+
+    if(last>=0){
+        if(s->mb_intra)
+            s->dct_unquantize_intra(s, temp, 0, s->qscale);
+        else
+            s->dct_unquantize_inter(s, temp, 0, s->qscale);
+    }
+
+    s->dsp.idct_add(lsrc2, 8, temp);
+
+    distortion= s->dsp.sse[1](NULL, lsrc2, lsrc1, 8, 8);
+
+    return distortion + ((bits*s->qscale*s->qscale*109 + 64)>>7);
+}
+
+static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
+    MpegEncContext * const s= (MpegEncContext *)c;
+    const uint8_t *scantable= s->intra_scantable.permutated;
+    LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
+    int i, last, run, bits, level, start_i;
+    const int esc_length= s->ac_esc_length;
+    uint8_t * length;
+    uint8_t * last_length;
+
+    assert(h==8);
+
+    s->dsp.diff_pixels(temp, src1, src2, stride);
+
+    s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
+
+    bits=0;
+
+    if (s->mb_intra) {
+        start_i = 1;
+        length     = s->intra_ac_vlc_length;
+        last_length= s->intra_ac_vlc_last_length;
+        bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
+    } else {
+        start_i = 0;
+        length     = s->inter_ac_vlc_length;
+        last_length= s->inter_ac_vlc_last_length;
+    }
+
+    if(last>=start_i){
+        run=0;
+        for(i=start_i; i<last; i++){
+            int j= scantable[i];
+            level= temp[j];
+
+            if(level){
+                level+=64;
+                if((level&(~127)) == 0){
+                    bits+= length[UNI_AC_ENC_INDEX(run, level)];
+                }else
+                    bits+= esc_length;
+                run=0;
+            }else
+                run++;
+        }
+        i= scantable[last];
+
+        level= temp[i] + 64;
+
+        assert(level - 64);
+
+        if((level&(~127)) == 0){
+            bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
+        }else
+            bits+= esc_length;
+    }
+
+    return bits;
+}
+
+#define VSAD_INTRA(size) \
+static int vsad_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
+    int score=0;                                                                                            \
+    int x,y;                                                                                                \
+                                                                                                            \
+    for(y=1; y<h; y++){                                                                                     \
+        for(x=0; x<size; x+=4){                                                                             \
+            score+= FFABS(s[x  ] - s[x  +stride]) + FFABS(s[x+1] - s[x+1+stride])                           \
+                   +FFABS(s[x+2] - s[x+2+stride]) + FFABS(s[x+3] - s[x+3+stride]);                          \
+        }                                                                                                   \
+        s+= stride;                                                                                         \
+    }                                                                                                       \
+                                                                                                            \
+    return score;                                                                                           \
+}
+VSAD_INTRA(8)
+VSAD_INTRA(16)
+
+static int vsad16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
+    int score=0;
+    int x,y;
+
+    for(y=1; y<h; y++){
+        for(x=0; x<16; x++){
+            score+= FFABS(s1[x  ] - s2[x ] - s1[x  +stride] + s2[x +stride]);
+        }
+        s1+= stride;
+        s2+= stride;
+    }
+
+    return score;
+}
+
+#define SQ(a) ((a)*(a))
+#define VSSE_INTRA(size) \
+static int vsse_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
+    int score=0;                                                                                            \
+    int x,y;                                                                                                \
+                                                                                                            \
+    for(y=1; y<h; y++){                                                                                     \
+        for(x=0; x<size; x+=4){                                                                               \
+            score+= SQ(s[x  ] - s[x  +stride]) + SQ(s[x+1] - s[x+1+stride])                                 \
+                   +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]);                                \
+        }                                                                                                   \
+        s+= stride;                                                                                         \
+    }                                                                                                       \
+                                                                                                            \
+    return score;                                                                                           \
+}
+VSSE_INTRA(8)
+VSSE_INTRA(16)
+
+static int vsse16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
+    int score=0;
+    int x,y;
+
+    for(y=1; y<h; y++){
+        for(x=0; x<16; x++){
+            score+= SQ(s1[x  ] - s2[x ] - s1[x  +stride] + s2[x +stride]);
+        }
+        s1+= stride;
+        s2+= stride;
+    }
+
+    return score;
+}
+
+static int ssd_int8_vs_int16_c(const int8_t *pix1, const int16_t *pix2,
+                               int size){
+    int score=0;
+    int i;
+    for(i=0; i<size; i++)
+        score += (pix1[i]-pix2[i])*(pix1[i]-pix2[i]);
+    return score;
+}
+
+WRAPPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c)
+WRAPPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c)
+WRAPPER8_16_SQ(dct_sad8x8_c, dct_sad16_c)
+#if CONFIG_GPL
+WRAPPER8_16_SQ(dct264_sad8x8_c, dct264_sad16_c)
+#endif
+WRAPPER8_16_SQ(dct_max8x8_c, dct_max16_c)
+WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c)
+WRAPPER8_16_SQ(rd8x8_c, rd16_c)
+WRAPPER8_16_SQ(bit8x8_c, bit16_c)
+
+static void vector_fmul_c(float *dst, const float *src, int len){
+    int i;
+    for(i=0; i<len; i++)
+        dst[i] *= src[i];
+}
+
+static void vector_fmul_reverse_c(float *dst, const float *src0, const float *src1, int len){
+    int i;
+    src1 += len-1;
+    for(i=0; i<len; i++)
+        dst[i] = src0[i] * src1[-i];
+}
+
+static void vector_fmul_add_c(float *dst, const float *src0, const float *src1, const float *src2, int len){
+    int i;
+    for(i=0; i<len; i++)
+        dst[i] = src0[i] * src1[i] + src2[i];
+}
+
+void ff_vector_fmul_window_c(float *dst, const float *src0, const float *src1, const float *win, float add_bias, int len){
+    int i,j;
+    dst += len;
+    win += len;
+    src0+= len;
+    for(i=-len, j=len-1; i<0; i++, j--) {
+        float s0 = src0[i];
+        float s1 = src1[j];
+        float wi = win[i];
+        float wj = win[j];
+        dst[i] = s0*wj - s1*wi + add_bias;
+        dst[j] = s0*wi + s1*wj + add_bias;
+    }
+}
+
+static void vector_fmul_scalar_c(float *dst, const float *src, float mul,
+                                 int len)
+{
+    int i;
+    for (i = 0; i < len; i++)
+        dst[i] = src[i] * mul;
+}
+
+static void vector_fmul_sv_scalar_2_c(float *dst, const float *src,
+                                      const float **sv, float mul, int len)
+{
+    int i;
+    for (i = 0; i < len; i += 2, sv++) {
+        dst[i  ] = src[i  ] * sv[0][0] * mul;
+        dst[i+1] = src[i+1] * sv[0][1] * mul;
+    }
+}
+
+static void vector_fmul_sv_scalar_4_c(float *dst, const float *src,
+                                      const float **sv, float mul, int len)
+{
+    int i;
+    for (i = 0; i < len; i += 4, sv++) {
+        dst[i  ] = src[i  ] * sv[0][0] * mul;
+        dst[i+1] = src[i+1] * sv[0][1] * mul;
+        dst[i+2] = src[i+2] * sv[0][2] * mul;
+        dst[i+3] = src[i+3] * sv[0][3] * mul;
+    }
+}
+
+static void sv_fmul_scalar_2_c(float *dst, const float **sv, float mul,
+                               int len)
+{
+    int i;
+    for (i = 0; i < len; i += 2, sv++) {
+        dst[i  ] = sv[0][0] * mul;
+        dst[i+1] = sv[0][1] * mul;
+    }
+}
+
+static void sv_fmul_scalar_4_c(float *dst, const float **sv, float mul,
+                               int len)
+{
+    int i;
+    for (i = 0; i < len; i += 4, sv++) {
+        dst[i  ] = sv[0][0] * mul;
+        dst[i+1] = sv[0][1] * mul;
+        dst[i+2] = sv[0][2] * mul;
+        dst[i+3] = sv[0][3] * mul;
+    }
+}
+
+static void butterflies_float_c(float *restrict v1, float *restrict v2,
+                                int len)
+{
+    int i;
+    for (i = 0; i < len; i++) {
+        float t = v1[i] - v2[i];
+        v1[i] += v2[i];
+        v2[i] = t;
+    }
+}
+
+static float scalarproduct_float_c(const float *v1, const float *v2, int len)
+{
+    float p = 0.0;
+    int i;
+
+    for (i = 0; i < len; i++)
+        p += v1[i] * v2[i];
+
+    return p;
+}
+
+static void int32_to_float_fmul_scalar_c(float *dst, const int *src, float mul, int len){
+    int i;
+    for(i=0; i<len; i++)
+        dst[i] = src[i] * mul;
+}
+
+static inline uint32_t clipf_c_one(uint32_t a, uint32_t mini,
+                   uint32_t maxi, uint32_t maxisign)
+{
+
+    if(a > mini) return mini;
+    else if((a^(1<<31)) > maxisign) return maxi;
+    else return a;
+}
+
+static void vector_clipf_c_opposite_sign(float *dst, const float *src, float *min, float *max, int len){
+    int i;
+    uint32_t mini = *(uint32_t*)min;
+    uint32_t maxi = *(uint32_t*)max;
+    uint32_t maxisign = maxi ^ (1<<31);
+    uint32_t *dsti = (uint32_t*)dst;
+    const uint32_t *srci = (const uint32_t*)src;
+    for(i=0; i<len; i+=8) {
+        dsti[i + 0] = clipf_c_one(srci[i + 0], mini, maxi, maxisign);
+        dsti[i + 1] = clipf_c_one(srci[i + 1], mini, maxi, maxisign);
+        dsti[i + 2] = clipf_c_one(srci[i + 2], mini, maxi, maxisign);
+        dsti[i + 3] = clipf_c_one(srci[i + 3], mini, maxi, maxisign);
+        dsti[i + 4] = clipf_c_one(srci[i + 4], mini, maxi, maxisign);
+        dsti[i + 5] = clipf_c_one(srci[i + 5], mini, maxi, maxisign);
+        dsti[i + 6] = clipf_c_one(srci[i + 6], mini, maxi, maxisign);
+        dsti[i + 7] = clipf_c_one(srci[i + 7], mini, maxi, maxisign);
+    }
+}
+static void vector_clipf_c(float *dst, const float *src, float min, float max, int len){
+    int i;
+    if(min < 0 && max > 0) {
+        vector_clipf_c_opposite_sign(dst, src, &min, &max, len);
+    } else {
+        for(i=0; i < len; i+=8) {
+            dst[i    ] = av_clipf(src[i    ], min, max);
+            dst[i + 1] = av_clipf(src[i + 1], min, max);
+            dst[i + 2] = av_clipf(src[i + 2], min, max);
+            dst[i + 3] = av_clipf(src[i + 3], min, max);
+            dst[i + 4] = av_clipf(src[i + 4], min, max);
+            dst[i + 5] = av_clipf(src[i + 5], min, max);
+            dst[i + 6] = av_clipf(src[i + 6], min, max);
+            dst[i + 7] = av_clipf(src[i + 7], min, max);
+        }
+    }
+}
+
+static av_always_inline int float_to_int16_one(const float *src){
+    int_fast32_t tmp = *(const int32_t*)src;
+    if(tmp & 0xf0000){
+        tmp = (0x43c0ffff - tmp)>>31;
+        // is this faster on some gcc/cpu combinations?
+//      if(tmp > 0x43c0ffff) tmp = 0xFFFF;
+//      else                 tmp = 0;
+    }
+    return tmp - 0x8000;
+}
+
+void ff_float_to_int16_c(int16_t *dst, const float *src, long len){
+    int i;
+    for(i=0; i<len; i++)
+        dst[i] = float_to_int16_one(src+i);
+}
+
+void ff_float_to_int16_interleave_c(int16_t *dst, const float **src, long len, int channels){
+    int i,j,c;
+    if(channels==2){
+        for(i=0; i<len; i++){
+            dst[2*i]   = float_to_int16_one(src[0]+i);
+            dst[2*i+1] = float_to_int16_one(src[1]+i);
+        }
+    }else{
+        for(c=0; c<channels; c++)
+            for(i=0, j=c; i<len; i++, j+=channels)
+                dst[j] = float_to_int16_one(src[c]+i);
+    }
+}
+
+static int32_t scalarproduct_int16_c(int16_t * v1, int16_t * v2, int order, int shift)
+{
+    int res = 0;
+
+    while (order--)
+        res += (*v1++ * *v2++) >> shift;
+
+    return res;
+}
+
+static int32_t scalarproduct_and_madd_int16_c(int16_t *v1, int16_t *v2, int16_t *v3, int order, int mul)
+{
+    int res = 0;
+    while (order--) {
+        res   += *v1 * *v2++;
+        *v1++ += mul * *v3++;
+    }
+    return res;
+}
+
+#define W0 2048
+#define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */
+#define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */
+#define W3 2408 /* 2048*sqrt (2)*cos (3*pi/16) */
+#define W4 2048 /* 2048*sqrt (2)*cos (4*pi/16) */
+#define W5 1609 /* 2048*sqrt (2)*cos (5*pi/16) */
+#define W6 1108 /* 2048*sqrt (2)*cos (6*pi/16) */
+#define W7 565  /* 2048*sqrt (2)*cos (7*pi/16) */
+
+static void wmv2_idct_row(short * b)
+{
+    int s1,s2;
+    int a0,a1,a2,a3,a4,a5,a6,a7;
+    /*step 1*/
+    a1 = W1*b[1]+W7*b[7];
+    a7 = W7*b[1]-W1*b[7];
+    a5 = W5*b[5]+W3*b[3];
+    a3 = W3*b[5]-W5*b[3];
+    a2 = W2*b[2]+W6*b[6];
+    a6 = W6*b[2]-W2*b[6];
+    a0 = W0*b[0]+W0*b[4];
+    a4 = W0*b[0]-W0*b[4];
+    /*step 2*/
+    s1 = (181*(a1-a5+a7-a3)+128)>>8;//1,3,5,7,
+    s2 = (181*(a1-a5-a7+a3)+128)>>8;
+    /*step 3*/
+    b[0] = (a0+a2+a1+a5 + (1<<7))>>8;
+    b[1] = (a4+a6 +s1   + (1<<7))>>8;
+    b[2] = (a4-a6 +s2   + (1<<7))>>8;
+    b[3] = (a0-a2+a7+a3 + (1<<7))>>8;
+    b[4] = (a0-a2-a7-a3 + (1<<7))>>8;
+    b[5] = (a4-a6 -s2   + (1<<7))>>8;
+    b[6] = (a4+a6 -s1   + (1<<7))>>8;
+    b[7] = (a0+a2-a1-a5 + (1<<7))>>8;
+}
+static void wmv2_idct_col(short * b)
+{
+    int s1,s2;
+    int a0,a1,a2,a3,a4,a5,a6,a7;
+    /*step 1, with extended precision*/
+    a1 = (W1*b[8*1]+W7*b[8*7] + 4)>>3;
+    a7 = (W7*b[8*1]-W1*b[8*7] + 4)>>3;
+    a5 = (W5*b[8*5]+W3*b[8*3] + 4)>>3;
+    a3 = (W3*b[8*5]-W5*b[8*3] + 4)>>3;
+    a2 = (W2*b[8*2]+W6*b[8*6] + 4)>>3;
+    a6 = (W6*b[8*2]-W2*b[8*6] + 4)>>3;
+    a0 = (W0*b[8*0]+W0*b[8*4]    )>>3;
+    a4 = (W0*b[8*0]-W0*b[8*4]    )>>3;
+    /*step 2*/
+    s1 = (181*(a1-a5+a7-a3)+128)>>8;
+    s2 = (181*(a1-a5-a7+a3)+128)>>8;
+    /*step 3*/
+    b[8*0] = (a0+a2+a1+a5 + (1<<13))>>14;
+    b[8*1] = (a4+a6 +s1   + (1<<13))>>14;
+    b[8*2] = (a4-a6 +s2   + (1<<13))>>14;
+    b[8*3] = (a0-a2+a7+a3 + (1<<13))>>14;
+
+    b[8*4] = (a0-a2-a7-a3 + (1<<13))>>14;
+    b[8*5] = (a4-a6 -s2   + (1<<13))>>14;
+    b[8*6] = (a4+a6 -s1   + (1<<13))>>14;
+    b[8*7] = (a0+a2-a1-a5 + (1<<13))>>14;
+}
+void ff_wmv2_idct_c(short * block){
+    int i;
+
+    for(i=0;i<64;i+=8){
+        wmv2_idct_row(block+i);
+    }
+    for(i=0;i<8;i++){
+        wmv2_idct_col(block+i);
+    }
+}
+/* XXX: those functions should be suppressed ASAP when all IDCTs are
+ converted */
+static void ff_wmv2_idct_put_c(uint8_t *dest, int line_size, DCTELEM *block)
+{
+    ff_wmv2_idct_c(block);
+    put_pixels_clamped_c(block, dest, line_size);
+}
+static void ff_wmv2_idct_add_c(uint8_t *dest, int line_size, DCTELEM *block)
+{
+    ff_wmv2_idct_c(block);
+    add_pixels_clamped_c(block, dest, line_size);
+}
+static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
+{
+    j_rev_dct (block);
+    put_pixels_clamped_c(block, dest, line_size);
+}
+static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
+{
+    j_rev_dct (block);
+    add_pixels_clamped_c(block, dest, line_size);
+}
+
+static void ff_jref_idct4_put(uint8_t *dest, int line_size, DCTELEM *block)
+{
+    j_rev_dct4 (block);
+    put_pixels_clamped4_c(block, dest, line_size);
+}
+static void ff_jref_idct4_add(uint8_t *dest, int line_size, DCTELEM *block)
+{
+    j_rev_dct4 (block);
+    add_pixels_clamped4_c(block, dest, line_size);
+}
+
+static void ff_jref_idct2_put(uint8_t *dest, int line_size, DCTELEM *block)
+{
+    j_rev_dct2 (block);
+    put_pixels_clamped2_c(block, dest, line_size);
+}
+static void ff_jref_idct2_add(uint8_t *dest, int line_size, DCTELEM *block)
+{
+    j_rev_dct2 (block);
+    add_pixels_clamped2_c(block, dest, line_size);
+}
+
+static void ff_jref_idct1_put(uint8_t *dest, int line_size, DCTELEM *block)
+{
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+
+    dest[0] = cm[(block[0] + 4)>>3];
+}
+static void ff_jref_idct1_add(uint8_t *dest, int line_size, DCTELEM *block)
+{
+    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+
+    dest[0] = cm[dest[0] + ((block[0] + 4)>>3)];
+}
+
+static void just_return(void *mem av_unused, int stride av_unused, int h av_unused) { return; }
+
+/* init static data */
+av_cold void dsputil_static_init(void)
+{
+    int i;
+
+    for(i=0;i<256;i++) ff_cropTbl[i + MAX_NEG_CROP] = i;
+    for(i=0;i<MAX_NEG_CROP;i++) {
+        ff_cropTbl[i] = 0;
+        ff_cropTbl[i + MAX_NEG_CROP + 256] = 255;
+    }
+
+    for(i=0;i<512;i++) {
+        ff_squareTbl[i] = (i - 256) * (i - 256);
+    }
+
+    for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1;
+}
+
+int ff_check_alignment(void){
+    static int did_fail=0;
+    DECLARE_ALIGNED(16, int, aligned);
+
+    if((intptr_t)&aligned & 15){
+        if(!did_fail){
+#if HAVE_MMX || HAVE_ALTIVEC
+            av_log(NULL, AV_LOG_ERROR,
+                "Compiler did not align stack variables. Libavcodec has been miscompiled\n"
+                "and may be very slow or crash. This is not a bug in libavcodec,\n"
+                "but in the compiler. You may try recompiling using gcc >= 4.2.\n"
+                "Do not report crashes to FFmpeg developers.\n");
+#endif
+            did_fail=1;
+        }
+        return -1;
+    }
+    return 0;
+}
+
+av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
+{
+    int i;
+
+    ff_check_alignment();
+
+#if CONFIG_ENCODERS
+    if(avctx->dct_algo==FF_DCT_FASTINT) {
+        c->fdct = fdct_ifast;
+        c->fdct248 = fdct_ifast248;
+    }
+    else if(avctx->dct_algo==FF_DCT_FAAN) {
+        c->fdct = ff_faandct;
+        c->fdct248 = ff_faandct248;
+    }
+    else {
+        c->fdct = ff_jpeg_fdct_islow; //slow/accurate/default
+        c->fdct248 = ff_fdct248_islow;
+    }
+#endif //CONFIG_ENCODERS
+
+    if(avctx->lowres==1){
+        if(avctx->idct_algo==FF_IDCT_INT || avctx->idct_algo==FF_IDCT_AUTO || !CONFIG_H264_DECODER){
+            c->idct_put= ff_jref_idct4_put;
+            c->idct_add= ff_jref_idct4_add;
+        }else{
+            c->idct_put= ff_h264_lowres_idct_put_c;
+            c->idct_add= ff_h264_lowres_idct_add_c;
+        }
+        c->idct    = j_rev_dct4;
+        c->idct_permutation_type= FF_NO_IDCT_PERM;
+    }else if(avctx->lowres==2){
+        c->idct_put= ff_jref_idct2_put;
+        c->idct_add= ff_jref_idct2_add;
+        c->idct    = j_rev_dct2;
+        c->idct_permutation_type= FF_NO_IDCT_PERM;
+    }else if(avctx->lowres==3){
+        c->idct_put= ff_jref_idct1_put;
+        c->idct_add= ff_jref_idct1_add;
+        c->idct    = j_rev_dct1;
+        c->idct_permutation_type= FF_NO_IDCT_PERM;
+    }else{
+        if(avctx->idct_algo==FF_IDCT_INT){
+            c->idct_put= ff_jref_idct_put;
+            c->idct_add= ff_jref_idct_add;
+            c->idct    = j_rev_dct;
+            c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
+        }else if((CONFIG_VP3_DECODER || CONFIG_VP5_DECODER || CONFIG_VP6_DECODER ) &&
+                avctx->idct_algo==FF_IDCT_VP3){
+            c->idct_put= ff_vp3_idct_put_c;
+            c->idct_add= ff_vp3_idct_add_c;
+            c->idct    = ff_vp3_idct_c;
+            c->idct_permutation_type= FF_NO_IDCT_PERM;
+        }else if(avctx->idct_algo==FF_IDCT_WMV2){
+            c->idct_put= ff_wmv2_idct_put_c;
+            c->idct_add= ff_wmv2_idct_add_c;
+            c->idct    = ff_wmv2_idct_c;
+            c->idct_permutation_type= FF_NO_IDCT_PERM;
+        }else if(avctx->idct_algo==FF_IDCT_FAAN){
+            c->idct_put= ff_faanidct_put;
+            c->idct_add= ff_faanidct_add;
+            c->idct    = ff_faanidct;
+            c->idct_permutation_type= FF_NO_IDCT_PERM;
+        }else if(CONFIG_EATGQ_DECODER && avctx->idct_algo==FF_IDCT_EA) {
+            c->idct_put= ff_ea_idct_put_c;
+            c->idct_permutation_type= FF_NO_IDCT_PERM;
+        }else if(CONFIG_BINK_DECODER && avctx->idct_algo==FF_IDCT_BINK) {
+            c->idct     = ff_bink_idct_c;
+            c->idct_add = ff_bink_idct_add_c;
+            c->idct_put = ff_bink_idct_put_c;
+            c->idct_permutation_type = FF_NO_IDCT_PERM;
+        }else{ //accurate/default
+            c->idct_put= ff_simple_idct_put;
+            c->idct_add= ff_simple_idct_add;
+            c->idct    = ff_simple_idct;
+            c->idct_permutation_type= FF_NO_IDCT_PERM;
+        }
+    }
+
+    c->get_pixels = get_pixels_c;
+    c->diff_pixels = diff_pixels_c;
+    c->put_pixels_clamped = put_pixels_clamped_c;
+    c->put_signed_pixels_clamped = put_signed_pixels_clamped_c;
+    c->put_pixels_nonclamped = put_pixels_nonclamped_c;
+    c->add_pixels_clamped = add_pixels_clamped_c;
+    c->add_pixels8 = add_pixels8_c;
+    c->add_pixels4 = add_pixels4_c;
+    c->sum_abs_dctelem = sum_abs_dctelem_c;
+    c->gmc1 = gmc1_c;
+    c->gmc = ff_gmc_c;
+    c->clear_block = clear_block_c;
+    c->clear_blocks = clear_blocks_c;
+    c->pix_sum = pix_sum_c;
+    c->pix_norm1 = pix_norm1_c;
+
+    c->fill_block_tab[0] = fill_block16_c;
+    c->fill_block_tab[1] = fill_block8_c;
+    c->scale_block = scale_block_c;
+
+    /* TODO [0] 16  [1] 8 */
+    c->pix_abs[0][0] = pix_abs16_c;
+    c->pix_abs[0][1] = pix_abs16_x2_c;
+    c->pix_abs[0][2] = pix_abs16_y2_c;
+    c->pix_abs[0][3] = pix_abs16_xy2_c;
+    c->pix_abs[1][0] = pix_abs8_c;
+    c->pix_abs[1][1] = pix_abs8_x2_c;
+    c->pix_abs[1][2] = pix_abs8_y2_c;
+    c->pix_abs[1][3] = pix_abs8_xy2_c;
+
+#define dspfunc(PFX, IDX, NUM) \
+    c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## NUM ## _c;     \
+    c->PFX ## _pixels_tab[IDX][1] = PFX ## _pixels ## NUM ## _x2_c;  \
+    c->PFX ## _pixels_tab[IDX][2] = PFX ## _pixels ## NUM ## _y2_c;  \
+    c->PFX ## _pixels_tab[IDX][3] = PFX ## _pixels ## NUM ## _xy2_c
+
+    dspfunc(put, 0, 16);
+    dspfunc(put_no_rnd, 0, 16);
+    dspfunc(put, 1, 8);
+    dspfunc(put_no_rnd, 1, 8);
+    dspfunc(put, 2, 4);
+    dspfunc(put, 3, 2);
+
+    dspfunc(avg, 0, 16);
+    dspfunc(avg_no_rnd, 0, 16);
+    dspfunc(avg, 1, 8);
+    dspfunc(avg_no_rnd, 1, 8);
+    dspfunc(avg, 2, 4);
+    dspfunc(avg, 3, 2);
+#undef dspfunc
+
+    c->put_no_rnd_pixels_l2[0]= put_no_rnd_pixels16_l2_c;
+    c->put_no_rnd_pixels_l2[1]= put_no_rnd_pixels8_l2_c;
+
+    c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c;
+    c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c;
+    c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c;
+    c->put_tpel_pixels_tab[ 4] = put_tpel_pixels_mc01_c;
+    c->put_tpel_pixels_tab[ 5] = put_tpel_pixels_mc11_c;
+    c->put_tpel_pixels_tab[ 6] = put_tpel_pixels_mc21_c;
+    c->put_tpel_pixels_tab[ 8] = put_tpel_pixels_mc02_c;
+    c->put_tpel_pixels_tab[ 9] = put_tpel_pixels_mc12_c;
+    c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c;
+
+    c->avg_tpel_pixels_tab[ 0] = avg_tpel_pixels_mc00_c;
+    c->avg_tpel_pixels_tab[ 1] = avg_tpel_pixels_mc10_c;
+    c->avg_tpel_pixels_tab[ 2] = avg_tpel_pixels_mc20_c;
+    c->avg_tpel_pixels_tab[ 4] = avg_tpel_pixels_mc01_c;
+    c->avg_tpel_pixels_tab[ 5] = avg_tpel_pixels_mc11_c;
+    c->avg_tpel_pixels_tab[ 6] = avg_tpel_pixels_mc21_c;
+    c->avg_tpel_pixels_tab[ 8] = avg_tpel_pixels_mc02_c;
+    c->avg_tpel_pixels_tab[ 9] = avg_tpel_pixels_mc12_c;
+    c->avg_tpel_pixels_tab[10] = avg_tpel_pixels_mc22_c;
+
+#define dspfunc(PFX, IDX, NUM) \
+    c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \
+    c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \
+    c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \
+    c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \
+    c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \
+    c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \
+    c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \
+    c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \
+    c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \
+    c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \
+    c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
+    c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
+    c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
+    c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
+    c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
+    c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c
+
+    dspfunc(put_qpel, 0, 16);
+    dspfunc(put_no_rnd_qpel, 0, 16);
+
+    dspfunc(avg_qpel, 0, 16);
+    /* dspfunc(avg_no_rnd_qpel, 0, 16); */
+
+    dspfunc(put_qpel, 1, 8);
+    dspfunc(put_no_rnd_qpel, 1, 8);
+
+    dspfunc(avg_qpel, 1, 8);
+    /* dspfunc(avg_no_rnd_qpel, 1, 8); */
+
+    dspfunc(put_h264_qpel, 0, 16);
+    dspfunc(put_h264_qpel, 1, 8);
+    dspfunc(put_h264_qpel, 2, 4);
+    dspfunc(put_h264_qpel, 3, 2);
+    dspfunc(avg_h264_qpel, 0, 16);
+    dspfunc(avg_h264_qpel, 1, 8);
+    dspfunc(avg_h264_qpel, 2, 4);
+
+#undef dspfunc
+    c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_c;
+    c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_c;
+    c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_c;
+    c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_c;
+    c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_c;
+    c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_c;
+    c->put_no_rnd_vc1_chroma_pixels_tab[0]= put_no_rnd_vc1_chroma_mc8_c;
+    c->avg_no_rnd_vc1_chroma_pixels_tab[0]= avg_no_rnd_vc1_chroma_mc8_c;
+
+    c->draw_edges = draw_edges_c;
+
+#if CONFIG_CAVS_DECODER
+    ff_cavsdsp_init(c,avctx);
+#endif
+
+#if CONFIG_MLP_DECODER || CONFIG_TRUEHD_DECODER
+    ff_mlp_init(c, avctx);
+#endif
+#if CONFIG_VC1_DECODER
+    ff_vc1dsp_init(c,avctx);
+#endif
+#if CONFIG_WMV2_DECODER || CONFIG_VC1_DECODER
+    ff_intrax8dsp_init(c,avctx);
+#endif
+#if CONFIG_RV30_DECODER
+    ff_rv30dsp_init(c,avctx);
+#endif
+#if CONFIG_RV40_DECODER
+    ff_rv40dsp_init(c,avctx);
+    c->put_rv40_qpel_pixels_tab[0][15] = put_rv40_qpel16_mc33_c;
+    c->avg_rv40_qpel_pixels_tab[0][15] = avg_rv40_qpel16_mc33_c;
+    c->put_rv40_qpel_pixels_tab[1][15] = put_rv40_qpel8_mc33_c;
+    c->avg_rv40_qpel_pixels_tab[1][15] = avg_rv40_qpel8_mc33_c;
+#endif
+
+    c->put_mspel_pixels_tab[0]= put_mspel8_mc00_c;
+    c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c;
+    c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c;
+    c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c;
+    c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c;
+    c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c;
+    c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c;
+    c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c;
+
+#define SET_CMP_FUNC(name) \
+    c->name[0]= name ## 16_c;\
+    c->name[1]= name ## 8x8_c;
+
+    SET_CMP_FUNC(hadamard8_diff)
+    c->hadamard8_diff[4]= hadamard8_intra16_c;
+    c->hadamard8_diff[5]= hadamard8_intra8x8_c;
+    SET_CMP_FUNC(dct_sad)
+    SET_CMP_FUNC(dct_max)
+#if CONFIG_GPL
+    SET_CMP_FUNC(dct264_sad)
+#endif
+    c->sad[0]= pix_abs16_c;
+    c->sad[1]= pix_abs8_c;
+    c->sse[0]= sse16_c;
+    c->sse[1]= sse8_c;
+    c->sse[2]= sse4_c;
+    SET_CMP_FUNC(quant_psnr)
+    SET_CMP_FUNC(rd)
+    SET_CMP_FUNC(bit)
+    c->vsad[0]= vsad16_c;
+    c->vsad[4]= vsad_intra16_c;
+    c->vsad[5]= vsad_intra8_c;
+    c->vsse[0]= vsse16_c;
+    c->vsse[4]= vsse_intra16_c;
+    c->vsse[5]= vsse_intra8_c;
+    c->nsse[0]= nsse16_c;
+    c->nsse[1]= nsse8_c;
+#if CONFIG_DWT
+    ff_dsputil_init_dwt(c);
+#endif
+
+    c->ssd_int8_vs_int16 = ssd_int8_vs_int16_c;
+
+    c->add_bytes= add_bytes_c;
+    c->add_bytes_l2= add_bytes_l2_c;
+    c->diff_bytes= diff_bytes_c;
+    c->add_hfyu_median_prediction= add_hfyu_median_prediction_c;
+    c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c;
+    c->add_hfyu_left_prediction  = add_hfyu_left_prediction_c;
+    c->add_hfyu_left_prediction_bgr32 = add_hfyu_left_prediction_bgr32_c;
+    c->bswap_buf= bswap_buf;
+#if CONFIG_PNG_DECODER
+    c->add_png_paeth_prediction= ff_add_png_paeth_prediction;
+#endif
+
+    if (CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
+        c->h263_h_loop_filter= h263_h_loop_filter_c;
+        c->h263_v_loop_filter= h263_v_loop_filter_c;
+    }
+
+    if (CONFIG_VP3_DECODER) {
+        c->vp3_h_loop_filter= ff_vp3_h_loop_filter_c;
+        c->vp3_v_loop_filter= ff_vp3_v_loop_filter_c;
+    }
+    if (CONFIG_VP6_DECODER) {
+        c->vp6_filter_diag4= ff_vp6_filter_diag4_c;
+    }
+
+    c->h261_loop_filter= h261_loop_filter_c;
+
+    c->try_8x8basis= try_8x8basis_c;
+    c->add_8x8basis= add_8x8basis_c;
+
+#if CONFIG_VORBIS_DECODER
+    c->vorbis_inverse_coupling = vorbis_inverse_coupling;
+#endif
+#if CONFIG_AC3_DECODER
+    c->ac3_downmix = ff_ac3_downmix_c;
+#endif
+#if CONFIG_LPC
+    c->lpc_compute_autocorr = ff_lpc_compute_autocorr;
+#endif
+    c->vector_fmul = vector_fmul_c;
+    c->vector_fmul_reverse = vector_fmul_reverse_c;
+    c->vector_fmul_add = vector_fmul_add_c;
+    c->vector_fmul_window = ff_vector_fmul_window_c;
+    c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_c;
+    c->vector_clipf = vector_clipf_c;
+    c->float_to_int16 = ff_float_to_int16_c;
+    c->float_to_int16_interleave = ff_float_to_int16_interleave_c;
+    c->scalarproduct_int16 = scalarproduct_int16_c;
+    c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_c;
+    c->scalarproduct_float = scalarproduct_float_c;
+    c->butterflies_float = butterflies_float_c;
+    c->vector_fmul_scalar = vector_fmul_scalar_c;
+
+    c->vector_fmul_sv_scalar[0] = vector_fmul_sv_scalar_2_c;
+    c->vector_fmul_sv_scalar[1] = vector_fmul_sv_scalar_4_c;
+
+    c->sv_fmul_scalar[0] = sv_fmul_scalar_2_c;
+    c->sv_fmul_scalar[1] = sv_fmul_scalar_4_c;
+
+    c->shrink[0]= ff_img_copy_plane;
+    c->shrink[1]= ff_shrink22;
+    c->shrink[2]= ff_shrink44;
+    c->shrink[3]= ff_shrink88;
+
+    c->prefetch= just_return;
+
+    memset(c->put_2tap_qpel_pixels_tab, 0, sizeof(c->put_2tap_qpel_pixels_tab));
+    memset(c->avg_2tap_qpel_pixels_tab, 0, sizeof(c->avg_2tap_qpel_pixels_tab));
+
+    if (HAVE_MMX)        dsputil_init_mmx   (c, avctx);
+    if (ARCH_ARM)        dsputil_init_arm   (c, avctx);
+    if (CONFIG_MLIB)     dsputil_init_mlib  (c, avctx);
+    if (HAVE_VIS)        dsputil_init_vis   (c, avctx);
+    if (ARCH_ALPHA)      dsputil_init_alpha (c, avctx);
+    if (ARCH_PPC)        dsputil_init_ppc   (c, avctx);
+    if (HAVE_MMI)        dsputil_init_mmi   (c, avctx);
+    if (ARCH_SH4)        dsputil_init_sh4   (c, avctx);
+    if (ARCH_BFIN)       dsputil_init_bfin  (c, avctx);
+
+    for(i=0; i<64; i++){
+        if(!c->put_2tap_qpel_pixels_tab[0][i])
+            c->put_2tap_qpel_pixels_tab[0][i]= c->put_h264_qpel_pixels_tab[0][i];
+        if(!c->avg_2tap_qpel_pixels_tab[0][i])
+            c->avg_2tap_qpel_pixels_tab[0][i]= c->avg_h264_qpel_pixels_tab[0][i];
+    }
+
+    switch(c->idct_permutation_type){
+    case FF_NO_IDCT_PERM:
+        for(i=0; i<64; i++)
+            c->idct_permutation[i]= i;
+        break;
+    case FF_LIBMPEG2_IDCT_PERM:
+        for(i=0; i<64; i++)
+            c->idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
+        break;
+    case FF_SIMPLE_IDCT_PERM:
+        for(i=0; i<64; i++)
+            c->idct_permutation[i]= simple_mmx_permutation[i];
+        break;
+    case FF_TRANSPOSE_IDCT_PERM:
+        for(i=0; i<64; i++)
+            c->idct_permutation[i]= ((i&7)<<3) | (i>>3);
+        break;
+    case FF_PARTTRANS_IDCT_PERM:
+        for(i=0; i<64; i++)
+            c->idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3);
+        break;
+    case FF_SSE2_IDCT_PERM:
+        for(i=0; i<64; i++)
+            c->idct_permutation[i]= (i&0x38) | idct_sse2_row_perm[i&7];
+        break;
+    default:
+        av_log(avctx, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n");
+    }
+}
+
diff --git a/apps/codecs/libwmapro/dsputil.h b/apps/codecs/libwmapro/dsputil.h
new file mode 100644
index 0000000000..d1816e66ba
--- /dev/null
+++ b/apps/codecs/libwmapro/dsputil.h
@@ -0,0 +1,808 @@
+/*
+ * DSP utils
+ * Copyright (c) 2000, 2001, 2002 Fabrice Bellard
+ * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file libavcodec/dsputil.h
+ * DSP utils.
+ * note, many functions in here may use MMX which trashes the FPU state, it is
+ * absolutely necessary to call emms_c() between dsp & float/double code
+ */
+
+#ifndef AVCODEC_DSPUTIL_H
+#define AVCODEC_DSPUTIL_H
+
+#include "libavutil/intreadwrite.h"
+#include "avcodec.h"
+
+
+//#define DEBUG
+/* dct code */
+typedef short DCTELEM;
+
+void fdct_ifast (DCTELEM *data);
+void fdct_ifast248 (DCTELEM *data);
+void ff_jpeg_fdct_islow (DCTELEM *data);
+void ff_fdct248_islow (DCTELEM *data);
+
+void j_rev_dct (DCTELEM *data);
+void j_rev_dct4 (DCTELEM *data);
+void j_rev_dct2 (DCTELEM *data);
+void j_rev_dct1 (DCTELEM *data);
+void ff_wmv2_idct_c(DCTELEM *data);
+
+void ff_fdct_mmx(DCTELEM *block);
+void ff_fdct_mmx2(DCTELEM *block);
+void ff_fdct_sse2(DCTELEM *block);
+
+void ff_h264_idct8_add_c(uint8_t *dst, DCTELEM *block, int stride);
+void ff_h264_idct_add_c(uint8_t *dst, DCTELEM *block, int stride);
+void ff_h264_idct8_dc_add_c(uint8_t *dst, DCTELEM *block, int stride);
+void ff_h264_idct_dc_add_c(uint8_t *dst, DCTELEM *block, int stride);
+void ff_h264_lowres_idct_add_c(uint8_t *dst, int stride, DCTELEM *block);
+void ff_h264_lowres_idct_put_c(uint8_t *dst, int stride, DCTELEM *block);
+void ff_h264_idct_add16_c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
+void ff_h264_idct_add16intra_c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
+void ff_h264_idct8_add4_c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
+void ff_h264_idct_add8_c(uint8_t **dest, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
+
+void ff_vector_fmul_window_c(float *dst, const float *src0, const float *src1,
+                             const float *win, float add_bias, int len);
+void ff_float_to_int16_c(int16_t *dst, const float *src, long len);
+void ff_float_to_int16_interleave_c(int16_t *dst, const float **src, long len, int channels);
+
+/* encoding scans */
+extern const uint8_t ff_alternate_horizontal_scan[64];
+extern const uint8_t ff_alternate_vertical_scan[64];
+extern const uint8_t ff_zigzag_direct[64];
+extern const uint8_t ff_zigzag248_direct[64];
+
+/* pixel operations */
+#define MAX_NEG_CROP 1024
+
+/* temporary */
+extern uint32_t ff_squareTbl[512];
+extern uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP];
+
+/* VP3 DSP functions */
+void ff_vp3_idct_c(DCTELEM *block/* align 16*/);
+void ff_vp3_idct_put_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
+void ff_vp3_idct_add_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
+
+void ff_vp3_v_loop_filter_c(uint8_t *src, int stride, int *bounding_values);
+void ff_vp3_h_loop_filter_c(uint8_t *src, int stride, int *bounding_values);
+
+/* VP6 DSP functions */
+void ff_vp6_filter_diag4_c(uint8_t *dst, uint8_t *src, int stride,
+                           const int16_t *h_weights, const int16_t *v_weights);
+
+/* Bink functions */
+void ff_bink_idct_c    (DCTELEM *block);
+void ff_bink_idct_add_c(uint8_t *dest, int linesize, DCTELEM *block);
+void ff_bink_idct_put_c(uint8_t *dest, int linesize, DCTELEM *block);
+
+/* CAVS functions */
+void ff_put_cavs_qpel8_mc00_c(uint8_t *dst, uint8_t *src, int stride);
+void ff_avg_cavs_qpel8_mc00_c(uint8_t *dst, uint8_t *src, int stride);
+void ff_put_cavs_qpel16_mc00_c(uint8_t *dst, uint8_t *src, int stride);
+void ff_avg_cavs_qpel16_mc00_c(uint8_t *dst, uint8_t *src, int stride);
+
+/* VC1 functions */
+void ff_put_vc1_mspel_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int rnd);
+void ff_avg_vc1_mspel_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int rnd);
+
+/* EA functions */
+void ff_ea_idct_put_c(uint8_t *dest, int linesize, DCTELEM *block);
+
+/* 1/2^n downscaling functions from imgconvert.c */
+void ff_img_copy_plane(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height);
+void ff_shrink22(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height);
+void ff_shrink44(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height);
+void ff_shrink88(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height);
+
+void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
+              int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height);
+
+/* minimum alignment rules ;)
+If you notice errors in the align stuff, need more alignment for some ASM code
+for some CPU or need to use a function with less aligned data then send a mail
+to the ffmpeg-devel mailing list, ...
+
+!warning These alignments might not match reality, (missing attribute((align))
+stuff somewhere possible).
+I (Michael) did not check them, these are just the alignments which I think
+could be reached easily ...
+
+!future video codecs might need functions with less strict alignment
+*/
+
+/*
+void get_pixels_c(DCTELEM *block, const uint8_t *pixels, int line_size);
+void diff_pixels_c(DCTELEM *block, const uint8_t *s1, const uint8_t *s2, int stride);
+void put_pixels_clamped_c(const DCTELEM *block, uint8_t *pixels, int line_size);
+void add_pixels_clamped_c(const DCTELEM *block, uint8_t *pixels, int line_size);
+void clear_blocks_c(DCTELEM *blocks);
+*/
+
+/* add and put pixel (decoding) */
+// blocksizes for op_pixels_func are 8x4,8x8 16x8 16x16
+//h for op_pixels_func is limited to {width/2, width} but never larger than 16 and never smaller then 4
+typedef void (*op_pixels_func)(uint8_t *block/*align width (8 or 16)*/, const uint8_t *pixels/*align 1*/, int line_size, int h);
+typedef void (*tpel_mc_func)(uint8_t *block/*align width (8 or 16)*/, const uint8_t *pixels/*align 1*/, int line_size, int w, int h);
+typedef void (*qpel_mc_func)(uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);
+typedef void (*h264_chroma_mc_func)(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int srcStride, int h, int x, int y);
+
+typedef void (*op_fill_func)(uint8_t *block/*align width (8 or 16)*/, uint8_t value, int line_size, int h);
+
+#define DEF_OLD_QPEL(name)\
+void ff_put_        ## name (uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);\
+void ff_put_no_rnd_ ## name (uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);\
+void ff_avg_        ## name (uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);
+
+DEF_OLD_QPEL(qpel16_mc11_old_c)
+DEF_OLD_QPEL(qpel16_mc31_old_c)
+DEF_OLD_QPEL(qpel16_mc12_old_c)
+DEF_OLD_QPEL(qpel16_mc32_old_c)
+DEF_OLD_QPEL(qpel16_mc13_old_c)
+DEF_OLD_QPEL(qpel16_mc33_old_c)
+DEF_OLD_QPEL(qpel8_mc11_old_c)
+DEF_OLD_QPEL(qpel8_mc31_old_c)
+DEF_OLD_QPEL(qpel8_mc12_old_c)
+DEF_OLD_QPEL(qpel8_mc32_old_c)
+DEF_OLD_QPEL(qpel8_mc13_old_c)
+DEF_OLD_QPEL(qpel8_mc33_old_c)
+
+#define CALL_2X_PIXELS(a, b, n)\
+static void a(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
+    b(block  , pixels  , line_size, h);\
+    b(block+n, pixels+n, line_size, h);\
+}
+
+/* motion estimation */
+// h is limited to {width/2, width, 2*width} but never larger than 16 and never smaller then 2
+// although currently h<4 is not used as functions with width <8 are neither used nor implemented
+typedef int (*me_cmp_func)(void /*MpegEncContext*/ *s, uint8_t *blk1/*align width (8 or 16)*/, uint8_t *blk2/*align 1*/, int line_size, int h)/* __attribute__ ((const))*/;
+
+/**
+ * Scantable.
+ */
+typedef struct ScanTable{
+    const uint8_t *scantable;
+    uint8_t permutated[64];
+    uint8_t raster_end[64];
+#if ARCH_PPC
+                /** Used by dct_quantize_altivec to find last-non-zero */
+    DECLARE_ALIGNED(16, uint8_t, inverse)[64];
+#endif
+} ScanTable;
+
+void ff_init_scantable(uint8_t *, ScanTable *st, const uint8_t *src_scantable);
+
+void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize,
+                         int block_w, int block_h,
+                         int src_x, int src_y, int w, int h);
+
+/**
+ * DSPContext.
+ */
+typedef struct DSPContext {
+    /* pixel ops : interface with DCT */
+    void (*get_pixels)(DCTELEM *block/*align 16*/, const uint8_t *pixels/*align 8*/, int line_size);
+    void (*diff_pixels)(DCTELEM *block/*align 16*/, const uint8_t *s1/*align 8*/, const uint8_t *s2/*align 8*/, int stride);
+    void (*put_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size);
+    void (*put_signed_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size);
+    void (*put_pixels_nonclamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size);
+    void (*add_pixels_clamped)(const DCTELEM *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size);
+    void (*add_pixels8)(uint8_t *pixels, DCTELEM *block, int line_size);
+    void (*add_pixels4)(uint8_t *pixels, DCTELEM *block, int line_size);
+    int (*sum_abs_dctelem)(DCTELEM *block/*align 16*/);
+    /**
+     * translational global motion compensation.
+     */
+    void (*gmc1)(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int srcStride, int h, int x16, int y16, int rounder);
+    /**
+     * global motion compensation.
+     */
+    void (*gmc )(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int ox, int oy,
+                    int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height);
+    void (*clear_block)(DCTELEM *block/*align 16*/);
+    void (*clear_blocks)(DCTELEM *blocks/*align 16*/);
+    int (*pix_sum)(uint8_t * pix, int line_size);
+    int (*pix_norm1)(uint8_t * pix, int line_size);
+// 16x16 8x8 4x4 2x2 16x8 8x4 4x2 8x16 4x8 2x4
+
+    me_cmp_func sad[6]; /* identical to pix_absAxA except additional void * */
+    me_cmp_func sse[6];
+    me_cmp_func hadamard8_diff[6];
+    me_cmp_func dct_sad[6];
+    me_cmp_func quant_psnr[6];
+    me_cmp_func bit[6];
+    me_cmp_func rd[6];
+    me_cmp_func vsad[6];
+    me_cmp_func vsse[6];
+    me_cmp_func nsse[6];
+    me_cmp_func w53[6];
+    me_cmp_func w97[6];
+    me_cmp_func dct_max[6];
+    me_cmp_func dct264_sad[6];
+
+    me_cmp_func me_pre_cmp[6];
+    me_cmp_func me_cmp[6];
+    me_cmp_func me_sub_cmp[6];
+    me_cmp_func mb_cmp[6];
+    me_cmp_func ildct_cmp[6]; //only width 16 used
+    me_cmp_func frame_skip_cmp[6]; //only width 8 used
+
+    int (*ssd_int8_vs_int16)(const int8_t *pix1, const int16_t *pix2,
+                             int size);
+
+    /**
+     * Halfpel motion compensation with rounding (a+b+1)>>1.
+     * this is an array[4][4] of motion compensation functions for 4
+     * horizontal blocksizes (8,16) and the 4 halfpel positions<br>
+     * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ]
+     * @param block destination where the result is stored
+     * @param pixels source
+     * @param line_size number of bytes in a horizontal line of block
+     * @param h height
+     */
+    op_pixels_func put_pixels_tab[4][4];
+
+    /**
+     * Halfpel motion compensation with rounding (a+b+1)>>1.
+     * This is an array[4][4] of motion compensation functions for 4
+     * horizontal blocksizes (8,16) and the 4 halfpel positions<br>
+     * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ]
+     * @param block destination into which the result is averaged (a+b+1)>>1
+     * @param pixels source
+     * @param line_size number of bytes in a horizontal line of block
+     * @param h height
+     */
+    op_pixels_func avg_pixels_tab[4][4];
+
+    /**
+     * Halfpel motion compensation with no rounding (a+b)>>1.
+     * this is an array[2][4] of motion compensation functions for 2
+     * horizontal blocksizes (8,16) and the 4 halfpel positions<br>
+     * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ]
+     * @param block destination where the result is stored
+     * @param pixels source
+     * @param line_size number of bytes in a horizontal line of block
+     * @param h height
+     */
+    op_pixels_func put_no_rnd_pixels_tab[4][4];
+
+    /**
+     * Halfpel motion compensation with no rounding (a+b)>>1.
+     * this is an array[2][4] of motion compensation functions for 2
+     * horizontal blocksizes (8,16) and the 4 halfpel positions<br>
+     * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ]
+     * @param block destination into which the result is averaged (a+b)>>1
+     * @param pixels source
+     * @param line_size number of bytes in a horizontal line of block
+     * @param h height
+     */
+    op_pixels_func avg_no_rnd_pixels_tab[4][4];
+
+    void (*put_no_rnd_pixels_l2[2])(uint8_t *block/*align width (8 or 16)*/, const uint8_t *a/*align 1*/, const uint8_t *b/*align 1*/, int line_size, int h);
+
+    /**
+     * Thirdpel motion compensation with rounding (a+b+1)>>1.
+     * this is an array[12] of motion compensation functions for the 9 thirdpe
+     * positions<br>
+     * *pixels_tab[ xthirdpel + 4*ythirdpel ]
+     * @param block destination where the result is stored
+     * @param pixels source
+     * @param line_size number of bytes in a horizontal line of block
+     * @param h height
+     */
+    tpel_mc_func put_tpel_pixels_tab[11]; //FIXME individual func ptr per width?
+    tpel_mc_func avg_tpel_pixels_tab[11]; //FIXME individual func ptr per width?
+
+    qpel_mc_func put_qpel_pixels_tab[2][16];
+    qpel_mc_func avg_qpel_pixels_tab[2][16];
+    qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16];
+    qpel_mc_func avg_no_rnd_qpel_pixels_tab[2][16];
+    qpel_mc_func put_mspel_pixels_tab[8];
+
+    /**
+     * h264 Chroma MC
+     */
+    h264_chroma_mc_func put_h264_chroma_pixels_tab[3];
+    h264_chroma_mc_func avg_h264_chroma_pixels_tab[3];
+    /* This is really one func used in VC-1 decoding */
+    h264_chroma_mc_func put_no_rnd_vc1_chroma_pixels_tab[3];
+    h264_chroma_mc_func avg_no_rnd_vc1_chroma_pixels_tab[3];
+
+    qpel_mc_func put_h264_qpel_pixels_tab[4][16];
+    qpel_mc_func avg_h264_qpel_pixels_tab[4][16];
+
+    qpel_mc_func put_2tap_qpel_pixels_tab[4][16];
+    qpel_mc_func avg_2tap_qpel_pixels_tab[4][16];
+
+    /* AVS specific */
+    qpel_mc_func put_cavs_qpel_pixels_tab[2][16];
+    qpel_mc_func avg_cavs_qpel_pixels_tab[2][16];
+    void (*cavs_filter_lv)(uint8_t *pix, int stride, int alpha, int beta, int tc, int bs1, int bs2);
+    void (*cavs_filter_lh)(uint8_t *pix, int stride, int alpha, int beta, int tc, int bs1, int bs2);
+    void (*cavs_filter_cv)(uint8_t *pix, int stride, int alpha, int beta, int tc, int bs1, int bs2);
+    void (*cavs_filter_ch)(uint8_t *pix, int stride, int alpha, int beta, int tc, int bs1, int bs2);
+    void (*cavs_idct8_add)(uint8_t *dst, DCTELEM *block, int stride);
+
+    me_cmp_func pix_abs[2][4];
+
+    /* huffyuv specific */
+    void (*add_bytes)(uint8_t *dst/*align 16*/, uint8_t *src/*align 16*/, int w);
+    void (*add_bytes_l2)(uint8_t *dst/*align 16*/, uint8_t *src1/*align 16*/, uint8_t *src2/*align 16*/, int w);
+    void (*diff_bytes)(uint8_t *dst/*align 16*/, uint8_t *src1/*align 16*/, uint8_t *src2/*align 1*/,int w);
+    /**
+     * subtract huffyuv's variant of median prediction
+     * note, this might read from src1[-1], src2[-1]
+     */
+    void (*sub_hfyu_median_prediction)(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w, int *left, int *left_top);
+    void (*add_hfyu_median_prediction)(uint8_t *dst, const uint8_t *top, const uint8_t *diff, int w, int *left, int *left_top);
+    int  (*add_hfyu_left_prediction)(uint8_t *dst, const uint8_t *src, int w, int left);
+    void (*add_hfyu_left_prediction_bgr32)(uint8_t *dst, const uint8_t *src, int w, int *red, int *green, int *blue, int *alpha);
+    /* this might write to dst[w] */
+    void (*add_png_paeth_prediction)(uint8_t *dst, uint8_t *src, uint8_t *top, int w, int bpp);
+    void (*bswap_buf)(uint32_t *dst, const uint32_t *src, int w);
+
+    void (*h263_v_loop_filter)(uint8_t *src, int stride, int qscale);
+    void (*h263_h_loop_filter)(uint8_t *src, int stride, int qscale);
+
+    void (*h261_loop_filter)(uint8_t *src, int stride);
+
+    void (*x8_v_loop_filter)(uint8_t *src, int stride, int qscale);
+    void (*x8_h_loop_filter)(uint8_t *src, int stride, int qscale);
+
+    void (*vp3_v_loop_filter)(uint8_t *src, int stride, int *bounding_values);
+    void (*vp3_h_loop_filter)(uint8_t *src, int stride, int *bounding_values);
+
+    void (*vp6_filter_diag4)(uint8_t *dst, uint8_t *src, int stride,
+                             const int16_t *h_weights,const int16_t *v_weights);
+
+    /* assume len is a multiple of 4, and arrays are 16-byte aligned */
+    void (*vorbis_inverse_coupling)(float *mag, float *ang, int blocksize);
+    void (*ac3_downmix)(float (*samples)[256], float (*matrix)[2], int out_ch, int in_ch, int len);
+    /* no alignment needed */
+    void (*lpc_compute_autocorr)(const int32_t *data, int len, int lag, double *autoc);
+    /* assume len is a multiple of 8, and arrays are 16-byte aligned */
+    void (*vector_fmul)(float *dst, const float *src, int len);
+    void (*vector_fmul_reverse)(float *dst, const float *src0, const float *src1, int len);
+    /* assume len is a multiple of 8, and src arrays are 16-byte aligned */
+    void (*vector_fmul_add)(float *dst, const float *src0, const float *src1, const float *src2, int len);
+    /* assume len is a multiple of 4, and arrays are 16-byte aligned */
+    void (*vector_fmul_window)(float *dst, const float *src0, const float *src1, const float *win, float add_bias, int len);
+    /* assume len is a multiple of 8, and arrays are 16-byte aligned */
+    void (*int32_to_float_fmul_scalar)(float *dst, const int *src, float mul, int len);
+    void (*vector_clipf)(float *dst /* align 16 */, const float *src /* align 16 */, float min, float max, int len /* align 16 */);
+    /**
+     * Multiply a vector of floats by a scalar float.  Source and
+     * destination vectors must overlap exactly or not at all.
+     * @param dst result vector, 16-byte aligned
+     * @param src input vector, 16-byte aligned
+     * @param mul scalar value
+     * @param len length of vector, multiple of 4
+     */
+    void (*vector_fmul_scalar)(float *dst, const float *src, float mul,
+                               int len);
+    /**
+     * Multiply a vector of floats by concatenated short vectors of
+     * floats and by a scalar float.  Source and destination vectors
+     * must overlap exactly or not at all.
+     * [0]: short vectors of length 2, 8-byte aligned
+     * [1]: short vectors of length 4, 16-byte aligned
+     * @param dst output vector, 16-byte aligned
+     * @param src input vector, 16-byte aligned
+     * @param sv  array of pointers to short vectors
+     * @param mul scalar value
+     * @param len number of elements in src and dst, multiple of 4
+     */
+    void (*vector_fmul_sv_scalar[2])(float *dst, const float *src,
+                                     const float **sv, float mul, int len);
+    /**
+     * Multiply short vectors of floats by a scalar float, store
+     * concatenated result.
+     * [0]: short vectors of length 2, 8-byte aligned
+     * [1]: short vectors of length 4, 16-byte aligned
+     * @param dst output vector, 16-byte aligned
+     * @param sv  array of pointers to short vectors
+     * @param mul scalar value
+     * @param len number of output elements, multiple of 4
+     */
+    void (*sv_fmul_scalar[2])(float *dst, const float **sv,
+                              float mul, int len);
+    /**
+     * Calculate the scalar product of two vectors of floats.
+     * @param v1  first vector, 16-byte aligned
+     * @param v2  second vector, 16-byte aligned
+     * @param len length of vectors, multiple of 4
+     */
+    float (*scalarproduct_float)(const float *v1, const float *v2, int len);
+    /**
+     * Calculate the sum and difference of two vectors of floats.
+     * @param v1  first input vector, sum output, 16-byte aligned
+     * @param v2  second input vector, difference output, 16-byte aligned
+     * @param len length of vectors, multiple of 4
+     */
+    void (*butterflies_float)(float *restrict v1, float *restrict v2, int len);
+
+    /* C version: convert floats from the range [384.0,386.0] to ints in [-32768,32767]
+     * simd versions: convert floats from [-32768.0,32767.0] without rescaling and arrays are 16byte aligned */
+    void (*float_to_int16)(int16_t *dst, const float *src, long len);
+    void (*float_to_int16_interleave)(int16_t *dst, const float **src, long len, int channels);
+
+    /* (I)DCT */
+    void (*fdct)(DCTELEM *block/* align 16*/);
+    void (*fdct248)(DCTELEM *block/* align 16*/);
+
+    /* IDCT really*/
+    void (*idct)(DCTELEM *block/* align 16*/);
+
+    /**
+     * block -> idct -> clip to unsigned 8 bit -> dest.
+     * (-1392, 0, 0, ...) -> idct -> (-174, -174, ...) -> put -> (0, 0, ...)
+     * @param line_size size in bytes of a horizontal line of dest
+     */
+    void (*idct_put)(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
+
+    /**
+     * block -> idct -> add dest -> clip to unsigned 8 bit -> dest.
+     * @param line_size size in bytes of a horizontal line of dest
+     */
+    void (*idct_add)(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
+
+    /**
+     * idct input permutation.
+     * several optimized IDCTs need a permutated input (relative to the normal order of the reference
+     * IDCT)
+     * this permutation must be performed before the idct_put/add, note, normally this can be merged
+     * with the zigzag/alternate scan<br>
+     * an example to avoid confusion:
+     * - (->decode coeffs -> zigzag reorder -> dequant -> reference idct ->...)
+     * - (x -> referece dct -> reference idct -> x)
+     * - (x -> referece dct -> simple_mmx_perm = idct_permutation -> simple_idct_mmx -> x)
+     * - (->decode coeffs -> zigzag reorder -> simple_mmx_perm -> dequant -> simple_idct_mmx ->...)
+     */
+    uint8_t idct_permutation[64];
+    int idct_permutation_type;
+#define FF_NO_IDCT_PERM 1
+#define FF_LIBMPEG2_IDCT_PERM 2
+#define FF_SIMPLE_IDCT_PERM 3
+#define FF_TRANSPOSE_IDCT_PERM 4
+#define FF_PARTTRANS_IDCT_PERM 5
+#define FF_SSE2_IDCT_PERM 6
+
+    int (*try_8x8basis)(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale);
+    void (*add_8x8basis)(int16_t rem[64], int16_t basis[64], int scale);
+#define BASIS_SHIFT 16
+#define RECON_SHIFT 6
+
+    void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, int w);
+#define EDGE_WIDTH 16
+
+    void (*prefetch)(void *mem, int stride, int h);
+
+    void (*shrink[4])(uint8_t *dst, int dst_wrap, const uint8_t *src, int src_wrap, int width, int height);
+
+    /* mlp/truehd functions */
+    void (*mlp_filter_channel)(int32_t *state, const int32_t *coeff,
+                               int firorder, int iirorder,
+                               unsigned int filter_shift, int32_t mask, int blocksize,
+                               int32_t *sample_buffer);
+
+    /* vc1 functions */
+    void (*vc1_inv_trans_8x8)(DCTELEM *b);
+    void (*vc1_inv_trans_8x4)(uint8_t *dest, int line_size, DCTELEM *block);
+    void (*vc1_inv_trans_4x8)(uint8_t *dest, int line_size, DCTELEM *block);
+    void (*vc1_inv_trans_4x4)(uint8_t *dest, int line_size, DCTELEM *block);
+    void (*vc1_inv_trans_8x8_dc)(uint8_t *dest, int line_size, DCTELEM *block);
+    void (*vc1_inv_trans_8x4_dc)(uint8_t *dest, int line_size, DCTELEM *block);
+    void (*vc1_inv_trans_4x8_dc)(uint8_t *dest, int line_size, DCTELEM *block);
+    void (*vc1_inv_trans_4x4_dc)(uint8_t *dest, int line_size, DCTELEM *block);
+    void (*vc1_v_overlap)(uint8_t* src, int stride);
+    void (*vc1_h_overlap)(uint8_t* src, int stride);
+    void (*vc1_v_loop_filter4)(uint8_t *src, int stride, int pq);
+    void (*vc1_h_loop_filter4)(uint8_t *src, int stride, int pq);
+    void (*vc1_v_loop_filter8)(uint8_t *src, int stride, int pq);
+    void (*vc1_h_loop_filter8)(uint8_t *src, int stride, int pq);
+    void (*vc1_v_loop_filter16)(uint8_t *src, int stride, int pq);
+    void (*vc1_h_loop_filter16)(uint8_t *src, int stride, int pq);
+    /* put 8x8 block with bicubic interpolation and quarterpel precision
+     * last argument is actually round value instead of height
+     */
+    op_pixels_func put_vc1_mspel_pixels_tab[16];
+    op_pixels_func avg_vc1_mspel_pixels_tab[16];
+
+    /* intrax8 functions */
+    void (*x8_spatial_compensation[12])(uint8_t *src , uint8_t *dst, int linesize);
+    void (*x8_setup_spatial_compensation)(uint8_t *src, uint8_t *dst, int linesize,
+           int * range, int * sum,  int edges);
+
+    /**
+     * Calculate scalar product of two vectors.
+     * @param len length of vectors, should be multiple of 16
+     * @param shift number of bits to discard from product
+     */
+    int32_t (*scalarproduct_int16)(int16_t *v1, int16_t *v2/*align 16*/, int len, int shift);
+    /* ape functions */
+    /**
+     * Calculate scalar product of v1 and v2,
+     * and v1[i] += v3[i] * mul
+     * @param len length of vectors, should be multiple of 16
+     */
+    int32_t (*scalarproduct_and_madd_int16)(int16_t *v1/*align 16*/, int16_t *v2, int16_t *v3, int len, int mul);
+
+    /* rv30 functions */
+    qpel_mc_func put_rv30_tpel_pixels_tab[4][16];
+    qpel_mc_func avg_rv30_tpel_pixels_tab[4][16];
+
+    /* rv40 functions */
+    qpel_mc_func put_rv40_qpel_pixels_tab[4][16];
+    qpel_mc_func avg_rv40_qpel_pixels_tab[4][16];
+    h264_chroma_mc_func put_rv40_chroma_pixels_tab[3];
+    h264_chroma_mc_func avg_rv40_chroma_pixels_tab[3];
+
+    /* bink functions */
+    op_fill_func fill_block_tab[2];
+    void (*scale_block)(const uint8_t src[64]/*align 8*/, uint8_t *dst/*align 8*/, int linesize);
+} DSPContext;
+
+void dsputil_static_init(void);
+void dsputil_init(DSPContext* p, AVCodecContext *avctx);
+
+int ff_check_alignment(void);
+
+/**
+ * permute block according to permuatation.
+ * @param last last non zero element in scantable order
+ */
+void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last);
+
+void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type);
+
+#define         BYTE_VEC32(c)   ((c)*0x01010101UL)
+
+static inline uint32_t rnd_avg32(uint32_t a, uint32_t b)
+{
+    return (a | b) - (((a ^ b) & ~BYTE_VEC32(0x01)) >> 1);
+}
+
+static inline uint32_t no_rnd_avg32(uint32_t a, uint32_t b)
+{
+    return (a & b) + (((a ^ b) & ~BYTE_VEC32(0x01)) >> 1);
+}
+
+static inline int get_penalty_factor(int lambda, int lambda2, int type){
+    switch(type&0xFF){
+    default:
+    case FF_CMP_SAD:
+        return lambda>>FF_LAMBDA_SHIFT;
+    case FF_CMP_DCT:
+        return (3*lambda)>>(FF_LAMBDA_SHIFT+1);
+    case FF_CMP_W53:
+        return (4*lambda)>>(FF_LAMBDA_SHIFT);
+    case FF_CMP_W97:
+        return (2*lambda)>>(FF_LAMBDA_SHIFT);
+    case FF_CMP_SATD:
+    case FF_CMP_DCT264:
+        return (2*lambda)>>FF_LAMBDA_SHIFT;
+    case FF_CMP_RD:
+    case FF_CMP_PSNR:
+    case FF_CMP_SSE:
+    case FF_CMP_NSSE:
+        return lambda2>>FF_LAMBDA_SHIFT;
+    case FF_CMP_BIT:
+        return 1;
+    }
+}
+
+/**
+ * Empty mmx state.
+ * this must be called between any dsp function and float/double code.
+ * for example sin(); dsp->idct_put(); emms_c(); cos()
+ */
+#define emms_c()
+
+/* should be defined by architectures supporting
+   one or more MultiMedia extension */
+int mm_support(void);
+extern int mm_flags;
+
+void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx);
+void dsputil_init_arm(DSPContext* c, AVCodecContext *avctx);
+void dsputil_init_bfin(DSPContext* c, AVCodecContext *avctx);
+void dsputil_init_mlib(DSPContext* c, AVCodecContext *avctx);
+void dsputil_init_mmi(DSPContext* c, AVCodecContext *avctx);
+void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx);
+void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx);
+void dsputil_init_sh4(DSPContext* c, AVCodecContext *avctx);
+void dsputil_init_vis(DSPContext* c, AVCodecContext *avctx);
+
+void ff_dsputil_init_dwt(DSPContext *c);
+void ff_cavsdsp_init(DSPContext* c, AVCodecContext *avctx);
+void ff_rv30dsp_init(DSPContext* c, AVCodecContext *avctx);
+void ff_rv40dsp_init(DSPContext* c, AVCodecContext *avctx);
+void ff_vc1dsp_init(DSPContext* c, AVCodecContext *avctx);
+void ff_intrax8dsp_init(DSPContext* c, AVCodecContext *avctx);
+void ff_mlp_init(DSPContext* c, AVCodecContext *avctx);
+void ff_mlp_init_x86(DSPContext* c, AVCodecContext *avctx);
+
+#if HAVE_MMX
+
+#undef emms_c
+
+static inline void emms(void)
+{
+    __asm__ volatile ("emms;":::"memory");
+}
+
+
+#define emms_c() \
+{\
+    if (mm_flags & FF_MM_MMX)\
+        emms();\
+}
+
+#elif ARCH_ARM
+
+#if HAVE_NEON
+#   define STRIDE_ALIGN 16
+#endif
+
+#elif ARCH_PPC
+
+#define STRIDE_ALIGN 16
+
+#elif HAVE_MMI
+
+#define STRIDE_ALIGN 16
+
+#else
+
+#define mm_flags 0
+#define mm_support() 0
+
+#endif
+
+#ifndef STRIDE_ALIGN
+#   define STRIDE_ALIGN 8
+#endif
+
+#define LOCAL_ALIGNED(a, t, v, s, ...)                          \
+    uint8_t la_##v[sizeof(t s __VA_ARGS__) + (a)];              \
+    t (*v) __VA_ARGS__ = (void *)FFALIGN((uintptr_t)la_##v, a)
+
+#if HAVE_LOCAL_ALIGNED_8
+#   define LOCAL_ALIGNED_8(t, v, s, ...) DECLARE_ALIGNED(8, t, v) s __VA_ARGS__
+#else
+#   define LOCAL_ALIGNED_8(t, v, s, ...) LOCAL_ALIGNED(8, t, v, s, __VA_ARGS__)
+#endif
+
+#if HAVE_LOCAL_ALIGNED_16
+#   define LOCAL_ALIGNED_16(t, v, s, ...) DECLARE_ALIGNED(16, t, v) s __VA_ARGS__
+#else
+#   define LOCAL_ALIGNED_16(t, v, s, ...) LOCAL_ALIGNED(16, t, v, s, __VA_ARGS__)
+#endif
+
+/* PSNR */
+void get_psnr(uint8_t *orig_image[3], uint8_t *coded_image[3],
+              int orig_linesize[3], int coded_linesize,
+              AVCodecContext *avctx);
+
+#define WRAPPER8_16(name8, name16)\
+static int name16(void /*MpegEncContext*/ *s, uint8_t *dst, uint8_t *src, int stride, int h){\
+    return name8(s, dst           , src           , stride, h)\
+          +name8(s, dst+8         , src+8         , stride, h);\
+}
+
+#define WRAPPER8_16_SQ(name8, name16)\
+static int name16(void /*MpegEncContext*/ *s, uint8_t *dst, uint8_t *src, int stride, int h){\
+    int score=0;\
+    score +=name8(s, dst           , src           , stride, 8);\
+    score +=name8(s, dst+8         , src+8         , stride, 8);\
+    if(h==16){\
+        dst += 8*stride;\
+        src += 8*stride;\
+        score +=name8(s, dst           , src           , stride, 8);\
+        score +=name8(s, dst+8         , src+8         , stride, 8);\
+    }\
+    return score;\
+}
+
+
+static inline void copy_block2(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
+{
+    int i;
+    for(i=0; i<h; i++)
+    {
+        AV_WN16(dst   , AV_RN16(src   ));
+        dst+=dstStride;
+        src+=srcStride;
+    }
+}
+
+static inline void copy_block4(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
+{
+    int i;
+    for(i=0; i<h; i++)
+    {
+        AV_WN32(dst   , AV_RN32(src   ));
+        dst+=dstStride;
+        src+=srcStride;
+    }
+}
+
+static inline void copy_block8(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
+{
+    int i;
+    for(i=0; i<h; i++)
+    {
+        AV_WN32(dst   , AV_RN32(src   ));
+        AV_WN32(dst+4 , AV_RN32(src+4 ));
+        dst+=dstStride;
+        src+=srcStride;
+    }
+}
+
+static inline void copy_block9(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
+{
+    int i;
+    for(i=0; i<h; i++)
+    {
+        AV_WN32(dst   , AV_RN32(src   ));
+        AV_WN32(dst+4 , AV_RN32(src+4 ));
+        dst[8]= src[8];
+        dst+=dstStride;
+        src+=srcStride;
+    }
+}
+
+static inline void copy_block16(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
+{
+    int i;
+    for(i=0; i<h; i++)
+    {
+        AV_WN32(dst   , AV_RN32(src   ));
+        AV_WN32(dst+4 , AV_RN32(src+4 ));
+        AV_WN32(dst+8 , AV_RN32(src+8 ));
+        AV_WN32(dst+12, AV_RN32(src+12));
+        dst+=dstStride;
+        src+=srcStride;
+    }
+}
+
+static inline void copy_block17(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
+{
+    int i;
+    for(i=0; i<h; i++)
+    {
+        AV_WN32(dst   , AV_RN32(src   ));
+        AV_WN32(dst+4 , AV_RN32(src+4 ));
+        AV_WN32(dst+8 , AV_RN32(src+8 ));
+        AV_WN32(dst+12, AV_RN32(src+12));
+        dst[16]= src[16];
+        dst+=dstStride;
+        src+=srcStride;
+    }
+}
+
+#endif /* AVCODEC_DSPUTIL_H */
diff --git a/apps/codecs/libwmapro/fft.c b/apps/codecs/libwmapro/fft.c
new file mode 100644
index 0000000000..7275d98e9f
--- /dev/null
+++ b/apps/codecs/libwmapro/fft.c
@@ -0,0 +1,368 @@
+/*
+ * FFT/IFFT transforms
+ * Copyright (c) 2008 Loren Merritt
+ * Copyright (c) 2002 Fabrice Bellard
+ * Partly based on libdjbfft by D. J. Bernstein
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file libavcodec/fft.c
+ * FFT/IFFT transforms.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include "libavutil/mathematics.h"
+#include "fft.h"
+
+/* cos(2*pi*x/n) for 0<=x<=n/4, followed by its reverse */
+#if !CONFIG_HARDCODED_TABLES
+COSTABLE(16);
+COSTABLE(32);
+COSTABLE(64);
+COSTABLE(128);
+COSTABLE(256);
+COSTABLE(512);
+COSTABLE(1024);
+COSTABLE(2048);
+COSTABLE(4096);
+COSTABLE(8192);
+COSTABLE(16384);
+COSTABLE(32768);
+COSTABLE(65536);
+#endif
+COSTABLE_CONST FFTSample * const ff_cos_tabs[] = {
+    NULL, NULL, NULL, NULL,
+    ff_cos_16, ff_cos_32, ff_cos_64, ff_cos_128, ff_cos_256, ff_cos_512, ff_cos_1024,
+    ff_cos_2048, ff_cos_4096, ff_cos_8192, ff_cos_16384, ff_cos_32768, ff_cos_65536,
+};
+
+static int split_radix_permutation(int i, int n, int inverse)
+{
+    int m;
+    if(n <= 2) return i&1;
+    m = n >> 1;
+    if(!(i&m))            return split_radix_permutation(i, m, inverse)*2;
+    m >>= 1;
+    if(inverse == !(i&m)) return split_radix_permutation(i, m, inverse)*4 + 1;
+    else                  return split_radix_permutation(i, m, inverse)*4 - 1;
+}
+
+av_cold void ff_init_ff_cos_tabs(int index)
+{
+#if !CONFIG_HARDCODED_TABLES
+    int i;
+    int m = 1<<index;
+    double freq = 2*M_PI/m;
+    FFTSample *tab = ff_cos_tabs[index];
+    for(i=0; i<=m/4; i++)
+        tab[i] = cos(i*freq);
+    for(i=1; i<m/4; i++)
+        tab[m/2-i] = tab[i];
+#endif
+}
+
+av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse)
+{
+    int i, j, m, n;
+    float alpha, c1, s1, s2;
+    int av_unused has_vectors;
+
+    if (nbits < 2 || nbits > 16)
+        goto fail;
+    s->nbits = nbits;
+    n = 1 << nbits;
+
+    s->tmp_buf = NULL;
+    s->exptab  = av_malloc((n / 2) * sizeof(FFTComplex));
+    if (!s->exptab)
+        goto fail;
+    s->revtab = av_malloc(n * sizeof(uint16_t));
+    if (!s->revtab)
+        goto fail;
+    s->inverse = inverse;
+
+    s2 = inverse ? 1.0 : -1.0;
+
+    s->fft_permute = ff_fft_permute_c;
+    s->fft_calc    = ff_fft_calc_c;
+#if CONFIG_MDCT
+    s->imdct_calc  = ff_imdct_calc_c;
+    s->imdct_half  = ff_imdct_half_c;
+    s->mdct_calc   = ff_mdct_calc_c;
+#endif
+    s->exptab1     = NULL;
+    s->split_radix = 1;
+
+    if (ARCH_ARM)     ff_fft_init_arm(s);
+    if (HAVE_ALTIVEC) ff_fft_init_altivec(s);
+    if (HAVE_MMX)     ff_fft_init_mmx(s);
+
+    if (s->split_radix) {
+        for(j=4; j<=nbits; j++) {
+            ff_init_ff_cos_tabs(j);
+        }
+        for(i=0; i<n; i++)
+            s->revtab[-split_radix_permutation(i, n, s->inverse) & (n-1)] = i;
+        s->tmp_buf = av_malloc(n * sizeof(FFTComplex));
+    } else {
+        int np, nblocks, np2, l;
+        FFTComplex *q;
+
+        for(i=0; i<(n/2); i++) {
+            alpha = 2 * M_PI * (float)i / (float)n;
+            c1 = cos(alpha);
+            s1 = sin(alpha) * s2;
+            s->exptab[i].re = c1;
+            s->exptab[i].im = s1;
+        }
+
+        np = 1 << nbits;
+        nblocks = np >> 3;
+        np2 = np >> 1;
+        s->exptab1 = av_malloc(np * 2 * sizeof(FFTComplex));
+        if (!s->exptab1)
+            goto fail;
+        q = s->exptab1;
+        do {
+            for(l = 0; l < np2; l += 2 * nblocks) {
+                *q++ = s->exptab[l];
+                *q++ = s->exptab[l + nblocks];
+
+                q->re = -s->exptab[l].im;
+                q->im = s->exptab[l].re;
+                q++;
+                q->re = -s->exptab[l + nblocks].im;
+                q->im = s->exptab[l + nblocks].re;
+                q++;
+            }
+            nblocks = nblocks >> 1;
+        } while (nblocks != 0);
+        av_freep(&s->exptab);
+
+        /* compute bit reverse table */
+        for(i=0;i<n;i++) {
+            m=0;
+            for(j=0;j<nbits;j++) {
+                m |= ((i >> j) & 1) << (nbits-j-1);
+            }
+            s->revtab[i]=m;
+        }
+    }
+
+    return 0;
+ fail:
+    av_freep(&s->revtab);
+    av_freep(&s->exptab);
+    av_freep(&s->exptab1);
+    av_freep(&s->tmp_buf);
+    return -1;
+}
+
+void ff_fft_permute_c(FFTContext *s, FFTComplex *z)
+{
+    int j, k, np;
+    FFTComplex tmp;
+    const uint16_t *revtab = s->revtab;
+    np = 1 << s->nbits;
+
+    if (s->tmp_buf) {
+        /* TODO: handle split-radix permute in a more optimal way, probably in-place */
+        for(j=0;j<np;j++) s->tmp_buf[revtab[j]] = z[j];
+        memcpy(z, s->tmp_buf, np * sizeof(FFTComplex));
+        return;
+    }
+
+    /* reverse */
+    for(j=0;j<np;j++) {
+        k = revtab[j];
+        if (k < j) {
+            tmp = z[k];
+            z[k] = z[j];
+            z[j] = tmp;
+        }
+    }
+}
+
+av_cold void ff_fft_end(FFTContext *s)
+{
+    av_freep(&s->revtab);
+    av_freep(&s->exptab);
+    av_freep(&s->exptab1);
+    av_freep(&s->tmp_buf);
+}
+
+#define sqrthalf (float)M_SQRT1_2
+
+#define BF(x,y,a,b) {\
+    x = a - b;\
+    y = a + b;\
+}
+
+#define BUTTERFLIES(a0,a1,a2,a3) {\
+    BF(t3, t5, t5, t1);\
+    BF(a2.re, a0.re, a0.re, t5);\
+    BF(a3.im, a1.im, a1.im, t3);\
+    BF(t4, t6, t2, t6);\
+    BF(a3.re, a1.re, a1.re, t4);\
+    BF(a2.im, a0.im, a0.im, t6);\
+}
+
+// force loading all the inputs before storing any.
+// this is slightly slower for small data, but avoids store->load aliasing
+// for addresses separated by large powers of 2.
+#define BUTTERFLIES_BIG(a0,a1,a2,a3) {\
+    FFTSample r0=a0.re, i0=a0.im, r1=a1.re, i1=a1.im;\
+    BF(t3, t5, t5, t1);\
+    BF(a2.re, a0.re, r0, t5);\
+    BF(a3.im, a1.im, i1, t3);\
+    BF(t4, t6, t2, t6);\
+    BF(a3.re, a1.re, r1, t4);\
+    BF(a2.im, a0.im, i0, t6);\
+}
+
+#define TRANSFORM(a0,a1,a2,a3,wre,wim) {\
+    t1 = a2.re * wre + a2.im * wim;\
+    t2 = a2.im * wre - a2.re * wim;\
+    t5 = a3.re * wre - a3.im * wim;\
+    t6 = a3.im * wre + a3.re * wim;\
+    BUTTERFLIES(a0,a1,a2,a3)\
+}
+
+#define TRANSFORM_ZERO(a0,a1,a2,a3) {\
+    t1 = a2.re;\
+    t2 = a2.im;\
+    t5 = a3.re;\
+    t6 = a3.im;\
+    BUTTERFLIES(a0,a1,a2,a3)\
+}
+
+/* z[0...8n-1], w[1...2n-1] */
+#define PASS(name)\
+static void name(FFTComplex *z, const FFTSample *wre, unsigned int n)\
+{\
+    FFTSample t1, t2, t3, t4, t5, t6;\
+    int o1 = 2*n;\
+    int o2 = 4*n;\
+    int o3 = 6*n;\
+    const FFTSample *wim = wre+o1;\
+    n--;\
+\
+    TRANSFORM_ZERO(z[0],z[o1],z[o2],z[o3]);\
+    TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\
+    do {\
+        z += 2;\
+        wre += 2;\
+        wim -= 2;\
+        TRANSFORM(z[0],z[o1],z[o2],z[o3],wre[0],wim[0]);\
+        TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\
+    } while(--n);\
+}
+
+PASS(pass)
+#undef BUTTERFLIES
+#define BUTTERFLIES BUTTERFLIES_BIG
+PASS(pass_big)
+
+#define DECL_FFT(n,n2,n4)\
+static void fft##n(FFTComplex *z)\
+{\
+    fft##n2(z);\
+    fft##n4(z+n4*2);\
+    fft##n4(z+n4*3);\
+    pass(z,ff_cos_##n,n4/2);\
+}
+
+static void fft4(FFTComplex *z)
+{
+    FFTSample t1, t2, t3, t4, t5, t6, t7, t8;
+
+    BF(t3, t1, z[0].re, z[1].re);
+    BF(t8, t6, z[3].re, z[2].re);
+    BF(z[2].re, z[0].re, t1, t6);
+    BF(t4, t2, z[0].im, z[1].im);
+    BF(t7, t5, z[2].im, z[3].im);
+    BF(z[3].im, z[1].im, t4, t8);
+    BF(z[3].re, z[1].re, t3, t7);
+    BF(z[2].im, z[0].im, t2, t5);
+}
+
+static void fft8(FFTComplex *z)
+{
+    FFTSample t1, t2, t3, t4, t5, t6, t7, t8;
+
+    fft4(z);
+
+    BF(t1, z[5].re, z[4].re, -z[5].re);
+    BF(t2, z[5].im, z[4].im, -z[5].im);
+    BF(t3, z[7].re, z[6].re, -z[7].re);
+    BF(t4, z[7].im, z[6].im, -z[7].im);
+    BF(t8, t1, t3, t1);
+    BF(t7, t2, t2, t4);
+    BF(z[4].re, z[0].re, z[0].re, t1);
+    BF(z[4].im, z[0].im, z[0].im, t2);
+    BF(z[6].re, z[2].re, z[2].re, t7);
+    BF(z[6].im, z[2].im, z[2].im, t8);
+
+    TRANSFORM(z[1],z[3],z[5],z[7],sqrthalf,sqrthalf);
+}
+
+#if !CONFIG_SMALL
+static void fft16(FFTComplex *z)
+{
+    FFTSample t1, t2, t3, t4, t5, t6;
+
+    fft8(z);
+    fft4(z+8);
+    fft4(z+12);
+
+    TRANSFORM_ZERO(z[0],z[4],z[8],z[12]);
+    TRANSFORM(z[2],z[6],z[10],z[14],sqrthalf,sqrthalf);
+    TRANSFORM(z[1],z[5],z[9],z[13],ff_cos_16[1],ff_cos_16[3]);
+    TRANSFORM(z[3],z[7],z[11],z[15],ff_cos_16[3],ff_cos_16[1]);
+}
+#else
+DECL_FFT(16,8,4)
+#endif
+DECL_FFT(32,16,8)
+DECL_FFT(64,32,16)
+DECL_FFT(128,64,32)
+DECL_FFT(256,128,64)
+DECL_FFT(512,256,128)
+#if !CONFIG_SMALL
+#define pass pass_big
+#endif
+DECL_FFT(1024,512,256)
+DECL_FFT(2048,1024,512)
+DECL_FFT(4096,2048,1024)
+DECL_FFT(8192,4096,2048)
+DECL_FFT(16384,8192,4096)
+DECL_FFT(32768,16384,8192)
+DECL_FFT(65536,32768,16384)
+
+static void (* const fft_dispatch[])(FFTComplex*) = {
+    fft4, fft8, fft16, fft32, fft64, fft128, fft256, fft512, fft1024,
+    fft2048, fft4096, fft8192, fft16384, fft32768, fft65536,
+};
+
+void ff_fft_calc_c(FFTContext *s, FFTComplex *z)
+{
+    fft_dispatch[s->nbits-2](z);
+}
+
diff --git a/apps/codecs/libwmapro/fft.h b/apps/codecs/libwmapro/fft.h
new file mode 100644
index 0000000000..1f5b2e86da
--- /dev/null
+++ b/apps/codecs/libwmapro/fft.h
@@ -0,0 +1,240 @@
+/*
+ * Copyright (c) 2000, 2001, 2002 Fabrice Bellard
+ * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_FFT_H
+#define AVCODEC_FFT_H
+
+#include <stdint.h>
+#include "config.h"
+#include "libavutil/mem.h"
+#include "avfft.h"
+
+/* FFT computation */
+
+struct FFTContext {
+    int nbits;
+    int inverse;
+    uint16_t *revtab;
+    FFTComplex *exptab;
+    FFTComplex *exptab1; /* only used by SSE code */
+    FFTComplex *tmp_buf;
+    int mdct_size; /* size of MDCT (i.e. number of input data * 2) */
+    int mdct_bits; /* n = 2^nbits */
+    /* pre/post rotation tables */
+    FFTSample *tcos;
+    FFTSample *tsin;
+    void (*fft_permute)(struct FFTContext *s, FFTComplex *z);
+    void (*fft_calc)(struct FFTContext *s, FFTComplex *z);
+    void (*imdct_calc)(struct FFTContext *s, FFTSample *output, const FFTSample *input);
+    void (*imdct_half)(struct FFTContext *s, FFTSample *output, const FFTSample *input);
+    void (*mdct_calc)(struct FFTContext *s, FFTSample *output, const FFTSample *input);
+    int split_radix;
+    int permutation;
+#define FF_MDCT_PERM_NONE       0
+#define FF_MDCT_PERM_INTERLEAVE 1
+};
+
+#if CONFIG_HARDCODED_TABLES
+#define COSTABLE_CONST const
+#define SINTABLE_CONST const
+#define SINETABLE_CONST const
+#else
+#define COSTABLE_CONST
+#define SINTABLE_CONST
+#define SINETABLE_CONST
+#endif
+
+#define COSTABLE(size) \
+    COSTABLE_CONST DECLARE_ALIGNED(16, FFTSample, ff_cos_##size)[size/2]
+#define SINTABLE(size) \
+    SINTABLE_CONST DECLARE_ALIGNED(16, FFTSample, ff_sin_##size)[size/2]
+#define SINETABLE(size) \
+    SINETABLE_CONST DECLARE_ALIGNED(16, float, ff_sine_##size)[size]
+extern COSTABLE(16);
+extern COSTABLE(32);
+extern COSTABLE(64);
+extern COSTABLE(128);
+extern COSTABLE(256);
+extern COSTABLE(512);
+extern COSTABLE(1024);
+extern COSTABLE(2048);
+extern COSTABLE(4096);
+extern COSTABLE(8192);
+extern COSTABLE(16384);
+extern COSTABLE(32768);
+extern COSTABLE(65536);
+extern COSTABLE_CONST FFTSample* const ff_cos_tabs[17];
+
+/**
+ * Initializes the cosine table in ff_cos_tabs[index]
+ * \param index index in ff_cos_tabs array of the table to initialize
+ */
+void ff_init_ff_cos_tabs(int index);
+
+extern SINTABLE(16);
+extern SINTABLE(32);
+extern SINTABLE(64);
+extern SINTABLE(128);
+extern SINTABLE(256);
+extern SINTABLE(512);
+extern SINTABLE(1024);
+extern SINTABLE(2048);
+extern SINTABLE(4096);
+extern SINTABLE(8192);
+extern SINTABLE(16384);
+extern SINTABLE(32768);
+extern SINTABLE(65536);
+
+/**
+ * Sets up a complex FFT.
+ * @param nbits           log2 of the length of the input array
+ * @param inverse         if 0 perform the forward transform, if 1 perform the inverse
+ */
+int ff_fft_init(FFTContext *s, int nbits, int inverse);
+void ff_fft_permute_c(FFTContext *s, FFTComplex *z);
+void ff_fft_calc_c(FFTContext *s, FFTComplex *z);
+
+void ff_fft_init_altivec(FFTContext *s);
+void ff_fft_init_mmx(FFTContext *s);
+void ff_fft_init_arm(FFTContext *s);
+
+/**
+ * Do the permutation needed BEFORE calling ff_fft_calc().
+ */
+static inline void ff_fft_permute(FFTContext *s, FFTComplex *z)
+{
+    s->fft_permute(s, z);
+}
+/**
+ * Do a complex FFT with the parameters defined in ff_fft_init(). The
+ * input data must be permuted before. No 1.0/sqrt(n) normalization is done.
+ */
+static inline void ff_fft_calc(FFTContext *s, FFTComplex *z)
+{
+    s->fft_calc(s, z);
+}
+void ff_fft_end(FFTContext *s);
+
+/* MDCT computation */
+
+static inline void ff_imdct_calc(FFTContext *s, FFTSample *output, const FFTSample *input)
+{
+    s->imdct_calc(s, output, input);
+}
+static inline void ff_imdct_half(FFTContext *s, FFTSample *output, const FFTSample *input)
+{
+    s->imdct_half(s, output, input);
+}
+
+static inline void ff_mdct_calc(FFTContext *s, FFTSample *output,
+                                const FFTSample *input)
+{
+    s->mdct_calc(s, output, input);
+}
+
+/**
+ * Generate a Kaiser-Bessel Derived Window.
+ * @param   window  pointer to half window
+ * @param   alpha   determines window shape
+ * @param   n       size of half window
+ */
+void ff_kbd_window_init(float *window, float alpha, int n);
+
+/**
+ * Generate a sine window.
+ * @param   window  pointer to half window
+ * @param   n       size of half window
+ */
+void ff_sine_window_init(float *window, int n);
+
+/**
+ * initialize the specified entry of ff_sine_windows
+ */
+void ff_init_ff_sine_windows(int index);
+extern SINETABLE(  32);
+extern SINETABLE(  64);
+extern SINETABLE( 128);
+extern SINETABLE( 256);
+extern SINETABLE( 512);
+extern SINETABLE(1024);
+extern SINETABLE(2048);
+extern SINETABLE(4096);
+extern SINETABLE_CONST float * const ff_sine_windows[13];
+
+int ff_mdct_init(FFTContext *s, int nbits, int inverse, double scale);
+void ff_imdct_calc_c(FFTContext *s, FFTSample *output, const FFTSample *input);
+void ff_imdct_half_c(FFTContext *s, FFTSample *output, const FFTSample *input);
+void ff_mdct_calc_c(FFTContext *s, FFTSample *output, const FFTSample *input);
+void ff_mdct_end(FFTContext *s);
+
+/* Real Discrete Fourier Transform */
+
+struct RDFTContext {
+    int nbits;
+    int inverse;
+    int sign_convention;
+
+    /* pre/post rotation tables */
+    const FFTSample *tcos;
+    SINTABLE_CONST FFTSample *tsin;
+    FFTContext fft;
+    void (*rdft_calc)(struct RDFTContext *s, FFTSample *z);
+};
+
+/**
+ * Sets up a real FFT.
+ * @param nbits           log2 of the length of the input array
+ * @param trans           the type of transform
+ */
+int ff_rdft_init(RDFTContext *s, int nbits, enum RDFTransformType trans);
+void ff_rdft_end(RDFTContext *s);
+
+void ff_rdft_init_arm(RDFTContext *s);
+
+static av_always_inline void ff_rdft_calc(RDFTContext *s, FFTSample *data)
+{
+    s->rdft_calc(s, data);
+}
+
+/* Discrete Cosine Transform */
+
+struct DCTContext {
+    int nbits;
+    int inverse;
+    RDFTContext rdft;
+    const float *costab;
+    FFTSample *csc2;
+    void (*dct_calc)(struct DCTContext *s, FFTSample *data);
+};
+
+/**
+ * Sets up DCT.
+ * @param nbits           size of the input array:
+ *                        (1 << nbits)     for DCT-II, DCT-III and DST-I
+ *                        (1 << nbits) + 1 for DCT-I
+ *
+ * @note the first element of the input of DST-I is ignored
+ */
+int  ff_dct_init(DCTContext *s, int nbits, enum DCTTransformType type);
+void ff_dct_calc(DCTContext *s, FFTSample *data);
+void ff_dct_end (DCTContext *s);
+
+#endif /* AVCODEC_FFT_H */
diff --git a/apps/codecs/libwmapro/get_bits.h b/apps/codecs/libwmapro/get_bits.h
new file mode 100644
index 0000000000..c325778d67
--- /dev/null
+++ b/apps/codecs/libwmapro/get_bits.h
@@ -0,0 +1,691 @@
+/*
+ * copyright (c) 2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file libavcodec/get_bits.h
+ * bitstream reader API header.
+ */
+
+#ifndef AVCODEC_GET_BITS_H
+#define AVCODEC_GET_BITS_H
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <assert.h>
+#include "libavutil/bswap.h"
+#include "libavutil/common.h"
+#include "libavutil/intreadwrite.h"
+#include "libavutil/log.h"
+#include "mathops.h"
+
+#if defined(ALT_BITSTREAM_READER_LE) && !defined(ALT_BITSTREAM_READER)
+#   define ALT_BITSTREAM_READER
+#endif
+
+#if !defined(LIBMPEG2_BITSTREAM_READER) && !defined(A32_BITSTREAM_READER) && !defined(ALT_BITSTREAM_READER)
+#   if ARCH_ARM && !HAVE_FAST_UNALIGNED
+#       define A32_BITSTREAM_READER
+#   else
+#       define ALT_BITSTREAM_READER
+//#define LIBMPEG2_BITSTREAM_READER
+//#define A32_BITSTREAM_READER
+#   endif
+#endif
+
+/* bit input */
+/* buffer, buffer_end and size_in_bits must be present and used by every reader */
+typedef struct GetBitContext {
+    const uint8_t *buffer, *buffer_end;
+#ifdef ALT_BITSTREAM_READER
+    int index;
+#elif defined LIBMPEG2_BITSTREAM_READER
+    uint8_t *buffer_ptr;
+    uint32_t cache;
+    int bit_count;
+#elif defined A32_BITSTREAM_READER
+    uint32_t *buffer_ptr;
+    uint32_t cache0;
+    uint32_t cache1;
+    int bit_count;
+#endif
+    int size_in_bits;
+} GetBitContext;
+
+#define VLC_TYPE int16_t
+
+typedef struct VLC {
+    int bits;
+    VLC_TYPE (*table)[2]; ///< code, bits
+    int table_size, table_allocated;
+} VLC;
+
+typedef struct RL_VLC_ELEM {
+    int16_t level;
+    int8_t len;
+    uint8_t run;
+} RL_VLC_ELEM;
+
+/* Bitstream reader API docs:
+name
+    arbitrary name which is used as prefix for the internal variables
+
+gb
+    getbitcontext
+
+OPEN_READER(name, gb)
+    loads gb into local variables
+
+CLOSE_READER(name, gb)
+    stores local vars in gb
+
+UPDATE_CACHE(name, gb)
+    refills the internal cache from the bitstream
+    after this call at least MIN_CACHE_BITS will be available,
+
+GET_CACHE(name, gb)
+    will output the contents of the internal cache, next bit is MSB of 32 or 64 bit (FIXME 64bit)
+
+SHOW_UBITS(name, gb, num)
+    will return the next num bits
+
+SHOW_SBITS(name, gb, num)
+    will return the next num bits and do sign extension
+
+SKIP_BITS(name, gb, num)
+    will skip over the next num bits
+    note, this is equivalent to SKIP_CACHE; SKIP_COUNTER
+
+SKIP_CACHE(name, gb, num)
+    will remove the next num bits from the cache (note SKIP_COUNTER MUST be called before UPDATE_CACHE / CLOSE_READER)
+
+SKIP_COUNTER(name, gb, num)
+    will increment the internal bit counter (see SKIP_CACHE & SKIP_BITS)
+
+LAST_SKIP_CACHE(name, gb, num)
+    will remove the next num bits from the cache if it is needed for UPDATE_CACHE otherwise it will do nothing
+
+LAST_SKIP_BITS(name, gb, num)
+    is equivalent to LAST_SKIP_CACHE; SKIP_COUNTER
+
+for examples see get_bits, show_bits, skip_bits, get_vlc
+*/
+
+#ifdef ALT_BITSTREAM_READER
+#   define MIN_CACHE_BITS 25
+
+#   define OPEN_READER(name, gb)\
+        unsigned int name##_index= (gb)->index;\
+        int name##_cache= 0;\
+
+#   define CLOSE_READER(name, gb)\
+        (gb)->index= name##_index;\
+
+# ifdef ALT_BITSTREAM_READER_LE
+#   define UPDATE_CACHE(name, gb)\
+        name##_cache= AV_RL32( ((const uint8_t *)(gb)->buffer)+(name##_index>>3) ) >> (name##_index&0x07);\
+
+#   define SKIP_CACHE(name, gb, num)\
+        name##_cache >>= (num);
+# else
+#   define UPDATE_CACHE(name, gb)\
+        name##_cache= AV_RB32( ((const uint8_t *)(gb)->buffer)+(name##_index>>3) ) << (name##_index&0x07);\
+
+#   define SKIP_CACHE(name, gb, num)\
+        name##_cache <<= (num);
+# endif
+
+// FIXME name?
+#   define SKIP_COUNTER(name, gb, num)\
+        name##_index += (num);\
+
+#   define SKIP_BITS(name, gb, num)\
+        {\
+            SKIP_CACHE(name, gb, num)\
+            SKIP_COUNTER(name, gb, num)\
+        }\
+
+#   define LAST_SKIP_BITS(name, gb, num) SKIP_COUNTER(name, gb, num)
+#   define LAST_SKIP_CACHE(name, gb, num) ;
+
+# ifdef ALT_BITSTREAM_READER_LE
+#   define SHOW_UBITS(name, gb, num)\
+        zero_extend(name##_cache, num)
+
+#   define SHOW_SBITS(name, gb, num)\
+        sign_extend(name##_cache, num)
+# else
+#   define SHOW_UBITS(name, gb, num)\
+        NEG_USR32(name##_cache, num)
+
+#   define SHOW_SBITS(name, gb, num)\
+        NEG_SSR32(name##_cache, num)
+# endif
+
+#   define GET_CACHE(name, gb)\
+        ((uint32_t)name##_cache)
+
+static inline int get_bits_count(const GetBitContext *s){
+    return s->index;
+}
+
+static inline void skip_bits_long(GetBitContext *s, int n){
+    s->index += n;
+}
+
+#elif defined LIBMPEG2_BITSTREAM_READER
+//libmpeg2 like reader
+
+#   define MIN_CACHE_BITS 17
+
+#   define OPEN_READER(name, gb)\
+        int name##_bit_count=(gb)->bit_count;\
+        int name##_cache= (gb)->cache;\
+        uint8_t * name##_buffer_ptr=(gb)->buffer_ptr;\
+
+#   define CLOSE_READER(name, gb)\
+        (gb)->bit_count= name##_bit_count;\
+        (gb)->cache= name##_cache;\
+        (gb)->buffer_ptr= name##_buffer_ptr;\
+
+#   define UPDATE_CACHE(name, gb)\
+    if(name##_bit_count >= 0){\
+        name##_cache+= AV_RB16(name##_buffer_ptr) << name##_bit_count; \
+        name##_buffer_ptr+=2;\
+        name##_bit_count-= 16;\
+    }\
+
+#   define SKIP_CACHE(name, gb, num)\
+        name##_cache <<= (num);\
+
+#   define SKIP_COUNTER(name, gb, num)\
+        name##_bit_count += (num);\
+
+#   define SKIP_BITS(name, gb, num)\
+        {\
+            SKIP_CACHE(name, gb, num)\
+            SKIP_COUNTER(name, gb, num)\
+        }\
+
+#   define LAST_SKIP_BITS(name, gb, num) SKIP_BITS(name, gb, num)
+#   define LAST_SKIP_CACHE(name, gb, num) SKIP_CACHE(name, gb, num)
+
+#   define SHOW_UBITS(name, gb, num)\
+        NEG_USR32(name##_cache, num)
+
+#   define SHOW_SBITS(name, gb, num)\
+        NEG_SSR32(name##_cache, num)
+
+#   define GET_CACHE(name, gb)\
+        ((uint32_t)name##_cache)
+
+static inline int get_bits_count(const GetBitContext *s){
+    return (s->buffer_ptr - s->buffer)*8 - 16 + s->bit_count;
+}
+
+static inline void skip_bits_long(GetBitContext *s, int n){
+    OPEN_READER(re, s)
+    re_bit_count += n;
+    re_buffer_ptr += 2*(re_bit_count>>4);
+    re_bit_count &= 15;
+    re_cache = ((re_buffer_ptr[-2]<<8) + re_buffer_ptr[-1]) << (16+re_bit_count);
+    UPDATE_CACHE(re, s)
+    CLOSE_READER(re, s)
+}
+
+#elif defined A32_BITSTREAM_READER
+
+#   define MIN_CACHE_BITS 32
+
+#   define OPEN_READER(name, gb)\
+        int name##_bit_count=(gb)->bit_count;\
+        uint32_t name##_cache0= (gb)->cache0;\
+        uint32_t name##_cache1= (gb)->cache1;\
+        uint32_t * name##_buffer_ptr=(gb)->buffer_ptr;\
+
+#   define CLOSE_READER(name, gb)\
+        (gb)->bit_count= name##_bit_count;\
+        (gb)->cache0= name##_cache0;\
+        (gb)->cache1= name##_cache1;\
+        (gb)->buffer_ptr= name##_buffer_ptr;\
+
+#   define UPDATE_CACHE(name, gb)\
+    if(name##_bit_count > 0){\
+        const uint32_t next= be2me_32( *name##_buffer_ptr );\
+        name##_cache0 |= NEG_USR32(next,name##_bit_count);\
+        name##_cache1 |= next<<name##_bit_count;\
+        name##_buffer_ptr++;\
+        name##_bit_count-= 32;\
+    }\
+
+#if ARCH_X86
+#   define SKIP_CACHE(name, gb, num)\
+        __asm__(\
+            "shldl %2, %1, %0          \n\t"\
+            "shll %2, %1               \n\t"\
+            : "+r" (name##_cache0), "+r" (name##_cache1)\
+            : "Ic" ((uint8_t)(num))\
+           );
+#else
+#   define SKIP_CACHE(name, gb, num)\
+        name##_cache0 <<= (num);\
+        name##_cache0 |= NEG_USR32(name##_cache1,num);\
+        name##_cache1 <<= (num);
+#endif
+
+#   define SKIP_COUNTER(name, gb, num)\
+        name##_bit_count += (num);\
+
+#   define SKIP_BITS(name, gb, num)\
+        {\
+            SKIP_CACHE(name, gb, num)\
+            SKIP_COUNTER(name, gb, num)\
+        }\
+
+#   define LAST_SKIP_BITS(name, gb, num) SKIP_BITS(name, gb, num)
+#   define LAST_SKIP_CACHE(name, gb, num) SKIP_CACHE(name, gb, num)
+
+#   define SHOW_UBITS(name, gb, num)\
+        NEG_USR32(name##_cache0, num)
+
+#   define SHOW_SBITS(name, gb, num)\
+        NEG_SSR32(name##_cache0, num)
+
+#   define GET_CACHE(name, gb)\
+        (name##_cache0)
+
+static inline int get_bits_count(const GetBitContext *s){
+    return ((uint8_t*)s->buffer_ptr - s->buffer)*8 - 32 + s->bit_count;
+}
+
+static inline void skip_bits_long(GetBitContext *s, int n){
+    OPEN_READER(re, s)
+    re_bit_count += n;
+    re_buffer_ptr += re_bit_count>>5;
+    re_bit_count &= 31;
+    re_cache0 = be2me_32( re_buffer_ptr[-1] ) << re_bit_count;
+    re_cache1 = 0;
+    UPDATE_CACHE(re, s)
+    CLOSE_READER(re, s)
+}
+
+#endif
+
+/**
+ * read mpeg1 dc style vlc (sign bit + mantisse with no MSB).
+ * if MSB not set it is negative
+ * @param n length in bits
+ * @author BERO
+ */
+static inline int get_xbits(GetBitContext *s, int n){
+    register int sign;
+    register int32_t cache;
+    OPEN_READER(re, s)
+    UPDATE_CACHE(re, s)
+    cache = GET_CACHE(re,s);
+    sign=(~cache)>>31;
+    LAST_SKIP_BITS(re, s, n)
+    CLOSE_READER(re, s)
+    return (NEG_USR32(sign ^ cache, n) ^ sign) - sign;
+}
+
+static inline int get_sbits(GetBitContext *s, int n){
+    register int tmp;
+    OPEN_READER(re, s)
+    UPDATE_CACHE(re, s)
+    tmp= SHOW_SBITS(re, s, n);
+    LAST_SKIP_BITS(re, s, n)
+    CLOSE_READER(re, s)
+    return tmp;
+}
+
+/**
+ * reads 1-17 bits.
+ * Note, the alt bitstream reader can read up to 25 bits, but the libmpeg2 reader can't
+ */
+static inline unsigned int get_bits(GetBitContext *s, int n){
+    register int tmp;
+    OPEN_READER(re, s)
+    UPDATE_CACHE(re, s)
+    tmp= SHOW_UBITS(re, s, n);
+    LAST_SKIP_BITS(re, s, n)
+    CLOSE_READER(re, s)
+    return tmp;
+}
+
+/**
+ * shows 1-17 bits.
+ * Note, the alt bitstream reader can read up to 25 bits, but the libmpeg2 reader can't
+ */
+static inline unsigned int show_bits(GetBitContext *s, int n){
+    register int tmp;
+    OPEN_READER(re, s)
+    UPDATE_CACHE(re, s)
+    tmp= SHOW_UBITS(re, s, n);
+//    CLOSE_READER(re, s)
+    return tmp;
+}
+
+static inline void skip_bits(GetBitContext *s, int n){
+ //Note gcc seems to optimize this to s->index+=n for the ALT_READER :))
+    OPEN_READER(re, s)
+    UPDATE_CACHE(re, s)
+    LAST_SKIP_BITS(re, s, n)
+    CLOSE_READER(re, s)
+}
+
+static inline unsigned int get_bits1(GetBitContext *s){
+#ifdef ALT_BITSTREAM_READER
+    unsigned int index= s->index;
+    uint8_t result= s->buffer[ index>>3 ];
+#ifdef ALT_BITSTREAM_READER_LE
+    result>>= (index&0x07);
+    result&= 1;
+#else
+    result<<= (index&0x07);
+    result>>= 8 - 1;
+#endif
+    index++;
+    s->index= index;
+
+    return result;
+#else
+    return get_bits(s, 1);
+#endif
+}
+
+static inline unsigned int show_bits1(GetBitContext *s){
+    return show_bits(s, 1);
+}
+
+static inline void skip_bits1(GetBitContext *s){
+    skip_bits(s, 1);
+}
+
+/**
+ * reads 0-32 bits.
+ */
+static inline unsigned int get_bits_long(GetBitContext *s, int n){
+    if(n<=MIN_CACHE_BITS) return get_bits(s, n);
+    else{
+#ifdef ALT_BITSTREAM_READER_LE
+        int ret= get_bits(s, 16);
+        return ret | (get_bits(s, n-16) << 16);
+#else
+        int ret= get_bits(s, 16) << (n-16);
+        return ret | get_bits(s, n-16);
+#endif
+    }
+}
+
+/**
+ * reads 0-32 bits as a signed integer.
+ */
+static inline int get_sbits_long(GetBitContext *s, int n) {
+    return sign_extend(get_bits_long(s, n), n);
+}
+
+/**
+ * shows 0-32 bits.
+ */
+static inline unsigned int show_bits_long(GetBitContext *s, int n){
+    if(n<=MIN_CACHE_BITS) return show_bits(s, n);
+    else{
+        GetBitContext gb= *s;
+        return get_bits_long(&gb, n);
+    }
+}
+
+static inline int check_marker(GetBitContext *s, const char *msg)
+{
+    int bit= get_bits1(s);
+    if(!bit)
+        av_log(NULL, AV_LOG_INFO, "Marker bit missing %s\n", msg);
+
+    return bit;
+}
+
+/**
+ * init GetBitContext.
+ * @param buffer bitstream buffer, must be FF_INPUT_BUFFER_PADDING_SIZE bytes larger then the actual read bits
+ * because some optimized bitstream readers read 32 or 64 bit at once and could read over the end
+ * @param bit_size the size of the buffer in bits
+ *
+ * While GetBitContext stores the buffer size, for performance reasons you are
+ * responsible for checking for the buffer end yourself (take advantage of the padding)!
+ */
+static inline void init_get_bits(GetBitContext *s,
+                   const uint8_t *buffer, int bit_size)
+{
+    int buffer_size= (bit_size+7)>>3;
+    if(buffer_size < 0 || bit_size < 0) {
+        buffer_size = bit_size = 0;
+        buffer = NULL;
+    }
+
+    s->buffer= buffer;
+    s->size_in_bits= bit_size;
+    s->buffer_end= buffer + buffer_size;
+#ifdef ALT_BITSTREAM_READER
+    s->index=0;
+#elif defined LIBMPEG2_BITSTREAM_READER
+    s->buffer_ptr = (uint8_t*)((intptr_t)buffer&(~1));
+    s->bit_count = 16 + 8*((intptr_t)buffer&1);
+    skip_bits_long(s, 0);
+#elif defined A32_BITSTREAM_READER
+    s->buffer_ptr = (uint32_t*)((intptr_t)buffer&(~3));
+    s->bit_count = 32 + 8*((intptr_t)buffer&3);
+    skip_bits_long(s, 0);
+#endif
+}
+
+static inline void align_get_bits(GetBitContext *s)
+{
+    int n= (-get_bits_count(s)) & 7;
+    if(n) skip_bits(s, n);
+}
+
+#define init_vlc(vlc, nb_bits, nb_codes,\
+                 bits, bits_wrap, bits_size,\
+                 codes, codes_wrap, codes_size,\
+                 flags)\
+        init_vlc_sparse(vlc, nb_bits, nb_codes,\
+                 bits, bits_wrap, bits_size,\
+                 codes, codes_wrap, codes_size,\
+                 NULL, 0, 0, flags)
+
+int init_vlc_sparse(VLC *vlc, int nb_bits, int nb_codes,
+             const void *bits, int bits_wrap, int bits_size,
+             const void *codes, int codes_wrap, int codes_size,
+             const void *symbols, int symbols_wrap, int symbols_size,
+             int flags);
+#define INIT_VLC_LE         2
+#define INIT_VLC_USE_NEW_STATIC 4
+void free_vlc(VLC *vlc);
+
+#define INIT_VLC_STATIC(vlc, bits, a,b,c,d,e,f,g, static_size)\
+{\
+    static VLC_TYPE table[static_size][2];\
+    (vlc)->table= table;\
+    (vlc)->table_allocated= static_size;\
+    init_vlc(vlc, bits, a,b,c,d,e,f,g, INIT_VLC_USE_NEW_STATIC);\
+}
+
+
+/**
+ *
+ * If the vlc code is invalid and max_depth=1, then no bits will be removed.
+ * If the vlc code is invalid and max_depth>1, then the number of bits removed
+ * is undefined.
+ */
+#define GET_VLC(code, name, gb, table, bits, max_depth)\
+{\
+    int n, nb_bits;\
+    unsigned int index;\
+\
+    index= SHOW_UBITS(name, gb, bits);\
+    code = table[index][0];\
+    n    = table[index][1];\
+\
+    if(max_depth > 1 && n < 0){\
+        LAST_SKIP_BITS(name, gb, bits)\
+        UPDATE_CACHE(name, gb)\
+\
+        nb_bits = -n;\
+\
+        index= SHOW_UBITS(name, gb, nb_bits) + code;\
+        code = table[index][0];\
+        n    = table[index][1];\
+        if(max_depth > 2 && n < 0){\
+            LAST_SKIP_BITS(name, gb, nb_bits)\
+            UPDATE_CACHE(name, gb)\
+\
+            nb_bits = -n;\
+\
+            index= SHOW_UBITS(name, gb, nb_bits) + code;\
+            code = table[index][0];\
+            n    = table[index][1];\
+        }\
+    }\
+    SKIP_BITS(name, gb, n)\
+}
+
+#define GET_RL_VLC(level, run, name, gb, table, bits, max_depth, need_update)\
+{\
+    int n, nb_bits;\
+    unsigned int index;\
+\
+    index= SHOW_UBITS(name, gb, bits);\
+    level = table[index].level;\
+    n     = table[index].len;\
+\
+    if(max_depth > 1 && n < 0){\
+        SKIP_BITS(name, gb, bits)\
+        if(need_update){\
+            UPDATE_CACHE(name, gb)\
+        }\
+\
+        nb_bits = -n;\
+\
+        index= SHOW_UBITS(name, gb, nb_bits) + level;\
+        level = table[index].level;\
+        n     = table[index].len;\
+    }\
+    run= table[index].run;\
+    SKIP_BITS(name, gb, n)\
+}
+
+
+/**
+ * parses a vlc code, faster then get_vlc()
+ * @param bits is the number of bits which will be read at once, must be
+ *             identical to nb_bits in init_vlc()
+ * @param max_depth is the number of times bits bits must be read to completely
+ *                  read the longest vlc code
+ *                  = (max_vlc_length + bits - 1) / bits
+ */
+static av_always_inline int get_vlc2(GetBitContext *s, VLC_TYPE (*table)[2],
+                                  int bits, int max_depth)
+{
+    int code;
+
+    OPEN_READER(re, s)
+    UPDATE_CACHE(re, s)
+
+    GET_VLC(code, re, s, table, bits, max_depth)
+
+    CLOSE_READER(re, s)
+    return code;
+}
+
+//#define TRACE
+
+#ifdef TRACE
+static inline void print_bin(int bits, int n){
+    int i;
+
+    for(i=n-1; i>=0; i--){
+        av_log(NULL, AV_LOG_DEBUG, "%d", (bits>>i)&1);
+    }
+    for(i=n; i<24; i++)
+        av_log(NULL, AV_LOG_DEBUG, " ");
+}
+
+static inline int get_bits_trace(GetBitContext *s, int n, char *file, const char *func, int line){
+    int r= get_bits(s, n);
+
+    print_bin(r, n);
+    av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d bit @%5d in %s %s:%d\n", r, n, r, get_bits_count(s)-n, file, func, line);
+    return r;
+}
+static inline int get_vlc_trace(GetBitContext *s, VLC_TYPE (*table)[2], int bits, int max_depth, char *file, const char *func, int line){
+    int show= show_bits(s, 24);
+    int pos= get_bits_count(s);
+    int r= get_vlc2(s, table, bits, max_depth);
+    int len= get_bits_count(s) - pos;
+    int bits2= show>>(24-len);
+
+    print_bin(bits2, len);
+
+    av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d vlc @%5d in %s %s:%d\n", bits2, len, r, pos, file, func, line);
+    return r;
+}
+static inline int get_xbits_trace(GetBitContext *s, int n, char *file, const char *func, int line){
+    int show= show_bits(s, n);
+    int r= get_xbits(s, n);
+
+    print_bin(show, n);
+    av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d xbt @%5d in %s %s:%d\n", show, n, r, get_bits_count(s)-n, file, func, line);
+    return r;
+}
+
+#define get_bits(s, n)  get_bits_trace(s, n, __FILE__, __PRETTY_FUNCTION__, __LINE__)
+#define get_bits1(s)    get_bits_trace(s, 1, __FILE__, __PRETTY_FUNCTION__, __LINE__)
+#define get_xbits(s, n) get_xbits_trace(s, n, __FILE__, __PRETTY_FUNCTION__, __LINE__)
+#define get_vlc(s, vlc)            get_vlc_trace(s, (vlc)->table, (vlc)->bits, 3, __FILE__, __PRETTY_FUNCTION__, __LINE__)
+#define get_vlc2(s, tab, bits, max) get_vlc_trace(s, tab, bits, max, __FILE__, __PRETTY_FUNCTION__, __LINE__)
+
+#define tprintf(p, ...) av_log(p, AV_LOG_DEBUG, __VA_ARGS__)
+
+#else //TRACE
+#define tprintf(p, ...) {}
+#endif
+
+static inline int decode012(GetBitContext *gb){
+    int n;
+    n = get_bits1(gb);
+    if (n == 0)
+        return 0;
+    else
+        return get_bits1(gb) + 1;
+}
+
+static inline int decode210(GetBitContext *gb){
+    if (get_bits1(gb))
+        return 0;
+    else
+        return 2 - get_bits1(gb);
+}
+
+static inline int get_bits_left(GetBitContext *gb)
+{
+    return gb->size_in_bits - get_bits_count(gb);
+}
+
+#endif /* AVCODEC_GET_BITS_H */
diff --git a/apps/codecs/libwmapro/internal.h b/apps/codecs/libwmapro/internal.h
new file mode 100644
index 0000000000..7ce019c12a
--- /dev/null
+++ b/apps/codecs/libwmapro/internal.h
@@ -0,0 +1,51 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file libavcodec/internal.h
+ * common internal api header.
+ */
+
+#ifndef AVCODEC_INTERNAL_H
+#define AVCODEC_INTERNAL_H
+
+#include <stdint.h>
+#include "avcodec.h"
+
+/**
+ * Determines whether pix_fmt is a hardware accelerated format.
+ */
+int ff_is_hwaccel_pix_fmt(enum PixelFormat pix_fmt);
+
+/**
+ * Returns the hardware accelerated codec for codec codec_id and
+ * pixel format pix_fmt.
+ *
+ * @param codec_id the codec to match
+ * @param pix_fmt the pixel format to match
+ * @return the hardware accelerated codec, or NULL if none was found.
+ */
+AVHWAccel *ff_find_hwaccel(enum CodecID codec_id, enum PixelFormat pix_fmt);
+
+/**
+ * Return the index into tab at which {a,b} match elements {[0],[1]} of tab.
+ * If there is no such matching pair then size is returned.
+ */
+int ff_match_2uint16(const uint16_t (*tab)[2], int size, int a, int b);
+
+#endif /* AVCODEC_INTERNAL_H */
diff --git a/apps/codecs/libwmapro/libavutil/attributes.h b/apps/codecs/libwmapro/libavutil/attributes.h
new file mode 100644
index 0000000000..1208bc0c72
--- /dev/null
+++ b/apps/codecs/libwmapro/libavutil/attributes.h
@@ -0,0 +1,113 @@
+/*
+ * copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file libavutil/attributes.h
+ * Macro definitions for various function/variable attributes
+ */
+
+#ifndef AVUTIL_ATTRIBUTES_H
+#define AVUTIL_ATTRIBUTES_H
+
+#ifdef __GNUC__
+#    define AV_GCC_VERSION_AT_LEAST(x,y) (__GNUC__ > x || __GNUC__ == x && __GNUC_MINOR__ >= y)
+#else
+#    define AV_GCC_VERSION_AT_LEAST(x,y) 0
+#endif
+
+#ifndef av_always_inline
+#if AV_GCC_VERSION_AT_LEAST(3,1)
+#    define av_always_inline __attribute__((always_inline)) inline
+#else
+#    define av_always_inline inline
+#endif
+#endif
+
+#ifndef av_noinline
+#if AV_GCC_VERSION_AT_LEAST(3,1)
+#    define av_noinline __attribute__((noinline))
+#else
+#    define av_noinline
+#endif
+#endif
+
+#ifndef av_pure
+#if AV_GCC_VERSION_AT_LEAST(3,1)
+#    define av_pure __attribute__((pure))
+#else
+#    define av_pure
+#endif
+#endif
+
+#ifndef av_const
+#if AV_GCC_VERSION_AT_LEAST(2,6)
+#    define av_const __attribute__((const))
+#else
+#    define av_const
+#endif
+#endif
+
+#ifndef av_cold
+#if (!defined(__ICC) || __ICC > 1110) && AV_GCC_VERSION_AT_LEAST(4,3)
+#    define av_cold __attribute__((cold))
+#else
+#    define av_cold
+#endif
+#endif
+
+#ifndef av_flatten
+#if (!defined(__ICC) || __ICC > 1110) && AV_GCC_VERSION_AT_LEAST(4,1)
+#    define av_flatten __attribute__((flatten))
+#else
+#    define av_flatten
+#endif
+#endif
+
+#ifndef attribute_deprecated
+#if AV_GCC_VERSION_AT_LEAST(3,1)
+#    define attribute_deprecated __attribute__((deprecated))
+#else
+#    define attribute_deprecated
+#endif
+#endif
+
+#ifndef av_unused
+#if defined(__GNUC__)
+#    define av_unused __attribute__((unused))
+#else
+#    define av_unused
+#endif
+#endif
+
+#ifndef av_uninit
+#if defined(__GNUC__) && !defined(__ICC)
+#    define av_uninit(x) x=x
+#else
+#    define av_uninit(x) x
+#endif
+#endif
+
+#ifdef __GNUC__
+#    define av_builtin_constant_p __builtin_constant_p
+#else
+#    define av_builtin_constant_p(x) 0
+#endif
+
+#endif /* AVUTIL_ATTRIBUTES_H */
diff --git a/apps/codecs/libwmapro/libavutil/avutil.h b/apps/codecs/libwmapro/libavutil/avutil.h
new file mode 100644
index 0000000000..1523de6753
--- /dev/null
+++ b/apps/codecs/libwmapro/libavutil/avutil.h
@@ -0,0 +1,89 @@
+/*
+ * copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_AVUTIL_H
+#define AVUTIL_AVUTIL_H
+
+/**
+ * @file libavutil/avutil.h
+ * external API header
+ */
+
+
+#define AV_STRINGIFY(s)         AV_TOSTRING(s)
+#define AV_TOSTRING(s) #s
+
+#define AV_GLUE(a, b) a ## b
+#define AV_JOIN(a, b) AV_GLUE(a, b)
+
+#define AV_PRAGMA(s) _Pragma(#s)
+
+#define AV_VERSION_INT(a, b, c) (a<<16 | b<<8 | c)
+#define AV_VERSION_DOT(a, b, c) a ##.## b ##.## c
+#define AV_VERSION(a, b, c) AV_VERSION_DOT(a, b, c)
+
+#define LIBAVUTIL_VERSION_MAJOR 50
+#define LIBAVUTIL_VERSION_MINOR 14
+#define LIBAVUTIL_VERSION_MICRO  0
+
+#define LIBAVUTIL_VERSION_INT   AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \
+                                               LIBAVUTIL_VERSION_MINOR, \
+                                               LIBAVUTIL_VERSION_MICRO)
+#define LIBAVUTIL_VERSION       AV_VERSION(LIBAVUTIL_VERSION_MAJOR,     \
+                                           LIBAVUTIL_VERSION_MINOR,     \
+                                           LIBAVUTIL_VERSION_MICRO)
+#define LIBAVUTIL_BUILD         LIBAVUTIL_VERSION_INT
+
+#define LIBAVUTIL_IDENT         "Lavu" AV_STRINGIFY(LIBAVUTIL_VERSION)
+
+/**
+ * Returns the LIBAVUTIL_VERSION_INT constant.
+ */
+unsigned avutil_version(void);
+
+/**
+ * Returns the libavutil build-time configuration.
+ */
+const char *avutil_configuration(void);
+
+/**
+ * Returns the libavutil license.
+ */
+const char *avutil_license(void);
+
+enum AVMediaType {
+    AVMEDIA_TYPE_UNKNOWN = -1,
+    AVMEDIA_TYPE_VIDEO,
+    AVMEDIA_TYPE_AUDIO,
+    AVMEDIA_TYPE_DATA,
+    AVMEDIA_TYPE_SUBTITLE,
+    AVMEDIA_TYPE_ATTACHMENT,
+    AVMEDIA_TYPE_NB
+};
+
+#include "common.h"
+#include "error.h"
+#include "mathematics.h"
+#include "rational.h"
+#include "intfloat_readwrite.h"
+#include "log.h"
+#include "pixfmt.h"
+
+#endif /* AVUTIL_AVUTIL_H */
diff --git a/apps/codecs/libwmapro/libavutil/bswap.h b/apps/codecs/libwmapro/libavutil/bswap.h
new file mode 100644
index 0000000000..74c7af3026
--- /dev/null
+++ b/apps/codecs/libwmapro/libavutil/bswap.h
@@ -0,0 +1,101 @@
+/*
+ * copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file libavutil/bswap.h
+ * byte swapping routines
+ */
+
+#ifndef AVUTIL_BSWAP_H
+#define AVUTIL_BSWAP_H
+
+#include <stdint.h>
+#include "config.h"
+#include "attributes.h"
+
+#if   ARCH_ARM
+#   include "arm/bswap.h"
+#elif ARCH_AVR32
+#   include "avr32/bswap.h"
+#elif ARCH_BFIN
+#   include "bfin/bswap.h"
+#elif ARCH_SH4
+#   include "sh4/bswap.h"
+#elif ARCH_X86
+#   include "x86/bswap.h"
+#endif
+
+#ifndef bswap_16
+static av_always_inline av_const uint16_t bswap_16(uint16_t x)
+{
+    x= (x>>8) | (x<<8);
+    return x;
+}
+#endif
+
+#ifndef bswap_32
+static av_always_inline av_const uint32_t bswap_32(uint32_t x)
+{
+    x= ((x<<8)&0xFF00FF00) | ((x>>8)&0x00FF00FF);
+    x= (x>>16) | (x<<16);
+    return x;
+}
+#endif
+
+#ifndef bswap_64
+static inline uint64_t av_const bswap_64(uint64_t x)
+{
+#if 0
+    x= ((x<< 8)&0xFF00FF00FF00FF00ULL) | ((x>> 8)&0x00FF00FF00FF00FFULL);
+    x= ((x<<16)&0xFFFF0000FFFF0000ULL) | ((x>>16)&0x0000FFFF0000FFFFULL);
+    return (x>>32) | (x<<32);
+#else
+    union {
+        uint64_t ll;
+        uint32_t l[2];
+    } w, r;
+    w.ll = x;
+    r.l[0] = bswap_32 (w.l[1]);
+    r.l[1] = bswap_32 (w.l[0]);
+    return r.ll;
+#endif
+}
+#endif
+
+// be2me ... big-endian to machine-endian
+// le2me ... little-endian to machine-endian
+
+#if HAVE_BIGENDIAN
+#define be2me_16(x) (x)
+#define be2me_32(x) (x)
+#define be2me_64(x) (x)
+#define le2me_16(x) bswap_16(x)
+#define le2me_32(x) bswap_32(x)
+#define le2me_64(x) bswap_64(x)
+#else
+#define be2me_16(x) bswap_16(x)
+#define be2me_32(x) bswap_32(x)
+#define be2me_64(x) bswap_64(x)
+#define le2me_16(x) (x)
+#define le2me_32(x) (x)
+#define le2me_64(x) (x)
+#endif
+
+#endif /* AVUTIL_BSWAP_H */
diff --git a/apps/codecs/libwmapro/libavutil/common.h b/apps/codecs/libwmapro/libavutil/common.h
new file mode 100644
index 0000000000..fae0b5b09b
--- /dev/null
+++ b/apps/codecs/libwmapro/libavutil/common.h
@@ -0,0 +1,297 @@
+/*
+ * copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file libavutil/common.h
+ * common internal and external API header
+ */
+
+#ifndef AVUTIL_COMMON_H
+#define AVUTIL_COMMON_H
+
+#include <ctype.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "attributes.h"
+
+//rounded division & shift
+#define RSHIFT(a,b) ((a) > 0 ? ((a) + ((1<<(b))>>1))>>(b) : ((a) + ((1<<(b))>>1)-1)>>(b))
+/* assume b>0 */
+#define ROUNDED_DIV(a,b) (((a)>0 ? (a) + ((b)>>1) : (a) - ((b)>>1))/(b))
+#define FFABS(a) ((a) >= 0 ? (a) : (-(a)))
+#define FFSIGN(a) ((a) > 0 ? 1 : -1)
+
+#define FFMAX(a,b) ((a) > (b) ? (a) : (b))
+#define FFMAX3(a,b,c) FFMAX(FFMAX(a,b),c)
+#define FFMIN(a,b) ((a) > (b) ? (b) : (a))
+#define FFMIN3(a,b,c) FFMIN(FFMIN(a,b),c)
+
+#define FFSWAP(type,a,b) do{type SWAP_tmp= b; b= a; a= SWAP_tmp;}while(0)
+#define FF_ARRAY_ELEMS(a) (sizeof(a) / sizeof((a)[0]))
+#define FFALIGN(x, a) (((x)+(a)-1)&~((a)-1))
+
+/* misc math functions */
+extern const uint8_t ff_log2_tab[256];
+
+extern const uint8_t av_reverse[256];
+
+static inline av_const int av_log2_c(unsigned int v)
+{
+    int n = 0;
+    if (v & 0xffff0000) {
+        v >>= 16;
+        n += 16;
+    }
+    if (v & 0xff00) {
+        v >>= 8;
+        n += 8;
+    }
+    n += ff_log2_tab[v];
+
+    return n;
+}
+
+static inline av_const int av_log2_16bit_c(unsigned int v)
+{
+    int n = 0;
+    if (v & 0xff00) {
+        v >>= 8;
+        n += 8;
+    }
+    n += ff_log2_tab[v];
+
+    return n;
+}
+
+#ifdef HAVE_AV_CONFIG_H
+#   include "config.h"
+#   include "intmath.h"
+#endif
+
+#ifndef av_log2
+#   define av_log2       av_log2_c
+#endif
+#ifndef av_log2_16bit
+#   define av_log2_16bit av_log2_16bit_c
+#endif
+
+/**
+ * Clips a signed integer value into the amin-amax range.
+ * @param a value to clip
+ * @param amin minimum value of the clip range
+ * @param amax maximum value of the clip range
+ * @return clipped value
+ */
+static inline av_const int av_clip(int a, int amin, int amax)
+{
+    if      (a < amin) return amin;
+    else if (a > amax) return amax;
+    else               return a;
+}
+
+/**
+ * Clips a signed integer value into the 0-255 range.
+ * @param a value to clip
+ * @return clipped value
+ */
+static inline av_const uint8_t av_clip_uint8(int a)
+{
+    if (a&(~255)) return (-a)>>31;
+    else          return a;
+}
+
+/**
+ * Clips a signed integer value into the 0-65535 range.
+ * @param a value to clip
+ * @return clipped value
+ */
+static inline av_const uint16_t av_clip_uint16(int a)
+{
+    if (a&(~65535)) return (-a)>>31;
+    else            return a;
+}
+
+/**
+ * Clips a signed integer value into the -32768,32767 range.
+ * @param a value to clip
+ * @return clipped value
+ */
+static inline av_const int16_t av_clip_int16(int a)
+{
+    if ((a+32768) & ~65535) return (a>>31) ^ 32767;
+    else                    return a;
+}
+
+/**
+ * Clips a float value into the amin-amax range.
+ * @param a value to clip
+ * @param amin minimum value of the clip range
+ * @param amax maximum value of the clip range
+ * @return clipped value
+ */
+static inline av_const float av_clipf(float a, float amin, float amax)
+{
+    if      (a < amin) return amin;
+    else if (a > amax) return amax;
+    else               return a;
+}
+
+/** Computes ceil(log2(x)).
+ * @param x value used to compute ceil(log2(x))
+ * @return computed ceiling of log2(x)
+ */
+static inline av_const int av_ceil_log2(int x)
+{
+    return av_log2((x - 1) << 1);
+}
+
+#define MKTAG(a,b,c,d) (a | (b << 8) | (c << 16) | (d << 24))
+#define MKBETAG(a,b,c,d) (d | (c << 8) | (b << 16) | (a << 24))
+
+/*!
+ * \def GET_UTF8(val, GET_BYTE, ERROR)
+ * Converts a UTF-8 character (up to 4 bytes long) to its 32-bit UCS-4 encoded form
+ * \param val is the output and should be of type uint32_t. It holds the converted
+ * UCS-4 character and should be a left value.
+ * \param GET_BYTE gets UTF-8 encoded bytes from any proper source. It can be
+ * a function or a statement whose return value or evaluated value is of type
+ * uint8_t. It will be executed up to 4 times for values in the valid UTF-8 range,
+ * and up to 7 times in the general case.
+ * \param ERROR action that should be taken when an invalid UTF-8 byte is returned
+ * from GET_BYTE. It should be a statement that jumps out of the macro,
+ * like exit(), goto, return, break, or continue.
+ */
+#define GET_UTF8(val, GET_BYTE, ERROR)\
+    val= GET_BYTE;\
+    {\
+        int ones= 7 - av_log2(val ^ 255);\
+        if(ones==1)\
+            ERROR\
+        val&= 127>>ones;\
+        while(--ones > 0){\
+            int tmp= GET_BYTE - 128;\
+            if(tmp>>6)\
+                ERROR\
+            val= (val<<6) + tmp;\
+        }\
+    }
+
+/*!
+ * \def GET_UTF16(val, GET_16BIT, ERROR)
+ * Converts a UTF-16 character (2 or 4 bytes) to its 32-bit UCS-4 encoded form
+ * \param val is the output and should be of type uint32_t. It holds the converted
+ * UCS-4 character and should be a left value.
+ * \param GET_16BIT gets two bytes of UTF-16 encoded data converted to native endianness.
+ * It can be a function or a statement whose return value or evaluated value is of type
+ * uint16_t. It will be executed up to 2 times.
+ * \param ERROR action that should be taken when an invalid UTF-16 surrogate is
+ * returned from GET_BYTE. It should be a statement that jumps out of the macro,
+ * like exit(), goto, return, break, or continue.
+ */
+#define GET_UTF16(val, GET_16BIT, ERROR)\
+    val = GET_16BIT;\
+    {\
+        unsigned int hi = val - 0xD800;\
+        if (hi < 0x800) {\
+            val = GET_16BIT - 0xDC00;\
+            if (val > 0x3FFU || hi > 0x3FFU)\
+                ERROR\
+            val += (hi<<10) + 0x10000;\
+        }\
+    }\
+
+/*!
+ * \def PUT_UTF8(val, tmp, PUT_BYTE)
+ * Converts a 32-bit Unicode character to its UTF-8 encoded form (up to 4 bytes long).
+ * \param val is an input-only argument and should be of type uint32_t. It holds
+ * a UCS-4 encoded Unicode character that is to be converted to UTF-8. If
+ * val is given as a function it is executed only once.
+ * \param tmp is a temporary variable and should be of type uint8_t. It
+ * represents an intermediate value during conversion that is to be
+ * output by PUT_BYTE.
+ * \param PUT_BYTE writes the converted UTF-8 bytes to any proper destination.
+ * It could be a function or a statement, and uses tmp as the input byte.
+ * For example, PUT_BYTE could be "*output++ = tmp;" PUT_BYTE will be
+ * executed up to 4 times for values in the valid UTF-8 range and up to
+ * 7 times in the general case, depending on the length of the converted
+ * Unicode character.
+ */
+#define PUT_UTF8(val, tmp, PUT_BYTE)\
+    {\
+        int bytes, shift;\
+        uint32_t in = val;\
+        if (in < 0x80) {\
+            tmp = in;\
+            PUT_BYTE\
+        } else {\
+            bytes = (av_log2(in) + 4) / 5;\
+            shift = (bytes - 1) * 6;\
+            tmp = (256 - (256 >> bytes)) | (in >> shift);\
+            PUT_BYTE\
+            while (shift >= 6) {\
+                shift -= 6;\
+                tmp = 0x80 | ((in >> shift) & 0x3f);\
+                PUT_BYTE\
+            }\
+        }\
+    }
+
+/*!
+ * \def PUT_UTF16(val, tmp, PUT_16BIT)
+ * Converts a 32-bit Unicode character to its UTF-16 encoded form (2 or 4 bytes).
+ * \param val is an input-only argument and should be of type uint32_t. It holds
+ * a UCS-4 encoded Unicode character that is to be converted to UTF-16. If
+ * val is given as a function it is executed only once.
+ * \param tmp is a temporary variable and should be of type uint16_t. It
+ * represents an intermediate value during conversion that is to be
+ * output by PUT_16BIT.
+ * \param PUT_16BIT writes the converted UTF-16 data to any proper destination
+ * in desired endianness. It could be a function or a statement, and uses tmp
+ * as the input byte.  For example, PUT_BYTE could be "*output++ = tmp;"
+ * PUT_BYTE will be executed 1 or 2 times depending on input character.
+ */
+#define PUT_UTF16(val, tmp, PUT_16BIT)\
+    {\
+        uint32_t in = val;\
+        if (in < 0x10000) {\
+            tmp = in;\
+            PUT_16BIT\
+        } else {\
+            tmp = 0xD800 | ((in - 0x10000) >> 10);\
+            PUT_16BIT\
+            tmp = 0xDC00 | ((in - 0x10000) & 0x3FF);\
+            PUT_16BIT\
+        }\
+    }\
+
+
+
+#include "mem.h"
+
+#ifdef HAVE_AV_CONFIG_H
+#    include "internal.h"
+#endif /* HAVE_AV_CONFIG_H */
+
+#endif /* AVUTIL_COMMON_H */
diff --git a/apps/codecs/libwmapro/libavutil/internal.h b/apps/codecs/libwmapro/libavutil/internal.h
new file mode 100644
index 0000000000..b47db3d8c6
--- /dev/null
+++ b/apps/codecs/libwmapro/libavutil/internal.h
@@ -0,0 +1,207 @@
+/*
+ * copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file libavutil/internal.h
+ * common internal API header
+ */
+
+#ifndef AVUTIL_INTERNAL_H
+#define AVUTIL_INTERNAL_H
+
+#if !defined(DEBUG) && !defined(NDEBUG)
+#    define NDEBUG
+#endif
+
+#include <limits.h>
+#include <stdint.h>
+#include <stddef.h>
+#include <assert.h>
+#include "config.h"
+#include "attributes.h"
+#include "timer.h"
+
+#ifndef attribute_align_arg
+#if (!defined(__ICC) || __ICC > 1110) && AV_GCC_VERSION_AT_LEAST(4,2)
+#    define attribute_align_arg __attribute__((force_align_arg_pointer))
+#else
+#    define attribute_align_arg
+#endif
+#endif
+
+#ifndef attribute_used
+#if AV_GCC_VERSION_AT_LEAST(3,1)
+#    define attribute_used __attribute__((used))
+#else
+#    define attribute_used
+#endif
+#endif
+
+#ifndef av_alias
+#if HAVE_ATTRIBUTE_MAY_ALIAS && (!defined(__ICC) || __ICC > 1110) && AV_GCC_VERSION_AT_LEAST(3,3)
+#   define av_alias __attribute__((may_alias))
+#else
+#   define av_alias
+#endif
+#endif
+
+#ifndef INT16_MIN
+#define INT16_MIN       (-0x7fff - 1)
+#endif
+
+#ifndef INT16_MAX
+#define INT16_MAX       0x7fff
+#endif
+
+#ifndef INT32_MIN
+#define INT32_MIN       (-0x7fffffff - 1)
+#endif
+
+#ifndef INT32_MAX
+#define INT32_MAX       0x7fffffff
+#endif
+
+#ifndef UINT32_MAX
+#define UINT32_MAX      0xffffffff
+#endif
+
+#ifndef INT64_MIN
+#define INT64_MIN       (-0x7fffffffffffffffLL - 1)
+#endif
+
+#ifndef INT64_MAX
+#define INT64_MAX INT64_C(9223372036854775807)
+#endif
+
+#ifndef UINT64_MAX
+#define UINT64_MAX UINT64_C(0xFFFFFFFFFFFFFFFF)
+#endif
+
+#ifndef INT_BIT
+#    define INT_BIT (CHAR_BIT * sizeof(int))
+#endif
+
+#ifndef offsetof
+#    define offsetof(T, F) ((unsigned int)((char *)&((T *)0)->F))
+#endif
+
+/* Use to export labels from asm. */
+#define LABEL_MANGLE(a) EXTERN_PREFIX #a
+
+// Use rip-relative addressing if compiling PIC code on x86-64.
+#if ARCH_X86_64 && defined(PIC)
+#    define LOCAL_MANGLE(a) #a "(%%rip)"
+#else
+#    define LOCAL_MANGLE(a) #a
+#endif
+
+#define MANGLE(a) EXTERN_PREFIX LOCAL_MANGLE(a)
+
+/* debug stuff */
+
+/* dprintf macros */
+#ifdef DEBUG
+#    define dprintf(pctx, ...) av_log(pctx, AV_LOG_DEBUG, __VA_ARGS__)
+#else
+#    define dprintf(pctx, ...)
+#endif
+
+#define av_abort()      do { av_log(NULL, AV_LOG_ERROR, "Abort at %s:%d\n", __FILE__, __LINE__); abort(); } while (0)
+
+/* math */
+
+#if ARCH_X86
+#define MASK_ABS(mask, level)\
+            __asm__ volatile(\
+                "cltd                   \n\t"\
+                "xorl %1, %0            \n\t"\
+                "subl %1, %0            \n\t"\
+                : "+a" (level), "=&d" (mask)\
+            );
+#else
+#define MASK_ABS(mask, level)\
+            mask  = level >> 31;\
+            level = (level ^ mask) - mask;
+#endif
+
+/* avoid usage of dangerous/inappropriate system functions */
+#undef  malloc
+#define malloc please_use_av_malloc
+#undef  free
+#define free please_use_av_free
+#undef  realloc
+#define realloc please_use_av_realloc
+#undef  time
+#define time time_is_forbidden_due_to_security_issues
+#undef  rand
+#define rand rand_is_forbidden_due_to_state_trashing_use_av_lfg_get
+#undef  srand
+#define srand srand_is_forbidden_due_to_state_trashing_use_av_lfg_init
+#undef  random
+#define random random_is_forbidden_due_to_state_trashing_use_av_lfg_get
+#undef  sprintf
+#define sprintf sprintf_is_forbidden_due_to_security_issues_use_snprintf
+#undef  strcat
+#define strcat strcat_is_forbidden_due_to_security_issues_use_av_strlcat
+#undef  exit
+#define exit exit_is_forbidden
+#ifndef LIBAVFORMAT_BUILD
+#undef  printf
+#define printf please_use_av_log_instead_of_printf
+#undef  fprintf
+#define fprintf please_use_av_log_instead_of_fprintf
+#undef  puts
+#define puts please_use_av_log_instead_of_puts
+#undef  perror
+#define perror please_use_av_log_instead_of_perror
+#endif
+
+#define FF_ALLOC_OR_GOTO(ctx, p, size, label)\
+{\
+    p = av_malloc(size);\
+    if (p == NULL && (size) != 0) {\
+        av_log(ctx, AV_LOG_ERROR, "Cannot allocate memory.\n");\
+        goto label;\
+    }\
+}
+
+#define FF_ALLOCZ_OR_GOTO(ctx, p, size, label)\
+{\
+    p = av_mallocz(size);\
+    if (p == NULL && (size) != 0) {\
+        av_log(ctx, AV_LOG_ERROR, "Cannot allocate memory.\n");\
+        goto label;\
+    }\
+}
+
+#include "libm.h"
+
+/**
+ * Returns NULL if CONFIG_SMALL is true, otherwise the argument
+ * without modification. Used to disable the definition of strings
+ * (for example AVCodec long_names).
+ */
+#if CONFIG_SMALL
+#   define NULL_IF_CONFIG_SMALL(x) NULL
+#else
+#   define NULL_IF_CONFIG_SMALL(x) x
+#endif
+
+#endif /* AVUTIL_INTERNAL_H */
diff --git a/apps/codecs/libwmapro/libavutil/intreadwrite.h b/apps/codecs/libwmapro/libavutil/intreadwrite.h
new file mode 100644
index 0000000000..c8026f0872
--- /dev/null
+++ b/apps/codecs/libwmapro/libavutil/intreadwrite.h
@@ -0,0 +1,516 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_INTREADWRITE_H
+#define AVUTIL_INTREADWRITE_H
+
+#include <stdint.h>
+#include "config.h"
+#include "bswap.h"
+#include "common.h"
+
+typedef union {
+    uint64_t u64;
+    uint32_t u32[2];
+    uint16_t u16[4];
+    uint8_t  u8 [8];
+    double   f64;
+    float    f32[2];
+} av_alias av_alias64;
+
+typedef union {
+    uint32_t u32;
+    uint16_t u16[2];
+    uint8_t  u8 [4];
+    float    f32;
+} av_alias av_alias32;
+
+typedef union {
+    uint16_t u16;
+    uint8_t  u8 [2];
+} av_alias av_alias16;
+
+/*
+ * Arch-specific headers can provide any combination of
+ * AV_[RW][BLN](16|24|32|64) and AV_(COPY|SWAP|ZERO)(64|128) macros.
+ * Preprocessor symbols must be defined, even if these are implemented
+ * as inline functions.
+ */
+
+#if   ARCH_ARM
+#   include "arm/intreadwrite.h"
+#elif ARCH_AVR32
+#   include "avr32/intreadwrite.h"
+#elif ARCH_MIPS
+#   include "mips/intreadwrite.h"
+#elif ARCH_PPC
+#   include "ppc/intreadwrite.h"
+#elif ARCH_TOMI
+#   include "tomi/intreadwrite.h"
+#elif ARCH_X86
+#   include "x86/intreadwrite.h"
+#endif
+
+/*
+ * Map AV_RNXX <-> AV_R[BL]XX for all variants provided by per-arch headers.
+ */
+
+#if HAVE_BIGENDIAN
+
+#   if    defined(AV_RN16) && !defined(AV_RB16)
+#       define AV_RB16(p) AV_RN16(p)
+#   elif !defined(AV_RN16) &&  defined(AV_RB16)
+#       define AV_RN16(p) AV_RB16(p)
+#   endif
+
+#   if    defined(AV_WN16) && !defined(AV_WB16)
+#       define AV_WB16(p, v) AV_WN16(p, v)
+#   elif !defined(AV_WN16) &&  defined(AV_WB16)
+#       define AV_WN16(p, v) AV_WB16(p, v)
+#   endif
+
+#   if    defined(AV_RN24) && !defined(AV_RB24)
+#       define AV_RB24(p) AV_RN24(p)
+#   elif !defined(AV_RN24) &&  defined(AV_RB24)
+#       define AV_RN24(p) AV_RB24(p)
+#   endif
+
+#   if    defined(AV_WN24) && !defined(AV_WB24)
+#       define AV_WB24(p, v) AV_WN24(p, v)
+#   elif !defined(AV_WN24) &&  defined(AV_WB24)
+#       define AV_WN24(p, v) AV_WB24(p, v)
+#   endif
+
+#   if    defined(AV_RN32) && !defined(AV_RB32)
+#       define AV_RB32(p) AV_RN32(p)
+#   elif !defined(AV_RN32) &&  defined(AV_RB32)
+#       define AV_RN32(p) AV_RB32(p)
+#   endif
+
+#   if    defined(AV_WN32) && !defined(AV_WB32)
+#       define AV_WB32(p, v) AV_WN32(p, v)
+#   elif !defined(AV_WN32) &&  defined(AV_WB32)
+#       define AV_WN32(p, v) AV_WB32(p, v)
+#   endif
+
+#   if    defined(AV_RN64) && !defined(AV_RB64)
+#       define AV_RB64(p) AV_RN64(p)
+#   elif !defined(AV_RN64) &&  defined(AV_RB64)
+#       define AV_RN64(p) AV_RB64(p)
+#   endif
+
+#   if    defined(AV_WN64) && !defined(AV_WB64)
+#       define AV_WB64(p, v) AV_WN64(p, v)
+#   elif !defined(AV_WN64) &&  defined(AV_WB64)
+#       define AV_WN64(p, v) AV_WB64(p, v)
+#   endif
+
+#else /* HAVE_BIGENDIAN */
+
+#   if    defined(AV_RN16) && !defined(AV_RL16)
+#       define AV_RL16(p) AV_RN16(p)
+#   elif !defined(AV_RN16) &&  defined(AV_RL16)
+#       define AV_RN16(p) AV_RL16(p)
+#   endif
+
+#   if    defined(AV_WN16) && !defined(AV_WL16)
+#       define AV_WL16(p, v) AV_WN16(p, v)
+#   elif !defined(AV_WN16) &&  defined(AV_WL16)
+#       define AV_WN16(p, v) AV_WL16(p, v)
+#   endif
+
+#   if    defined(AV_RN24) && !defined(AV_RL24)
+#       define AV_RL24(p) AV_RN24(p)
+#   elif !defined(AV_RN24) &&  defined(AV_RL24)
+#       define AV_RN24(p) AV_RL24(p)
+#   endif
+
+#   if    defined(AV_WN24) && !defined(AV_WL24)
+#       define AV_WL24(p, v) AV_WN24(p, v)
+#   elif !defined(AV_WN24) &&  defined(AV_WL24)
+#       define AV_WN24(p, v) AV_WL24(p, v)
+#   endif
+
+#   if    defined(AV_RN32) && !defined(AV_RL32)
+#       define AV_RL32(p) AV_RN32(p)
+#   elif !defined(AV_RN32) &&  defined(AV_RL32)
+#       define AV_RN32(p) AV_RL32(p)
+#   endif
+
+#   if    defined(AV_WN32) && !defined(AV_WL32)
+#       define AV_WL32(p, v) AV_WN32(p, v)
+#   elif !defined(AV_WN32) &&  defined(AV_WL32)
+#       define AV_WN32(p, v) AV_WL32(p, v)
+#   endif
+
+#   if    defined(AV_RN64) && !defined(AV_RL64)
+#       define AV_RL64(p) AV_RN64(p)
+#   elif !defined(AV_RN64) &&  defined(AV_RL64)
+#       define AV_RN64(p) AV_RL64(p)
+#   endif
+
+#   if    defined(AV_WN64) && !defined(AV_WL64)
+#       define AV_WL64(p, v) AV_WN64(p, v)
+#   elif !defined(AV_WN64) &&  defined(AV_WL64)
+#       define AV_WN64(p, v) AV_WL64(p, v)
+#   endif
+
+#endif /* !HAVE_BIGENDIAN */
+
+/*
+ * Define AV_[RW]N helper macros to simplify definitions not provided
+ * by per-arch headers.
+ */
+
+#if   HAVE_ATTRIBUTE_PACKED
+
+union unaligned_64 { uint64_t l; } __attribute__((packed)) av_alias;
+union unaligned_32 { uint32_t l; } __attribute__((packed)) av_alias;
+union unaligned_16 { uint16_t l; } __attribute__((packed)) av_alias;
+
+#   define AV_RN(s, p) (((const union unaligned_##s *) (p))->l)
+#   define AV_WN(s, p, v) ((((union unaligned_##s *) (p))->l) = (v))
+
+#elif defined(__DECC)
+
+#   define AV_RN(s, p) (*((const __unaligned uint##s##_t*)(p)))
+#   define AV_WN(s, p, v) (*((__unaligned uint##s##_t*)(p)) = (v))
+
+#elif HAVE_FAST_UNALIGNED
+
+#   define AV_RN(s, p) (((const av_alias##s*)(p))->u##s)
+#   define AV_WN(s, p, v) (((av_alias##s*)(p))->u##s = (v))
+
+#else
+
+#ifndef AV_RB16
+#   define AV_RB16(x)                           \
+    ((((const uint8_t*)(x))[0] << 8) |          \
+      ((const uint8_t*)(x))[1])
+#endif
+#ifndef AV_WB16
+#   define AV_WB16(p, d) do {                   \
+        ((uint8_t*)(p))[1] = (d);               \
+        ((uint8_t*)(p))[0] = (d)>>8;            \
+    } while(0)
+#endif
+
+#ifndef AV_RL16
+#   define AV_RL16(x)                           \
+    ((((const uint8_t*)(x))[1] << 8) |          \
+      ((const uint8_t*)(x))[0])
+#endif
+#ifndef AV_WL16
+#   define AV_WL16(p, d) do {                   \
+        ((uint8_t*)(p))[0] = (d);               \
+        ((uint8_t*)(p))[1] = (d)>>8;            \
+    } while(0)
+#endif
+
+#ifndef AV_RB32
+#   define AV_RB32(x)                           \
+    ((((const uint8_t*)(x))[0] << 24) |         \
+     (((const uint8_t*)(x))[1] << 16) |         \
+     (((const uint8_t*)(x))[2] <<  8) |         \
+      ((const uint8_t*)(x))[3])
+#endif
+#ifndef AV_WB32
+#   define AV_WB32(p, d) do {                   \
+        ((uint8_t*)(p))[3] = (d);               \
+        ((uint8_t*)(p))[2] = (d)>>8;            \
+        ((uint8_t*)(p))[1] = (d)>>16;           \
+        ((uint8_t*)(p))[0] = (d)>>24;           \
+    } while(0)
+#endif
+
+#ifndef AV_RL32
+#   define AV_RL32(x)                           \
+    ((((const uint8_t*)(x))[3] << 24) |         \
+     (((const uint8_t*)(x))[2] << 16) |         \
+     (((const uint8_t*)(x))[1] <<  8) |         \
+      ((const uint8_t*)(x))[0])
+#endif
+#ifndef AV_WL32
+#   define AV_WL32(p, d) do {                   \
+        ((uint8_t*)(p))[0] = (d);               \
+        ((uint8_t*)(p))[1] = (d)>>8;            \
+        ((uint8_t*)(p))[2] = (d)>>16;           \
+        ((uint8_t*)(p))[3] = (d)>>24;           \
+    } while(0)
+#endif
+
+#ifndef AV_RB64
+#   define AV_RB64(x)                                   \
+    (((uint64_t)((const uint8_t*)(x))[0] << 56) |       \
+     ((uint64_t)((const uint8_t*)(x))[1] << 48) |       \
+     ((uint64_t)((const uint8_t*)(x))[2] << 40) |       \
+     ((uint64_t)((const uint8_t*)(x))[3] << 32) |       \
+     ((uint64_t)((const uint8_t*)(x))[4] << 24) |       \
+     ((uint64_t)((const uint8_t*)(x))[5] << 16) |       \
+     ((uint64_t)((const uint8_t*)(x))[6] <<  8) |       \
+      (uint64_t)((const uint8_t*)(x))[7])
+#endif
+#ifndef AV_WB64
+#   define AV_WB64(p, d) do {                   \
+        ((uint8_t*)(p))[7] = (d);               \
+        ((uint8_t*)(p))[6] = (d)>>8;            \
+        ((uint8_t*)(p))[5] = (d)>>16;           \
+        ((uint8_t*)(p))[4] = (d)>>24;           \
+        ((uint8_t*)(p))[3] = (d)>>32;           \
+        ((uint8_t*)(p))[2] = (d)>>40;           \
+        ((uint8_t*)(p))[1] = (d)>>48;           \
+        ((uint8_t*)(p))[0] = (d)>>56;           \
+    } while(0)
+#endif
+
+#ifndef AV_RL64
+#   define AV_RL64(x)                                   \
+    (((uint64_t)((const uint8_t*)(x))[7] << 56) |       \
+     ((uint64_t)((const uint8_t*)(x))[6] << 48) |       \
+     ((uint64_t)((const uint8_t*)(x))[5] << 40) |       \
+     ((uint64_t)((const uint8_t*)(x))[4] << 32) |       \
+     ((uint64_t)((const uint8_t*)(x))[3] << 24) |       \
+     ((uint64_t)((const uint8_t*)(x))[2] << 16) |       \
+     ((uint64_t)((const uint8_t*)(x))[1] <<  8) |       \
+      (uint64_t)((const uint8_t*)(x))[0])
+#endif
+#ifndef AV_WL64
+#   define AV_WL64(p, d) do {                   \
+        ((uint8_t*)(p))[0] = (d);               \
+        ((uint8_t*)(p))[1] = (d)>>8;            \
+        ((uint8_t*)(p))[2] = (d)>>16;           \
+        ((uint8_t*)(p))[3] = (d)>>24;           \
+        ((uint8_t*)(p))[4] = (d)>>32;           \
+        ((uint8_t*)(p))[5] = (d)>>40;           \
+        ((uint8_t*)(p))[6] = (d)>>48;           \
+        ((uint8_t*)(p))[7] = (d)>>56;           \
+    } while(0)
+#endif
+
+#if HAVE_BIGENDIAN
+#   define AV_RN(s, p)    AV_RB##s(p)
+#   define AV_WN(s, p, v) AV_WB##s(p, v)
+#else
+#   define AV_RN(s, p)    AV_RL##s(p)
+#   define AV_WN(s, p, v) AV_WL##s(p, v)
+#endif
+
+#endif /* HAVE_FAST_UNALIGNED */
+
+#ifndef AV_RN16
+#   define AV_RN16(p) AV_RN(16, p)
+#endif
+
+#ifndef AV_RN32
+#   define AV_RN32(p) AV_RN(32, p)
+#endif
+
+#ifndef AV_RN64
+#   define AV_RN64(p) AV_RN(64, p)
+#endif
+
+#ifndef AV_WN16
+#   define AV_WN16(p, v) AV_WN(16, p, v)
+#endif
+
+#ifndef AV_WN32
+#   define AV_WN32(p, v) AV_WN(32, p, v)
+#endif
+
+#ifndef AV_WN64
+#   define AV_WN64(p, v) AV_WN(64, p, v)
+#endif
+
+#if HAVE_BIGENDIAN
+#   define AV_RB(s, p)    AV_RN##s(p)
+#   define AV_WB(s, p, v) AV_WN##s(p, v)
+#   define AV_RL(s, p)    bswap_##s(AV_RN##s(p))
+#   define AV_WL(s, p, v) AV_WN##s(p, bswap_##s(v))
+#else
+#   define AV_RB(s, p)    bswap_##s(AV_RN##s(p))
+#   define AV_WB(s, p, v) AV_WN##s(p, bswap_##s(v))
+#   define AV_RL(s, p)    AV_RN##s(p)
+#   define AV_WL(s, p, v) AV_WN##s(p, v)
+#endif
+
+#define AV_RB8(x)     (((const uint8_t*)(x))[0])
+#define AV_WB8(p, d)  do { ((uint8_t*)(p))[0] = (d); } while(0)
+
+#define AV_RL8(x)     AV_RB8(x)
+#define AV_WL8(p, d)  AV_WB8(p, d)
+
+#ifndef AV_RB16
+#   define AV_RB16(p)    AV_RB(16, p)
+#endif
+#ifndef AV_WB16
+#   define AV_WB16(p, v) AV_WB(16, p, v)
+#endif
+
+#ifndef AV_RL16
+#   define AV_RL16(p)    AV_RL(16, p)
+#endif
+#ifndef AV_WL16
+#   define AV_WL16(p, v) AV_WL(16, p, v)
+#endif
+
+#ifndef AV_RB32
+#   define AV_RB32(p)    AV_RB(32, p)
+#endif
+#ifndef AV_WB32
+#   define AV_WB32(p, v) AV_WB(32, p, v)
+#endif
+
+#ifndef AV_RL32
+#   define AV_RL32(p)    AV_RL(32, p)
+#endif
+#ifndef AV_WL32
+#   define AV_WL32(p, v) AV_WL(32, p, v)
+#endif
+
+#ifndef AV_RB64
+#   define AV_RB64(p)    AV_RB(64, p)
+#endif
+#ifndef AV_WB64
+#   define AV_WB64(p, v) AV_WB(64, p, v)
+#endif
+
+#ifndef AV_RL64
+#   define AV_RL64(p)    AV_RL(64, p)
+#endif
+#ifndef AV_WL64
+#   define AV_WL64(p, v) AV_WL(64, p, v)
+#endif
+
+#ifndef AV_RB24
+#   define AV_RB24(x)                           \
+    ((((const uint8_t*)(x))[0] << 16) |         \
+     (((const uint8_t*)(x))[1] <<  8) |         \
+      ((const uint8_t*)(x))[2])
+#endif
+#ifndef AV_WB24
+#   define AV_WB24(p, d) do {                   \
+        ((uint8_t*)(p))[2] = (d);               \
+        ((uint8_t*)(p))[1] = (d)>>8;            \
+        ((uint8_t*)(p))[0] = (d)>>16;           \
+    } while(0)
+#endif
+
+#ifndef AV_RL24
+#   define AV_RL24(x)                           \
+    ((((const uint8_t*)(x))[2] << 16) |         \
+     (((const uint8_t*)(x))[1] <<  8) |         \
+      ((const uint8_t*)(x))[0])
+#endif
+#ifndef AV_WL24
+#   define AV_WL24(p, d) do {                   \
+        ((uint8_t*)(p))[0] = (d);               \
+        ((uint8_t*)(p))[1] = (d)>>8;            \
+        ((uint8_t*)(p))[2] = (d)>>16;           \
+    } while(0)
+#endif
+
+/*
+ * The AV_[RW]NA macros access naturally aligned data
+ * in a type-safe way.
+ */
+
+#define AV_RNA(s, p)    (((const av_alias##s*)(p))->u##s)
+#define AV_WNA(s, p, v) (((av_alias##s*)(p))->u##s = (v))
+
+#ifndef AV_RN16A
+#   define AV_RN16A(p) AV_RNA(16, p)
+#endif
+
+#ifndef AV_RN32A
+#   define AV_RN32A(p) AV_RNA(32, p)
+#endif
+
+#ifndef AV_RN64A
+#   define AV_RN64A(p) AV_RNA(64, p)
+#endif
+
+#ifndef AV_WN16A
+#   define AV_WN16A(p, v) AV_WNA(16, p, v)
+#endif
+
+#ifndef AV_WN32A
+#   define AV_WN32A(p, v) AV_WNA(32, p, v)
+#endif
+
+#ifndef AV_WN64A
+#   define AV_WN64A(p, v) AV_WNA(64, p, v)
+#endif
+
+/* Parameters for AV_COPY*, AV_SWAP*, AV_ZERO* must be
+ * naturally aligned. They may be implemented using MMX,
+ * so emms_c() must be called before using any float code
+ * afterwards.
+ */
+
+#define AV_COPY(n, d, s) \
+    (((av_alias##n*)(d))->u##n = ((const av_alias##n*)(s))->u##n)
+
+#ifndef AV_COPY16
+#   define AV_COPY16(d, s) AV_COPY(16, d, s)
+#endif
+
+#ifndef AV_COPY32
+#   define AV_COPY32(d, s) AV_COPY(32, d, s)
+#endif
+
+#ifndef AV_COPY64
+#   define AV_COPY64(d, s) AV_COPY(64, d, s)
+#endif
+
+#ifndef AV_COPY128
+#   define AV_COPY128(d, s)                    \
+    do {                                       \
+        AV_COPY64(d, s);                       \
+        AV_COPY64((char*)(d)+8, (char*)(s)+8); \
+    } while(0)
+#endif
+
+#define AV_SWAP(n, a, b) FFSWAP(av_alias##n, *(av_alias##n*)(a), *(av_alias##n*)(b))
+
+#ifndef AV_SWAP64
+#   define AV_SWAP64(a, b) AV_SWAP(64, a, b)
+#endif
+
+#define AV_ZERO(n, d) (((av_alias##n*)(d))->u##n = 0)
+
+#ifndef AV_ZERO16
+#   define AV_ZERO16(d) AV_ZERO(16, d)
+#endif
+
+#ifndef AV_ZERO32
+#   define AV_ZERO32(d) AV_ZERO(32, d)
+#endif
+
+#ifndef AV_ZERO64
+#   define AV_ZERO64(d) AV_ZERO(64, d)
+#endif
+
+#ifndef AV_ZERO128
+#   define AV_ZERO128(d)         \
+    do {                         \
+        AV_ZERO64(d);            \
+        AV_ZERO64((char*)(d)+8); \
+    } while(0)
+#endif
+
+#endif /* AVUTIL_INTREADWRITE_H */
diff --git a/apps/codecs/libwmapro/libavutil/log.c b/apps/codecs/libwmapro/libavutil/log.c
new file mode 100644
index 0000000000..166e724b07
--- /dev/null
+++ b/apps/codecs/libwmapro/libavutil/log.c
@@ -0,0 +1,92 @@
+/*
+ * log functions
+ * Copyright (c) 2003 Michel Bardiaux
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file libavutil/log.c
+ * logging functions
+ */
+
+#include "avutil.h"
+#include "log.h"
+
+#if LIBAVUTIL_VERSION_MAJOR > 50
+static
+#endif
+int av_log_level = AV_LOG_INFO;
+
+void av_log_default_callback(void* ptr, int level, const char* fmt, va_list vl)
+{
+    static int print_prefix=1;
+    static int count;
+    static char line[1024], prev[1024];
+    AVClass* avc= ptr ? *(AVClass**)ptr : NULL;
+    if(level>av_log_level)
+        return;
+#undef fprintf
+    if(print_prefix && avc) {
+        snprintf(line, sizeof(line), "[%s @ %p]", avc->item_name(ptr), ptr);
+    }else
+        line[0]=0;
+
+    vsnprintf(line + strlen(line), sizeof(line) - strlen(line), fmt, vl);
+
+    print_prefix= line[strlen(line)-1] == '\n';
+    if(print_prefix && !strcmp(line, prev)){
+        count++;
+        return;
+    }
+    if(count>0){
+        fprintf(stderr, "    Last message repeated %d times\n", count);
+        count=0;
+    }
+    fputs(line, stderr);
+    strcpy(prev, line);
+}
+
+static void (*av_log_callback)(void*, int, const char*, va_list) = av_log_default_callback;
+
+void av_log(void* avcl, int level, const char *fmt, ...)
+{
+    va_list vl;
+    va_start(vl, fmt);
+    av_vlog(avcl, level, fmt, vl);
+    va_end(vl);
+}
+
+void av_vlog(void* avcl, int level, const char *fmt, va_list vl)
+{
+    av_log_callback(avcl, level, fmt, vl);
+}
+
+int av_log_get_level(void)
+{
+    return av_log_level;
+}
+
+void av_log_set_level(int level)
+{
+    av_log_level = level;
+}
+
+void av_log_set_callback(void (*callback)(void*, int, const char*, va_list))
+{
+    av_log_callback = callback;
+}
diff --git a/apps/codecs/libwmapro/libavutil/log.h b/apps/codecs/libwmapro/libavutil/log.h
new file mode 100644
index 0000000000..b0a1493607
--- /dev/null
+++ b/apps/codecs/libwmapro/libavutil/log.h
@@ -0,0 +1,115 @@
+/*
+ * copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_LOG_H
+#define AVUTIL_LOG_H
+
+#include <stdarg.h>
+#include "avutil.h"
+
+/**
+ * Describes the class of an AVClass context structure. That is an
+ * arbitrary struct of which the first field is a pointer to an
+ * AVClass struct (e.g. AVCodecContext, AVFormatContext etc.).
+ */
+typedef struct {
+    /**
+     * The name of the class; usually it is the same name as the
+     * context structure type to which the AVClass is associated.
+     */
+    const char* class_name;
+
+    /**
+     * A pointer to a function which returns the name of a context
+     * instance ctx associated with the class.
+     */
+    const char* (*item_name)(void* ctx);
+
+    /**
+     * a pointer to the first option specified in the class if any or NULL
+     *
+     * @see av_set_default_options()
+     */
+    const struct AVOption *option;
+} AVClass;
+
+/* av_log API */
+
+#define AV_LOG_QUIET    -8
+
+/**
+ * Something went really wrong and we will crash now.
+ */
+#define AV_LOG_PANIC     0
+
+/**
+ * Something went wrong and recovery is not possible.
+ * For example, no header was found for a format which depends
+ * on headers or an illegal combination of parameters is used.
+ */
+#define AV_LOG_FATAL     8
+
+/**
+ * Something went wrong and cannot losslessly be recovered.
+ * However, not all future data is affected.
+ */
+#define AV_LOG_ERROR    16
+
+/**
+ * Something somehow does not look correct. This may or may not
+ * lead to problems. An example would be the use of '-vstrict -2'.
+ */
+#define AV_LOG_WARNING  24
+
+#define AV_LOG_INFO     32
+#define AV_LOG_VERBOSE  40
+
+/**
+ * Stuff which is only useful for libav* developers.
+ */
+#define AV_LOG_DEBUG    48
+
+/**
+ * Sends the specified message to the log if the level is less than or equal
+ * to the current av_log_level. By default, all logging messages are sent to
+ * stderr. This behavior can be altered by setting a different av_vlog callback
+ * function.
+ *
+ * @param avcl A pointer to an arbitrary struct of which the first field is a
+ * pointer to an AVClass struct.
+ * @param level The importance level of the message, lower values signifying
+ * higher importance.
+ * @param fmt The format string (printf-compatible) that specifies how
+ * subsequent arguments are converted to output.
+ * @see av_vlog
+ */
+#ifdef __GNUC__
+void av_log(void*, int level, const char *fmt, ...) __attribute__ ((__format__ (__printf__, 3, 4)));
+#else
+void av_log(void*, int level, const char *fmt, ...);
+#endif
+
+void av_vlog(void*, int level, const char *fmt, va_list);
+int av_log_get_level(void);
+void av_log_set_level(int);
+void av_log_set_callback(void (*)(void*, int, const char*, va_list));
+void av_log_default_callback(void* ptr, int level, const char* fmt, va_list vl);
+
+#endif /* AVUTIL_LOG_H */
diff --git a/apps/codecs/libwmapro/libavutil/mathematics.c b/apps/codecs/libwmapro/libavutil/mathematics.c
new file mode 100644
index 0000000000..81d47c1bf1
--- /dev/null
+++ b/apps/codecs/libwmapro/libavutil/mathematics.c
@@ -0,0 +1,174 @@
+/*
+ * Copyright (c) 2005 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file libavutil/mathematics.c
+ * miscellaneous math routines and tables
+ */
+
+#include <assert.h>
+#include <stdint.h>
+#include <limits.h>
+#include "mathematics.h"
+
+const uint8_t ff_sqrt_tab[256]={
+  0, 16, 23, 28, 32, 36, 40, 43, 46, 48, 51, 54, 56, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76, 77, 79, 80, 82, 84, 85, 87, 88, 90,
+ 91, 92, 94, 95, 96, 98, 99,100,102,103,104,105,107,108,109,110,111,112,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
+128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,144,145,146,147,148,149,150,151,151,152,153,154,155,156,156,
+157,158,159,160,160,161,162,163,164,164,165,166,167,168,168,169,170,171,171,172,173,174,174,175,176,176,177,178,179,179,180,181,
+182,182,183,184,184,185,186,186,187,188,188,189,190,190,191,192,192,193,194,194,195,196,196,197,198,198,199,200,200,201,202,202,
+203,204,204,205,205,206,207,207,208,208,209,210,210,211,212,212,213,213,214,215,215,216,216,217,218,218,219,219,220,220,221,222,
+222,223,223,224,224,225,226,226,227,227,228,228,229,230,230,231,231,232,232,233,233,234,235,235,236,236,237,237,238,238,239,239,
+240,240,241,242,242,243,243,244,244,245,245,246,246,247,247,248,248,249,249,250,250,251,251,252,252,253,253,254,254,255,255,255
+};
+
+const uint8_t ff_log2_tab[256]={
+        0,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+        5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+        6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+        6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+        7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+        7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+        7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+        7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
+};
+
+const uint8_t av_reverse[256]={
+0x00,0x80,0x40,0xC0,0x20,0xA0,0x60,0xE0,0x10,0x90,0x50,0xD0,0x30,0xB0,0x70,0xF0,
+0x08,0x88,0x48,0xC8,0x28,0xA8,0x68,0xE8,0x18,0x98,0x58,0xD8,0x38,0xB8,0x78,0xF8,
+0x04,0x84,0x44,0xC4,0x24,0xA4,0x64,0xE4,0x14,0x94,0x54,0xD4,0x34,0xB4,0x74,0xF4,
+0x0C,0x8C,0x4C,0xCC,0x2C,0xAC,0x6C,0xEC,0x1C,0x9C,0x5C,0xDC,0x3C,0xBC,0x7C,0xFC,
+0x02,0x82,0x42,0xC2,0x22,0xA2,0x62,0xE2,0x12,0x92,0x52,0xD2,0x32,0xB2,0x72,0xF2,
+0x0A,0x8A,0x4A,0xCA,0x2A,0xAA,0x6A,0xEA,0x1A,0x9A,0x5A,0xDA,0x3A,0xBA,0x7A,0xFA,
+0x06,0x86,0x46,0xC6,0x26,0xA6,0x66,0xE6,0x16,0x96,0x56,0xD6,0x36,0xB6,0x76,0xF6,
+0x0E,0x8E,0x4E,0xCE,0x2E,0xAE,0x6E,0xEE,0x1E,0x9E,0x5E,0xDE,0x3E,0xBE,0x7E,0xFE,
+0x01,0x81,0x41,0xC1,0x21,0xA1,0x61,0xE1,0x11,0x91,0x51,0xD1,0x31,0xB1,0x71,0xF1,
+0x09,0x89,0x49,0xC9,0x29,0xA9,0x69,0xE9,0x19,0x99,0x59,0xD9,0x39,0xB9,0x79,0xF9,
+0x05,0x85,0x45,0xC5,0x25,0xA5,0x65,0xE5,0x15,0x95,0x55,0xD5,0x35,0xB5,0x75,0xF5,
+0x0D,0x8D,0x4D,0xCD,0x2D,0xAD,0x6D,0xED,0x1D,0x9D,0x5D,0xDD,0x3D,0xBD,0x7D,0xFD,
+0x03,0x83,0x43,0xC3,0x23,0xA3,0x63,0xE3,0x13,0x93,0x53,0xD3,0x33,0xB3,0x73,0xF3,
+0x0B,0x8B,0x4B,0xCB,0x2B,0xAB,0x6B,0xEB,0x1B,0x9B,0x5B,0xDB,0x3B,0xBB,0x7B,0xFB,
+0x07,0x87,0x47,0xC7,0x27,0xA7,0x67,0xE7,0x17,0x97,0x57,0xD7,0x37,0xB7,0x77,0xF7,
+0x0F,0x8F,0x4F,0xCF,0x2F,0xAF,0x6F,0xEF,0x1F,0x9F,0x5F,0xDF,0x3F,0xBF,0x7F,0xFF,
+};
+
+int64_t av_gcd(int64_t a, int64_t b){
+    if(b) return av_gcd(b, a%b);
+    else  return a;
+}
+
+int64_t av_rescale_rnd(int64_t a, int64_t b, int64_t c, enum AVRounding rnd){
+    int64_t r=0;
+    assert(c > 0);
+    assert(b >=0);
+    assert(rnd >=0 && rnd<=5 && rnd!=4);
+
+    if(a<0 && a != INT64_MIN) return -av_rescale_rnd(-a, b, c, rnd ^ ((rnd>>1)&1));
+
+    if(rnd==AV_ROUND_NEAR_INF) r= c/2;
+    else if(rnd&1)             r= c-1;
+
+    if(b<=INT_MAX && c<=INT_MAX){
+        if(a<=INT_MAX)
+            return (a * b + r)/c;
+        else
+            return a/c*b + (a%c*b + r)/c;
+    }else{
+#if 1
+        uint64_t a0= a&0xFFFFFFFF;
+        uint64_t a1= a>>32;
+        uint64_t b0= b&0xFFFFFFFF;
+        uint64_t b1= b>>32;
+        uint64_t t1= a0*b1 + a1*b0;
+        uint64_t t1a= t1<<32;
+        int i;
+
+        a0 = a0*b0 + t1a;
+        a1 = a1*b1 + (t1>>32) + (a0<t1a);
+        a0 += r;
+        a1 += a0<r;
+
+        for(i=63; i>=0; i--){
+//            int o= a1 & 0x8000000000000000ULL;
+            a1+= a1 + ((a0>>i)&1);
+            t1+=t1;
+            if(/*o || */c <= a1){
+                a1 -= c;
+                t1++;
+            }
+        }
+        return t1;
+    }
+#else
+        AVInteger ai;
+        ai= av_mul_i(av_int2i(a), av_int2i(b));
+        ai= av_add_i(ai, av_int2i(r));
+
+        return av_i2int(av_div_i(ai, av_int2i(c)));
+    }
+#endif
+}
+
+int64_t av_rescale(int64_t a, int64_t b, int64_t c){
+    return av_rescale_rnd(a, b, c, AV_ROUND_NEAR_INF);
+}
+
+int64_t av_rescale_q(int64_t a, AVRational bq, AVRational cq){
+    int64_t b= bq.num * (int64_t)cq.den;
+    int64_t c= cq.num * (int64_t)bq.den;
+    return av_rescale_rnd(a, b, c, AV_ROUND_NEAR_INF);
+}
+
+int av_compare_ts(int64_t ts_a, AVRational tb_a, int64_t ts_b, AVRational tb_b){
+    int64_t a= tb_a.num * (int64_t)tb_b.den;
+    int64_t b= tb_b.num * (int64_t)tb_a.den;
+    if (av_rescale_rnd(ts_a, a, b, AV_ROUND_DOWN) < ts_b) return -1;
+    if (av_rescale_rnd(ts_b, b, a, AV_ROUND_DOWN) < ts_a) return  1;
+    return 0;
+}
+
+#ifdef TEST
+#include "integer.h"
+#undef printf
+int main(void){
+    int64_t a,b,c,d,e;
+
+    for(a=7; a<(1LL<<62); a+=a/3+1){
+        for(b=3; b<(1LL<<62); b+=b/4+1){
+            for(c=9; c<(1LL<<62); c+=(c*2)/5+3){
+                int64_t r= c/2;
+                AVInteger ai;
+                ai= av_mul_i(av_int2i(a), av_int2i(b));
+                ai= av_add_i(ai, av_int2i(r));
+
+                d= av_i2int(av_div_i(ai, av_int2i(c)));
+
+                e= av_rescale(a,b,c);
+
+                if((double)a * (double)b / (double)c > (1LL<<63))
+                    continue;
+
+                if(d!=e) printf("%"PRId64"*%"PRId64"/%"PRId64"= %"PRId64"=%"PRId64"\n", a, b, c, d, e);
+            }
+        }
+    }
+    return 0;
+}
+#endif
diff --git a/apps/codecs/libwmapro/libavutil/mathematics.h b/apps/codecs/libwmapro/libavutil/mathematics.h
new file mode 100644
index 0000000000..e198aef8cb
--- /dev/null
+++ b/apps/codecs/libwmapro/libavutil/mathematics.h
@@ -0,0 +1,98 @@
+/*
+ * copyright (c) 2005 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_MATHEMATICS_H
+#define AVUTIL_MATHEMATICS_H
+
+#include <stdint.h>
+#include <math.h>
+#include "attributes.h"
+#include "rational.h"
+
+#ifndef M_E
+#define M_E            2.7182818284590452354   /* e */
+#endif
+#ifndef M_LN2
+#define M_LN2          0.69314718055994530942  /* log_e 2 */
+#endif
+#ifndef M_LN10
+#define M_LN10         2.30258509299404568402  /* log_e 10 */
+#endif
+#ifndef M_LOG2_10
+#define M_LOG2_10      3.32192809488736234787  /* log_2 10 */
+#endif
+#ifndef M_PI
+#define M_PI           3.14159265358979323846  /* pi */
+#endif
+#ifndef M_SQRT1_2
+#define M_SQRT1_2      0.70710678118654752440  /* 1/sqrt(2) */
+#endif
+#ifndef M_SQRT2
+#define M_SQRT2        1.41421356237309504880  /* sqrt(2) */
+#endif
+#ifndef NAN
+#define NAN            (0.0/0.0)
+#endif
+#ifndef INFINITY
+#define INFINITY       (1.0/0.0)
+#endif
+
+enum AVRounding {
+    AV_ROUND_ZERO     = 0, ///< Round toward zero.
+    AV_ROUND_INF      = 1, ///< Round away from zero.
+    AV_ROUND_DOWN     = 2, ///< Round toward -infinity.
+    AV_ROUND_UP       = 3, ///< Round toward +infinity.
+    AV_ROUND_NEAR_INF = 5, ///< Round to nearest and halfway cases away from zero.
+};
+
+/**
+ * Returns the greatest common divisor of a and b.
+ * If both a and b are 0 or either or both are <0 then behavior is
+ * undefined.
+ */
+int64_t av_const av_gcd(int64_t a, int64_t b);
+
+/**
+ * Rescales a 64-bit integer with rounding to nearest.
+ * A simple a*b/c isn't possible as it can overflow.
+ */
+int64_t av_rescale(int64_t a, int64_t b, int64_t c) av_const;
+
+/**
+ * Rescales a 64-bit integer with specified rounding.
+ * A simple a*b/c isn't possible as it can overflow.
+ */
+int64_t av_rescale_rnd(int64_t a, int64_t b, int64_t c, enum AVRounding) av_const;
+
+/**
+ * Rescales a 64-bit integer by 2 rational numbers.
+ */
+int64_t av_rescale_q(int64_t a, AVRational bq, AVRational cq) av_const;
+
+/**
+ * Compares 2 timestamps each in its own timebases.
+ * The result of the function is undefined if one of the timestamps
+ * is outside the int64_t range when represented in the others timebase.
+ * @return -1 if ts_a is before ts_b, 1 if ts_a is after ts_b or 0 if they represent the same position
+ */
+int av_compare_ts(int64_t ts_a, AVRational tb_a, int64_t ts_b, AVRational tb_b);
+
+
+#endif /* AVUTIL_MATHEMATICS_H */
diff --git a/apps/codecs/libwmapro/libavutil/mem.c b/apps/codecs/libwmapro/libavutil/mem.c
new file mode 100644
index 0000000000..4d776d4450
--- /dev/null
+++ b/apps/codecs/libwmapro/libavutil/mem.c
@@ -0,0 +1,176 @@
+/*
+ * default memory allocator for libavutil
+ * Copyright (c) 2002 Fabrice Bellard
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file libavutil/mem.c
+ * default memory allocator for libavutil
+ */
+
+#include "config.h"
+
+#include <limits.h>
+#include <stdlib.h>
+#include <string.h>
+#if HAVE_MALLOC_H
+#include <malloc.h>
+#endif
+
+#include "avutil.h"
+#include "mem.h"
+
+/* here we can use OS-dependent allocation functions */
+#undef free
+#undef malloc
+#undef realloc
+
+#ifdef MALLOC_PREFIX
+
+#define malloc         AV_JOIN(MALLOC_PREFIX, malloc)
+#define memalign       AV_JOIN(MALLOC_PREFIX, memalign)
+#define posix_memalign AV_JOIN(MALLOC_PREFIX, posix_memalign)
+#define realloc        AV_JOIN(MALLOC_PREFIX, realloc)
+#define free           AV_JOIN(MALLOC_PREFIX, free)
+
+void *malloc(size_t size);
+void *memalign(size_t align, size_t size);
+int   posix_memalign(void **ptr, size_t align, size_t size);
+void *realloc(void *ptr, size_t size);
+void  free(void *ptr);
+
+#endif /* MALLOC_PREFIX */
+
+/* You can redefine av_malloc and av_free in your project to use your
+   memory allocator. You do not need to suppress this file because the
+   linker will do it automatically. */
+
+void *av_malloc(unsigned int size)
+{
+    void *ptr = NULL;
+#if CONFIG_MEMALIGN_HACK
+    long diff;
+#endif
+
+    /* let's disallow possible ambiguous cases */
+    if(size > (INT_MAX-16) )
+        return NULL;
+
+#if CONFIG_MEMALIGN_HACK
+    ptr = malloc(size+16);
+    if(!ptr)
+        return ptr;
+    diff= ((-(long)ptr - 1)&15) + 1;
+    ptr = (char*)ptr + diff;
+    ((char*)ptr)[-1]= diff;
+#elif HAVE_POSIX_MEMALIGN
+    if (posix_memalign(&ptr,16,size))
+        ptr = NULL;
+#elif HAVE_MEMALIGN
+    ptr = memalign(16,size);
+    /* Why 64?
+       Indeed, we should align it:
+         on 4 for 386
+         on 16 for 486
+         on 32 for 586, PPro - K6-III
+         on 64 for K7 (maybe for P3 too).
+       Because L1 and L2 caches are aligned on those values.
+       But I don't want to code such logic here!
+     */
+     /* Why 16?
+        Because some CPUs need alignment, for example SSE2 on P4, & most RISC CPUs
+        it will just trigger an exception and the unaligned load will be done in the
+        exception handler or it will just segfault (SSE2 on P4).
+        Why not larger? Because I did not see a difference in benchmarks ...
+     */
+     /* benchmarks with P3
+        memalign(64)+1          3071,3051,3032
+        memalign(64)+2          3051,3032,3041
+        memalign(64)+4          2911,2896,2915
+        memalign(64)+8          2545,2554,2550
+        memalign(64)+16         2543,2572,2563
+        memalign(64)+32         2546,2545,2571
+        memalign(64)+64         2570,2533,2558
+
+        BTW, malloc seems to do 8-byte alignment by default here.
+     */
+#else
+    ptr = malloc(size);
+#endif
+    return ptr;
+}
+
+void *av_realloc(void *ptr, unsigned int size)
+{
+#if CONFIG_MEMALIGN_HACK
+    int diff;
+#endif
+
+    /* let's disallow possible ambiguous cases */
+    if(size > (INT_MAX-16) )
+        return NULL;
+
+#if CONFIG_MEMALIGN_HACK
+    //FIXME this isn't aligned correctly, though it probably isn't needed
+    if(!ptr) return av_malloc(size);
+    diff= ((char*)ptr)[-1];
+    return (char*)realloc((char*)ptr - diff, size + diff) + diff;
+#else
+    return realloc(ptr, size);
+#endif
+}
+
+void av_free(void *ptr)
+{
+    /* XXX: this test should not be needed on most libcs */
+    if (ptr)
+#if CONFIG_MEMALIGN_HACK
+        free((char*)ptr - ((char*)ptr)[-1]);
+#else
+        free(ptr);
+#endif
+}
+
+void av_freep(void *arg)
+{
+    void **ptr= (void**)arg;
+    av_free(*ptr);
+    *ptr = NULL;
+}
+
+void *av_mallocz(unsigned int size)
+{
+    void *ptr = av_malloc(size);
+    if (ptr)
+        memset(ptr, 0, size);
+    return ptr;
+}
+
+char *av_strdup(const char *s)
+{
+    char *ptr= NULL;
+    if(s){
+        int len = strlen(s) + 1;
+        ptr = av_malloc(len);
+        if (ptr)
+            memcpy(ptr, s, len);
+    }
+    return ptr;
+}
+
diff --git a/apps/codecs/libwmapro/libavutil/mem.h b/apps/codecs/libwmapro/libavutil/mem.h
new file mode 100644
index 0000000000..fffbb872ae
--- /dev/null
+++ b/apps/codecs/libwmapro/libavutil/mem.h
@@ -0,0 +1,125 @@
+/*
+ * copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file libavutil/mem.h
+ * memory handling functions
+ */
+
+#ifndef AVUTIL_MEM_H
+#define AVUTIL_MEM_H
+
+#include "attributes.h"
+
+#if defined(__ICC) || defined(__SUNPRO_C)
+    #define DECLARE_ALIGNED(n,t,v)      t __attribute__ ((aligned (n))) v
+    #define DECLARE_ASM_CONST(n,t,v)    const t __attribute__ ((aligned (n))) v
+#elif defined(__TI_COMPILER_VERSION__)
+    #define DECLARE_ALIGNED(n,t,v)                      \
+        AV_PRAGMA(DATA_ALIGN(v,n))                      \
+        t __attribute__((aligned(n))) v
+    #define DECLARE_ASM_CONST(n,t,v)                    \
+        AV_PRAGMA(DATA_ALIGN(v,n))                      \
+        static const t __attribute__((aligned(n))) v
+#elif defined(__GNUC__)
+    #define DECLARE_ALIGNED(n,t,v)      t __attribute__ ((aligned (n))) v
+    #define DECLARE_ASM_CONST(n,t,v)    static const t attribute_used __attribute__ ((aligned (n))) v
+#elif defined(_MSC_VER)
+    #define DECLARE_ALIGNED(n,t,v)      __declspec(align(n)) t v
+    #define DECLARE_ASM_CONST(n,t,v)    __declspec(align(n)) static const t v
+#else
+    #define DECLARE_ALIGNED(n,t,v)      t v
+    #define DECLARE_ASM_CONST(n,t,v)    static const t v
+#endif
+
+#if AV_GCC_VERSION_AT_LEAST(3,1)
+    #define av_malloc_attrib __attribute__((__malloc__))
+#else
+    #define av_malloc_attrib
+#endif
+
+#if (!defined(__ICC) || __ICC > 1110) && AV_GCC_VERSION_AT_LEAST(4,3)
+    #define av_alloc_size(n) __attribute__((alloc_size(n)))
+#else
+    #define av_alloc_size(n)
+#endif
+
+/**
+ * Allocates a block of size bytes with alignment suitable for all
+ * memory accesses (including vectors if available on the CPU).
+ * @param size Size in bytes for the memory block to be allocated.
+ * @return Pointer to the allocated block, NULL if the block cannot
+ * be allocated.
+ * @see av_mallocz()
+ */
+void *av_malloc(unsigned int size) av_malloc_attrib av_alloc_size(1);
+
+/**
+ * Allocates or reallocates a block of memory.
+ * If ptr is NULL and size > 0, allocates a new block. If
+ * size is zero, frees the memory block pointed to by ptr.
+ * @param size Size in bytes for the memory block to be allocated or
+ * reallocated.
+ * @param ptr Pointer to a memory block already allocated with
+ * av_malloc(z)() or av_realloc() or NULL.
+ * @return Pointer to a newly reallocated block or NULL if the block
+ * cannot be reallocated or the function is used to free the memory block.
+ * @see av_fast_realloc()
+ */
+void *av_realloc(void *ptr, unsigned int size) av_alloc_size(2);
+
+/**
+ * Frees a memory block which has been allocated with av_malloc(z)() or
+ * av_realloc().
+ * @param ptr Pointer to the memory block which should be freed.
+ * @note ptr = NULL is explicitly allowed.
+ * @note It is recommended that you use av_freep() instead.
+ * @see av_freep()
+ */
+void av_free(void *ptr);
+
+/**
+ * Allocates a block of size bytes with alignment suitable for all
+ * memory accesses (including vectors if available on the CPU) and
+ * zeroes all the bytes of the block.
+ * @param size Size in bytes for the memory block to be allocated.
+ * @return Pointer to the allocated block, NULL if it cannot be allocated.
+ * @see av_malloc()
+ */
+void *av_mallocz(unsigned int size) av_malloc_attrib av_alloc_size(1);
+
+/**
+ * Duplicates the string s.
+ * @param s string to be duplicated
+ * @return Pointer to a newly allocated string containing a
+ * copy of s or NULL if the string cannot be allocated.
+ */
+char *av_strdup(const char *s) av_malloc_attrib;
+
+/**
+ * Frees a memory block which has been allocated with av_malloc(z)() or
+ * av_realloc() and set the pointer pointing to it to NULL.
+ * @param ptr Pointer to the pointer to the memory block which should
+ * be freed.
+ * @see av_free()
+ */
+void av_freep(void *ptr);
+
+#endif /* AVUTIL_MEM_H */
diff --git a/apps/codecs/libwmapro/mathops.h b/apps/codecs/libwmapro/mathops.h
new file mode 100644
index 0000000000..149910bb1d
--- /dev/null
+++ b/apps/codecs/libwmapro/mathops.h
@@ -0,0 +1,150 @@
+/*
+ * simple math operations
+ * Copyright (c) 2001, 2002 Fabrice Bellard
+ * Copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at> et al
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#ifndef AVCODEC_MATHOPS_H
+#define AVCODEC_MATHOPS_H
+
+#include "libavutil/common.h"
+
+#if   ARCH_ARM
+#   include "arm/mathops.h"
+#elif ARCH_AVR32
+#   include "avr32/mathops.h"
+#elif ARCH_BFIN
+#   include "bfin/mathops.h"
+#elif ARCH_MIPS
+#   include "mips/mathops.h"
+#elif ARCH_PPC
+#   include "ppc/mathops.h"
+#elif ARCH_X86
+#   include "x86/mathops.h"
+#endif
+
+/* generic implementation */
+
+#ifndef MULL
+#   define MULL(a,b,s) (((int64_t)(a) * (int64_t)(b)) >> (s))
+#endif
+
+#ifndef MULH
+//gcc 3.4 creates an incredibly bloated mess out of this
+//#    define MULH(a,b) (((int64_t)(a) * (int64_t)(b))>>32)
+
+static av_always_inline int MULH(int a, int b){
+    return ((int64_t)(a) * (int64_t)(b))>>32;
+}
+#endif
+
+#ifndef UMULH
+static av_always_inline unsigned UMULH(unsigned a, unsigned b){
+    return ((uint64_t)(a) * (uint64_t)(b))>>32;
+}
+#endif
+
+#ifndef MUL64
+#   define MUL64(a,b) ((int64_t)(a) * (int64_t)(b))
+#endif
+
+#ifndef MAC64
+#   define MAC64(d, a, b) ((d) += MUL64(a, b))
+#endif
+
+#ifndef MLS64
+#   define MLS64(d, a, b) ((d) -= MUL64(a, b))
+#endif
+
+/* signed 16x16 -> 32 multiply add accumulate */
+#ifndef MAC16
+#   define MAC16(rt, ra, rb) rt += (ra) * (rb)
+#endif
+
+/* signed 16x16 -> 32 multiply */
+#ifndef MUL16
+#   define MUL16(ra, rb) ((ra) * (rb))
+#endif
+
+#ifndef MLS16
+#   define MLS16(rt, ra, rb) ((rt) -= (ra) * (rb))
+#endif
+
+/* median of 3 */
+#ifndef mid_pred
+#define mid_pred mid_pred
+static inline av_const int mid_pred(int a, int b, int c)
+{
+#if 0
+    int t= (a-b)&((a-b)>>31);
+    a-=t;
+    b+=t;
+    b-= (b-c)&((b-c)>>31);
+    b+= (a-b)&((a-b)>>31);
+
+    return b;
+#else
+    if(a>b){
+        if(c>b){
+            if(c>a) b=a;
+            else    b=c;
+        }
+    }else{
+        if(b>c){
+            if(c>a) b=c;
+            else    b=a;
+        }
+    }
+    return b;
+#endif
+}
+#endif
+
+#ifndef sign_extend
+static inline av_const int sign_extend(int val, unsigned bits)
+{
+    return (val << (INT_BIT - bits)) >> (INT_BIT - bits);
+}
+#endif
+
+#ifndef zero_extend
+static inline av_const unsigned zero_extend(unsigned val, unsigned bits)
+{
+    return (val << (INT_BIT - bits)) >> (INT_BIT - bits);
+}
+#endif
+
+#ifndef COPY3_IF_LT
+#define COPY3_IF_LT(x, y, a, b, c, d)\
+if ((y) < (x)) {\
+    (x) = (y);\
+    (a) = (b);\
+    (c) = (d);\
+}
+#endif
+
+#ifndef NEG_SSR32
+#   define NEG_SSR32(a,s) ((( int32_t)(a))>>(32-(s)))
+#endif
+
+#ifndef NEG_USR32
+#   define NEG_USR32(a,s) (((uint32_t)(a))>>(32-(s)))
+#endif
+
+#endif /* AVCODEC_MATHOPS_H */
+
diff --git a/apps/codecs/libwmapro/mdct.c b/apps/codecs/libwmapro/mdct.c
new file mode 100644
index 0000000000..9d0a59dc84
--- /dev/null
+++ b/apps/codecs/libwmapro/mdct.c
@@ -0,0 +1,232 @@
+/*
+ * MDCT/IMDCT transforms
+ * Copyright (c) 2002 Fabrice Bellard
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include "libavutil/common.h"
+#include "libavutil/mathematics.h"
+#include "fft.h"
+
+/**
+ * @file libavcodec/mdct.c
+ * MDCT/IMDCT transforms.
+ */
+
+// Generate a Kaiser-Bessel Derived Window.
+#define BESSEL_I0_ITER 50 // default: 50 iterations of Bessel I0 approximation
+av_cold void ff_kbd_window_init(float *window, float alpha, int n)
+{
+   int i, j;
+   double sum = 0.0, bessel, tmp;
+   double local_window[n];
+   double alpha2 = (alpha * M_PI / n) * (alpha * M_PI / n);
+
+   for (i = 0; i < n; i++) {
+       tmp = i * (n - i) * alpha2;
+       bessel = 1.0;
+       for (j = BESSEL_I0_ITER; j > 0; j--)
+           bessel = bessel * tmp / (j * j) + 1;
+       sum += bessel;
+       local_window[i] = sum;
+   }
+
+   sum++;
+   for (i = 0; i < n; i++)
+       window[i] = sqrt(local_window[i] / sum);
+}
+
+#include "mdct_tablegen.h"
+
+/**
+ * init MDCT or IMDCT computation.
+ */
+av_cold int ff_mdct_init(FFTContext *s, int nbits, int inverse, double scale)
+{
+    int n, n4, i;
+    double alpha, theta;
+    int tstep;
+
+    memset(s, 0, sizeof(*s));
+    n = 1 << nbits;
+    s->mdct_bits = nbits;
+    s->mdct_size = n;
+    n4 = n >> 2;
+    s->permutation = FF_MDCT_PERM_NONE;
+
+    if (ff_fft_init(s, s->mdct_bits - 2, inverse) < 0)
+        goto fail;
+
+    s->tcos = av_malloc(n/2 * sizeof(FFTSample));
+    if (!s->tcos)
+        goto fail;
+
+    switch (s->permutation) {
+    case FF_MDCT_PERM_NONE:
+        s->tsin = s->tcos + n4;
+        tstep = 1;
+        break;
+    case FF_MDCT_PERM_INTERLEAVE:
+        s->tsin = s->tcos + 1;
+        tstep = 2;
+        break;
+    default:
+        goto fail;
+    }
+
+    theta = 1.0 / 8.0 + (scale < 0 ? n4 : 0);
+    scale = sqrt(fabs(scale));
+    for(i=0;i<n4;i++) {
+        alpha = 2 * M_PI * (i + theta) / n;
+        s->tcos[i*tstep] = -cos(alpha) * scale;
+        s->tsin[i*tstep] = -sin(alpha) * scale;
+    }
+    return 0;
+ fail:
+    ff_mdct_end(s);
+    return -1;
+}
+
+/* complex multiplication: p = a * b */
+#define CMUL(pre, pim, are, aim, bre, bim) \
+{\
+    FFTSample _are = (are);\
+    FFTSample _aim = (aim);\
+    FFTSample _bre = (bre);\
+    FFTSample _bim = (bim);\
+    (pre) = _are * _bre - _aim * _bim;\
+    (pim) = _are * _bim + _aim * _bre;\
+}
+
+/**
+ * Compute the middle half of the inverse MDCT of size N = 2^nbits,
+ * thus excluding the parts that can be derived by symmetry
+ * @param output N/2 samples
+ * @param input N/2 samples
+ */
+void ff_imdct_half_c(FFTContext *s, FFTSample *output, const FFTSample *input)
+{
+    int k, n8, n4, n2, n, j;
+    const uint16_t *revtab = s->revtab;
+    const FFTSample *tcos = s->tcos;
+    const FFTSample *tsin = s->tsin;
+    const FFTSample *in1, *in2;
+    FFTComplex *z = (FFTComplex *)output;
+
+    n = 1 << s->mdct_bits;
+    n2 = n >> 1;
+    n4 = n >> 2;
+    n8 = n >> 3;
+
+    /* pre rotation */
+    in1 = input;
+    in2 = input + n2 - 1;
+    for(k = 0; k < n4; k++) {
+        j=revtab[k];
+        CMUL(z[j].re, z[j].im, *in2, *in1, tcos[k], tsin[k]);
+        in1 += 2;
+        in2 -= 2;
+    }
+    ff_fft_calc(s, z);
+
+    /* post rotation + reordering */
+    for(k = 0; k < n8; k++) {
+        FFTSample r0, i0, r1, i1;
+        CMUL(r0, i1, z[n8-k-1].im, z[n8-k-1].re, tsin[n8-k-1], tcos[n8-k-1]);
+        CMUL(r1, i0, z[n8+k  ].im, z[n8+k  ].re, tsin[n8+k  ], tcos[n8+k  ]);
+        z[n8-k-1].re = r0;
+        z[n8-k-1].im = i0;
+        z[n8+k  ].re = r1;
+        z[n8+k  ].im = i1;
+    }
+}
+
+/**
+ * Compute inverse MDCT of size N = 2^nbits
+ * @param output N samples
+ * @param input N/2 samples
+ */
+void ff_imdct_calc_c(FFTContext *s, FFTSample *output, const FFTSample *input)
+{
+    int k;
+    int n = 1 << s->mdct_bits;
+    int n2 = n >> 1;
+    int n4 = n >> 2;
+
+    ff_imdct_half_c(s, output+n4, input);
+
+    for(k = 0; k < n4; k++) {
+        output[k] = -output[n2-k-1];
+        output[n-k-1] = output[n2+k];
+    }
+}
+
+/**
+ * Compute MDCT of size N = 2^nbits
+ * @param input N samples
+ * @param out N/2 samples
+ */
+void ff_mdct_calc_c(FFTContext *s, FFTSample *out, const FFTSample *input)
+{
+    int i, j, n, n8, n4, n2, n3;
+    FFTSample re, im;
+    const uint16_t *revtab = s->revtab;
+    const FFTSample *tcos = s->tcos;
+    const FFTSample *tsin = s->tsin;
+    FFTComplex *x = (FFTComplex *)out;
+
+    n = 1 << s->mdct_bits;
+    n2 = n >> 1;
+    n4 = n >> 2;
+    n8 = n >> 3;
+    n3 = 3 * n4;
+
+    /* pre rotation */
+    for(i=0;i<n8;i++) {
+        re = -input[2*i+3*n4] - input[n3-1-2*i];
+        im = -input[n4+2*i] + input[n4-1-2*i];
+        j = revtab[i];
+        CMUL(x[j].re, x[j].im, re, im, -tcos[i], tsin[i]);
+
+        re = input[2*i] - input[n2-1-2*i];
+        im = -(input[n2+2*i] + input[n-1-2*i]);
+        j = revtab[n8 + i];
+        CMUL(x[j].re, x[j].im, re, im, -tcos[n8 + i], tsin[n8 + i]);
+    }
+
+    ff_fft_calc(s, x);
+
+    /* post rotation */
+    for(i=0;i<n8;i++) {
+        FFTSample r0, i0, r1, i1;
+        CMUL(i1, r0, x[n8-i-1].re, x[n8-i-1].im, -tsin[n8-i-1], -tcos[n8-i-1]);
+        CMUL(i0, r1, x[n8+i  ].re, x[n8+i  ].im, -tsin[n8+i  ], -tcos[n8+i  ]);
+        x[n8-i-1].re = r0;
+        x[n8-i-1].im = i0;
+        x[n8+i  ].re = r1;
+        x[n8+i  ].im = i1;
+    }
+}
+
+av_cold void ff_mdct_end(FFTContext *s)
+{
+    av_freep(&s->tcos);
+    ff_fft_end(s);
+}
diff --git a/apps/codecs/libwmapro/mdct_tablegen.h b/apps/codecs/libwmapro/mdct_tablegen.h
new file mode 100644
index 0000000000..1722c3b4d0
--- /dev/null
+++ b/apps/codecs/libwmapro/mdct_tablegen.h
@@ -0,0 +1,60 @@
+/*
+ * Header file for hardcoded MDCT tables
+ *
+ * Copyright (c) 2009 Reimar Döffinger <Reimar.Doeffinger@gmx.de>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <assert.h>
+// do not use libavutil/mathematics.h since this is compiled both
+// for the host and the target and config.h is only valid for the target
+#include <math.h>
+#include "../libavutil/attributes.h"
+
+#if !CONFIG_HARDCODED_TABLES
+SINETABLE(  32);
+SINETABLE(  64);
+SINETABLE( 128);
+SINETABLE( 256);
+SINETABLE( 512);
+SINETABLE(1024);
+SINETABLE(2048);
+SINETABLE(4096);
+#else
+#include "libavcodec/mdct_tables.h"
+#endif
+
+SINETABLE_CONST float * const ff_sine_windows[] = {
+    NULL, NULL, NULL, NULL, NULL, // unused
+    ff_sine_32 , ff_sine_64 ,
+    ff_sine_128, ff_sine_256, ff_sine_512, ff_sine_1024, ff_sine_2048, ff_sine_4096
+};
+
+// Generate a sine window.
+av_cold void ff_sine_window_init(float *window, int n) {
+    int i;
+    for(i = 0; i < n; i++)
+        window[i] = sinf((i + 0.5) * (M_PI / (2.0 * n)));
+}
+
+av_cold void ff_init_ff_sine_windows(int index) {
+    assert(index >= 0 && index < FF_ARRAY_ELEMS(ff_sine_windows));
+#if !CONFIG_HARDCODED_TABLES
+    ff_sine_window_init(ff_sine_windows[index], 1 << index);
+#endif
+}
diff --git a/apps/codecs/libwmapro/put_bits.h b/apps/codecs/libwmapro/put_bits.h
new file mode 100644
index 0000000000..c0139661a5
--- /dev/null
+++ b/apps/codecs/libwmapro/put_bits.h
@@ -0,0 +1,343 @@
+/*
+ * copyright (c) 2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file libavcodec/put_bits.h
+ * bitstream writer API
+ */
+
+#ifndef AVCODEC_PUT_BITS_H
+#define AVCODEC_PUT_BITS_H
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <assert.h>
+#include "libavutil/bswap.h"
+#include "libavutil/common.h"
+#include "libavutil/intreadwrite.h"
+#include "libavutil/log.h"
+#include "mathops.h"
+
+//#define ALT_BITSTREAM_WRITER
+//#define ALIGNED_BITSTREAM_WRITER
+
+/* buf and buf_end must be present and used by every alternative writer. */
+typedef struct PutBitContext {
+#ifdef ALT_BITSTREAM_WRITER
+    uint8_t *buf, *buf_end;
+    int index;
+#else
+    uint32_t bit_buf;
+    int bit_left;
+    uint8_t *buf, *buf_ptr, *buf_end;
+#endif
+    int size_in_bits;
+} PutBitContext;
+
+/**
+ * Initializes the PutBitContext s.
+ *
+ * @param buffer the buffer where to put bits
+ * @param buffer_size the size in bytes of buffer
+ */
+static inline void init_put_bits(PutBitContext *s, uint8_t *buffer, int buffer_size)
+{
+    if(buffer_size < 0) {
+        buffer_size = 0;
+        buffer = NULL;
+    }
+
+    s->size_in_bits= 8*buffer_size;
+    s->buf = buffer;
+    s->buf_end = s->buf + buffer_size;
+#ifdef ALT_BITSTREAM_WRITER
+    s->index=0;
+    ((uint32_t*)(s->buf))[0]=0;
+//    memset(buffer, 0, buffer_size);
+#else
+    s->buf_ptr = s->buf;
+    s->bit_left=32;
+    s->bit_buf=0;
+#endif
+}
+
+/**
+ * Returns the total number of bits written to the bitstream.
+ */
+static inline int put_bits_count(PutBitContext *s)
+{
+#ifdef ALT_BITSTREAM_WRITER
+    return s->index;
+#else
+    return (s->buf_ptr - s->buf) * 8 + 32 - s->bit_left;
+#endif
+}
+
+/**
+ * Pads the end of the output stream with zeros.
+ */
+static inline void flush_put_bits(PutBitContext *s)
+{
+#ifdef ALT_BITSTREAM_WRITER
+    align_put_bits(s);
+#else
+#ifndef BITSTREAM_WRITER_LE
+    s->bit_buf<<= s->bit_left;
+#endif
+    while (s->bit_left < 32) {
+        /* XXX: should test end of buffer */
+#ifdef BITSTREAM_WRITER_LE
+        *s->buf_ptr++=s->bit_buf;
+        s->bit_buf>>=8;
+#else
+        *s->buf_ptr++=s->bit_buf >> 24;
+        s->bit_buf<<=8;
+#endif
+        s->bit_left+=8;
+    }
+    s->bit_left=32;
+    s->bit_buf=0;
+#endif
+}
+
+#if defined(ALT_BITSTREAM_WRITER) || defined(BITSTREAM_WRITER_LE)
+#define align_put_bits align_put_bits_unsupported_here
+#define ff_put_string ff_put_string_unsupported_here
+#define ff_copy_bits ff_copy_bits_unsupported_here
+#else
+/**
+ * Pads the bitstream with zeros up to the next byte boundary.
+ */
+void align_put_bits(PutBitContext *s);
+
+/**
+ * Puts the string string in the bitstream.
+ *
+ * @param terminate_string 0-terminates the written string if value is 1
+ */
+void ff_put_string(PutBitContext *pb, const char *string, int terminate_string);
+
+/**
+ * Copies the content of src to the bitstream.
+ *
+ * @param length the number of bits of src to copy
+ */
+void ff_copy_bits(PutBitContext *pb, const uint8_t *src, int length);
+#endif
+
+/**
+ * Writes up to 31 bits into a bitstream.
+ * Use put_bits32 to write 32 bits.
+ */
+static inline void put_bits(PutBitContext *s, int n, unsigned int value)
+#ifndef ALT_BITSTREAM_WRITER
+{
+    unsigned int bit_buf;
+    int bit_left;
+
+    //    printf("put_bits=%d %x\n", n, value);
+    assert(n <= 31 && value < (1U << n));
+
+    bit_buf = s->bit_buf;
+    bit_left = s->bit_left;
+
+    //    printf("n=%d value=%x cnt=%d buf=%x\n", n, value, bit_cnt, bit_buf);
+    /* XXX: optimize */
+#ifdef BITSTREAM_WRITER_LE
+    bit_buf |= value << (32 - bit_left);
+    if (n >= bit_left) {
+#if !HAVE_FAST_UNALIGNED
+        if (3 & (intptr_t) s->buf_ptr) {
+            AV_WL32(s->buf_ptr, bit_buf);
+        } else
+#endif
+        *(uint32_t *)s->buf_ptr = le2me_32(bit_buf);
+        s->buf_ptr+=4;
+        bit_buf = (bit_left==32)?0:value >> bit_left;
+        bit_left+=32;
+    }
+    bit_left-=n;
+#else
+    if (n < bit_left) {
+        bit_buf = (bit_buf<<n) | value;
+        bit_left-=n;
+    } else {
+        bit_buf<<=bit_left;
+        bit_buf |= value >> (n - bit_left);
+#if !HAVE_FAST_UNALIGNED
+        if (3 & (intptr_t) s->buf_ptr) {
+            AV_WB32(s->buf_ptr, bit_buf);
+        } else
+#endif
+        *(uint32_t *)s->buf_ptr = be2me_32(bit_buf);
+        //printf("bitbuf = %08x\n", bit_buf);
+        s->buf_ptr+=4;
+        bit_left+=32 - n;
+        bit_buf = value;
+    }
+#endif
+
+    s->bit_buf = bit_buf;
+    s->bit_left = bit_left;
+}
+#else  /* ALT_BITSTREAM_WRITER defined */
+{
+#    ifdef ALIGNED_BITSTREAM_WRITER
+#        if ARCH_X86
+    __asm__ volatile(
+        "movl %0, %%ecx                 \n\t"
+        "xorl %%eax, %%eax              \n\t"
+        "shrdl %%cl, %1, %%eax          \n\t"
+        "shrl %%cl, %1                  \n\t"
+        "movl %0, %%ecx                 \n\t"
+        "shrl $3, %%ecx                 \n\t"
+        "andl $0xFFFFFFFC, %%ecx        \n\t"
+        "bswapl %1                      \n\t"
+        "orl %1, (%2, %%ecx)            \n\t"
+        "bswapl %%eax                   \n\t"
+        "addl %3, %0                    \n\t"
+        "movl %%eax, 4(%2, %%ecx)       \n\t"
+        : "=&r" (s->index), "=&r" (value)
+        : "r" (s->buf), "r" (n), "0" (s->index), "1" (value<<(-n))
+        : "%eax", "%ecx"
+    );
+#        else
+    int index= s->index;
+    uint32_t *ptr= ((uint32_t *)s->buf)+(index>>5);
+
+    value<<= 32-n;
+
+    ptr[0] |= be2me_32(value>>(index&31));
+    ptr[1]  = be2me_32(value<<(32-(index&31)));
+//if(n>24) printf("%d %d\n", n, value);
+    index+= n;
+    s->index= index;
+#        endif
+#    else //ALIGNED_BITSTREAM_WRITER
+#        if ARCH_X86
+    __asm__ volatile(
+        "movl $7, %%ecx                 \n\t"
+        "andl %0, %%ecx                 \n\t"
+        "addl %3, %%ecx                 \n\t"
+        "negl %%ecx                     \n\t"
+        "shll %%cl, %1                  \n\t"
+        "bswapl %1                      \n\t"
+        "movl %0, %%ecx                 \n\t"
+        "shrl $3, %%ecx                 \n\t"
+        "orl %1, (%%ecx, %2)            \n\t"
+        "addl %3, %0                    \n\t"
+        "movl $0, 4(%%ecx, %2)          \n\t"
+        : "=&r" (s->index), "=&r" (value)
+        : "r" (s->buf), "r" (n), "0" (s->index), "1" (value)
+        : "%ecx"
+    );
+#        else
+    int index= s->index;
+    uint32_t *ptr= (uint32_t*)(((uint8_t *)s->buf)+(index>>3));
+
+    ptr[0] |= be2me_32(value<<(32-n-(index&7) ));
+    ptr[1] = 0;
+//if(n>24) printf("%d %d\n", n, value);
+    index+= n;
+    s->index= index;
+#        endif
+#    endif //!ALIGNED_BITSTREAM_WRITER
+}
+#endif
+
+static inline void put_sbits(PutBitContext *pb, int n, int32_t value)
+{
+    assert(n >= 0 && n <= 31);
+
+    put_bits(pb, n, value & ((1<<n)-1));
+}
+
+/**
+ * Writes exactly 32 bits into a bitstream.
+ */
+static void av_unused put_bits32(PutBitContext *s, uint32_t value)
+{
+    int lo = value & 0xffff;
+    int hi = value >> 16;
+#ifdef BITSTREAM_WRITER_LE
+    put_bits(s, 16, lo);
+    put_bits(s, 16, hi);
+#else
+    put_bits(s, 16, hi);
+    put_bits(s, 16, lo);
+#endif
+}
+
+/**
+ * Returns the pointer to the byte where the bitstream writer will put
+ * the next bit.
+ */
+static inline uint8_t* put_bits_ptr(PutBitContext *s)
+{
+#ifdef ALT_BITSTREAM_WRITER
+        return s->buf + (s->index>>3);
+#else
+        return s->buf_ptr;
+#endif
+}
+
+/**
+ * Skips the given number of bytes.
+ * PutBitContext must be flushed & aligned to a byte boundary before calling this.
+ */
+static inline void skip_put_bytes(PutBitContext *s, int n)
+{
+        assert((put_bits_count(s)&7)==0);
+#ifdef ALT_BITSTREAM_WRITER
+        FIXME may need some cleaning of the buffer
+        s->index += n<<3;
+#else
+        assert(s->bit_left==32);
+        s->buf_ptr += n;
+#endif
+}
+
+/**
+ * Skips the given number of bits.
+ * Must only be used if the actual values in the bitstream do not matter.
+ * If n is 0 the behavior is undefined.
+ */
+static inline void skip_put_bits(PutBitContext *s, int n)
+{
+#ifdef ALT_BITSTREAM_WRITER
+    s->index += n;
+#else
+    s->bit_left -= n;
+    s->buf_ptr-= 4*(s->bit_left>>5);
+    s->bit_left &= 31;
+#endif
+}
+
+/**
+ * Changes the end of the buffer.
+ *
+ * @param size the new size in bytes of the buffer where to put bits
+ */
+static inline void set_put_bits_buffer_size(PutBitContext *s, int size)
+{
+    s->buf_end= s->buf + size;
+}
+
+#endif /* AVCODEC_PUT_BITS_H */
diff --git a/apps/codecs/libwmapro/wma.c b/apps/codecs/libwmapro/wma.c
new file mode 100644
index 0000000000..65780459aa
--- /dev/null
+++ b/apps/codecs/libwmapro/wma.c
@@ -0,0 +1,523 @@
+/*
+ * WMA compatible codec
+ * Copyright (c) 2002-2007 The FFmpeg Project
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "avcodec.h"
+#include "wma.h"
+#include "wmadata.h"
+
+#undef NDEBUG
+#include <assert.h>
+
+/* XXX: use same run/length optimization as mpeg decoders */
+//FIXME maybe split decode / encode or pass flag
+static void init_coef_vlc(VLC *vlc, uint16_t **prun_table,
+                          float **plevel_table, uint16_t **pint_table,
+                          const CoefVLCTable *vlc_table)
+{
+    int n = vlc_table->n;
+    const uint8_t  *table_bits   = vlc_table->huffbits;
+    const uint32_t *table_codes  = vlc_table->huffcodes;
+    const uint16_t *levels_table = vlc_table->levels;
+    uint16_t *run_table, *level_table, *int_table;
+    float *flevel_table;
+    int i, l, j, k, level;
+
+    init_vlc(vlc, VLCBITS, n, table_bits, 1, 1, table_codes, 4, 4, 0);
+
+    run_table   = av_malloc(n * sizeof(uint16_t));
+    level_table = av_malloc(n * sizeof(uint16_t));
+    flevel_table= av_malloc(n * sizeof(*flevel_table));
+    int_table   = av_malloc(n * sizeof(uint16_t));
+    i = 2;
+    level = 1;
+    k = 0;
+    while (i < n) {
+        int_table[k] = i;
+        l = levels_table[k++];
+        for (j = 0; j < l; j++) {
+            run_table[i]   = j;
+            level_table[i] = level;
+            flevel_table[i]= level;
+            i++;
+        }
+        level++;
+    }
+    *prun_table   = run_table;
+    *plevel_table = flevel_table;
+    *pint_table   = int_table;
+    av_free(level_table);
+}
+
+/**
+ *@brief Get the samples per frame for this stream.
+ *@param sample_rate output sample_rate
+ *@param version wma version
+ *@param decode_flags codec compression features
+ *@return log2 of the number of output samples per frame
+ */
+int av_cold ff_wma_get_frame_len_bits(int sample_rate, int version,
+                                      unsigned int decode_flags)
+{
+
+    int frame_len_bits;
+
+    if (sample_rate <= 16000) {
+        frame_len_bits = 9;
+    } else if (sample_rate <= 22050 ||
+             (sample_rate <= 32000 && version == 1)) {
+        frame_len_bits = 10;
+    } else if (sample_rate <= 48000) {
+        frame_len_bits = 11;
+    } else if (sample_rate <= 96000) {
+        frame_len_bits = 12;
+    } else {
+        frame_len_bits = 13;
+    }
+
+    if (version == 3) {
+        int tmp = decode_flags & 0x6;
+        if (tmp == 0x2) {
+            ++frame_len_bits;
+        } else if (tmp == 0x4) {
+            --frame_len_bits;
+        } else if (tmp == 0x6) {
+            frame_len_bits -= 2;
+        }
+    }
+
+    return frame_len_bits;
+}
+
+int ff_wma_init(AVCodecContext *avctx, int flags2)
+{
+    WMACodecContext *s = avctx->priv_data;
+    int i;
+    float bps1, high_freq;
+    volatile float bps;
+    int sample_rate1;
+    int coef_vlc_table;
+
+    if (   avctx->sample_rate <= 0 || avctx->sample_rate > 50000
+        || avctx->channels    <= 0 || avctx->channels    > 8
+        || avctx->bit_rate    <= 0)
+        return -1;
+
+    s->sample_rate = avctx->sample_rate;
+    s->nb_channels = avctx->channels;
+    s->bit_rate    = avctx->bit_rate;
+    s->block_align = avctx->block_align;
+
+    dsputil_init(&s->dsp, avctx);
+
+    if (avctx->codec->id == CODEC_ID_WMAV1) {
+        s->version = 1;
+    } else {
+        s->version = 2;
+    }
+
+    /* compute MDCT block size */
+    s->frame_len_bits = ff_wma_get_frame_len_bits(s->sample_rate, s->version, 0);
+
+    s->frame_len = 1 << s->frame_len_bits;
+    if (s->use_variable_block_len) {
+        int nb_max, nb;
+        nb = ((flags2 >> 3) & 3) + 1;
+        if ((s->bit_rate / s->nb_channels) >= 32000)
+            nb += 2;
+        nb_max = s->frame_len_bits - BLOCK_MIN_BITS;
+        if (nb > nb_max)
+            nb = nb_max;
+        s->nb_block_sizes = nb + 1;
+    } else {
+        s->nb_block_sizes = 1;
+    }
+
+    /* init rate dependent parameters */
+    s->use_noise_coding = 1;
+    high_freq = s->sample_rate * 0.5;
+
+    /* if version 2, then the rates are normalized */
+    sample_rate1 = s->sample_rate;
+    if (s->version == 2) {
+        if (sample_rate1 >= 44100) {
+            sample_rate1 = 44100;
+        } else if (sample_rate1 >= 22050) {
+            sample_rate1 = 22050;
+        } else if (sample_rate1 >= 16000) {
+            sample_rate1 = 16000;
+        } else if (sample_rate1 >= 11025) {
+            sample_rate1 = 11025;
+        } else if (sample_rate1 >= 8000) {
+            sample_rate1 = 8000;
+        }
+    }
+
+    bps = (float)s->bit_rate / (float)(s->nb_channels * s->sample_rate);
+    s->byte_offset_bits = av_log2((int)(bps * s->frame_len / 8.0 + 0.5)) + 2;
+
+    /* compute high frequency value and choose if noise coding should
+       be activated */
+    bps1 = bps;
+    if (s->nb_channels == 2)
+        bps1 = bps * 1.6;
+    if (sample_rate1 == 44100) {
+        if (bps1 >= 0.61) {
+            s->use_noise_coding = 0;
+        } else {
+            high_freq = high_freq * 0.4;
+        }
+    } else if (sample_rate1 == 22050) {
+        if (bps1 >= 1.16) {
+            s->use_noise_coding = 0;
+        } else if (bps1 >= 0.72) {
+            high_freq = high_freq * 0.7;
+        } else {
+            high_freq = high_freq * 0.6;
+        }
+    } else if (sample_rate1 == 16000) {
+        if (bps > 0.5) {
+            high_freq = high_freq * 0.5;
+        } else {
+            high_freq = high_freq * 0.3;
+        }
+    } else if (sample_rate1 == 11025) {
+        high_freq = high_freq * 0.7;
+    } else if (sample_rate1 == 8000) {
+        if (bps <= 0.625) {
+            high_freq = high_freq * 0.5;
+        } else if (bps > 0.75) {
+            s->use_noise_coding = 0;
+        } else {
+            high_freq = high_freq * 0.65;
+        }
+    } else {
+        if (bps >= 0.8) {
+            high_freq = high_freq * 0.75;
+        } else if (bps >= 0.6) {
+            high_freq = high_freq * 0.6;
+        } else {
+            high_freq = high_freq * 0.5;
+        }
+    }
+    dprintf(s->avctx, "flags2=0x%x\n", flags2);
+    dprintf(s->avctx, "version=%d channels=%d sample_rate=%d bitrate=%d block_align=%d\n",
+            s->version, s->nb_channels, s->sample_rate, s->bit_rate,
+            s->block_align);
+    dprintf(s->avctx, "bps=%f bps1=%f high_freq=%f bitoffset=%d\n",
+            bps, bps1, high_freq, s->byte_offset_bits);
+    dprintf(s->avctx, "use_noise_coding=%d use_exp_vlc=%d nb_block_sizes=%d\n",
+            s->use_noise_coding, s->use_exp_vlc, s->nb_block_sizes);
+
+    /* compute the scale factor band sizes for each MDCT block size */
+    {
+        int a, b, pos, lpos, k, block_len, i, j, n;
+        const uint8_t *table;
+
+        if (s->version == 1) {
+            s->coefs_start = 3;
+        } else {
+            s->coefs_start = 0;
+        }
+        for (k = 0; k < s->nb_block_sizes; k++) {
+            block_len = s->frame_len >> k;
+
+            if (s->version == 1) {
+                lpos = 0;
+                for (i = 0; i < 25; i++) {
+                    a = ff_wma_critical_freqs[i];
+                    b = s->sample_rate;
+                    pos = ((block_len * 2 * a) + (b >> 1)) / b;
+                    if (pos > block_len)
+                        pos = block_len;
+                    s->exponent_bands[0][i] = pos - lpos;
+                    if (pos >= block_len) {
+                        i++;
+                        break;
+                    }
+                    lpos = pos;
+                }
+                s->exponent_sizes[0] = i;
+            } else {
+                /* hardcoded tables */
+                table = NULL;
+                a = s->frame_len_bits - BLOCK_MIN_BITS - k;
+                if (a < 3) {
+                    if (s->sample_rate >= 44100) {
+                        table = exponent_band_44100[a];
+                    } else if (s->sample_rate >= 32000) {
+                        table = exponent_band_32000[a];
+                    } else if (s->sample_rate >= 22050) {
+                        table = exponent_band_22050[a];
+                    }
+                }
+                if (table) {
+                    n = *table++;
+                    for (i = 0; i < n; i++)
+                        s->exponent_bands[k][i] = table[i];
+                    s->exponent_sizes[k] = n;
+                } else {
+                    j = 0;
+                    lpos = 0;
+                    for (i = 0; i < 25; i++) {
+                        a = ff_wma_critical_freqs[i];
+                        b = s->sample_rate;
+                        pos = ((block_len * 2 * a) + (b << 1)) / (4 * b);
+                        pos <<= 2;
+                        if (pos > block_len)
+                            pos = block_len;
+                        if (pos > lpos)
+                            s->exponent_bands[k][j++] = pos - lpos;
+                        if (pos >= block_len)
+                            break;
+                        lpos = pos;
+                    }
+                    s->exponent_sizes[k] = j;
+                }
+            }
+
+            /* max number of coefs */
+            s->coefs_end[k] = (s->frame_len - ((s->frame_len * 9) / 100)) >> k;
+            /* high freq computation */
+            s->high_band_start[k] = (int)((block_len * 2 * high_freq) /
+                                          s->sample_rate + 0.5);
+            n = s->exponent_sizes[k];
+            j = 0;
+            pos = 0;
+            for (i = 0; i < n; i++) {
+                int start, end;
+                start = pos;
+                pos += s->exponent_bands[k][i];
+                end = pos;
+                if (start < s->high_band_start[k])
+                    start = s->high_band_start[k];
+                if (end > s->coefs_end[k])
+                    end = s->coefs_end[k];
+                if (end > start)
+                    s->exponent_high_bands[k][j++] = end - start;
+            }
+            s->exponent_high_sizes[k] = j;
+#if 0
+            tprintf(s->avctx, "%5d: coefs_end=%d high_band_start=%d nb_high_bands=%d: ",
+                    s->frame_len >> k,
+                    s->coefs_end[k],
+                    s->high_band_start[k],
+                    s->exponent_high_sizes[k]);
+            for (j = 0; j < s->exponent_high_sizes[k]; j++)
+                tprintf(s->avctx, " %d", s->exponent_high_bands[k][j]);
+            tprintf(s->avctx, "\n");
+#endif
+        }
+    }
+
+#ifdef TRACE
+    {
+        int i, j;
+        for (i = 0; i < s->nb_block_sizes; i++) {
+            tprintf(s->avctx, "%5d: n=%2d:",
+                    s->frame_len >> i,
+                    s->exponent_sizes[i]);
+            for (j = 0; j < s->exponent_sizes[i]; j++)
+                tprintf(s->avctx, " %d", s->exponent_bands[i][j]);
+            tprintf(s->avctx, "\n");
+        }
+    }
+#endif
+
+    /* init MDCT windows : simple sinus window */
+    for (i = 0; i < s->nb_block_sizes; i++) {
+        ff_init_ff_sine_windows(s->frame_len_bits - i);
+        s->windows[i] = ff_sine_windows[s->frame_len_bits - i];
+    }
+
+    s->reset_block_lengths = 1;
+
+    if (s->use_noise_coding) {
+
+        /* init the noise generator */
+        if (s->use_exp_vlc) {
+            s->noise_mult = 0.02;
+        } else {
+            s->noise_mult = 0.04;
+        }
+
+#ifdef TRACE
+        for (i = 0; i < NOISE_TAB_SIZE; i++)
+            s->noise_table[i] = 1.0 * s->noise_mult;
+#else
+        {
+            unsigned int seed;
+            float norm;
+            seed = 1;
+            norm = (1.0 / (float)(1LL << 31)) * sqrt(3) * s->noise_mult;
+            for (i = 0; i < NOISE_TAB_SIZE; i++) {
+                seed = seed * 314159 + 1;
+                s->noise_table[i] = (float)((int)seed) * norm;
+            }
+        }
+#endif
+    }
+
+    /* choose the VLC tables for the coefficients */
+    coef_vlc_table = 2;
+    if (s->sample_rate >= 32000) {
+        if (bps1 < 0.72) {
+            coef_vlc_table = 0;
+        } else if (bps1 < 1.16) {
+            coef_vlc_table = 1;
+        }
+    }
+    s->coef_vlcs[0]= &coef_vlcs[coef_vlc_table * 2    ];
+    s->coef_vlcs[1]= &coef_vlcs[coef_vlc_table * 2 + 1];
+    init_coef_vlc(&s->coef_vlc[0], &s->run_table[0], &s->level_table[0], &s->int_table[0],
+                  s->coef_vlcs[0]);
+    init_coef_vlc(&s->coef_vlc[1], &s->run_table[1], &s->level_table[1], &s->int_table[1],
+                  s->coef_vlcs[1]);
+
+    return 0;
+}
+
+int ff_wma_total_gain_to_bits(int total_gain)
+{
+         if (total_gain < 15) return 13;
+    else if (total_gain < 32) return 12;
+    else if (total_gain < 40) return 11;
+    else if (total_gain < 45) return 10;
+    else                      return  9;
+}
+
+int ff_wma_end(AVCodecContext *avctx)
+{
+    WMACodecContext *s = avctx->priv_data;
+    int i;
+
+    for (i = 0; i < s->nb_block_sizes; i++)
+        ff_mdct_end(&s->mdct_ctx[i]);
+
+    if (s->use_exp_vlc) {
+        free_vlc(&s->exp_vlc);
+    }
+    if (s->use_noise_coding) {
+        free_vlc(&s->hgain_vlc);
+    }
+    for (i = 0; i < 2; i++) {
+        free_vlc(&s->coef_vlc[i]);
+        av_free(s->run_table[i]);
+        av_free(s->level_table[i]);
+        av_free(s->int_table[i]);
+    }
+
+    return 0;
+}
+
+/**
+ * Decode an uncompressed coefficient.
+ * @param s codec context
+ * @return the decoded coefficient
+ */
+unsigned int ff_wma_get_large_val(GetBitContext* gb)
+{
+    /** consumes up to 34 bits */
+    int n_bits = 8;
+    /** decode length */
+    if (get_bits1(gb)) {
+        n_bits += 8;
+        if (get_bits1(gb)) {
+            n_bits += 8;
+            if (get_bits1(gb)) {
+                n_bits += 7;
+            }
+        }
+    }
+    return get_bits_long(gb, n_bits);
+}
+
+/**
+ * Decode run level compressed coefficients.
+ * @param avctx codec context
+ * @param gb bitstream reader context
+ * @param vlc vlc table for get_vlc2
+ * @param level_table level codes
+ * @param run_table run codes
+ * @param version 0 for wma1,2 1 for wmapro
+ * @param ptr output buffer
+ * @param offset offset in the output buffer
+ * @param num_coefs number of input coefficents
+ * @param block_len input buffer length (2^n)
+ * @param frame_len_bits number of bits for escaped run codes
+ * @param coef_nb_bits number of bits for escaped level codes
+ * @return 0 on success, -1 otherwise
+ */
+int ff_wma_run_level_decode(AVCodecContext* avctx, GetBitContext* gb,
+                            VLC *vlc,
+                            const float *level_table, const uint16_t *run_table,
+                            int version, WMACoef *ptr, int offset,
+                            int num_coefs, int block_len, int frame_len_bits,
+                            int coef_nb_bits)
+{
+    int code, level, sign;
+    const uint32_t *ilvl = (const uint32_t*)level_table;
+    uint32_t *iptr = (uint32_t*)ptr;
+    const unsigned int coef_mask = block_len - 1;
+    for (; offset < num_coefs; offset++) {
+        code = get_vlc2(gb, vlc->table, VLCBITS, VLCMAX);
+        if (code > 1) {
+            /** normal code */
+            offset += run_table[code];
+            sign = get_bits1(gb) - 1;
+            iptr[offset & coef_mask] = ilvl[code] ^ sign<<31;
+        } else if (code == 1) {
+            /** EOB */
+            break;
+        } else {
+            /** escape */
+            if (!version) {
+                level = get_bits(gb, coef_nb_bits);
+                /** NOTE: this is rather suboptimal. reading
+                    block_len_bits would be better */
+                offset += get_bits(gb, frame_len_bits);
+            } else {
+                level = ff_wma_get_large_val(gb);
+                /** escape decode */
+                if (get_bits1(gb)) {
+                    if (get_bits1(gb)) {
+                        if (get_bits1(gb)) {
+                            av_log(avctx,AV_LOG_ERROR,
+                                "broken escape sequence\n");
+                            return -1;
+                        } else
+                            offset += get_bits(gb, frame_len_bits) + 4;
+                    } else
+                        offset += get_bits(gb, 2) + 1;
+                }
+            }
+            sign = get_bits1(gb) - 1;
+            ptr[offset & coef_mask] = (level^sign) - sign;
+        }
+    }
+    /** NOTE: EOB can be omitted */
+    if (offset > num_coefs) {
+        av_log(avctx, AV_LOG_ERROR, "overflow in spectral RLE, ignoring\n");
+        return -1;
+    }
+
+    return 0;
+}
+
diff --git a/apps/codecs/libwmapro/wma.h b/apps/codecs/libwmapro/wma.h
new file mode 100644
index 0000000000..11274ad970
--- /dev/null
+++ b/apps/codecs/libwmapro/wma.h
@@ -0,0 +1,163 @@
+/*
+ * WMA compatible codec
+ * Copyright (c) 2002-2007 The FFmpeg Project
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_WMA_H
+#define AVCODEC_WMA_H
+
+#include "get_bits.h"
+#include "put_bits.h"
+#include "dsputil.h"
+#include "fft.h"
+
+/* size of blocks */
+#define BLOCK_MIN_BITS 7
+#define BLOCK_MAX_BITS 11
+#define BLOCK_MAX_SIZE (1 << BLOCK_MAX_BITS)
+
+#define BLOCK_NB_SIZES (BLOCK_MAX_BITS - BLOCK_MIN_BITS + 1)
+
+/* XXX: find exact max size */
+#define HIGH_BAND_MAX_SIZE 16
+
+#define NB_LSP_COEFS 10
+
+/* XXX: is it a suitable value ? */
+#define MAX_CODED_SUPERFRAME_SIZE 16384
+
+#define MAX_CHANNELS 2
+
+#define NOISE_TAB_SIZE 8192
+
+#define LSP_POW_BITS 7
+
+//FIXME should be in wmadec
+#define VLCBITS 9
+#define VLCMAX ((22+VLCBITS-1)/VLCBITS)
+
+typedef float WMACoef;          ///< type for decoded coefficients, int16_t would be enough for wma 1/2
+
+typedef struct CoefVLCTable {
+    int n;                      ///< total number of codes
+    int max_level;
+    const uint32_t *huffcodes;  ///< VLC bit values
+    const uint8_t *huffbits;    ///< VLC bit size
+    const uint16_t *levels;     ///< table to build run/level tables
+} CoefVLCTable;
+
+typedef struct WMACodecContext {
+    AVCodecContext* avctx;
+    GetBitContext gb;
+    PutBitContext pb;
+    int sample_rate;
+    int nb_channels;
+    int bit_rate;
+    int version;                            ///< 1 = 0x160 (WMAV1), 2 = 0x161 (WMAV2)
+    int block_align;
+    int use_bit_reservoir;
+    int use_variable_block_len;
+    int use_exp_vlc;                        ///< exponent coding: 0 = lsp, 1 = vlc + delta
+    int use_noise_coding;                   ///< true if perceptual noise is added
+    int byte_offset_bits;
+    VLC exp_vlc;
+    int exponent_sizes[BLOCK_NB_SIZES];
+    uint16_t exponent_bands[BLOCK_NB_SIZES][25];
+    int high_band_start[BLOCK_NB_SIZES];    ///< index of first coef in high band
+    int coefs_start;                        ///< first coded coef
+    int coefs_end[BLOCK_NB_SIZES];          ///< max number of coded coefficients
+    int exponent_high_sizes[BLOCK_NB_SIZES];
+    int exponent_high_bands[BLOCK_NB_SIZES][HIGH_BAND_MAX_SIZE];
+    VLC hgain_vlc;
+
+    /* coded values in high bands */
+    int high_band_coded[MAX_CHANNELS][HIGH_BAND_MAX_SIZE];
+    int high_band_values[MAX_CHANNELS][HIGH_BAND_MAX_SIZE];
+
+    /* there are two possible tables for spectral coefficients */
+//FIXME the following 3 tables should be shared between decoders
+    VLC coef_vlc[2];
+    uint16_t *run_table[2];
+    float *level_table[2];
+    uint16_t *int_table[2];
+    const CoefVLCTable *coef_vlcs[2];
+    /* frame info */
+    int frame_len;                          ///< frame length in samples
+    int frame_len_bits;                     ///< frame_len = 1 << frame_len_bits
+    int nb_block_sizes;                     ///< number of block sizes
+    /* block info */
+    int reset_block_lengths;
+    int block_len_bits;                     ///< log2 of current block length
+    int next_block_len_bits;                ///< log2 of next block length
+    int prev_block_len_bits;                ///< log2 of prev block length
+    int block_len;                          ///< block length in samples
+    int block_num;                          ///< block number in current frame
+    int block_pos;                          ///< current position in frame
+    uint8_t ms_stereo;                      ///< true if mid/side stereo mode
+    uint8_t channel_coded[MAX_CHANNELS];    ///< true if channel is coded
+    int exponents_bsize[MAX_CHANNELS];      ///< log2 ratio frame/exp. length
+    DECLARE_ALIGNED(16, float, exponents)[MAX_CHANNELS][BLOCK_MAX_SIZE];
+    float max_exponent[MAX_CHANNELS];
+    WMACoef coefs1[MAX_CHANNELS][BLOCK_MAX_SIZE];
+    DECLARE_ALIGNED(16, float, coefs)[MAX_CHANNELS][BLOCK_MAX_SIZE];
+    DECLARE_ALIGNED(16, FFTSample, output)[BLOCK_MAX_SIZE * 2];
+    FFTContext mdct_ctx[BLOCK_NB_SIZES];
+    float *windows[BLOCK_NB_SIZES];
+    /* output buffer for one frame and the last for IMDCT windowing */
+    DECLARE_ALIGNED(16, float, frame_out)[MAX_CHANNELS][BLOCK_MAX_SIZE * 2];
+    /* last frame info */
+    uint8_t last_superframe[MAX_CODED_SUPERFRAME_SIZE + 4]; /* padding added */
+    int last_bitoffset;
+    int last_superframe_len;
+    float noise_table[NOISE_TAB_SIZE];
+    int noise_index;
+    float noise_mult; /* XXX: suppress that and integrate it in the noise array */
+    /* lsp_to_curve tables */
+    float lsp_cos_table[BLOCK_MAX_SIZE];
+    float lsp_pow_e_table[256];
+    float lsp_pow_m_table1[(1 << LSP_POW_BITS)];
+    float lsp_pow_m_table2[(1 << LSP_POW_BITS)];
+    DSPContext dsp;
+
+#ifdef TRACE
+    int frame_count;
+#endif
+} WMACodecContext;
+
+extern const uint16_t ff_wma_critical_freqs[25];
+extern const uint16_t ff_wma_hgain_huffcodes[37];
+extern const uint8_t ff_wma_hgain_huffbits[37];
+extern const float ff_wma_lsp_codebook[NB_LSP_COEFS][16];
+extern const uint32_t ff_aac_scalefactor_code[121];
+extern const uint8_t  ff_aac_scalefactor_bits[121];
+
+int av_cold ff_wma_get_frame_len_bits(int sample_rate, int version,
+                                      unsigned int decode_flags);
+int ff_wma_init(AVCodecContext * avctx, int flags2);
+int ff_wma_total_gain_to_bits(int total_gain);
+int ff_wma_end(AVCodecContext *avctx);
+unsigned int ff_wma_get_large_val(GetBitContext* gb);
+int ff_wma_run_level_decode(AVCodecContext* avctx, GetBitContext* gb,
+                            VLC *vlc,
+                            const float *level_table, const uint16_t *run_table,
+                            int version, WMACoef *ptr, int offset,
+                            int num_coefs, int block_len, int frame_len_bits,
+                            int coef_nb_bits);
+
+#endif /* AVCODEC_WMA_H */
diff --git a/apps/codecs/libwmapro/wmaprodata.h b/apps/codecs/libwmapro/wmaprodata.h
new file mode 100644
index 0000000000..a1d186e0c2
--- /dev/null
+++ b/apps/codecs/libwmapro/wmaprodata.h
@@ -0,0 +1,604 @@
+/*
+ * WMA 9/3/PRO compatible decoder
+ * Copyright (c) 2007 Baptiste Coudurier, Benjamin Larsson, Ulion
+ * Copyright (c) 2008 - 2009 Sascha Sommer
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file  libavcodec/wmaprodata.h
+ * @brief tables for wmapro decoding
+ */
+
+#ifndef AVCODEC_WMAPRODATA_H
+#define AVCODEC_WMAPRODATA_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+/**
+ * @brief frequencies to divide the frequency spectrum into scale factor bands
+ */
+static const uint16_t critical_freq[] = {
+     100,   200,    300,    400,    510,    630,    770,
+     920,  1080,   1270,   1480,   1720,   2000,   2320,
+    2700,  3150,   3700,   4400,   5300,   6400,   7700,
+    9500, 12000,  15500,  20675,  28575,  41375,  63875,
+};
+
+
+/**
+ * @name Huffman tables for DPCM-coded scale factors
+ * @{
+ */
+#define HUFF_SCALE_SIZE    121
+#define HUFF_SCALE_MAXBITS  19
+static const uint16_t scale_huffcodes[HUFF_SCALE_SIZE] = {
+    0xE639, 0xE6C2, 0xE6C1, 0xE6C0, 0xE63F, 0xE63E, 0xE63D, 0xE63C,
+    0xE63B, 0xE63A, 0xE638, 0xE637, 0xE636, 0xE635, 0xE634, 0xE632,
+    0xE633, 0xE620, 0x737B, 0xE610, 0xE611, 0xE612, 0xE613, 0xE614,
+    0xE615, 0xE616, 0xE617, 0xE618, 0xE619, 0xE61A, 0xE61B, 0xE61C,
+    0xE61D, 0xE61E, 0xE61F, 0xE6C3, 0xE621, 0xE622, 0xE623, 0xE624,
+    0xE625, 0xE626, 0xE627, 0xE628, 0xE629, 0xE62A, 0xE62B, 0xE62C,
+    0xE62D, 0xE62E, 0xE62F, 0xE630, 0xE631, 0x1CDF, 0x0E60, 0x0399,
+    0x00E7, 0x001D, 0x0000, 0x0001, 0x0001, 0x0001, 0x0002, 0x0006,
+    0x0002, 0x0007, 0x0006, 0x000F, 0x0038, 0x0072, 0x039A, 0xE6C4,
+    0xE6C5, 0xE6C6, 0xE6C7, 0xE6C8, 0xE6C9, 0xE6CA, 0xE6CB, 0xE6CC,
+    0xE6CD, 0xE6CE, 0xE6CF, 0xE6D0, 0xE6D1, 0xE6D2, 0xE6D3, 0xE6D4,
+    0xE6D5, 0xE6D6, 0xE6D7, 0xE6D8, 0xE6D9, 0xE6DA, 0xE6DB, 0xE6DC,
+    0xE6DD, 0xE6DE, 0xE6DF, 0xE6E0, 0xE6E1, 0xE6E2, 0xE6E3, 0xE6E4,
+    0xE6E5, 0xE6E6, 0xE6E7, 0xE6E8, 0xE6E9, 0xE6EA, 0xE6EB, 0xE6EC,
+    0xE6ED, 0xE6EE, 0xE6EF, 0xE6F0, 0xE6F1, 0xE6F2, 0xE6F3, 0xE6F4,
+    0xE6F5,
+};
+
+static const uint8_t scale_huffbits[HUFF_SCALE_SIZE] = {
+    19, 19, 19, 19, 19, 19, 19, 19,
+    19, 19, 19, 19, 19, 19, 19, 19,
+    19, 19, 18, 19, 19, 19, 19, 19,
+    19, 19, 19, 19, 19, 19, 19, 19,
+    19, 19, 19, 19, 19, 19, 19, 19,
+    19, 19, 19, 19, 19, 19, 19, 19,
+    19, 19, 19, 19, 19, 16, 15, 13,
+    11,  8,  5,  2,  1,  3,  5,  6,
+     6,  7,  7,  7,  9, 10, 13, 19,
+    19, 19, 19, 19, 19, 19, 19, 19,
+    19, 19, 19, 19, 19, 19, 19, 19,
+    19, 19, 19, 19, 19, 19, 19, 19,
+    19, 19, 19, 19, 19, 19, 19, 19,
+    19, 19, 19, 19, 19, 19, 19, 19,
+    19, 19, 19, 19, 19, 19, 19, 19,
+    19,
+};
+/** @} */
+
+
+/**
+ * @name Huffman, run and level tables for runlevel-coded scale factors
+ * @{
+ */
+#define HUFF_SCALE_RL_SIZE    120
+#define HUFF_SCALE_RL_MAXBITS  21
+static const uint32_t scale_rl_huffcodes[HUFF_SCALE_RL_SIZE] = {
+    0x00010C, 0x000001, 0x10FE2A, 0x000003, 0x000003, 0x000001, 0x000013,
+    0x000020, 0x000029, 0x000014, 0x000016, 0x000045, 0x000049, 0x00002F,
+    0x000042, 0x00008E, 0x00008F, 0x000129, 0x000009, 0x00000D, 0x0004AC,
+    0x00002C, 0x000561, 0x0002E6, 0x00087C, 0x0002E2, 0x00095C, 0x000018,
+    0x000001, 0x000016, 0x000044, 0x00002A, 0x000007, 0x000159, 0x000143,
+    0x000128, 0x00015A, 0x00012D, 0x00002B, 0x0000A0, 0x000142, 0x00012A,
+    0x0002EF, 0x0004AF, 0x00087D, 0x004AE9, 0x0043F9, 0x000067, 0x000199,
+    0x002B05, 0x001583, 0x0021FE, 0x10FE2C, 0x000004, 0x00002E, 0x00010D,
+    0x00000A, 0x000244, 0x000017, 0x000245, 0x000011, 0x00010E, 0x00012C,
+    0x00002A, 0x00002F, 0x000121, 0x000046, 0x00087E, 0x0000BA, 0x000032,
+    0x0087F0, 0x0056DC, 0x0002EC, 0x0043FA, 0x002B6F, 0x004AE8, 0x0002B7,
+    0x10FE2B, 0x000001, 0x000051, 0x000010, 0x0002EE, 0x000B9C, 0x002576,
+    0x000198, 0x0056DD, 0x0000CD, 0x000AC0, 0x000170, 0x004AEF, 0x00002D,
+    0x0004AD, 0x0021FF, 0x0005CF, 0x002B04, 0x10FE29, 0x10FE28, 0x0002ED,
+    0x002E74, 0x021FC4, 0x004AEE, 0x010FE3, 0x087F17, 0x000000, 0x000097,
+    0x0002E3, 0x000ADA, 0x002575, 0x00173B, 0x0043FB, 0x002E75, 0x10FE2D,
+    0x0015B6, 0x00056C, 0x000057, 0x000123, 0x000120, 0x00021E, 0x000172,
+    0x0002B1,
+};
+
+static const uint8_t scale_rl_huffbits[HUFF_SCALE_RL_SIZE] = {
+     9,  2, 21,  2,  4,  5,  5,
+     6,  6,  7,  7,  7,  7,  6,
+     7,  8,  8,  9, 10, 10, 11,
+    12, 11, 12, 12, 12, 12, 11,
+     4,  5,  7,  8,  9,  9,  9,
+     9,  9,  9,  8,  8,  9,  9,
+    12, 11, 12, 15, 15, 13, 15,
+    14, 13, 14, 21,  5,  6,  9,
+    10, 10, 11, 10, 11,  9,  9,
+     6,  8,  9,  7, 12, 10, 12,
+    16, 15, 12, 15, 14, 15, 10,
+    21,  6,  7, 11, 12, 14, 14,
+    15, 15, 14, 12, 11, 15, 12,
+    11, 14, 13, 14, 21, 21, 12,
+    16, 18, 15, 17, 20,  7,  8,
+    12, 12, 14, 15, 15, 16, 21,
+    13, 11,  7,  9,  9, 10, 11,
+    10,
+};
+
+
+static const uint8_t scale_rl_run[HUFF_SCALE_RL_SIZE] = {
+     0,  0,  0,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
+    16, 17, 18, 19, 20, 21, 22, 23, 24,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9,
+    10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,  0,  1,  2,  3,
+     4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
+    23, 24,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
+    17, 18, 19, 20, 21, 22, 23, 24,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10,
+     0,  1,  0,  1,  0,  1,
+};
+
+static const uint8_t scale_rl_level[HUFF_SCALE_RL_SIZE] = {
+     0,  0,  1,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
+     2,  2,  2,  2,  2,  2,  2,  2,  2,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,
+     3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  4,  4,  4,  4,
+     4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
+     4,  4,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
+     5,  5,  5,  5,  5,  5,  5,  5,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
+     7,  7,  8,  8,  9,  9,
+};
+/** @} */
+
+
+/**
+ * @name Huffman, run and level codes for runlevel-coded coefficients
+ * @{
+ */
+#define HUFF_COEF0_SIZE    272
+#define HUFF_COEF0_MAXBITS  21
+static const uint32_t coef0_huffcodes[HUFF_COEF0_SIZE] = {
+    0x00004A, 0x00002B, 0x000000, 0x000003, 0x000006, 0x000009, 0x00000F,
+    0x000010, 0x000016, 0x000011, 0x000016, 0x000028, 0x00002F, 0x000026,
+    0x000029, 0x000045, 0x000055, 0x00005D, 0x000042, 0x00004E, 0x000051,
+    0x00005E, 0x00008D, 0x0000A8, 0x0000AD, 0x000080, 0x000096, 0x00009F,
+    0x0000AA, 0x0000BE, 0x00011C, 0x000153, 0x000158, 0x000170, 0x000104,
+    0x00010D, 0x000105, 0x000103, 0x00012F, 0x000177, 0x000175, 0x000157,
+    0x000174, 0x000225, 0x00023B, 0x00020D, 0x00021F, 0x000281, 0x00027B,
+    0x000282, 0x0002AC, 0x0002FD, 0x00044F, 0x000478, 0x00044D, 0x0002EC,
+    0x00044E, 0x000564, 0x000409, 0x00040B, 0x000501, 0x000545, 0x0004F3,
+    0x000541, 0x00043B, 0x0004F1, 0x0004F4, 0x0008FD, 0x000A94, 0x000811,
+    0x000B88, 0x000B91, 0x000B93, 0x0008EA, 0x000899, 0x000B8A, 0x000972,
+    0x0009E5, 0x000A8F, 0x000A84, 0x000A8E, 0x000A00, 0x000830, 0x0008E8,
+    0x000B95, 0x000871, 0x00083A, 0x000814, 0x000873, 0x000BFE, 0x001728,
+    0x001595, 0x001712, 0x00102A, 0x001021, 0x001729, 0x00152E, 0x0013C3,
+    0x001721, 0x001597, 0x00151B, 0x0010F2, 0x001403, 0x001703, 0x001503,
+    0x001708, 0x0013C1, 0x00170E, 0x00170C, 0x0010E1, 0x0011EA, 0x001020,
+    0x001500, 0x0017FA, 0x001704, 0x001705, 0x0017F0, 0x0017FB, 0x0021E6,
+    0x002B2D, 0x0020C6, 0x002B29, 0x002E4A, 0x0023AC, 0x001519, 0x0023F3,
+    0x002B2C, 0x0021C0, 0x0017FE, 0x0023D7, 0x0017F9, 0x0012E7, 0x0013C0,
+    0x002261, 0x0023D3, 0x002057, 0x002056, 0x0021D2, 0x0020C7, 0x0023D2,
+    0x0020EC, 0x0044C0, 0x002FE2, 0x00475B, 0x002A03, 0x002FE3, 0x0021E2,
+    0x0021D0, 0x002A31, 0x002E13, 0x002E05, 0x0047E5, 0x00000E, 0x000024,
+    0x000088, 0x0000B9, 0x00010C, 0x000224, 0x0002B3, 0x000283, 0x0002ED,
+    0x00047B, 0x00041E, 0x00043D, 0x0004F5, 0x0005FD, 0x000A92, 0x000B96,
+    0x000838, 0x000971, 0x000B83, 0x000B80, 0x000BF9, 0x0011D3, 0x0011E8,
+    0x0011D7, 0x001527, 0x0011F8, 0x001073, 0x0010F0, 0x0010E4, 0x0017F8,
+    0x001062, 0x001402, 0x0017E3, 0x00151A, 0x001077, 0x00152B, 0x00170D,
+    0x0021D3, 0x002E41, 0x0013C2, 0x000029, 0x0000A9, 0x00025D, 0x000419,
+    0x000544, 0x000B8B, 0x0009E4, 0x0011D2, 0x001526, 0x001724, 0x0012E6,
+    0x00150B, 0x0017FF, 0x002E26, 0x002E4B, 0x002B28, 0x0021E3, 0x002A14,
+    0x00475A, 0x002E12, 0x000057, 0x00023E, 0x000A90, 0x000BF0, 0x001072,
+    0x001502, 0x0023D6, 0x0020ED, 0x002A30, 0x0044C7, 0x00008C, 0x00047F,
+    0x00152A, 0x002262, 0x002E04, 0x0000A1, 0x0005F9, 0x000173, 0x000875,
+    0x000171, 0x00152D, 0x0002E3, 0x0017E2, 0x0002AD, 0x0021C1, 0x000479,
+    0x0021E7, 0x00041F, 0x005C4E, 0x000543, 0x005C4F, 0x000A91, 0x00898D,
+    0x000B97, 0x008746, 0x000970, 0x008745, 0x000B85, 0x00A856, 0x00152F,
+    0x010E8E, 0x0010E5, 0x00A857, 0x00170F, 0x021D11, 0x002A58, 0x010E8F,
+    0x002E40, 0x021D13, 0x002A59, 0x043A25, 0x002A02, 0x043A21, 0x0044C1,
+    0x087448, 0x0047E4, 0x043A20, 0x00542A, 0x087449, 0x00898C,
+};
+
+static const uint8_t coef0_huffbits[HUFF_COEF0_SIZE] = {
+     8,  7,  2,  3,  3,  4,  4,
+     5,  5,  6,  6,  6,  6,  7,
+     7,  7,  7,  7,  8,  8,  8,
+     8,  8,  8,  8,  9,  9,  9,
+     9,  9,  9,  9,  9,  9, 10,
+    10, 10, 10, 10, 10, 10, 10,
+    10, 10, 10, 11, 11, 11, 11,
+    11, 11, 11, 11, 11, 11, 11,
+    11, 11, 12, 12, 12, 12, 12,
+    12, 12, 12, 12, 12, 12, 13,
+    12, 12, 12, 12, 12, 12, 13,
+    13, 13, 13, 13, 13, 13, 12,
+    12, 13, 13, 13, 13, 13, 13,
+    13, 13, 14, 14, 13, 13, 14,
+    13, 13, 14, 14, 14, 14, 14,
+    14, 14, 14, 14, 14, 13, 14,
+    14, 14, 14, 14, 14, 14, 15,
+    14, 15, 14, 14, 14, 14, 14,
+    14, 15, 14, 14, 14, 14, 14,
+    14, 14, 15, 15, 15, 15, 14,
+    15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15,  4,  7,
+     8,  9, 10, 10, 10, 11, 11,
+    11, 12, 12, 12, 12, 12, 12,
+    13, 13, 13, 13, 13, 13, 13,
+    13, 13, 13, 14, 14, 14, 14,
+    14, 14, 14, 14, 14, 13, 14,
+    15, 14, 14,  6,  9, 11, 12,
+    12, 12, 13, 13, 13, 13, 14,
+    14, 14, 14, 14, 14, 15, 15,
+    15, 15,  7, 10, 12, 13, 14,
+    14, 14, 15, 15, 15,  8, 11,
+    13, 14, 15,  9, 12,  9, 13,
+    10, 13, 10, 14, 11, 15, 11,
+    15, 12, 15, 12, 15, 12, 16,
+    12, 17, 13, 17, 13, 17, 13,
+    18, 14, 17, 14, 19, 14, 18,
+    14, 19, 14, 20, 15, 20, 15,
+    21, 15, 20, 16, 21, 16,
+};
+
+
+#define HUFF_COEF1_SIZE    244
+#define HUFF_COEF1_MAXBITS  22
+static const uint32_t coef1_huffcodes[HUFF_COEF1_SIZE] = {
+    0x0001E2, 0x00007F, 0x000000, 0x000002, 0x000008, 0x00000E, 0x000019,
+    0x00002F, 0x000037, 0x000060, 0x00006C, 0x000095, 0x0000C6, 0x0000F0,
+    0x00012E, 0x000189, 0x0001A5, 0x0001F8, 0x000253, 0x00030A, 0x000344,
+    0x00034D, 0x0003F2, 0x0004BD, 0x0005D7, 0x00062A, 0x00068B, 0x000693,
+    0x000797, 0x00097D, 0x000BAB, 0x000C52, 0x000C5E, 0x000D21, 0x000D20,
+    0x000F1A, 0x000FCE, 0x000FD1, 0x0012F1, 0x001759, 0x0018AC, 0x0018A7,
+    0x0018BF, 0x001A2B, 0x001E52, 0x001E50, 0x001E31, 0x001FB8, 0x0025E6,
+    0x0025E7, 0x002EB4, 0x002EB7, 0x003169, 0x00315B, 0x00317C, 0x00316C,
+    0x0034CA, 0x00348D, 0x003F40, 0x003CA2, 0x003F76, 0x004BC3, 0x004BE5,
+    0x003F73, 0x004BF8, 0x004BF9, 0x006131, 0x00628B, 0x006289, 0x0062DA,
+    0x00628A, 0x0062D4, 0x006997, 0x0062B4, 0x006918, 0x00794D, 0x007E7B,
+    0x007E87, 0x007EEA, 0x00794E, 0x00699D, 0x007967, 0x00699F, 0x0062DB,
+    0x007E7A, 0x007EEB, 0x00BAC0, 0x0097C9, 0x00C537, 0x00C5AB, 0x00D233,
+    0x00D338, 0x00BAC1, 0x00D23D, 0x012F91, 0x00D339, 0x00FDC8, 0x00D23C,
+    0x00FDDC, 0x00FDC9, 0x00FDDD, 0x00D33C, 0x000003, 0x000016, 0x00003E,
+    0x0000C3, 0x0001A1, 0x000347, 0x00062E, 0x000BAA, 0x000F2D, 0x001A2A,
+    0x001E58, 0x00309B, 0x003CA3, 0x005D6A, 0x00629A, 0x006996, 0x00794F,
+    0x007EE5, 0x00BAD7, 0x00C5AA, 0x00C5F4, 0x00FDDF, 0x00FDDE, 0x018A20,
+    0x018A6D, 0x01A67B, 0x01A464, 0x025F21, 0x01F9E2, 0x01F9E3, 0x00000A,
+    0x00003D, 0x000128, 0x0003C7, 0x000C24, 0x0018A3, 0x002EB1, 0x003CB2,
+    0x00691F, 0x007E79, 0x000013, 0x0000BB, 0x00034E, 0x000D14, 0x0025FD,
+    0x004BE7, 0x000024, 0x000188, 0x0007EF, 0x000035, 0x000308, 0x0012F2,
+    0x00005C, 0x0003F6, 0x0025E0, 0x00006D, 0x000698, 0x000096, 0x000C25,
+    0x0000C7, 0x000F1B, 0x0000F3, 0x0012FF, 0x000174, 0x001A66, 0x0001A0,
+    0x003099, 0x0001E4, 0x00316B, 0x000252, 0x003F31, 0x00030B, 0x004BE6,
+    0x000346, 0x0062FB, 0x00034F, 0x007966, 0x0003F5, 0x007E86, 0x0005D4,
+    0x00C511, 0x00062C, 0x00C5F5, 0x000692, 0x00F299, 0x000795, 0x00F298,
+    0x0007E9, 0x018A21, 0x00097E, 0x0175AD, 0x000C27, 0x01A67A, 0x000C57,
+    0x02EB59, 0x000D22, 0x0314D9, 0x000F19, 0x03F3C2, 0x000FCD, 0x0348CB,
+    0x0012F8, 0x04BE41, 0x0018A0, 0x03F3C1, 0x0018A1, 0x04BE40, 0x0018B7,
+    0x0629B0, 0x001A64, 0x0D2329, 0x001E30, 0x03F3C3, 0x001F9F, 0x0BAD62,
+    0x001F99, 0x0FCF00, 0x00309A, 0x0629B1, 0x002EB6, 0x175AC3, 0x00314C,
+    0x069195, 0x003168, 0x0BAD63, 0x00348E, 0x175AC1, 0x003F30, 0x07E781,
+    0x003F41, 0x0D2328, 0x003F42, 0x1F9E03, 0x004BC2, 0x175AC2, 0x003F74,
+    0x175AC0, 0x005D61, 0x3F3C05, 0x006130, 0x3F3C04, 0x0062B5,
+};
+
+static const uint8_t coef1_huffbits[HUFF_COEF1_SIZE] = {
+     9,  7,  2,  3,  4,  4,  5,
+     6,  6,  7,  7,  8,  8,  8,
+     9,  9,  9,  9, 10, 10, 10,
+    10, 10, 11, 11, 11, 11, 11,
+    11, 12, 12, 12, 12, 12, 12,
+    12, 12, 12, 13, 13, 13, 13,
+    13, 13, 13, 13, 13, 13, 14,
+    14, 14, 14, 14, 14, 14, 14,
+    14, 14, 14, 14, 14, 15, 15,
+    14, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15,
+    15, 15, 15, 15, 15, 15, 15,
+    15, 15, 16, 16, 16, 16, 16,
+    16, 16, 16, 17, 16, 16, 16,
+    16, 16, 16, 16,  3,  5,  6,
+     8,  9, 10, 11, 12, 12, 13,
+    13, 14, 14, 15, 15, 15, 15,
+    15, 16, 16, 16, 16, 16, 17,
+    17, 17, 17, 18, 17, 17,  4,
+     6,  9, 10, 12, 13, 14, 14,
+    15, 15,  5,  8, 10, 12, 14,
+    15,  6,  9, 11,  6, 10, 13,
+     7, 10, 14,  7, 11,  8, 12,
+     8, 12,  8, 13,  9, 13,  9,
+    14,  9, 14, 10, 14, 10, 15,
+    10, 15, 10, 15, 10, 15, 11,
+    16, 11, 16, 11, 16, 11, 16,
+    11, 17, 12, 17, 12, 17, 12,
+    18, 12, 18, 12, 18, 12, 18,
+    13, 19, 13, 18, 13, 19, 13,
+    19, 13, 20, 13, 18, 13, 20,
+    13, 20, 14, 19, 14, 21, 14,
+    19, 14, 20, 14, 21, 14, 19,
+    14, 20, 14, 21, 15, 21, 14,
+    21, 15, 22, 15, 22, 15,
+};
+
+
+static const uint16_t coef0_run[HUFF_COEF0_SIZE] = {
+      0,   0,   0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,
+     12,  13,  14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,
+     26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,
+     40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  52,  53,
+     54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,  65,  66,  67,
+     68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,  81,
+     82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,
+     96,  97,  98,  99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109,
+    110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123,
+    124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137,
+    138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,   0,   1,
+      2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,  15,
+     16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,
+     30,  31,  32,  33,  34,  35,  36,  37,  38,  39,   0,   1,   2,   3,
+      4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,  15,  16,  17,
+     18,  19,   0,   1,   2,   3,   4,   5,   6,   7,   8,   9,   0,   1,
+      2,   3,   4,   0,   1,   0,   1,   0,   1,   0,   1,   0,   1,   0,
+      1,   0,   1,   0,   1,   0,   1,   0,   1,   0,   1,   0,   1,   0,
+      1,   0,   1,   0,   1,   0,   1,   0,   1,   0,   1,   0,   1,   0,
+      1,   0,   1,   0,   1,   0,
+};
+
+static const float coef0_level[HUFF_COEF0_SIZE] = {
+      0,   0,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,
+      1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,
+      1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,
+      1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,
+      1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,
+      1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,
+      1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,
+      1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,
+      1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,
+      1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,
+      1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   2,   2,
+      2,   2,   2,   2,   2,   2,   2,   2,   2,   2,   2,   2,   2,   2,
+      2,   2,   2,   2,   2,   2,   2,   2,   2,   2,   2,   2,   2,   2,
+      2,   2,   2,   2,   2,   2,   2,   2,   2,   2,   3,   3,   3,   3,
+      3,   3,   3,   3,   3,   3,   3,   3,   3,   3,   3,   3,   3,   3,
+      3,   3,   4,   4,   4,   4,   4,   4,   4,   4,   4,   4,   5,   5,
+      5,   5,   5,   6,   6,   7,   7,   8,   8,   9,   9,  10,  10,  11,
+     11,  12,  12,  13,  13,  14,  14,  15,  15,  16,  16,  17,  17,  18,
+     18,  19,  19,  20,  20,  21,  21,  22,  22,  23,  23,  24,  24,  25,
+     25,  26,  26,  27,  27,  28,
+};
+
+
+static const uint16_t coef1_run[HUFF_COEF1_SIZE] = {
+     0,  0,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
+    16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
+    34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
+    52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
+    70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87,
+    88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99,  0,  1,  2,  3,  4,  5,
+     6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
+    24, 25, 26, 27, 28, 29,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  0,  1,
+     2,  3,  4,  5,  0,  1,  2,  0,  1,  2,  0,  1,  2,  0,  1,  0,  1,  0,
+     1,  0,  1,  0,  1,  0,  1,  0,  1,  0,  1,  0,  1,  0,  1,  0,  1,  0,
+     1,  0,  1,  0,  1,  0,  1,  0,  1,  0,  1,  0,  1,  0,  1,  0,  1,  0,
+     1,  0,  1,  0,  1,  0,  1,  0,  1,  0,  1,  0,  1,  0,  1,  0,  1,  0,
+     1,  0,  1,  0,  1,  0,  1,  0,  1,  0,  1,  0,  1,  0,  1,  0,  1,  0,
+     1,  0,  1,  0,  1,  0,  1,  0,  0,  0,
+};
+
+static const float coef1_level[HUFF_COEF1_SIZE] = {
+     0,  0,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
+     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
+     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
+     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
+     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
+     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  2,  2,
+     2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
+     2,  2,  2,  2,  2,  2,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  4,  4,
+     4,  4,  4,  4,  5,  5,  5,  6,  6,  6,  7,  7,  7,  8,  8,  9,  9, 10,
+    10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19,
+    19, 20, 20, 21, 21, 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28,
+    28, 29, 29, 30, 30, 31, 31, 32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 37,
+    37, 38, 38, 39, 39, 40, 40, 41, 41, 42, 42, 43, 43, 44, 44, 45, 45, 46,
+    46, 47, 47, 48, 48, 49, 49, 50, 51, 52,
+};
+/** @} */
+
+
+/**
+ * @name Huffman and vector lookup tables for vector-coded coefficients
+ * @{
+ */
+#define HUFF_VEC4_SIZE    127
+#define HUFF_VEC4_MAXBITS  14
+static const uint16_t vec4_huffcodes[HUFF_VEC4_SIZE] = {
+    0x0019, 0x0027, 0x00F2, 0x03BA, 0x0930, 0x1267, 0x0031, 0x0030,
+    0x0097, 0x0221, 0x058B, 0x0124, 0x00EB, 0x01D4, 0x03D8, 0x0584,
+    0x0364, 0x045F, 0x0F66, 0x0931, 0x24CD, 0x002F, 0x0039, 0x00E8,
+    0x02C3, 0x078A, 0x0037, 0x0029, 0x0084, 0x01B1, 0x00ED, 0x0086,
+    0x00F9, 0x03AB, 0x01EB, 0x08BC, 0x011E, 0x00F3, 0x0220, 0x058A,
+    0x00EC, 0x008E, 0x012B, 0x01EA, 0x0119, 0x04B0, 0x04B1, 0x03B8,
+    0x0691, 0x0365, 0x01ED, 0x049A, 0x0EA9, 0x0EA8, 0x08BD, 0x24CC,
+    0x0026, 0x0035, 0x00DB, 0x02C4, 0x07B2, 0x0038, 0x002B, 0x007F,
+    0x01B3, 0x00F4, 0x0091, 0x0116, 0x03BB, 0x0215, 0x0932, 0x002D,
+    0x002A, 0x008A, 0x01DE, 0x0028, 0x0020, 0x005C, 0x0090, 0x0068,
+    0x01EE, 0x00E9, 0x008D, 0x012A, 0x0087, 0x005D, 0x0118, 0x0349,
+    0x01EF, 0x01E3, 0x08B9, 0x00F0, 0x00D3, 0x0214, 0x049B, 0x00DA,
+    0x0089, 0x0125, 0x0217, 0x012D, 0x0690, 0x0094, 0x007D, 0x011F,
+    0x007E, 0x0059, 0x0127, 0x01A5, 0x0111, 0x00F8, 0x045D, 0x03B9,
+    0x0259, 0x0580, 0x02C1, 0x01DF, 0x0585, 0x0216, 0x0163, 0x01B0,
+    0x03C4, 0x08B8, 0x078B, 0x0755, 0x0581, 0x0F67, 0x0000,
+};
+
+static const uint8_t vec4_huffbits[HUFF_VEC4_SIZE] = {
+     5,  6,  8, 10, 12, 13,  6,  6,
+     8, 10, 11,  9,  8,  9, 10, 11,
+    10, 11, 12, 12, 14,  6,  6,  8,
+    10, 11,  6,  6,  8,  9,  8,  8,
+     8, 10,  9, 12,  9,  8, 10, 11,
+     8,  8,  9,  9,  9, 11, 11, 10,
+    11, 10,  9, 11, 12, 12, 12, 14,
+     6,  6,  8, 10, 11,  6,  6,  7,
+     9,  8,  8,  9, 10, 10, 12,  6,
+     6,  8,  9,  6,  6,  7,  8,  7,
+     9,  8,  8,  9,  8,  7,  9, 10,
+     9,  9, 12,  8,  8, 10, 11,  8,
+     8,  9, 10,  9, 11,  8,  7,  9,
+     7,  7,  9,  9,  9,  8, 11, 10,
+    10, 11, 10,  9, 11, 10,  9,  9,
+    10, 12, 11, 11, 11, 12,  1,
+};
+
+
+#define HUFF_VEC2_SIZE    137
+#define HUFF_VEC2_MAXBITS  12
+static const uint16_t vec2_huffcodes[HUFF_VEC2_SIZE] = {
+    0x055, 0x01C, 0x01A, 0x02B, 0x028, 0x067, 0x08B, 0x039,
+    0x170, 0x10D, 0x2A5, 0x047, 0x464, 0x697, 0x523, 0x8CB,
+    0x01B, 0x00E, 0x000, 0x010, 0x012, 0x036, 0x048, 0x04C,
+    0x0C2, 0x09B, 0x171, 0x03B, 0x224, 0x34A, 0x2D6, 0x019,
+    0x00F, 0x002, 0x014, 0x017, 0x006, 0x05D, 0x054, 0x0C7,
+    0x0B4, 0x192, 0x10E, 0x233, 0x043, 0x02C, 0x00F, 0x013,
+    0x006, 0x02F, 0x02C, 0x068, 0x077, 0x0DF, 0x111, 0x1A4,
+    0x16A, 0x2A4, 0x027, 0x011, 0x018, 0x02D, 0x00F, 0x04A,
+    0x040, 0x097, 0x01F, 0x11B, 0x022, 0x16D, 0x066, 0x035,
+    0x005, 0x02B, 0x049, 0x009, 0x075, 0x0CB, 0x0AA, 0x187,
+    0x106, 0x08A, 0x047, 0x060, 0x06E, 0x01D, 0x074, 0x0C4,
+    0x01E, 0x118, 0x1A7, 0x038, 0x042, 0x053, 0x076, 0x0A8,
+    0x0CA, 0x082, 0x110, 0x18D, 0x12D, 0x0B9, 0x0C8, 0x0DE,
+    0x01C, 0x0AB, 0x113, 0x18C, 0x10F, 0x09A, 0x0A5, 0x0B7,
+    0x11A, 0x186, 0x1A6, 0x259, 0x153, 0x18A, 0x193, 0x020,
+    0x10C, 0x046, 0x03A, 0x107, 0x149, 0x16C, 0x2D7, 0x225,
+    0x258, 0x316, 0x696, 0x317, 0x042, 0x522, 0x290, 0x8CA,
+    0x001,
+};
+
+static const uint8_t vec2_huffbits[HUFF_VEC2_SIZE] = {
+     7,  6,  6,  6,  7,  7,  8,  9,
+     9, 10, 10, 11, 11, 11, 12, 12,
+     6,  4,  5,  5,  6,  6,  7,  8,
+     8,  9,  9, 10, 10, 10, 11,  6,
+     4,  5,  5,  6,  7,  7,  8,  8,
+     9,  9, 10, 10, 11,  6,  5,  5,
+     6,  6,  7,  7,  8,  8,  9,  9,
+    10, 10,  7,  6,  6,  6,  7,  7,
+     8,  8,  9,  9, 10, 10,  7,  6,
+     7,  7,  7,  8,  8,  8,  9,  9,
+    10,  8,  7,  7,  7,  8,  8,  8,
+     9,  9,  9,  9,  8,  8,  8,  8,
+     8,  9,  9,  9,  9,  8,  8,  8,
+     9,  9,  9,  9, 10,  9,  9,  9,
+     9,  9,  9, 10,  9,  9,  9, 10,
+    10, 11, 10, 10, 10, 10, 11, 10,
+    10, 10, 11, 10, 11, 12, 11, 12,
+     3,
+};
+
+
+#define HUFF_VEC1_SIZE    101
+#define HUFF_VEC1_MAXBITS  11
+static const uint16_t vec1_huffcodes[HUFF_VEC1_SIZE] = {
+    0x01A, 0x003, 0x017, 0x010, 0x00C, 0x009, 0x005, 0x000,
+    0x00D, 0x00A, 0x009, 0x00C, 0x00F, 0x002, 0x004, 0x007,
+    0x00B, 0x00F, 0x01C, 0x006, 0x010, 0x015, 0x01C, 0x022,
+    0x03B, 0x00E, 0x019, 0x023, 0x034, 0x036, 0x03A, 0x047,
+    0x008, 0x00A, 0x01E, 0x031, 0x037, 0x050, 0x053, 0x06B,
+    0x06F, 0x08C, 0x0E8, 0x0EA, 0x0EB, 0x016, 0x03E, 0x03F,
+    0x06C, 0x089, 0x08A, 0x0A3, 0x0A4, 0x0D4, 0x0DD, 0x0EC,
+    0x0EE, 0x11A, 0x1D2, 0x024, 0x025, 0x02E, 0x027, 0x0C2,
+    0x0C0, 0x0DA, 0x0DB, 0x111, 0x144, 0x116, 0x14A, 0x145,
+    0x1B8, 0x1AB, 0x1DA, 0x1DE, 0x1DB, 0x1DF, 0x236, 0x237,
+    0x3A6, 0x3A7, 0x04D, 0x04C, 0x05E, 0x05F, 0x183, 0x182,
+    0x186, 0x221, 0x187, 0x220, 0x22E, 0x22F, 0x296, 0x354,
+    0x297, 0x355, 0x372, 0x373, 0x016,
+};
+
+static const uint8_t vec1_huffbits[HUFF_VEC1_SIZE] = {
+     7,  6,  5,  5,  5,  5,  5,  5,
+     4,  4,  4,  4,  4,  5,  5,  5,
+     5,  5,  5,  6,  6,  6,  6,  6,
+     6,  7,  7,  7,  7,  7,  7,  7,
+     8,  8,  8,  8,  8,  8,  8,  8,
+     8,  8,  8,  8,  8,  9,  9,  9,
+     9,  9,  9,  9,  9,  9,  9,  9,
+     9,  9,  9, 10, 10, 10, 10, 10,
+    10, 10, 10, 10, 10, 10, 10, 10,
+    10, 10, 10, 10, 10, 10, 10, 10,
+    10, 10, 11, 11, 11, 11, 11, 11,
+    11, 11, 11, 11, 11, 11, 11, 11,
+    11, 11, 11, 11,  5,
+};
+
+
+static const uint16_t symbol_to_vec4[HUFF_VEC4_SIZE] = {
+        0,    1,      2,     3,     4,     5,    16,    17,    18,    19,
+       20,   32,     33,    34,    35,    48,    49,    50,    64,    65,
+       80,   256,   257,   258,   259,   260,   272,   273,   274,   275,
+      288,   289,   290,   304,   305,   320,   512,   513,   514,   515,
+      528,   529,   530,   544,   545,   560,   768,   769,   770,   784,
+      785,   800,  1024,  1025,  1040,  1280,  4096,  4097,  4098,  4099,
+     4100,  4112,  4113,  4114,  4115,  4128,  4129,  4130,  4144,  4145,
+     4160,  4352,  4353,  4354,  4355,  4368,  4369,  4370,  4384,  4385,
+     4400,  4608,  4609,  4610,  4624,  4625,  4640,  4864,  4865,  4880,
+     5120,  8192,  8193,  8194,  8195,  8208,  8209,  8210,  8224,  8225,
+     8240,  8448,  8449,  8450,  8464,  8465,  8480,  8704,  8705,  8720,
+     8960, 12288, 12289, 12290, 12304, 12305, 12320, 12544, 12545, 12560,
+    12800, 16384, 16385, 16400, 16640, 20480,     0,
+};
+
+
+static const uint8_t symbol_to_vec2[HUFF_VEC2_SIZE] = {
+      0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,
+     15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,
+     30,  32,  33,  34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,  45,
+     48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  59,  60,  64,  65,
+     66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  80,  81,  82,  83,  84,
+     85,  86,  87,  88,  89,  90,  96,  97,  98,  99, 100, 101, 102, 103, 104,
+    105, 112, 113, 114, 115, 116, 117, 118, 119, 120, 128, 129, 130, 131, 132,
+    133, 134, 135, 144, 145, 146, 147, 148, 149, 150, 160, 161, 162, 163, 164,
+    165, 176, 177, 178, 179, 180, 192, 193, 194, 195, 208, 209, 210, 224, 225,
+    240,   0,
+};
+/** @} */
+
+
+/**
+ * @brief decorrelation matrix for multichannel streams
+ **/
+static const float default_decorrelation_matrices[] = {
+    1.000000,  0.707031, -0.707031,  0.707031,  0.707031,  0.578125,  0.707031,
+    0.410156,  0.578125, -0.707031,  0.410156,  0.578125,  0.000000, -0.816406,
+    0.500000,  0.652344,  0.500000,  0.269531,  0.500000,  0.269531, -0.500000,
+   -0.652344,  0.500000, -0.269531, -0.500000,  0.652344,  0.500000, -0.652344,
+    0.500000, -0.269531,  0.445312,  0.601562,  0.511719,  0.371094,  0.195312,
+    0.445312,  0.371094, -0.195312, -0.601562, -0.511719,  0.445312,  0.000000,
+   -0.632812,  0.000000,  0.632812,  0.445312, -0.371094, -0.195312,  0.601562,
+   -0.511719,  0.445312, -0.601562,  0.511719, -0.371094,  0.195312,  0.410156,
+    0.558594,  0.500000,  0.410156,  0.289062,  0.148438,  0.410156,  0.410156,
+    0.000000, -0.410156, -0.578125, -0.410156,  0.410156,  0.148438, -0.500000,
+   -0.410156,  0.289062,  0.558594,  0.410156, -0.148438, -0.500000,  0.410156,
+    0.289062, -0.558594,  0.410156, -0.410156,  0.000000,  0.410156, -0.578125,
+    0.410156,  0.410156, -0.558594,  0.500000, -0.410156,  0.289062, -0.148438,
+};
+
+/**
+ * @brief default decorrelation matrix offsets
+ */
+static const float * const default_decorrelation[] = {
+    NULL,
+    &default_decorrelation_matrices[0],
+    &default_decorrelation_matrices[1],
+    &default_decorrelation_matrices[5],
+    &default_decorrelation_matrices[14],
+    &default_decorrelation_matrices[30],
+    &default_decorrelation_matrices[55]
+};
+
+#endif /* AVCODEC_WMAPRODATA_H */
diff --git a/apps/codecs/libwmapro/wmaprodec.c b/apps/codecs/libwmapro/wmaprodec.c
new file mode 100644
index 0000000000..82f1b3623b
--- /dev/null
+++ b/apps/codecs/libwmapro/wmaprodec.c
@@ -0,0 +1,1578 @@
+/*
+ * Wmapro compatible decoder
+ * Copyright (c) 2007 Baptiste Coudurier, Benjamin Larsson, Ulion
+ * Copyright (c) 2008 - 2009 Sascha Sommer, Benjamin Larsson
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file  libavcodec/wmaprodec.c
+ * @brief wmapro decoder implementation
+ * Wmapro is an MDCT based codec comparable to wma standard or AAC.
+ * The decoding therefore consists of the following steps:
+ * - bitstream decoding
+ * - reconstruction of per-channel data
+ * - rescaling and inverse quantization
+ * - IMDCT
+ * - windowing and overlapp-add
+ *
+ * The compressed wmapro bitstream is split into individual packets.
+ * Every such packet contains one or more wma frames.
+ * The compressed frames may have a variable length and frames may
+ * cross packet boundaries.
+ * Common to all wmapro frames is the number of samples that are stored in
+ * a frame.
+ * The number of samples and a few other decode flags are stored
+ * as extradata that has to be passed to the decoder.
+ *
+ * The wmapro frames themselves are again split into a variable number of
+ * subframes. Every subframe contains the data for 2^N time domain samples
+ * where N varies between 7 and 12.
+ *
+ * Example wmapro bitstream (in samples):
+ *
+ * ||   packet 0           || packet 1 || packet 2      packets
+ * ---------------------------------------------------
+ * || frame 0      || frame 1       || frame 2    ||    frames
+ * ---------------------------------------------------
+ * ||   |      |   ||   |   |   |   ||            ||    subframes of channel 0
+ * ---------------------------------------------------
+ * ||      |   |   ||   |   |   |   ||            ||    subframes of channel 1
+ * ---------------------------------------------------
+ *
+ * The frame layouts for the individual channels of a wma frame does not need
+ * to be the same.
+ *
+ * However, if the offsets and lengths of several subframes of a frame are the
+ * same, the subframes of the channels can be grouped.
+ * Every group may then use special coding techniques like M/S stereo coding
+ * to improve the compression ratio. These channel transformations do not
+ * need to be applied to a whole subframe. Instead, they can also work on
+ * individual scale factor bands (see below).
+ * The coefficients that carry the audio signal in the frequency domain
+ * are transmitted as huffman-coded vectors with 4, 2 and 1 elements.
+ * In addition to that, the encoder can switch to a runlevel coding scheme
+ * by transmitting subframe_length / 128 zero coefficients.
+ *
+ * Before the audio signal can be converted to the time domain, the
+ * coefficients have to be rescaled and inverse quantized.
+ * A subframe is therefore split into several scale factor bands that get
+ * scaled individually.
+ * Scale factors are submitted for every frame but they might be shared
+ * between the subframes of a channel. Scale factors are initially DPCM-coded.
+ * Once scale factors are shared, the differences are transmitted as runlevel
+ * codes.
+ * Every subframe length and offset combination in the frame layout shares a
+ * common quantization factor that can be adjusted for every channel by a
+ * modifier.
+ * After the inverse quantization, the coefficients get processed by an IMDCT.
+ * The resulting values are then windowed with a sine window and the first half
+ * of the values are added to the second half of the output from the previous
+ * subframe in order to reconstruct the output samples.
+ */
+
+#include "avcodec.h"
+#include "internal.h"
+#include "get_bits.h"
+#include "put_bits.h"
+#include "wmaprodata.h"
+#include "dsputil.h"
+#include "wma.h"
+
+/** current decoder limitations */
+#define WMAPRO_MAX_CHANNELS    8                             ///< max number of handled channels
+#define MAX_SUBFRAMES  32                                    ///< max number of subframes per channel
+#define MAX_BANDS      29                                    ///< max number of scale factor bands
+#define MAX_FRAMESIZE  32768                                 ///< maximum compressed frame size
+
+#define WMAPRO_BLOCK_MAX_BITS 12                                           ///< log2 of max block size
+#define WMAPRO_BLOCK_MAX_SIZE (1 << WMAPRO_BLOCK_MAX_BITS)                 ///< maximum block size
+#define WMAPRO_BLOCK_SIZES    (WMAPRO_BLOCK_MAX_BITS - BLOCK_MIN_BITS + 1) ///< possible block sizes
+
+
+#define VLCBITS            9
+#define SCALEVLCBITS       8
+#define VEC4MAXDEPTH    ((HUFF_VEC4_MAXBITS+VLCBITS-1)/VLCBITS)
+#define VEC2MAXDEPTH    ((HUFF_VEC2_MAXBITS+VLCBITS-1)/VLCBITS)
+#define VEC1MAXDEPTH    ((HUFF_VEC1_MAXBITS+VLCBITS-1)/VLCBITS)
+#define SCALEMAXDEPTH   ((HUFF_SCALE_MAXBITS+SCALEVLCBITS-1)/SCALEVLCBITS)
+#define SCALERLMAXDEPTH ((HUFF_SCALE_RL_MAXBITS+VLCBITS-1)/VLCBITS)
+
+static VLC              sf_vlc;           ///< scale factor DPCM vlc
+static VLC              sf_rl_vlc;        ///< scale factor run length vlc
+static VLC              vec4_vlc;         ///< 4 coefficients per symbol
+static VLC              vec2_vlc;         ///< 2 coefficients per symbol
+static VLC              vec1_vlc;         ///< 1 coefficient per symbol
+static VLC              coef_vlc[2];      ///< coefficient run length vlc codes
+static float            sin64[33];        ///< sinus table for decorrelation
+
+/**
+ * @brief frame specific decoder context for a single channel
+ */
+typedef struct {
+    int16_t  prev_block_len;                          ///< length of the previous block
+    uint8_t  transmit_coefs;
+    uint8_t  num_subframes;
+    uint16_t subframe_len[MAX_SUBFRAMES];             ///< subframe length in samples
+    uint16_t subframe_offset[MAX_SUBFRAMES];          ///< subframe positions in the current frame
+    uint8_t  cur_subframe;                            ///< current subframe number
+    uint16_t decoded_samples;                         ///< number of already processed samples
+    uint8_t  grouped;                                 ///< channel is part of a group
+    int      quant_step;                              ///< quantization step for the current subframe
+    int8_t   reuse_sf;                                ///< share scale factors between subframes
+    int8_t   scale_factor_step;                       ///< scaling step for the current subframe
+    int      max_scale_factor;                        ///< maximum scale factor for the current subframe
+    int      saved_scale_factors[2][MAX_BANDS];       ///< resampled and (previously) transmitted scale factor values
+    int8_t   scale_factor_idx;                        ///< index for the transmitted scale factor values (used for resampling)
+    int*     scale_factors;                           ///< pointer to the scale factor values used for decoding
+    uint8_t  table_idx;                               ///< index in sf_offsets for the scale factor reference block
+    float*   coeffs;                                  ///< pointer to the subframe decode buffer
+    DECLARE_ALIGNED(16, float, out)[WMAPRO_BLOCK_MAX_SIZE + WMAPRO_BLOCK_MAX_SIZE / 2]; ///< output buffer
+} WMAProChannelCtx;
+
+/**
+ * @brief channel group for channel transformations
+ */
+typedef struct {
+    uint8_t num_channels;                                     ///< number of channels in the group
+    int8_t  transform;                                        ///< transform on / off
+    int8_t  transform_band[MAX_BANDS];                        ///< controls if the transform is enabled for a certain band
+    float   decorrelation_matrix[WMAPRO_MAX_CHANNELS*WMAPRO_MAX_CHANNELS];
+    float*  channel_data[WMAPRO_MAX_CHANNELS];                ///< transformation coefficients
+} WMAProChannelGrp;
+
+/**
+ * @brief main decoder context
+ */
+typedef struct WMAProDecodeCtx {
+    /* generic decoder variables */
+    AVCodecContext*  avctx;                         ///< codec context for av_log
+    DSPContext       dsp;                           ///< accelerated DSP functions
+    uint8_t          frame_data[MAX_FRAMESIZE +
+                      FF_INPUT_BUFFER_PADDING_SIZE];///< compressed frame data
+    PutBitContext    pb;                            ///< context for filling the frame_data buffer
+    FFTContext       mdct_ctx[WMAPRO_BLOCK_SIZES];  ///< MDCT context per block size
+    DECLARE_ALIGNED(16, float, tmp)[WMAPRO_BLOCK_MAX_SIZE]; ///< IMDCT output buffer
+    float*           windows[WMAPRO_BLOCK_SIZES];   ///< windows for the different block sizes
+
+    /* frame size dependent frame information (set during initialization) */
+    uint32_t         decode_flags;                  ///< used compression features
+    uint8_t          len_prefix;                    ///< frame is prefixed with its length
+    uint8_t          dynamic_range_compression;     ///< frame contains DRC data
+    uint8_t          bits_per_sample;               ///< integer audio sample size for the unscaled IMDCT output (used to scale to [-1.0, 1.0])
+    uint16_t         samples_per_frame;             ///< number of samples to output
+    uint16_t         log2_frame_size;
+    int8_t           num_channels;                  ///< number of channels in the stream (same as AVCodecContext.num_channels)
+    int8_t           lfe_channel;                   ///< lfe channel index
+    uint8_t          max_num_subframes;
+    uint8_t          subframe_len_bits;             ///< number of bits used for the subframe length
+    uint8_t          max_subframe_len_bit;          ///< flag indicating that the subframe is of maximum size when the first subframe length bit is 1
+    uint16_t         min_samples_per_subframe;
+    int8_t           num_sfb[WMAPRO_BLOCK_SIZES];   ///< scale factor bands per block size
+    int16_t          sfb_offsets[WMAPRO_BLOCK_SIZES][MAX_BANDS];                    ///< scale factor band offsets (multiples of 4)
+    int8_t           sf_offsets[WMAPRO_BLOCK_SIZES][WMAPRO_BLOCK_SIZES][MAX_BANDS]; ///< scale factor resample matrix
+    int16_t          subwoofer_cutoffs[WMAPRO_BLOCK_SIZES]; ///< subwoofer cutoff values
+
+    /* packet decode state */
+    GetBitContext    pgb;                           ///< bitstream reader context for the packet
+    uint8_t          packet_offset;                 ///< frame offset in the packet
+    uint8_t          packet_sequence_number;        ///< current packet number
+    int              num_saved_bits;                ///< saved number of bits
+    int              frame_offset;                  ///< frame offset in the bit reservoir
+    int              subframe_offset;               ///< subframe offset in the bit reservoir
+    uint8_t          packet_loss;                   ///< set in case of bitstream error
+    uint8_t          packet_done;                   ///< set when a packet is fully decoded
+
+    /* frame decode state */
+    uint32_t         frame_num;                     ///< current frame number (not used for decoding)
+    GetBitContext    gb;                            ///< bitstream reader context
+    int              buf_bit_size;                  ///< buffer size in bits
+    float*           samples;                       ///< current samplebuffer pointer
+    float*           samples_end;                   ///< maximum samplebuffer pointer
+    uint8_t          drc_gain;                      ///< gain for the DRC tool
+    int8_t           skip_frame;                    ///< skip output step
+    int8_t           parsed_all_subframes;          ///< all subframes decoded?
+
+    /* subframe/block decode state */
+    int16_t          subframe_len;                  ///< current subframe length
+    int8_t           channels_for_cur_subframe;     ///< number of channels that contain the subframe
+    int8_t           channel_indexes_for_cur_subframe[WMAPRO_MAX_CHANNELS];
+    int8_t           num_bands;                     ///< number of scale factor bands
+    int16_t*         cur_sfb_offsets;               ///< sfb offsets for the current block
+    uint8_t          table_idx;                     ///< index for the num_sfb, sfb_offsets, sf_offsets and subwoofer_cutoffs tables
+    int8_t           esc_len;                       ///< length of escaped coefficients
+
+    uint8_t          num_chgroups;                  ///< number of channel groups
+    WMAProChannelGrp chgroup[WMAPRO_MAX_CHANNELS];  ///< channel group information
+
+    WMAProChannelCtx channel[WMAPRO_MAX_CHANNELS];  ///< per channel data
+} WMAProDecodeCtx;
+
+
+/**
+ *@brief helper function to print the most important members of the context
+ *@param s context
+ */
+static void av_cold dump_context(WMAProDecodeCtx *s)
+{
+#define PRINT(a, b)     av_log(s->avctx, AV_LOG_DEBUG, " %s = %d\n", a, b);
+#define PRINT_HEX(a, b) av_log(s->avctx, AV_LOG_DEBUG, " %s = %x\n", a, b);
+
+    PRINT("ed sample bit depth", s->bits_per_sample);
+    PRINT_HEX("ed decode flags", s->decode_flags);
+    PRINT("samples per frame",   s->samples_per_frame);
+    PRINT("log2 frame size",     s->log2_frame_size);
+    PRINT("max num subframes",   s->max_num_subframes);
+    PRINT("len prefix",          s->len_prefix);
+    PRINT("num channels",        s->num_channels);
+}
+
+/**
+ *@brief Uninitialize the decoder and free all resources.
+ *@param avctx codec context
+ *@return 0 on success, < 0 otherwise
+ */
+static av_cold int decode_end(AVCodecContext *avctx)
+{
+    WMAProDecodeCtx *s = avctx->priv_data;
+    int i;
+
+    for (i = 0; i < WMAPRO_BLOCK_SIZES; i++)
+        ff_mdct_end(&s->mdct_ctx[i]);
+
+    return 0;
+}
+
+/**
+ *@brief Initialize the decoder.
+ *@param avctx codec context
+ *@return 0 on success, -1 otherwise
+ */
+static av_cold int decode_init(AVCodecContext *avctx)
+{
+    WMAProDecodeCtx *s = avctx->priv_data;
+    uint8_t *edata_ptr = avctx->extradata;
+    unsigned int channel_mask;
+    int i;
+    int log2_max_num_subframes;
+    int num_possible_block_sizes;
+
+    s->avctx = avctx;
+    dsputil_init(&s->dsp, avctx);
+    init_put_bits(&s->pb, s->frame_data, MAX_FRAMESIZE);
+
+    avctx->sample_fmt = SAMPLE_FMT_FLT;
+
+    if (avctx->extradata_size >= 18) {
+        s->decode_flags    = AV_RL16(edata_ptr+14);
+        channel_mask       = AV_RL32(edata_ptr+2);
+        s->bits_per_sample = AV_RL16(edata_ptr);
+        /** dump the extradata */
+        for (i = 0; i < avctx->extradata_size; i++)
+            dprintf(avctx, "[%x] ", avctx->extradata[i]);
+        dprintf(avctx, "\n");
+
+    } else {
+        av_log_ask_for_sample(avctx, "Unknown extradata size\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    /** generic init */
+    s->log2_frame_size = av_log2(avctx->block_align) + 4;
+
+    /** frame info */
+    s->skip_frame  = 1; /** skip first frame */
+    s->packet_loss = 1;
+    s->len_prefix  = (s->decode_flags & 0x40);
+
+    if (!s->len_prefix) {
+        av_log_ask_for_sample(avctx, "no length prefix\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    /** get frame len */
+    s->samples_per_frame = 1 << ff_wma_get_frame_len_bits(avctx->sample_rate,
+                                                          3, s->decode_flags);
+
+    /** init previous block len */
+    for (i = 0; i < avctx->channels; i++)
+        s->channel[i].prev_block_len = s->samples_per_frame;
+
+    /** subframe info */
+    log2_max_num_subframes       = ((s->decode_flags & 0x38) >> 3);
+    s->max_num_subframes         = 1 << log2_max_num_subframes;
+    if (s->max_num_subframes == 16)
+        s->max_subframe_len_bit = 1;
+    s->subframe_len_bits = av_log2(log2_max_num_subframes) + 1;
+
+    num_possible_block_sizes     = log2_max_num_subframes + 1;
+    s->min_samples_per_subframe  = s->samples_per_frame / s->max_num_subframes;
+    s->dynamic_range_compression = (s->decode_flags & 0x80);
+
+    if (s->max_num_subframes > MAX_SUBFRAMES) {
+        av_log(avctx, AV_LOG_ERROR, "invalid number of subframes %i\n",
+               s->max_num_subframes);
+        return AVERROR_INVALIDDATA;
+    }
+
+    s->num_channels = avctx->channels;
+
+    /** extract lfe channel position */
+    s->lfe_channel = -1;
+
+    if (channel_mask & 8) {
+        unsigned int mask;
+        for (mask = 1; mask < 16; mask <<= 1) {
+            if (channel_mask & mask)
+                ++s->lfe_channel;
+        }
+    }
+
+    if (s->num_channels < 0) {
+        av_log(avctx, AV_LOG_ERROR, "invalid number of channels %d\n", s->num_channels);
+        return AVERROR_INVALIDDATA;
+    } else if (s->num_channels > WMAPRO_MAX_CHANNELS) {
+        av_log_ask_for_sample(avctx, "unsupported number of channels\n");
+        return AVERROR_PATCHWELCOME;
+    }
+
+    INIT_VLC_STATIC(&sf_vlc, SCALEVLCBITS, HUFF_SCALE_SIZE,
+                    scale_huffbits, 1, 1,
+                    scale_huffcodes, 2, 2, 616);
+
+    INIT_VLC_STATIC(&sf_rl_vlc, VLCBITS, HUFF_SCALE_RL_SIZE,
+                    scale_rl_huffbits, 1, 1,
+                    scale_rl_huffcodes, 4, 4, 1406);
+
+    INIT_VLC_STATIC(&coef_vlc[0], VLCBITS, HUFF_COEF0_SIZE,
+                    coef0_huffbits, 1, 1,
+                    coef0_huffcodes, 4, 4, 2108);
+
+    INIT_VLC_STATIC(&coef_vlc[1], VLCBITS, HUFF_COEF1_SIZE,
+                    coef1_huffbits, 1, 1,
+                    coef1_huffcodes, 4, 4, 3912);
+
+    INIT_VLC_STATIC(&vec4_vlc, VLCBITS, HUFF_VEC4_SIZE,
+                    vec4_huffbits, 1, 1,
+                    vec4_huffcodes, 2, 2, 604);
+
+    INIT_VLC_STATIC(&vec2_vlc, VLCBITS, HUFF_VEC2_SIZE,
+                    vec2_huffbits, 1, 1,
+                    vec2_huffcodes, 2, 2, 562);
+
+    INIT_VLC_STATIC(&vec1_vlc, VLCBITS, HUFF_VEC1_SIZE,
+                    vec1_huffbits, 1, 1,
+                    vec1_huffcodes, 2, 2, 562);
+
+    /** calculate number of scale factor bands and their offsets
+        for every possible block size */
+    for (i = 0; i < num_possible_block_sizes; i++) {
+        int subframe_len = s->samples_per_frame >> i;
+        int x;
+        int band = 1;
+
+        s->sfb_offsets[i][0] = 0;
+
+        for (x = 0; x < MAX_BANDS-1 && s->sfb_offsets[i][band - 1] < subframe_len; x++) {
+            int offset = (subframe_len * 2 * critical_freq[x])
+                          / s->avctx->sample_rate + 2;
+            offset &= ~3;
+            if (offset > s->sfb_offsets[i][band - 1])
+                s->sfb_offsets[i][band++] = offset;
+        }
+        s->sfb_offsets[i][band - 1] = subframe_len;
+        s->num_sfb[i]               = band - 1;
+    }
+
+
+    /** Scale factors can be shared between blocks of different size
+        as every block has a different scale factor band layout.
+        The matrix sf_offsets is needed to find the correct scale factor.
+     */
+
+    for (i = 0; i < num_possible_block_sizes; i++) {
+        int b;
+        for (b = 0; b < s->num_sfb[i]; b++) {
+            int x;
+            int offset = ((s->sfb_offsets[i][b]
+                           + s->sfb_offsets[i][b + 1] - 1) << i) >> 1;
+            for (x = 0; x < num_possible_block_sizes; x++) {
+                int v = 0;
+                while (s->sfb_offsets[x][v + 1] << x < offset)
+                    ++v;
+                s->sf_offsets[i][x][b] = v;
+            }
+        }
+    }
+
+    /** init MDCT, FIXME: only init needed sizes */
+    for (i = 0; i < WMAPRO_BLOCK_SIZES; i++)
+        ff_mdct_init(&s->mdct_ctx[i], BLOCK_MIN_BITS+1+i, 1,
+                     1.0 / (1 << (BLOCK_MIN_BITS + i - 1))
+                     / (1 << (s->bits_per_sample - 1)));
+
+    /** init MDCT windows: simple sinus window */
+    for (i = 0; i < WMAPRO_BLOCK_SIZES; i++) {
+        const int win_idx = WMAPRO_BLOCK_MAX_BITS - i;
+        ff_init_ff_sine_windows(win_idx);
+        s->windows[WMAPRO_BLOCK_SIZES - i - 1] = ff_sine_windows[win_idx];
+    }
+
+    /** calculate subwoofer cutoff values */
+    for (i = 0; i < num_possible_block_sizes; i++) {
+        int block_size = s->samples_per_frame >> i;
+        int cutoff = (440*block_size + 3 * (s->avctx->sample_rate >> 1) - 1)
+                     / s->avctx->sample_rate;
+        s->subwoofer_cutoffs[i] = av_clip(cutoff, 4, block_size);
+    }
+
+    /** calculate sine values for the decorrelation matrix */
+    for (i = 0; i < 33; i++)
+        sin64[i] = sin(i*M_PI / 64.0);
+
+    if (avctx->debug & FF_DEBUG_BITSTREAM)
+        dump_context(s);
+
+    avctx->channel_layout = channel_mask;
+    return 0;
+}
+
+/**
+ *@brief Decode the subframe length.
+ *@param s context
+ *@param offset sample offset in the frame
+ *@return decoded subframe length on success, < 0 in case of an error
+ */
+static int decode_subframe_length(WMAProDecodeCtx *s, int offset)
+{
+    int frame_len_shift = 0;
+    int subframe_len;
+
+    /** no need to read from the bitstream when only one length is possible */
+    if (offset == s->samples_per_frame - s->min_samples_per_subframe)
+        return s->min_samples_per_subframe;
+
+    /** 1 bit indicates if the subframe is of maximum length */
+    if (s->max_subframe_len_bit) {
+        if (get_bits1(&s->gb))
+            frame_len_shift = 1 + get_bits(&s->gb, s->subframe_len_bits-1);
+    } else
+        frame_len_shift = get_bits(&s->gb, s->subframe_len_bits);
+
+    subframe_len = s->samples_per_frame >> frame_len_shift;
+
+    /** sanity check the length */
+    if (subframe_len < s->min_samples_per_subframe ||
+        subframe_len > s->samples_per_frame) {
+        av_log(s->avctx, AV_LOG_ERROR, "broken frame: subframe_len %i\n",
+               subframe_len);
+        return AVERROR_INVALIDDATA;
+    }
+    return subframe_len;
+}
+
+/**
+ *@brief Decode how the data in the frame is split into subframes.
+ *       Every WMA frame contains the encoded data for a fixed number of
+ *       samples per channel. The data for every channel might be split
+ *       into several subframes. This function will reconstruct the list of
+ *       subframes for every channel.
+ *
+ *       If the subframes are not evenly split, the algorithm estimates the
+ *       channels with the lowest number of total samples.
+ *       Afterwards, for each of these channels a bit is read from the
+ *       bitstream that indicates if the channel contains a subframe with the
+ *       next subframe size that is going to be read from the bitstream or not.
+ *       If a channel contains such a subframe, the subframe size gets added to
+ *       the channel's subframe list.
+ *       The algorithm repeats these steps until the frame is properly divided
+ *       between the individual channels.
+ *
+ *@param s context
+ *@return 0 on success, < 0 in case of an error
+ */
+static int decode_tilehdr(WMAProDecodeCtx *s)
+{
+    uint16_t num_samples[WMAPRO_MAX_CHANNELS];        /** sum of samples for all currently known subframes of a channel */
+    uint8_t  contains_subframe[WMAPRO_MAX_CHANNELS];  /** flag indicating if a channel contains the current subframe */
+    int channels_for_cur_subframe = s->num_channels;  /** number of channels that contain the current subframe */
+    int fixed_channel_layout = 0;                     /** flag indicating that all channels use the same subframe offsets and sizes */
+    int min_channel_len = 0;                          /** smallest sum of samples (channels with this length will be processed first) */
+    int c;
+
+    /* Should never consume more than 3073 bits (256 iterations for the
+     * while loop when always the minimum amount of 128 samples is substracted
+     * from missing samples in the 8 channel case).
+     * 1 + BLOCK_MAX_SIZE * MAX_CHANNELS / BLOCK_MIN_SIZE * (MAX_CHANNELS  + 4)
+     */
+
+    /** reset tiling information */
+    for (c = 0; c < s->num_channels; c++)
+        s->channel[c].num_subframes = 0;
+
+    memset(num_samples, 0, sizeof(num_samples));
+
+    if (s->max_num_subframes == 1 || get_bits1(&s->gb))
+        fixed_channel_layout = 1;
+
+    /** loop until the frame data is split between the subframes */
+    do {
+        int subframe_len;
+
+        /** check which channels contain the subframe */
+        for (c = 0; c < s->num_channels; c++) {
+            if (num_samples[c] == min_channel_len) {
+                if (fixed_channel_layout || channels_for_cur_subframe == 1 ||
+                   (min_channel_len == s->samples_per_frame - s->min_samples_per_subframe))
+                    contains_subframe[c] = 1;
+                else
+                    contains_subframe[c] = get_bits1(&s->gb);
+            } else
+                contains_subframe[c] = 0;
+        }
+
+        /** get subframe length, subframe_len == 0 is not allowed */
+        if ((subframe_len = decode_subframe_length(s, min_channel_len)) <= 0)
+            return AVERROR_INVALIDDATA;
+
+        /** add subframes to the individual channels and find new min_channel_len */
+        min_channel_len += subframe_len;
+        for (c = 0; c < s->num_channels; c++) {
+            WMAProChannelCtx* chan = &s->channel[c];
+
+            if (contains_subframe[c]) {
+                if (chan->num_subframes >= MAX_SUBFRAMES) {
+                    av_log(s->avctx, AV_LOG_ERROR,
+                           "broken frame: num subframes > 31\n");
+                    return AVERROR_INVALIDDATA;
+                }
+                chan->subframe_len[chan->num_subframes] = subframe_len;
+                num_samples[c] += subframe_len;
+                ++chan->num_subframes;
+                if (num_samples[c] > s->samples_per_frame) {
+                    av_log(s->avctx, AV_LOG_ERROR, "broken frame: "
+                           "channel len > samples_per_frame\n");
+                    return AVERROR_INVALIDDATA;
+                }
+            } else if (num_samples[c] <= min_channel_len) {
+                if (num_samples[c] < min_channel_len) {
+                    channels_for_cur_subframe = 0;
+                    min_channel_len = num_samples[c];
+                }
+                ++channels_for_cur_subframe;
+            }
+        }
+    } while (min_channel_len < s->samples_per_frame);
+
+    for (c = 0; c < s->num_channels; c++) {
+        int i;
+        int offset = 0;
+        for (i = 0; i < s->channel[c].num_subframes; i++) {
+            dprintf(s->avctx, "frame[%i] channel[%i] subframe[%i]"
+                    " len %i\n", s->frame_num, c, i,
+                    s->channel[c].subframe_len[i]);
+            s->channel[c].subframe_offset[i] = offset;
+            offset += s->channel[c].subframe_len[i];
+        }
+    }
+
+    return 0;
+}
+
+/**
+ *@brief Calculate a decorrelation matrix from the bitstream parameters.
+ *@param s codec context
+ *@param chgroup channel group for which the matrix needs to be calculated
+ */
+static void decode_decorrelation_matrix(WMAProDecodeCtx *s,
+                                        WMAProChannelGrp *chgroup)
+{
+    int i;
+    int offset = 0;
+    int8_t rotation_offset[WMAPRO_MAX_CHANNELS * WMAPRO_MAX_CHANNELS];
+    memset(chgroup->decorrelation_matrix, 0, s->num_channels *
+           s->num_channels * sizeof(*chgroup->decorrelation_matrix));
+
+    for (i = 0; i < chgroup->num_channels * (chgroup->num_channels - 1) >> 1; i++)
+        rotation_offset[i] = get_bits(&s->gb, 6);
+
+    for (i = 0; i < chgroup->num_channels; i++)
+        chgroup->decorrelation_matrix[chgroup->num_channels * i + i] =
+            get_bits1(&s->gb) ? 1.0 : -1.0;
+
+    for (i = 1; i < chgroup->num_channels; i++) {
+        int x;
+        for (x = 0; x < i; x++) {
+            int y;
+            for (y = 0; y < i + 1; y++) {
+                float v1 = chgroup->decorrelation_matrix[x * chgroup->num_channels + y];
+                float v2 = chgroup->decorrelation_matrix[i * chgroup->num_channels + y];
+                int n = rotation_offset[offset + x];
+                float sinv;
+                float cosv;
+
+                if (n < 32) {
+                    sinv = sin64[n];
+                    cosv = sin64[32 - n];
+                } else {
+                    sinv =  sin64[64 -  n];
+                    cosv = -sin64[n  - 32];
+                }
+
+                chgroup->decorrelation_matrix[y + x * chgroup->num_channels] =
+                                               (v1 * sinv) - (v2 * cosv);
+                chgroup->decorrelation_matrix[y + i * chgroup->num_channels] =
+                                               (v1 * cosv) + (v2 * sinv);
+            }
+        }
+        offset += i;
+    }
+}
+
+/**
+ *@brief Decode channel transformation parameters
+ *@param s codec context
+ *@return 0 in case of success, < 0 in case of bitstream errors
+ */
+static int decode_channel_transform(WMAProDecodeCtx* s)
+{
+    int i;
+    /* should never consume more than 1921 bits for the 8 channel case
+     * 1 + MAX_CHANNELS * (MAX_CHANNELS + 2 + 3 * MAX_CHANNELS * MAX_CHANNELS
+     * + MAX_CHANNELS + MAX_BANDS + 1)
+     */
+
+    /** in the one channel case channel transforms are pointless */
+    s->num_chgroups = 0;
+    if (s->num_channels > 1) {
+        int remaining_channels = s->channels_for_cur_subframe;
+
+        if (get_bits1(&s->gb)) {
+            av_log_ask_for_sample(s->avctx,
+                                  "unsupported channel transform bit\n");
+            return AVERROR_INVALIDDATA;
+        }
+
+        for (s->num_chgroups = 0; remaining_channels &&
+             s->num_chgroups < s->channels_for_cur_subframe; s->num_chgroups++) {
+            WMAProChannelGrp* chgroup = &s->chgroup[s->num_chgroups];
+            float** channel_data = chgroup->channel_data;
+            chgroup->num_channels = 0;
+            chgroup->transform = 0;
+
+            /** decode channel mask */
+            if (remaining_channels > 2) {
+                for (i = 0; i < s->channels_for_cur_subframe; i++) {
+                    int channel_idx = s->channel_indexes_for_cur_subframe[i];
+                    if (!s->channel[channel_idx].grouped
+                        && get_bits1(&s->gb)) {
+                        ++chgroup->num_channels;
+                        s->channel[channel_idx].grouped = 1;
+                        *channel_data++ = s->channel[channel_idx].coeffs;
+                    }
+                }
+            } else {
+                chgroup->num_channels = remaining_channels;
+                for (i = 0; i < s->channels_for_cur_subframe; i++) {
+                    int channel_idx = s->channel_indexes_for_cur_subframe[i];
+                    if (!s->channel[channel_idx].grouped)
+                        *channel_data++ = s->channel[channel_idx].coeffs;
+                    s->channel[channel_idx].grouped = 1;
+                }
+            }
+
+            /** decode transform type */
+            if (chgroup->num_channels == 2) {
+                if (get_bits1(&s->gb)) {
+                    if (get_bits1(&s->gb)) {
+                        av_log_ask_for_sample(s->avctx,
+                                              "unsupported channel transform type\n");
+                    }
+                } else {
+                    chgroup->transform = 1;
+                    if (s->num_channels == 2) {
+                        chgroup->decorrelation_matrix[0] =  1.0;
+                        chgroup->decorrelation_matrix[1] = -1.0;
+                        chgroup->decorrelation_matrix[2] =  1.0;
+                        chgroup->decorrelation_matrix[3] =  1.0;
+                    } else {
+                        /** cos(pi/4) */
+                        chgroup->decorrelation_matrix[0] =  0.70703125;
+                        chgroup->decorrelation_matrix[1] = -0.70703125;
+                        chgroup->decorrelation_matrix[2] =  0.70703125;
+                        chgroup->decorrelation_matrix[3] =  0.70703125;
+                    }
+                }
+            } else if (chgroup->num_channels > 2) {
+                if (get_bits1(&s->gb)) {
+                    chgroup->transform = 1;
+                    if (get_bits1(&s->gb)) {
+                        decode_decorrelation_matrix(s, chgroup);
+                    } else {
+                        /** FIXME: more than 6 coupled channels not supported */
+                        if (chgroup->num_channels > 6) {
+                            av_log_ask_for_sample(s->avctx,
+                                                  "coupled channels > 6\n");
+                        } else {
+                            memcpy(chgroup->decorrelation_matrix,
+                                   default_decorrelation[chgroup->num_channels],
+                                   chgroup->num_channels * chgroup->num_channels *
+                                   sizeof(*chgroup->decorrelation_matrix));
+                        }
+                    }
+                }
+            }
+
+            /** decode transform on / off */
+            if (chgroup->transform) {
+                if (!get_bits1(&s->gb)) {
+                    int i;
+                    /** transform can be enabled for individual bands */
+                    for (i = 0; i < s->num_bands; i++) {
+                        chgroup->transform_band[i] = get_bits1(&s->gb);
+                    }
+                } else {
+                    memset(chgroup->transform_band, 1, s->num_bands);
+                }
+            }
+            remaining_channels -= chgroup->num_channels;
+        }
+    }
+    return 0;
+}
+
+/**
+ *@brief Extract the coefficients from the bitstream.
+ *@param s codec context
+ *@param c current channel number
+ *@return 0 on success, < 0 in case of bitstream errors
+ */
+static int decode_coeffs(WMAProDecodeCtx *s, int c)
+{
+    /* Integers 0..15 as single-precision floats.  The table saves a
+       costly int to float conversion, and storing the values as
+       integers allows fast sign-flipping. */
+    static const int fval_tab[16] = {
+        0x00000000, 0x3f800000, 0x40000000, 0x40400000,
+        0x40800000, 0x40a00000, 0x40c00000, 0x40e00000,
+        0x41000000, 0x41100000, 0x41200000, 0x41300000,
+        0x41400000, 0x41500000, 0x41600000, 0x41700000,
+    };
+    int vlctable;
+    VLC* vlc;
+    WMAProChannelCtx* ci = &s->channel[c];
+    int rl_mode = 0;
+    int cur_coeff = 0;
+    int num_zeros = 0;
+    const uint16_t* run;
+    const float* level;
+
+    dprintf(s->avctx, "decode coefficients for channel %i\n", c);
+
+    vlctable = get_bits1(&s->gb);
+    vlc = &coef_vlc[vlctable];
+
+    if (vlctable) {
+        run = coef1_run;
+        level = coef1_level;
+    } else {
+        run = coef0_run;
+        level = coef0_level;
+    }
+
+    /** decode vector coefficients (consumes up to 167 bits per iteration for
+      4 vector coded large values) */
+    while (!rl_mode && cur_coeff + 3 < s->subframe_len) {
+        int vals[4];
+        int i;
+        unsigned int idx;
+
+        idx = get_vlc2(&s->gb, vec4_vlc.table, VLCBITS, VEC4MAXDEPTH);
+
+        if (idx == HUFF_VEC4_SIZE - 1) {
+            for (i = 0; i < 4; i += 2) {
+                idx = get_vlc2(&s->gb, vec2_vlc.table, VLCBITS, VEC2MAXDEPTH);
+                if (idx == HUFF_VEC2_SIZE - 1) {
+                    int v0, v1;
+                    v0 = get_vlc2(&s->gb, vec1_vlc.table, VLCBITS, VEC1MAXDEPTH);
+                    if (v0 == HUFF_VEC1_SIZE - 1)
+                        v0 += ff_wma_get_large_val(&s->gb);
+                    v1 = get_vlc2(&s->gb, vec1_vlc.table, VLCBITS, VEC1MAXDEPTH);
+                    if (v1 == HUFF_VEC1_SIZE - 1)
+                        v1 += ff_wma_get_large_val(&s->gb);
+                    ((float*)vals)[i  ] = v0;
+                    ((float*)vals)[i+1] = v1;
+                } else {
+                    vals[i]   = fval_tab[symbol_to_vec2[idx] >> 4 ];
+                    vals[i+1] = fval_tab[symbol_to_vec2[idx] & 0xF];
+                }
+            }
+        } else {
+            vals[0] = fval_tab[ symbol_to_vec4[idx] >> 12      ];
+            vals[1] = fval_tab[(symbol_to_vec4[idx] >> 8) & 0xF];
+            vals[2] = fval_tab[(symbol_to_vec4[idx] >> 4) & 0xF];
+            vals[3] = fval_tab[ symbol_to_vec4[idx]       & 0xF];
+        }
+
+        /** decode sign */
+        for (i = 0; i < 4; i++) {
+            if (vals[i]) {
+                int sign = get_bits1(&s->gb) - 1;
+                *(uint32_t*)&ci->coeffs[cur_coeff] = vals[i] ^ sign<<31;
+                num_zeros = 0;
+            } else {
+                ci->coeffs[cur_coeff] = 0;
+                /** switch to run level mode when subframe_len / 128 zeros
+                    were found in a row */
+                rl_mode |= (++num_zeros > s->subframe_len >> 8);
+            }
+            ++cur_coeff;
+        }
+    }
+
+    /** decode run level coded coefficients */
+    if (rl_mode) {
+        memset(&ci->coeffs[cur_coeff], 0,
+               sizeof(*ci->coeffs) * (s->subframe_len - cur_coeff));
+        if (ff_wma_run_level_decode(s->avctx, &s->gb, vlc,
+                                    level, run, 1, ci->coeffs,
+                                    cur_coeff, s->subframe_len,
+                                    s->subframe_len, s->esc_len, 0))
+            return AVERROR_INVALIDDATA;
+    }
+
+    return 0;
+}
+
+/**
+ *@brief Extract scale factors from the bitstream.
+ *@param s codec context
+ *@return 0 on success, < 0 in case of bitstream errors
+ */
+static int decode_scale_factors(WMAProDecodeCtx* s)
+{
+    int i;
+
+    /** should never consume more than 5344 bits
+     *  MAX_CHANNELS * (1 +  MAX_BANDS * 23)
+     */
+
+    for (i = 0; i < s->channels_for_cur_subframe; i++) {
+        int c = s->channel_indexes_for_cur_subframe[i];
+        int* sf;
+        int* sf_end;
+        s->channel[c].scale_factors = s->channel[c].saved_scale_factors[!s->channel[c].scale_factor_idx];
+        sf_end = s->channel[c].scale_factors + s->num_bands;
+
+        /** resample scale factors for the new block size
+         *  as the scale factors might need to be resampled several times
+         *  before some  new values are transmitted, a backup of the last
+         *  transmitted scale factors is kept in saved_scale_factors
+         */
+        if (s->channel[c].reuse_sf) {
+            const int8_t* sf_offsets = s->sf_offsets[s->table_idx][s->channel[c].table_idx];
+            int b;
+            for (b = 0; b < s->num_bands; b++)
+                s->channel[c].scale_factors[b] =
+                    s->channel[c].saved_scale_factors[s->channel[c].scale_factor_idx][*sf_offsets++];
+        }
+
+        if (!s->channel[c].cur_subframe || get_bits1(&s->gb)) {
+
+            if (!s->channel[c].reuse_sf) {
+                int val;
+                /** decode DPCM coded scale factors */
+                s->channel[c].scale_factor_step = get_bits(&s->gb, 2) + 1;
+                val = 45 / s->channel[c].scale_factor_step;
+                for (sf = s->channel[c].scale_factors; sf < sf_end; sf++) {
+                    val += get_vlc2(&s->gb, sf_vlc.table, SCALEVLCBITS, SCALEMAXDEPTH) - 60;
+                    *sf = val;
+                }
+            } else {
+                int i;
+                /** run level decode differences to the resampled factors */
+                for (i = 0; i < s->num_bands; i++) {
+                    int idx;
+                    int skip;
+                    int val;
+                    int sign;
+
+                    idx = get_vlc2(&s->gb, sf_rl_vlc.table, VLCBITS, SCALERLMAXDEPTH);
+
+                    if (!idx) {
+                        uint32_t code = get_bits(&s->gb, 14);
+                        val  =  code >> 6;
+                        sign = (code & 1) - 1;
+                        skip = (code & 0x3f) >> 1;
+                    } else if (idx == 1) {
+                        break;
+                    } else {
+                        skip = scale_rl_run[idx];
+                        val  = scale_rl_level[idx];
+                        sign = get_bits1(&s->gb)-1;
+                    }
+
+                    i += skip;
+                    if (i >= s->num_bands) {
+                        av_log(s->avctx, AV_LOG_ERROR,
+                               "invalid scale factor coding\n");
+                        return AVERROR_INVALIDDATA;
+                    }
+                    s->channel[c].scale_factors[i] += (val ^ sign) - sign;
+                }
+            }
+            /** swap buffers */
+            s->channel[c].scale_factor_idx = !s->channel[c].scale_factor_idx;
+            s->channel[c].table_idx = s->table_idx;
+            s->channel[c].reuse_sf  = 1;
+        }
+
+        /** calculate new scale factor maximum */
+        s->channel[c].max_scale_factor = s->channel[c].scale_factors[0];
+        for (sf = s->channel[c].scale_factors + 1; sf < sf_end; sf++) {
+            s->channel[c].max_scale_factor =
+                FFMAX(s->channel[c].max_scale_factor, *sf);
+        }
+
+    }
+    return 0;
+}
+
+/**
+ *@brief Reconstruct the individual channel data.
+ *@param s codec context
+ */
+static void inverse_channel_transform(WMAProDecodeCtx *s)
+{
+    int i;
+
+    for (i = 0; i < s->num_chgroups; i++) {
+        if (s->chgroup[i].transform) {
+            float data[WMAPRO_MAX_CHANNELS];
+            const int num_channels = s->chgroup[i].num_channels;
+            float** ch_data = s->chgroup[i].channel_data;
+            float** ch_end = ch_data + num_channels;
+            const int8_t* tb = s->chgroup[i].transform_band;
+            int16_t* sfb;
+
+            /** multichannel decorrelation */
+            for (sfb = s->cur_sfb_offsets;
+                 sfb < s->cur_sfb_offsets + s->num_bands; sfb++) {
+                int y;
+                if (*tb++ == 1) {
+                    /** multiply values with the decorrelation_matrix */
+                    for (y = sfb[0]; y < FFMIN(sfb[1], s->subframe_len); y++) {
+                        const float* mat = s->chgroup[i].decorrelation_matrix;
+                        const float* data_end = data + num_channels;
+                        float* data_ptr = data;
+                        float** ch;
+
+                        for (ch = ch_data; ch < ch_end; ch++)
+                            *data_ptr++ = (*ch)[y];
+
+                        for (ch = ch_data; ch < ch_end; ch++) {
+                            float sum = 0;
+                            data_ptr = data;
+                            while (data_ptr < data_end)
+                                sum += *data_ptr++ * *mat++;
+
+                            (*ch)[y] = sum;
+                        }
+                    }
+                } else if (s->num_channels == 2) {
+                    int len = FFMIN(sfb[1], s->subframe_len) - sfb[0];
+                    s->dsp.vector_fmul_scalar(ch_data[0] + sfb[0],
+                                              ch_data[0] + sfb[0],
+                                              181.0 / 128, len);
+                    s->dsp.vector_fmul_scalar(ch_data[1] + sfb[0],
+                                              ch_data[1] + sfb[0],
+                                              181.0 / 128, len);
+                }
+            }
+        }
+    }
+}
+
+/**
+ *@brief Apply sine window and reconstruct the output buffer.
+ *@param s codec context
+ */
+static void wmapro_window(WMAProDecodeCtx *s)
+{
+    int i;
+    for (i = 0; i < s->channels_for_cur_subframe; i++) {
+        int c = s->channel_indexes_for_cur_subframe[i];
+        float* window;
+        int winlen = s->channel[c].prev_block_len;
+        float* start = s->channel[c].coeffs - (winlen >> 1);
+
+        if (s->subframe_len < winlen) {
+            start += (winlen - s->subframe_len) >> 1;
+            winlen = s->subframe_len;
+        }
+
+        window = s->windows[av_log2(winlen) - BLOCK_MIN_BITS];
+
+        winlen >>= 1;
+
+        s->dsp.vector_fmul_window(start, start, start + winlen,
+                                  window, 0, winlen);
+
+        s->channel[c].prev_block_len = s->subframe_len;
+    }
+}
+
+/**
+ *@brief Decode a single subframe (block).
+ *@param s codec context
+ *@return 0 on success, < 0 when decoding failed
+ */
+static int decode_subframe(WMAProDecodeCtx *s)
+{
+    int offset = s->samples_per_frame;
+    int subframe_len = s->samples_per_frame;
+    int i;
+    int total_samples   = s->samples_per_frame * s->num_channels;
+    int transmit_coeffs = 0;
+    int cur_subwoofer_cutoff;
+
+    s->subframe_offset = get_bits_count(&s->gb);
+
+    /** reset channel context and find the next block offset and size
+        == the next block of the channel with the smallest number of
+        decoded samples
+    */
+    for (i = 0; i < s->num_channels; i++) {
+        s->channel[i].grouped = 0;
+        if (offset > s->channel[i].decoded_samples) {
+            offset = s->channel[i].decoded_samples;
+            subframe_len =
+                s->channel[i].subframe_len[s->channel[i].cur_subframe];
+        }
+    }
+
+    dprintf(s->avctx,
+            "processing subframe with offset %i len %i\n", offset, subframe_len);
+
+    /** get a list of all channels that contain the estimated block */
+    s->channels_for_cur_subframe = 0;
+    for (i = 0; i < s->num_channels; i++) {
+        const int cur_subframe = s->channel[i].cur_subframe;
+        /** substract already processed samples */
+        total_samples -= s->channel[i].decoded_samples;
+
+        /** and count if there are multiple subframes that match our profile */
+        if (offset == s->channel[i].decoded_samples &&
+            subframe_len == s->channel[i].subframe_len[cur_subframe]) {
+            total_samples -= s->channel[i].subframe_len[cur_subframe];
+            s->channel[i].decoded_samples +=
+                s->channel[i].subframe_len[cur_subframe];
+            s->channel_indexes_for_cur_subframe[s->channels_for_cur_subframe] = i;
+            ++s->channels_for_cur_subframe;
+        }
+    }
+
+    /** check if the frame will be complete after processing the
+        estimated block */
+    if (!total_samples)
+        s->parsed_all_subframes = 1;
+
+
+    dprintf(s->avctx, "subframe is part of %i channels\n",
+            s->channels_for_cur_subframe);
+
+    /** calculate number of scale factor bands and their offsets */
+    s->table_idx         = av_log2(s->samples_per_frame/subframe_len);
+    s->num_bands         = s->num_sfb[s->table_idx];
+    s->cur_sfb_offsets   = s->sfb_offsets[s->table_idx];
+    cur_subwoofer_cutoff = s->subwoofer_cutoffs[s->table_idx];
+
+    /** configure the decoder for the current subframe */
+    for (i = 0; i < s->channels_for_cur_subframe; i++) {
+        int c = s->channel_indexes_for_cur_subframe[i];
+
+        s->channel[c].coeffs = &s->channel[c].out[(s->samples_per_frame >> 1)
+                                                  + offset];
+    }
+
+    s->subframe_len = subframe_len;
+    s->esc_len = av_log2(s->subframe_len - 1) + 1;
+
+    /** skip extended header if any */
+    if (get_bits1(&s->gb)) {
+        int num_fill_bits;
+        if (!(num_fill_bits = get_bits(&s->gb, 2))) {
+            int len = get_bits(&s->gb, 4);
+            num_fill_bits = get_bits(&s->gb, len) + 1;
+        }
+
+        if (num_fill_bits >= 0) {
+            if (get_bits_count(&s->gb) + num_fill_bits > s->num_saved_bits) {
+                av_log(s->avctx, AV_LOG_ERROR, "invalid number of fill bits\n");
+                return AVERROR_INVALIDDATA;
+            }
+
+            skip_bits_long(&s->gb, num_fill_bits);
+        }
+    }
+
+    /** no idea for what the following bit is used */
+    if (get_bits1(&s->gb)) {
+        av_log_ask_for_sample(s->avctx, "reserved bit set\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+
+    if (decode_channel_transform(s) < 0)
+        return AVERROR_INVALIDDATA;
+
+
+    for (i = 0; i < s->channels_for_cur_subframe; i++) {
+        int c = s->channel_indexes_for_cur_subframe[i];
+        if ((s->channel[c].transmit_coefs = get_bits1(&s->gb)))
+            transmit_coeffs = 1;
+    }
+
+    if (transmit_coeffs) {
+        int step;
+        int quant_step = 90 * s->bits_per_sample >> 4;
+        if ((get_bits1(&s->gb))) {
+            /** FIXME: might change run level mode decision */
+            av_log_ask_for_sample(s->avctx, "unsupported quant step coding\n");
+            return AVERROR_INVALIDDATA;
+        }
+        /** decode quantization step */
+        step = get_sbits(&s->gb, 6);
+        quant_step += step;
+        if (step == -32 || step == 31) {
+            const int sign = (step == 31) - 1;
+            int quant = 0;
+            while (get_bits_count(&s->gb) + 5 < s->num_saved_bits &&
+                   (step = get_bits(&s->gb, 5)) == 31) {
+                quant += 31;
+            }
+            quant_step += ((quant + step) ^ sign) - sign;
+        }
+        if (quant_step < 0) {
+            av_log(s->avctx, AV_LOG_DEBUG, "negative quant step\n");
+        }
+
+        /** decode quantization step modifiers for every channel */
+
+        if (s->channels_for_cur_subframe == 1) {
+            s->channel[s->channel_indexes_for_cur_subframe[0]].quant_step = quant_step;
+        } else {
+            int modifier_len = get_bits(&s->gb, 3);
+            for (i = 0; i < s->channels_for_cur_subframe; i++) {
+                int c = s->channel_indexes_for_cur_subframe[i];
+                s->channel[c].quant_step = quant_step;
+                if (get_bits1(&s->gb)) {
+                    if (modifier_len) {
+                        s->channel[c].quant_step += get_bits(&s->gb, modifier_len) + 1;
+                    } else
+                        ++s->channel[c].quant_step;
+                }
+            }
+        }
+
+        /** decode scale factors */
+        if (decode_scale_factors(s) < 0)
+            return AVERROR_INVALIDDATA;
+    }
+
+    dprintf(s->avctx, "BITSTREAM: subframe header length was %i\n",
+            get_bits_count(&s->gb) - s->subframe_offset);
+
+    /** parse coefficients */
+    for (i = 0; i < s->channels_for_cur_subframe; i++) {
+        int c = s->channel_indexes_for_cur_subframe[i];
+        if (s->channel[c].transmit_coefs &&
+            get_bits_count(&s->gb) < s->num_saved_bits) {
+            decode_coeffs(s, c);
+        } else
+            memset(s->channel[c].coeffs, 0,
+                   sizeof(*s->channel[c].coeffs) * subframe_len);
+    }
+
+    dprintf(s->avctx, "BITSTREAM: subframe length was %i\n",
+            get_bits_count(&s->gb) - s->subframe_offset);
+
+    if (transmit_coeffs) {
+        /** reconstruct the per channel data */
+        inverse_channel_transform(s);
+        for (i = 0; i < s->channels_for_cur_subframe; i++) {
+            int c = s->channel_indexes_for_cur_subframe[i];
+            const int* sf = s->channel[c].scale_factors;
+            int b;
+
+            if (c == s->lfe_channel)
+                memset(&s->tmp[cur_subwoofer_cutoff], 0, sizeof(*s->tmp) *
+                       (subframe_len - cur_subwoofer_cutoff));
+
+            /** inverse quantization and rescaling */
+            for (b = 0; b < s->num_bands; b++) {
+                const int end = FFMIN(s->cur_sfb_offsets[b+1], s->subframe_len);
+                const int exp = s->channel[c].quant_step -
+                            (s->channel[c].max_scale_factor - *sf++) *
+                            s->channel[c].scale_factor_step;
+                const float quant = pow(10.0, exp / 20.0);
+                int start = s->cur_sfb_offsets[b];
+                s->dsp.vector_fmul_scalar(s->tmp + start,
+                                          s->channel[c].coeffs + start,
+                                          quant, end - start);
+            }
+
+            /** apply imdct (ff_imdct_half == DCTIV with reverse) */
+            ff_imdct_half(&s->mdct_ctx[av_log2(subframe_len) - BLOCK_MIN_BITS],
+                          s->channel[c].coeffs, s->tmp);
+        }
+    }
+
+    /** window and overlapp-add */
+    wmapro_window(s);
+
+    /** handled one subframe */
+    for (i = 0; i < s->channels_for_cur_subframe; i++) {
+        int c = s->channel_indexes_for_cur_subframe[i];
+        if (s->channel[c].cur_subframe >= s->channel[c].num_subframes) {
+            av_log(s->avctx, AV_LOG_ERROR, "broken subframe\n");
+            return AVERROR_INVALIDDATA;
+        }
+        ++s->channel[c].cur_subframe;
+    }
+
+    return 0;
+}
+
+/**
+ *@brief Decode one WMA frame.
+ *@param s codec context
+ *@return 0 if the trailer bit indicates that this is the last frame,
+ *        1 if there are additional frames
+ */
+static int decode_frame(WMAProDecodeCtx *s)
+{
+    GetBitContext* gb = &s->gb;
+    int more_frames = 0;
+    int len = 0;
+    int i;
+
+    /** check for potential output buffer overflow */
+    if (s->num_channels * s->samples_per_frame > s->samples_end - s->samples) {
+        /** return an error if no frame could be decoded at all */
+        av_log(s->avctx, AV_LOG_ERROR,
+               "not enough space for the output samples\n");
+        s->packet_loss = 1;
+        return 0;
+    }
+
+    /** get frame length */
+    if (s->len_prefix)
+        len = get_bits(gb, s->log2_frame_size);
+
+    dprintf(s->avctx, "decoding frame with length %x\n", len);
+
+    /** decode tile information */
+    if (decode_tilehdr(s)) {
+        s->packet_loss = 1;
+        return 0;
+    }
+
+    /** read postproc transform */
+    if (s->num_channels > 1 && get_bits1(gb)) {
+        av_log_ask_for_sample(s->avctx, "Unsupported postproc transform found\n");
+        s->packet_loss = 1;
+        return 0;
+    }
+
+    /** read drc info */
+    if (s->dynamic_range_compression) {
+        s->drc_gain = get_bits(gb, 8);
+        dprintf(s->avctx, "drc_gain %i\n", s->drc_gain);
+    }
+
+    /** no idea what these are for, might be the number of samples
+        that need to be skipped at the beginning or end of a stream */
+    if (get_bits1(gb)) {
+        int skip;
+
+        /** usually true for the first frame */
+        if (get_bits1(gb)) {
+            skip = get_bits(gb, av_log2(s->samples_per_frame * 2));
+            dprintf(s->avctx, "start skip: %i\n", skip);
+        }
+
+        /** sometimes true for the last frame */
+        if (get_bits1(gb)) {
+            skip = get_bits(gb, av_log2(s->samples_per_frame * 2));
+            dprintf(s->avctx, "end skip: %i\n", skip);
+        }
+
+    }
+
+    dprintf(s->avctx, "BITSTREAM: frame header length was %i\n",
+            get_bits_count(gb) - s->frame_offset);
+
+    /** reset subframe states */
+    s->parsed_all_subframes = 0;
+    for (i = 0; i < s->num_channels; i++) {
+        s->channel[i].decoded_samples = 0;
+        s->channel[i].cur_subframe    = 0;
+        s->channel[i].reuse_sf        = 0;
+    }
+
+    /** decode all subframes */
+    while (!s->parsed_all_subframes) {
+        if (decode_subframe(s) < 0) {
+            s->packet_loss = 1;
+            return 0;
+        }
+    }
+
+    /** interleave samples and write them to the output buffer */
+    for (i = 0; i < s->num_channels; i++) {
+        float* ptr  = s->samples + i;
+        int incr = s->num_channels;
+        float* iptr = s->channel[i].out;
+        float* iend = iptr + s->samples_per_frame;
+
+        while (iptr < iend) {
+            *ptr = av_clipf(*iptr++, -1.0, 32767.0 / 32768.0);
+            ptr += incr;
+        }
+
+        /** reuse second half of the IMDCT output for the next frame */
+        memcpy(&s->channel[i].out[0],
+               &s->channel[i].out[s->samples_per_frame],
+               s->samples_per_frame * sizeof(*s->channel[i].out) >> 1);
+    }
+
+    if (s->skip_frame) {
+        s->skip_frame = 0;
+    } else
+        s->samples += s->num_channels * s->samples_per_frame;
+
+    if (len != (get_bits_count(gb) - s->frame_offset) + 2) {
+        /** FIXME: not sure if this is always an error */
+        av_log(s->avctx, AV_LOG_ERROR, "frame[%i] would have to skip %i bits\n",
+               s->frame_num, len - (get_bits_count(gb) - s->frame_offset) - 1);
+        s->packet_loss = 1;
+        return 0;
+    }
+
+    /** skip the rest of the frame data */
+    skip_bits_long(gb, len - (get_bits_count(gb) - s->frame_offset) - 1);
+
+    /** decode trailer bit */
+    more_frames = get_bits1(gb);
+
+    ++s->frame_num;
+    return more_frames;
+}
+
+/**
+ *@brief Calculate remaining input buffer length.
+ *@param s codec context
+ *@param gb bitstream reader context
+ *@return remaining size in bits
+ */
+static int remaining_bits(WMAProDecodeCtx *s, GetBitContext *gb)
+{
+    return s->buf_bit_size - get_bits_count(gb);
+}
+
+/**
+ *@brief Fill the bit reservoir with a (partial) frame.
+ *@param s codec context
+ *@param gb bitstream reader context
+ *@param len length of the partial frame
+ *@param append decides wether to reset the buffer or not
+ */
+static void save_bits(WMAProDecodeCtx *s, GetBitContext* gb, int len,
+                      int append)
+{
+    int buflen;
+
+    /** when the frame data does not need to be concatenated, the input buffer
+        is resetted and additional bits from the previous frame are copyed
+        and skipped later so that a fast byte copy is possible */
+
+    if (!append) {
+        s->frame_offset = get_bits_count(gb) & 7;
+        s->num_saved_bits = s->frame_offset;
+        init_put_bits(&s->pb, s->frame_data, MAX_FRAMESIZE);
+    }
+
+    buflen = (s->num_saved_bits + len + 8) >> 3;
+
+    if (len <= 0 || buflen > MAX_FRAMESIZE) {
+        av_log_ask_for_sample(s->avctx, "input buffer too small\n");
+        s->packet_loss = 1;
+        return;
+    }
+
+    s->num_saved_bits += len;
+    if (!append) {
+        ff_copy_bits(&s->pb, gb->buffer + (get_bits_count(gb) >> 3),
+                     s->num_saved_bits);
+    } else {
+        int align = 8 - (get_bits_count(gb) & 7);
+        align = FFMIN(align, len);
+        put_bits(&s->pb, align, get_bits(gb, align));
+        len -= align;
+        ff_copy_bits(&s->pb, gb->buffer + (get_bits_count(gb) >> 3), len);
+    }
+    skip_bits_long(gb, len);
+
+    {
+        PutBitContext tmp = s->pb;
+        flush_put_bits(&tmp);
+    }
+
+    init_get_bits(&s->gb, s->frame_data, s->num_saved_bits);
+    skip_bits(&s->gb, s->frame_offset);
+}
+
+/**
+ *@brief Decode a single WMA packet.
+ *@param avctx codec context
+ *@param data the output buffer
+ *@param data_size number of bytes that were written to the output buffer
+ *@param avpkt input packet
+ *@return number of bytes that were read from the input buffer
+ */
+static int decode_packet(AVCodecContext *avctx,
+                         void *data, int *data_size, AVPacket* avpkt)
+{
+    WMAProDecodeCtx *s = avctx->priv_data;
+    GetBitContext* gb  = &s->pgb;
+    const uint8_t* buf = avpkt->data;
+    int buf_size       = avpkt->size;
+    int num_bits_prev_frame;
+    int packet_sequence_number;
+
+    s->samples       = data;
+    s->samples_end   = (float*)((int8_t*)data + *data_size);
+    *data_size = 0;
+
+    if (s->packet_done || s->packet_loss) {
+        s->packet_done = 0;
+        s->buf_bit_size = buf_size << 3;
+
+        /** sanity check for the buffer length */
+        if (buf_size < avctx->block_align)
+            return 0;
+
+        buf_size = avctx->block_align;
+
+        /** parse packet header */
+        init_get_bits(gb, buf, s->buf_bit_size);
+        packet_sequence_number = get_bits(gb, 4);
+        skip_bits(gb, 2);
+
+        /** get number of bits that need to be added to the previous frame */
+        num_bits_prev_frame = get_bits(gb, s->log2_frame_size);
+        dprintf(avctx, "packet[%d]: nbpf %x\n", avctx->frame_number,
+                num_bits_prev_frame);
+
+        /** check for packet loss */
+        if (!s->packet_loss &&
+            ((s->packet_sequence_number + 1) & 0xF) != packet_sequence_number) {
+            s->packet_loss = 1;
+            av_log(avctx, AV_LOG_ERROR, "Packet loss detected! seq %x vs %x\n",
+                   s->packet_sequence_number, packet_sequence_number);
+        }
+        s->packet_sequence_number = packet_sequence_number;
+
+        if (num_bits_prev_frame > 0) {
+            /** append the previous frame data to the remaining data from the
+                previous packet to create a full frame */
+            save_bits(s, gb, num_bits_prev_frame, 1);
+            dprintf(avctx, "accumulated %x bits of frame data\n",
+                    s->num_saved_bits - s->frame_offset);
+
+            /** decode the cross packet frame if it is valid */
+            if (!s->packet_loss)
+                decode_frame(s);
+        } else if (s->num_saved_bits - s->frame_offset) {
+            dprintf(avctx, "ignoring %x previously saved bits\n",
+                    s->num_saved_bits - s->frame_offset);
+        }
+
+        s->packet_loss = 0;
+
+    } else {
+        int frame_size;
+        s->buf_bit_size = avpkt->size << 3;
+        init_get_bits(gb, avpkt->data, s->buf_bit_size);
+        skip_bits(gb, s->packet_offset);
+        if (remaining_bits(s, gb) > s->log2_frame_size &&
+            (frame_size = show_bits(gb, s->log2_frame_size)) &&
+            frame_size <= remaining_bits(s, gb)) {
+            save_bits(s, gb, frame_size, 0);
+            s->packet_done = !decode_frame(s);
+        } else
+            s->packet_done = 1;
+    }
+
+    if (s->packet_done && !s->packet_loss &&
+        remaining_bits(s, gb) > 0) {
+        /** save the rest of the data so that it can be decoded
+            with the next packet */
+        save_bits(s, gb, remaining_bits(s, gb), 0);
+    }
+
+    *data_size = (int8_t *)s->samples - (int8_t *)data;
+    s->packet_offset = get_bits_count(gb) & 7;
+
+    return (s->packet_loss) ? AVERROR_INVALIDDATA : get_bits_count(gb) >> 3;
+}
+
+/**
+ *@brief Clear decoder buffers (for seeking).
+ *@param avctx codec context
+ */
+static void flush(AVCodecContext *avctx)
+{
+    WMAProDecodeCtx *s = avctx->priv_data;
+    int i;
+    /** reset output buffer as a part of it is used during the windowing of a
+        new frame */
+    for (i = 0; i < s->num_channels; i++)
+        memset(s->channel[i].out, 0, s->samples_per_frame *
+               sizeof(*s->channel[i].out));
+    s->packet_loss = 1;
+}
+
+
+/**
+ *@brief wmapro decoder
+ */
+AVCodec wmapro_decoder = {
+    "wmapro",
+    AVMEDIA_TYPE_AUDIO,
+    CODEC_ID_WMAPRO,
+    sizeof(WMAProDecodeCtx),
+    decode_init,
+    NULL,
+    decode_end,
+    decode_packet,
+    .capabilities = CODEC_CAP_SUBFRAMES,
+    .flush= flush,
+    .long_name = NULL_IF_CONFIG_SMALL("Windows Media Audio 9 Professional"),
+};
-- 
cgit