[libav-devel] [PATCH] Put inline asm behind a macro.

Ronald S. Bultje Wed, 20 Jun 2012 15:19:16 -0700

From: "Ronald S. Bultje" <[email protected]>

---
 arch.mak                          |    2 +
 libavcodec/cabac_functions.h      |    2 +-
 libavcodec/cavsdsp.c              |    3 +-
 libavcodec/dct-test.c             |    4 +-
 libavcodec/dnxhdenc.c             |    2 +-
 libavcodec/dwt.c                  |    2 +-
 libavcodec/h264_cabac.c           |    2 +-
 libavcodec/imgconvert.c           |    2 +-
 libavcodec/lpc.c                  |    2 +-
 libavcodec/mathops.h              |    2 +-
 libavcodec/mlpdsp.c               |    2 +-
 libavcodec/mpegvideo.c            |    2 +-
 libavcodec/x86/Makefile           |   25 ++++----
 libavcodec/x86/ac3dsp_mmx.c       |    3 +-
 libavcodec/x86/deinterlace_mmx.h  |   39 ++++++++++++
 libavcodec/x86/dsputil_mmx.c      |  118 ++++++++++++++++++++++-------------
 libavcodec/x86/dsputil_mmx.h      |   15 -----
 libavcodec/x86/dsputilenc_mmx.c   |   81 +++++++++++++-----------
 libavcodec/x86/fft.c              |    4 +-
 libavcodec/x86/h264_qpel_mmx.c    |    4 +-
 libavcodec/x86/h264dsp_mmx.c      |    7 +++
 libavcodec/x86/mpegaudiodec_mmx.c |    6 +-
 libavcodec/x86/rv40dsp_init.c     |    7 ++-
 libavcodec/x86/vc1dsp_mmx.c       |    7 ++-
 libavcodec/x86/vp3dsp.asm         |  122 ++++++++++++++++++++++++-------------
 libavfilter/vf_gradfun.c          |    2 +
 libavfilter/vf_yadif.c            |    6 +-
 libavfilter/x86/Makefile          |    4 +-
 libavutil/internal.h              |    9 ++-
 libavutil/intmath.h               |    2 +-
 libavutil/x86/cpu.c               |   37 +++++++++++
 libavutil/x86/timer.h             |    8 +++
 libavutil/x86_cpu.h               |    2 +
 libswscale/rgb2rgb.c              |    2 +-
 libswscale/swscale.c              |    4 +-
 libswscale/utils.c                |   10 +--
 libswscale/x86/Makefile           |    5 +-
 libswscale/x86/swscale_mmx.c      |    6 ++
 libswscale/yuv2rgb.c              |    2 +-
 39 files changed, 383 insertions(+), 181 deletions(-)
 create mode 100644 libavcodec/x86/deinterlace_mmx.h


diff --git a/arch.mak b/arch.mak
index 33018f3..0564833 100644
--- a/arch.mak
+++ b/arch.mak
@@ -10,4 +10,6 @@ OBJS-$(HAVE_ALTIVEC) += $(ALTIVEC-OBJS) $(ALTIVEC-OBJS-yes)
 OBJS-$(HAVE_VIS)     += $(VIS-OBJS)     $(VIS-OBJS-yes)
 
 OBJS-$(HAVE_MMX)     += $(MMX-OBJS)     $(MMX-OBJS-yes)
+OBJS-$(HAVE_INLINE_ASM) += $(INLINEASM-OBJS) $(INLINEASM-OBJS-yes)
+YASM-OBJS-$(HAVE_INLINE_ASM) += $(INLINEYASM-OBJS) $(INLINEYASM-OBJS-yes)
 OBJS-$(HAVE_YASM)    += $(YASM-OBJS)    $(YASM-OBJS-yes)
diff --git a/libavcodec/cabac_functions.h b/libavcodec/cabac_functions.h
index 484ba85..d9a7a46 100644
--- a/libavcodec/cabac_functions.h
+++ b/libavcodec/cabac_functions.h
@@ -32,7 +32,7 @@
 #include "cabac.h"
 #include "config.h"
 
-#if ARCH_X86
+#if ARCH_X86 && HAVE_INLINE_ASM
 #   include "x86/cabac.h"
 #endif
 
diff --git a/libavcodec/cavsdsp.c b/libavcodec/cavsdsp.c
index 04e521b..36ac8bb 100644
--- a/libavcodec/cavsdsp.c
+++ b/libavcodec/cavsdsp.c
@@ -544,5 +544,6 @@ av_cold void ff_cavsdsp_init(CAVSDSPContext* c, 
AVCodecContext *avctx) {
     c->cavs_filter_ch = cavs_filter_ch_c;
     c->cavs_idct8_add = cavs_idct8_add_c;
 
-    if (HAVE_MMX) ff_cavsdsp_init_mmx(c, avctx);
+    if (HAVE_MMX && HAVE_INLINE_ASM)
+        ff_cavsdsp_init_mmx(c, avctx);
 }
diff --git a/libavcodec/dct-test.c b/libavcodec/dct-test.c
index 4647642..e56bb4e 100644
--- a/libavcodec/dct-test.c
+++ b/libavcodec/dct-test.c
@@ -82,7 +82,7 @@ static const struct algo fdct_tab[] = {
     { "IJG-AAN-INT",    ff_fdct_ifast,         SCALE_PERM },
     { "IJG-LLM-INT",    ff_jpeg_fdct_islow_8,  NO_PERM    },
 
-#if HAVE_MMX
+#if HAVE_MMX && HAVE_INLINE_ASM
     { "MMX",            ff_fdct_mmx,           NO_PERM,   AV_CPU_FLAG_MMX     
},
     { "MMX2",           ff_fdct_mmx2,          NO_PERM,   AV_CPU_FLAG_MMX2    
},
     { "SSE2",           ff_fdct_sse2,          NO_PERM,   AV_CPU_FLAG_SSE2    
},
@@ -105,7 +105,7 @@ static const struct algo idct_tab[] = {
     { "INT",            ff_j_rev_dct,          MMX_PERM },
     { "SIMPLE-C",       ff_simple_idct_8,      NO_PERM  },
 
-#if HAVE_MMX
+#if HAVE_MMX && HAVE_INLINE_ASM
 #if CONFIG_GPL
     { "LIBMPEG2-MMX",   ff_mmx_idct,           MMX_PERM,  AV_CPU_FLAG_MMX,  1 
},
     { "LIBMPEG2-MMX2",  ff_mmxext_idct,        MMX_PERM,  AV_CPU_FLAG_MMX2, 1 
},
diff --git a/libavcodec/dnxhdenc.c b/libavcodec/dnxhdenc.c
index 7ceae92..7741ed9 100644
--- a/libavcodec/dnxhdenc.c
+++ b/libavcodec/dnxhdenc.c
@@ -275,7 +275,7 @@ static int dnxhd_encode_init(AVCodecContext *avctx)
        ctx->block_width_l2 = 3;
     }
 
-#if HAVE_MMX
+#if HAVE_MMX && HAVE_INLINE_ASM
     ff_dnxhd_init_mmx(ctx);
 #endif
 
diff --git a/libavcodec/dwt.c b/libavcodec/dwt.c
index 56e4a57..f2fb34a 100644
--- a/libavcodec/dwt.c
+++ b/libavcodec/dwt.c
@@ -992,6 +992,6 @@ void ff_dwt_init(DWTContext *c)
     c->horizontal_compose97i = ff_snow_horizontal_compose97i;
     c->inner_add_yblock      = ff_snow_inner_add_yblock;
 
-    if (HAVE_MMX)
+    if (HAVE_MMX && HAVE_INLINE_ASM)
         ff_dwt_init_x86(c);
 }
diff --git a/libavcodec/h264_cabac.c b/libavcodec/h264_cabac.c
index 08a6a5b..dc67724 100644
--- a/libavcodec/h264_cabac.c
+++ b/libavcodec/h264_cabac.c
@@ -38,7 +38,7 @@
 #include "h264_mvpred.h"
 #include "golomb.h"
 
-#if ARCH_X86
+#if ARCH_X86 && HAVE_INLINE_ASM
 #include "x86/h264_i386.h"
 #endif
 
diff --git a/libavcodec/imgconvert.c b/libavcodec/imgconvert.c
index 90c9b7b..5eaa5b6 100644
--- a/libavcodec/imgconvert.c
+++ b/libavcodec/imgconvert.c
@@ -39,7 +39,7 @@
 #include "libavutil/imgutils.h"
 
 #if HAVE_MMX && HAVE_YASM
-#include "x86/dsputil_mmx.h"
+#include "x86/deinterlace_mmx.h"
 #endif
 
 #define FF_COLOR_RGB      0 /**< RGB color space */
diff --git a/libavcodec/lpc.c b/libavcodec/lpc.c
index 0d6910f..9e5d93e 100644
--- a/libavcodec/lpc.c
+++ b/libavcodec/lpc.c
@@ -258,7 +258,7 @@ av_cold int ff_lpc_init(LPCContext *s, int blocksize, int 
max_order,
     s->lpc_apply_welch_window = lpc_apply_welch_window_c;
     s->lpc_compute_autocorr   = lpc_compute_autocorr_c;
 
-    if (HAVE_MMX)
+    if (HAVE_MMX && HAVE_INLINE_ASM)
         ff_lpc_init_x86(s);
 
     return 0;
diff --git a/libavcodec/mathops.h b/libavcodec/mathops.h
index d6eb98d..0b232e0 100644
--- a/libavcodec/mathops.h
+++ b/libavcodec/mathops.h
@@ -35,7 +35,7 @@
 #   include "mips/mathops.h"
 #elif ARCH_PPC
 #   include "ppc/mathops.h"
-#elif ARCH_X86
+#elif ARCH_X86 && HAVE_INLINE_ASM
 #   include "x86/mathops.h"
 #endif
 
diff --git a/libavcodec/mlpdsp.c b/libavcodec/mlpdsp.c
index 7d01c75..a27b1fc 100644
--- a/libavcodec/mlpdsp.c
+++ b/libavcodec/mlpdsp.c
@@ -58,6 +58,6 @@ static void ff_mlp_filter_channel(int32_t *state, const 
int32_t *coeff,
 void ff_mlp_init(DSPContext* c, AVCodecContext *avctx)
 {
     c->mlp_filter_channel = ff_mlp_filter_channel;
-    if (ARCH_X86)
+    if (ARCH_X86 && HAVE_INLINE_ASM)
         ff_mlp_init_x86(c, avctx);
 }
diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c
index 708199a..e042fc6 100644
--- a/libavcodec/mpegvideo.c
+++ b/libavcodec/mpegvideo.c
@@ -187,7 +187,7 @@ av_cold int ff_dct_common_init(MpegEncContext *s)
         s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_bitexact;
     s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_c;
 
-#if HAVE_MMX
+#if HAVE_MMX && HAVE_INLINE_ASM
     ff_MPV_common_init_mmx(s);
 #elif ARCH_ALPHA
     ff_MPV_common_init_axp(s);
diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
index 6602cce..7ee76e9 100644
--- a/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile
@@ -1,10 +1,11 @@
-OBJS-$(CONFIG_MLP_DECODER)             += x86/mlpdsp.o
-OBJS-$(CONFIG_TRUEHD_DECODER)          += x86/mlpdsp.o
+INLINEASM-OBJS-$(CONFIG_MLP_DECODER)   += x86/mlpdsp.o
+INLINEASM-OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp.o
 OBJS-$(CONFIG_XMM_CLOBBER_TEST)        += x86/w64xmmtest.o
 
 MMX-OBJS                               += x86/dsputil_mmx.o             \
-                                          x86/fdct_mmx.o                \
                                           x86/fmtconvert_mmx.o          \
+
+INLINEASM-OBJS                         += x86/fdct_mmx.o                \
                                           x86/idct_mmx_xvid.o           \
                                           x86/idct_sse2_xvid.o          \
                                           x86/motion_est_mmx.o          \
@@ -13,15 +14,15 @@ MMX-OBJS                               += x86/dsputil_mmx.o 
            \
 
 MMX-OBJS-$(CONFIG_AAC_DECODER)         += x86/sbrdsp_init.o
 MMX-OBJS-$(CONFIG_AC3DSP)              += x86/ac3dsp_mmx.o
-MMX-OBJS-$(CONFIG_CAVS_DECODER)        += x86/cavsdsp_mmx.o
-MMX-OBJS-$(CONFIG_DNXHD_ENCODER)       += x86/dnxhd_mmx.o
-MMX-OBJS-$(CONFIG_DWT)                 += x86/snowdsp_mmx.o
+INLINEASM-OBJS-$(CONFIG_CAVS_DECODER)  += x86/cavsdsp_mmx.o
+INLINEASM-OBJS-$(CONFIG_DNXHD_ENCODER) += x86/dnxhd_mmx.o
+INLINEASM-OBJS-$(CONFIG_DWT)           += x86/snowdsp_mmx.o
 MMX-OBJS-$(CONFIG_ENCODERS)            += x86/dsputilenc_mmx.o
 MMX-OBJS-$(CONFIG_FFT)                 += x86/fft.o
-MMX-OBJS-$(CONFIG_GPL)                 += x86/idct_mmx.o
+INLINEASM-OBJS-$(CONFIG_GPL)           += x86/idct_mmx.o
 MMX-OBJS-$(CONFIG_H264DSP)             += x86/h264dsp_mmx.o
 MMX-OBJS-$(CONFIG_H264PRED)            += x86/h264_intrapred_init.o
-MMX-OBJS-$(CONFIG_LPC)                 += x86/lpc_mmx.o
+INLINEASM-OBJS-$(CONFIG_LPC)           += x86/lpc_mmx.o
 MMX-OBJS-$(CONFIG_MPEGAUDIODSP)        += x86/mpegaudiodec_mmx.o
 MMX-OBJS-$(CONFIG_PNG_DECODER)         += x86/pngdsp-init.o
 MMX-OBJS-$(CONFIG_PRORES_DECODER)      += x86/proresdsp-init.o
@@ -37,10 +38,10 @@ YASM-OBJS-$(CONFIG_AAC_DECODER)        += x86/sbrdsp.o
 YASM-OBJS-$(CONFIG_AC3DSP)             += x86/ac3dsp.o
 YASM-OBJS-$(CONFIG_DCT)                += x86/dct32_sse.o
 YASM-OBJS-$(CONFIG_ENCODERS)           += x86/dsputilenc_yasm.o
-YASM-OBJS-FFT-$(HAVE_AMD3DNOW)         += x86/fft_3dn.o
-YASM-OBJS-FFT-$(HAVE_AMD3DNOWEXT)      += x86/fft_3dn2.o
-YASM-OBJS-FFT-$(HAVE_SSE)              += x86/fft_sse.o
-YASM-OBJS-$(CONFIG_FFT)                += x86/fft_mmx.o                 \
+INLINEYASM-OBJS-FFT-$(HAVE_AMD3DNOW)   += x86/fft_3dn.o
+INLINEYASM-OBJS-FFT-$(HAVE_AMD3DNOWEXT) += x86/fft_3dn2.o
+INLINEYASM-OBJS-FFT-$(HAVE_SSE)        += x86/fft_sse.o
+INLINEASM-OBJS-$(CONFIG_FFT)           += x86/fft_mmx.o                 \
                                           $(YASM-OBJS-FFT-yes)
 YASM-OBJS-$(CONFIG_H264CHROMA)         += x86/h264_chromamc.o           \
                                           x86/h264_chromamc_10bit.o
diff --git a/libavcodec/x86/ac3dsp_mmx.c b/libavcodec/x86/ac3dsp_mmx.c
index 1a43183..3ba55d7 100644
--- a/libavcodec/x86/ac3dsp_mmx.c
+++ b/libavcodec/x86/ac3dsp_mmx.c
@@ -19,8 +19,9 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
 #include "libavutil/x86_cpu.h"
-#include "dsputil_mmx.h"
 #include "libavcodec/ac3dsp.h"
 
 extern void ff_ac3_exponent_min_mmx   (uint8_t *exp, int num_reuse_blocks, int 
nb_coefs);
diff --git a/libavcodec/x86/deinterlace_mmx.h b/libavcodec/x86/deinterlace_mmx.h
new file mode 100644
index 0000000..7a90d86
--- /dev/null
+++ b/libavcodec/x86/deinterlace_mmx.h
@@ -0,0 +1,39 @@
+/*
+ * MMX optimized DSP utils
+ * Copyright (c) 2007  Aurelien Jacobs <[email protected]>
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_X86_DEINTERLACE_MMX_H
+#define AVCODEC_X86_DEINTERLACE_MMX_H
+
+#include <stdint.h>
+
+void ff_deinterlace_line_mmx(uint8_t *dst,
+                             const uint8_t *lum_m4, const uint8_t *lum_m3,
+                             const uint8_t *lum_m2, const uint8_t *lum_m1,
+                             const uint8_t *lum,
+                             int size);
+
+void ff_deinterlace_line_inplace_mmx(const uint8_t *lum_m4,
+                                     const uint8_t *lum_m3,
+                                     const uint8_t *lum_m2,
+                                     const uint8_t *lum_m1,
+                                     const uint8_t *lum, int size);
+
+#endif /* AVCODEC_X86_DEINTERLACE_MMX_H */
diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c
index 434d185..c2c59f0 100644
--- a/libavcodec/x86/dsputil_mmx.c
+++ b/libavcodec/x86/dsputil_mmx.c
@@ -29,8 +29,10 @@
 #include "libavcodec/mpegvideo.h"
 #include "libavcodec/simple_idct.h"
 #include "libavcodec/ac3dec.h"
+#if HAVE_INLINE_ASM
 #include "dsputil_mmx.h"
 #include "idct_xvid.h"
+#endif
 
 //#undef NDEBUG
 //#include <assert.h>
@@ -84,6 +86,8 @@ DECLARE_ALIGNED(16, const xmm_reg,  ff_pb_FE)   = { 
0xFEFEFEFEFEFEFEFEULL, 0xFEF
 DECLARE_ALIGNED(16, const double, ff_pd_1)[2] = { 1.0, 1.0 };
 DECLARE_ALIGNED(16, const double, ff_pd_2)[2] = { 2.0, 2.0 };
 
+#if HAVE_INLINE_ASM
+
 #define JUMPALIGN()     __asm__ volatile (".p2align 3"::)
 #define MOVQ_ZERO(regd) __asm__ volatile ("pxor %%"#regd", %%"#regd ::)
 
@@ -1808,6 +1812,8 @@ void ff_avg_rv40_qpel16_mc33_mmx(uint8_t *dst, uint8_t 
*src, int stride)
   avg_pixels16_xy2_mmx(dst, src, stride, 16);
 }
 
+#endif /* HAVE_INLINE_ASM*/
+
 #if HAVE_YASM
 typedef void emu_edge_core_func(uint8_t *buf, const uint8_t *src,
                                 x86_reg linesize, x86_reg start_y,
@@ -1876,6 +1882,8 @@ static av_noinline void emulated_edge_mc_sse(uint8_t 
*buf, const uint8_t *src,
 }
 #endif /* HAVE_YASM */
 
+#if HAVE_INLINE_ASM
+
 typedef void emulated_edge_mc_func(uint8_t *dst, const uint8_t *src,
                                    int linesize, int block_w, int block_h,
                                    int src_x, int src_y, int w, int h);
@@ -2045,6 +2053,8 @@ PREFETCH(prefetch_mmx2,  prefetcht0)
 PREFETCH(prefetch_3dnow, prefetch)
 #undef PREFETCH
 
+#endif /* HAVE_INLINE_ASM */
+
 #include "h264_qpel_mmx.c"
 
 void ff_put_h264_chroma_mc8_mmx_rnd  (uint8_t *dst, uint8_t *src,
@@ -2090,6 +2100,8 @@ CHROMA_MC(avg, 8, 10, sse2)
 CHROMA_MC(put, 8, 10, avx)
 CHROMA_MC(avg, 8, 10, avx)
 
+#if HAVE_INLINE_ASM
+
 /* CAVS-specific */
 void ff_put_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride)
 {
@@ -2448,6 +2460,8 @@ static void vector_clipf_sse(float *dst, const float *src,
     );
 }
 
+#endif /* HAVE_INLINE_ASM */
+
 void ff_vp3_idct_mmx(int16_t *input_data);
 void ff_vp3_idct_put_mmx(uint8_t *dest, int line_size, DCTELEM *block);
 void ff_vp3_idct_add_mmx(uint8_t *dest, int line_size, DCTELEM *block);
@@ -2574,6 +2588,7 @@ static void dsputil_init_mmx(DSPContext *c, 
AVCodecContext *avctx, int mm_flags)
 {
     const int high_bit_depth = avctx->bits_per_raw_sample > 8;
 
+#if HAVE_INLINE_ASM
     c->put_pixels_clamped        = ff_put_pixels_clamped_mmx;
     c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_mmx;
     c->add_pixels_clamped        = ff_add_pixels_clamped_mmx;
@@ -2596,10 +2611,6 @@ static void dsputil_init_mmx(DSPContext *c, 
AVCodecContext *avctx, int mm_flags)
 #if ARCH_X86_32 || !HAVE_YASM
     c->gmc = gmc_mmx;
 #endif
-#if ARCH_X86_32 && HAVE_YASM
-    if (!high_bit_depth)
-        c->emulated_edge_mc = emulated_edge_mc_mmx;
-#endif
 
     c->add_bytes = add_bytes_mmx;
 
@@ -2607,8 +2618,14 @@ static void dsputil_init_mmx(DSPContext *c, 
AVCodecContext *avctx, int mm_flags)
         c->h263_v_loop_filter = h263_v_loop_filter_mmx;
         c->h263_h_loop_filter = h263_h_loop_filter_mmx;
     }
+#endif /* HAVE_INLINE_ASM */
 
 #if HAVE_YASM
+#if ARCH_X86_32
+    if (!high_bit_depth)
+        c->emulated_edge_mc = emulated_edge_mc_mmx;
+#endif
+
     if (!high_bit_depth && CONFIG_H264CHROMA) {
         c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_mmx_rnd;
         c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_mmx;
@@ -2625,6 +2642,7 @@ static void dsputil_init_mmx2(DSPContext *c, 
AVCodecContext *avctx,
     const int bit_depth      = avctx->bits_per_raw_sample;
     const int high_bit_depth = bit_depth > 8;
 
+#if HAVE_INLINE_ASM
     c->prefetch = prefetch_mmx2;
 
     if (!high_bit_depth) {
@@ -2683,8 +2701,18 @@ static void dsputil_init_mmx2(DSPContext *c, 
AVCodecContext *avctx,
             SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, mmx2, );
             SET_QPEL_FUNCS(avg_h264_qpel, 1,  8, mmx2, );
             SET_QPEL_FUNCS(avg_h264_qpel, 2,  4, mmx2, );
-        } else if (bit_depth == 10) {
+        }
+
+        SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, mmx2, );
+        SET_QPEL_FUNCS(put_2tap_qpel, 1,  8, mmx2, );
+        SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, mmx2, );
+        SET_QPEL_FUNCS(avg_2tap_qpel, 1,  8, mmx2, );
+    }
+#endif /* HAVE_INLINE_ASM */
+
 #if HAVE_YASM
+    if (CONFIG_H264QPEL) {
+        if (bit_depth == 10) {
 #if !ARCH_X86_64
             SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 10_mmxext, ff_);
             SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 10_mmxext, ff_);
@@ -2693,16 +2721,9 @@ static void dsputil_init_mmx2(DSPContext *c, 
AVCodecContext *avctx,
 #endif
             SET_QPEL_FUNCS(put_h264_qpel, 2, 4,  10_mmxext, ff_);
             SET_QPEL_FUNCS(avg_h264_qpel, 2, 4,  10_mmxext, ff_);
-#endif
         }
-
-        SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, mmx2, );
-        SET_QPEL_FUNCS(put_2tap_qpel, 1,  8, mmx2, );
-        SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, mmx2, );
-        SET_QPEL_FUNCS(avg_2tap_qpel, 1,  8, mmx2, );
     }
 
-#if HAVE_YASM
     if (!high_bit_depth && CONFIG_H264CHROMA) {
         c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_mmx2_rnd;
         c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_mmx2;
@@ -2734,6 +2755,7 @@ static void dsputil_init_3dnow(DSPContext *c, 
AVCodecContext *avctx,
 {
     const int high_bit_depth = avctx->bits_per_raw_sample > 8;
 
+#if HAVE_INLINE_ASM
     c->prefetch = prefetch_3dnow;
 
     if (!high_bit_depth) {
@@ -2791,24 +2813,25 @@ static void dsputil_init_3dnow(DSPContext *c, 
AVCodecContext *avctx,
         SET_QPEL_FUNCS(avg_2tap_qpel, 1,  8, 3dnow, );
     }
 
+    c->vorbis_inverse_coupling = vorbis_inverse_coupling_3dnow;
+
+#if HAVE_7REGS
+    c->add_hfyu_median_prediction = add_hfyu_median_prediction_cmov;
+#endif
+#endif /* HAVE_INLINE_ASM */
+
 #if HAVE_YASM
     if (!high_bit_depth && CONFIG_H264CHROMA) {
         c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_3dnow_rnd;
         c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_3dnow;
     }
 #endif
-
-    c->vorbis_inverse_coupling = vorbis_inverse_coupling_3dnow;
-
-#if HAVE_7REGS
-    c->add_hfyu_median_prediction = add_hfyu_median_prediction_cmov;
-#endif
 }
 
 static void dsputil_init_3dnow2(DSPContext *c, AVCodecContext *avctx,
                                 int mm_flags)
 {
-#if HAVE_6REGS
+#if HAVE_6REGS && HAVE_INLINE_ASM
     c->vector_fmul_window  = vector_fmul_window_3dnow2;
 #endif
 }
@@ -2817,6 +2840,7 @@ static void dsputil_init_sse(DSPContext *c, 
AVCodecContext *avctx, int mm_flags)
 {
     const int high_bit_depth = avctx->bits_per_raw_sample > 8;
 
+#if HAVE_INLINE_ASM
     if (!high_bit_depth) {
         if (!(CONFIG_MPEG_XVMC_DECODER && avctx->xvmc_acceleration > 1)) {
             /* XvMCCreateBlocks() may not allocate 16-byte aligned blocks */
@@ -2827,24 +2851,27 @@ static void dsputil_init_sse(DSPContext *c, 
AVCodecContext *avctx, int mm_flags)
 
     c->vorbis_inverse_coupling = vorbis_inverse_coupling_sse;
     c->ac3_downmix             = ac3_downmix_sse;
-#if HAVE_YASM
-    c->vector_fmul_reverse = ff_vector_fmul_reverse_sse;
-    c->vector_fmul_add     = ff_vector_fmul_add_sse;
-#endif
 
 #if HAVE_6REGS
     c->vector_fmul_window = vector_fmul_window_sse;
 #endif
-
+    
     c->vector_clipf = vector_clipf_sse;
 
 #if HAVE_YASM
+    c->gmc = gmc_sse;
+#endif
+#endif /* HAVE_INLINE_ASM */
+
+#if HAVE_YASM
+    c->vector_fmul_reverse = ff_vector_fmul_reverse_sse;
+    c->vector_fmul_add     = ff_vector_fmul_add_sse;
+
     c->scalarproduct_float          = ff_scalarproduct_float_sse;
     c->butterflies_float_interleave = ff_butterflies_float_interleave_sse;
 
     if (!high_bit_depth)
         c->emulated_edge_mc = emulated_edge_mc_sse;
-    c->gmc = gmc_sse;
 #endif
 }
 
@@ -2854,6 +2881,7 @@ static void dsputil_init_sse2(DSPContext *c, 
AVCodecContext *avctx,
     const int bit_depth      = avctx->bits_per_raw_sample;
     const int high_bit_depth = bit_depth > 8;
 
+#if HAVE_INLINE_ASM
     if (!(mm_flags & AV_CPU_FLAG_SSE2SLOW)) {
         // these functions are slower than mmx on AMD, but faster on Intel
         if (!high_bit_depth) {
@@ -2879,6 +2907,7 @@ static void dsputil_init_sse2(DSPContext *c, 
AVCodecContext *avctx,
         H264_QPEL_FUNCS(3, 2, sse2);
         H264_QPEL_FUNCS(3, 3, sse2);
     }
+#endif /* HAVE_INLINE_ASM */
 
 #if HAVE_YASM
     if (bit_depth == 10) {
@@ -2920,6 +2949,7 @@ static void dsputil_init_ssse3(DSPContext *c, 
AVCodecContext *avctx,
     const int high_bit_depth = avctx->bits_per_raw_sample > 8;
     const int bit_depth      = avctx->bits_per_raw_sample;
 
+#if HAVE_INLINE_ASM
     if (!high_bit_depth && CONFIG_H264QPEL) {
         H264_QPEL_FUNCS(1, 0, ssse3);
         H264_QPEL_FUNCS(1, 1, ssse3);
@@ -2934,8 +2964,10 @@ static void dsputil_init_ssse3(DSPContext *c, 
AVCodecContext *avctx,
         H264_QPEL_FUNCS(3, 2, ssse3);
         H264_QPEL_FUNCS(3, 3, ssse3);
     }
+#endif /* HAVE_INLINE_ASM */
+
 #if HAVE_YASM
-    else if (bit_depth == 10 && CONFIG_H264QPEL) {
+    if (bit_depth == 10 && CONFIG_H264QPEL) {
         H264_QPEL_FUNCS_10(1, 0, ssse3_cache64);
         H264_QPEL_FUNCS_10(2, 0, ssse3_cache64);
         H264_QPEL_FUNCS_10(3, 0, ssse3_cache64);
@@ -3017,6 +3049,7 @@ void ff_dsputil_init_mmx(DSPContext *c, AVCodecContext 
*avctx)
         const int idct_algo = avctx->idct_algo;
 
         if (avctx->bits_per_raw_sample <= 8) {
+#if HAVE_INLINE_ASM
             if (idct_algo == FF_IDCT_AUTO || idct_algo == FF_IDCT_SIMPLEMMX) {
                 c->idct_put              = ff_simple_idct_put_mmx;
                 c->idct_add              = ff_simple_idct_add_mmx;
@@ -3035,22 +3068,8 @@ void ff_dsputil_init_mmx(DSPContext *c, AVCodecContext 
*avctx)
                 }
                 c->idct_permutation_type = FF_LIBMPEG2_IDCT_PERM;
 #endif
-            } else if ((CONFIG_VP3_DECODER || CONFIG_VP5_DECODER ||
-                        CONFIG_VP6_DECODER) &&
-                       idct_algo == FF_IDCT_VP3 && HAVE_YASM) {
-                if (mm_flags & AV_CPU_FLAG_SSE2) {
-                    c->idct_put              = ff_vp3_idct_put_sse2;
-                    c->idct_add              = ff_vp3_idct_add_sse2;
-                    c->idct                  = ff_vp3_idct_sse2;
-                    c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
-                } else {
-                    c->idct_put              = ff_vp3_idct_put_mmx;
-                    c->idct_add              = ff_vp3_idct_add_mmx;
-                    c->idct                  = ff_vp3_idct_mmx;
-                    c->idct_permutation_type = FF_PARTTRANS_IDCT_PERM;
-                }
             } else if (idct_algo == FF_IDCT_CAVS) {
-                    c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
+                c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
             } else if (idct_algo == FF_IDCT_XVIDMMX) {
                 if (mm_flags & AV_CPU_FLAG_SSE2) {
                     c->idct_put              = ff_idct_xvid_sse2_put;
@@ -3067,6 +3086,23 @@ void ff_dsputil_init_mmx(DSPContext *c, AVCodecContext 
*avctx)
                     c->idct                  = ff_idct_xvid_mmx;
                 }
             }
+#endif /* HAVE_INLINE_ASM */
+#if HAVE_YASM
+            if ((CONFIG_VP3_DECODER || CONFIG_VP5_DECODER ||
+                 CONFIG_VP6_DECODER) && idct_algo == FF_IDCT_VP3) {
+                if (mm_flags & AV_CPU_FLAG_SSE2) {
+                    c->idct_put              = ff_vp3_idct_put_sse2;
+                    c->idct_add              = ff_vp3_idct_add_sse2;
+                    c->idct                  = ff_vp3_idct_sse2;
+                    c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
+                } else {
+                    c->idct_put              = ff_vp3_idct_put_mmx;
+                    c->idct_add              = ff_vp3_idct_add_mmx;
+                    c->idct                  = ff_vp3_idct_mmx;
+                    c->idct_permutation_type = FF_PARTTRANS_IDCT_PERM;
+                }
+            }
+#endif /* HAVE_YASM */
         }
 
         dsputil_init_mmx(c, avctx, mm_flags);
diff --git a/libavcodec/x86/dsputil_mmx.h b/libavcodec/x86/dsputil_mmx.h
index 37f4581..3150b03 100644
--- a/libavcodec/x86/dsputil_mmx.h
+++ b/libavcodec/x86/dsputil_mmx.h
@@ -26,8 +26,6 @@
 #include "libavcodec/dsputil.h"
 #include "libavutil/x86_cpu.h"
 
-typedef struct { uint64_t a, b; } xmm_reg;
-
 extern const uint64_t ff_bone;
 extern const uint64_t ff_wtwo;
 
@@ -207,17 +205,4 @@ void ff_avg_rv40_qpel16_mc33_mmx(uint8_t *block, uint8_t 
*pixels, int line_size)
 void ff_mmx_idct(DCTELEM *block);
 void ff_mmxext_idct(DCTELEM *block);
 
-
-void ff_deinterlace_line_mmx(uint8_t *dst,
-                             const uint8_t *lum_m4, const uint8_t *lum_m3,
-                             const uint8_t *lum_m2, const uint8_t *lum_m1,
-                             const uint8_t *lum,
-                             int size);
-
-void ff_deinterlace_line_inplace_mmx(const uint8_t *lum_m4,
-                                     const uint8_t *lum_m3,
-                                     const uint8_t *lum_m2,
-                                     const uint8_t *lum_m1,
-                                     const uint8_t *lum, int size);
-
 #endif /* AVCODEC_X86_DSPUTIL_MMX_H */
diff --git a/libavcodec/x86/dsputilenc_mmx.c b/libavcodec/x86/dsputilenc_mmx.c
index d8a60e1..83b35a6 100644
--- a/libavcodec/x86/dsputilenc_mmx.c
+++ b/libavcodec/x86/dsputilenc_mmx.c
@@ -27,8 +27,9 @@
 #include "libavcodec/dsputil.h"
 #include "libavcodec/mpegvideo.h"
 #include "libavcodec/mathops.h"
-#include "dsputil_mmx.h"
 
+#if HAVE_INLINE_ASM
+#include "dsputil_mmx.h"
 
 static void get_pixels_mmx(DCTELEM *block, const uint8_t *pixels, int 
line_size)
 {
@@ -323,8 +324,6 @@ static int sse16_mmx(void *v, uint8_t * pix1, uint8_t * 
pix2, int line_size, int
     return tmp;
 }
 
-int ff_sse16_sse2(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int 
h);
-
 static int hf_noise8_mmx(uint8_t * pix1, int line_size, int h) {
     int tmp;
   __asm__ volatile (
@@ -925,17 +924,6 @@ static void sub_hfyu_median_prediction_mmx2(uint8_t *dst, 
const uint8_t *src1, c
     "paddusw "#t", "#a"               \n\t"\
     "movd "#a", "#dst"                \n\t"\
 
-#define hadamard_func(cpu) \
-int ff_hadamard8_diff_##cpu  (void *s, uint8_t *src1, uint8_t *src2, \
-                              int stride, int h); \
-int ff_hadamard8_diff16_##cpu(void *s, uint8_t *src1, uint8_t *src2, \
-                              int stride, int h);
-
-hadamard_func(mmx)
-hadamard_func(mmx2)
-hadamard_func(sse2)
-hadamard_func(ssse3)
-
 #define DCT_SAD4(m,mm,o)\
     "mov"#m" "#o"+ 0(%1), "#mm"2      \n\t"\
     "mov"#m" "#o"+16(%1), "#mm"3      \n\t"\
@@ -1094,12 +1082,27 @@ static int ssd_int8_vs_int16_mmx(const int8_t *pix1, 
const int16_t *pix2, int si
 #undef PHADDD
 #endif //HAVE_SSSE3
 
+#endif /* HAVE_INLINE_ASM */
+
+int ff_sse16_sse2(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int 
h);
+
+#define hadamard_func(cpu) \
+int ff_hadamard8_diff_##cpu  (void *s, uint8_t *src1, uint8_t *src2, \
+                              int stride, int h); \
+int ff_hadamard8_diff16_##cpu(void *s, uint8_t *src1, uint8_t *src2, \
+                              int stride, int h);
+
+hadamard_func(mmx)
+hadamard_func(mmx2)
+hadamard_func(sse2)
+hadamard_func(ssse3)
 
 void ff_dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx)
 {
     int mm_flags = av_get_cpu_flags();
     int bit_depth = avctx->bits_per_raw_sample;
 
+#if HAVE_INLINE_ASM
     if (mm_flags & AV_CPU_FLAG_MMX) {
         const int dct_algo = avctx->dct_algo;
         if (avctx->bits_per_raw_sample <= 8 &&
@@ -1121,11 +1124,6 @@ void ff_dsputilenc_init_mmx(DSPContext* c, 
AVCodecContext *avctx)
         c->diff_bytes= diff_bytes_mmx;
         c->sum_abs_dctelem= sum_abs_dctelem_mmx;
 
-#if HAVE_YASM
-        c->hadamard8_diff[0]= ff_hadamard8_diff16_mmx;
-        c->hadamard8_diff[1]= ff_hadamard8_diff_mmx;
-#endif
-
         c->pix_norm1 = pix_norm1_mmx;
         c->sse[0] = sse16_mmx;
         c->sse[1] = sse8_mmx;
@@ -1147,10 +1145,6 @@ void ff_dsputilenc_init_mmx(DSPContext* c, 
AVCodecContext *avctx)
 
         if (mm_flags & AV_CPU_FLAG_MMX2) {
             c->sum_abs_dctelem= sum_abs_dctelem_mmx2;
-#if HAVE_YASM
-            c->hadamard8_diff[0]= ff_hadamard8_diff16_mmx2;
-            c->hadamard8_diff[1]= ff_hadamard8_diff_mmx2;
-#endif
             c->vsad[4]= vsad_intra16_mmx2;
 
             if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
@@ -1164,13 +1158,6 @@ void ff_dsputilenc_init_mmx(DSPContext* c, 
AVCodecContext *avctx)
             if (bit_depth <= 8)
                 c->get_pixels = get_pixels_sse2;
             c->sum_abs_dctelem= sum_abs_dctelem_sse2;
-#if HAVE_YASM
-            c->sse[0] = ff_sse16_sse2;
-#if HAVE_ALIGNED_STACK
-            c->hadamard8_diff[0]= ff_hadamard8_diff16_sse2;
-            c->hadamard8_diff[1]= ff_hadamard8_diff_sse2;
-#endif
-#endif
         }
 
 #if HAVE_SSSE3
@@ -1180,10 +1167,6 @@ void ff_dsputilenc_init_mmx(DSPContext* c, 
AVCodecContext *avctx)
             }
             c->add_8x8basis= add_8x8basis_ssse3;
             c->sum_abs_dctelem= sum_abs_dctelem_ssse3;
-#if HAVE_YASM && HAVE_ALIGNED_STACK
-            c->hadamard8_diff[0]= ff_hadamard8_diff16_ssse3;
-            c->hadamard8_diff[1]= ff_hadamard8_diff_ssse3;
-#endif
         }
 #endif
 
@@ -1196,4 +1179,34 @@ void ff_dsputilenc_init_mmx(DSPContext* c, 
AVCodecContext *avctx)
     }
 
     ff_dsputil_init_pix_mmx(c, avctx);
+#endif /* HAVE_INLINE_ASM */
+    
+#if HAVE_YASM
+    if (mm_flags & AV_CPU_FLAG_MMX) {
+        c->hadamard8_diff[0]= ff_hadamard8_diff16_mmx;
+        c->hadamard8_diff[1]= ff_hadamard8_diff_mmx;
+
+        if (mm_flags & AV_CPU_FLAG_MMX2) {
+            c->hadamard8_diff[0]= ff_hadamard8_diff16_mmx2;
+            c->hadamard8_diff[1]= ff_hadamard8_diff_mmx2;
+        }
+        
+        if(mm_flags & AV_CPU_FLAG_SSE2){
+            c->sse[0] = ff_sse16_sse2;
+#if HAVE_ALIGNED_STACK
+            c->hadamard8_diff[0]= ff_hadamard8_diff16_sse2;
+            c->hadamard8_diff[1]= ff_hadamard8_diff_sse2;
+#endif
+        }
+
+#if HAVE_SSSE3
+        if(mm_flags & AV_CPU_FLAG_SSSE3){
+#if HAVE_ALIGNED_STACK
+            c->hadamard8_diff[0]= ff_hadamard8_diff16_ssse3;
+            c->hadamard8_diff[1]= ff_hadamard8_diff_ssse3;
+#endif
+        }
+#endif
+    }
+#endif /* HAVE_YASM */
 }
diff --git a/libavcodec/x86/fft.c b/libavcodec/x86/fft.c
index 6349c23..b0fdc55 100644
--- a/libavcodec/x86/fft.c
+++ b/libavcodec/x86/fft.c
@@ -23,7 +23,7 @@
 
 av_cold void ff_fft_init_mmx(FFTContext *s)
 {
-#if HAVE_YASM
+#if HAVE_YASM && HAVE_INLINE_ASM
     int has_vectors = av_get_cpu_flags();
     if (has_vectors & AV_CPU_FLAG_3DNOW && HAVE_AMD3DNOW) {
         /* 3DNow! for K6-2/3 */
@@ -39,8 +39,8 @@ av_cold void ff_fft_init_mmx(FFTContext *s)
     }
     if (has_vectors & AV_CPU_FLAG_SSE && HAVE_SSE) {
         /* SSE for P3/P4/K8 */
-        s->imdct_calc  = ff_imdct_calc_sse;
         s->imdct_half  = ff_imdct_half_sse;
+        s->imdct_calc  = ff_imdct_calc_sse;
         s->fft_permute = ff_fft_permute_sse;
         s->fft_calc    = ff_fft_calc_sse;
         s->fft_permutation = FF_FFT_PERM_SWAP_LSBS;
diff --git a/libavcodec/x86/h264_qpel_mmx.c b/libavcodec/x86/h264_qpel_mmx.c
index 85ae07e..113d80a 100644
--- a/libavcodec/x86/h264_qpel_mmx.c
+++ b/libavcodec/x86/h264_qpel_mmx.c
@@ -19,6 +19,8 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#if HAVE_INLINE_ASM
+
 #include "dsputil_mmx.h"
 
 /***********************************/
@@ -1191,7 +1193,7 @@ H264_MC_816(H264_MC_H, ssse3)
 H264_MC_816(H264_MC_HV, ssse3)
 #endif
 
-
+#endif /* HAVE_INLINE_ASM */
 
 //10bit
 #define LUMA_MC_OP(OP, NUM, DEPTH, TYPE, OPT) \
diff --git a/libavcodec/x86/h264dsp_mmx.c b/libavcodec/x86/h264dsp_mmx.c
index dcd9180..0b0ab09 100644
--- a/libavcodec/x86/h264dsp_mmx.c
+++ b/libavcodec/x86/h264dsp_mmx.c
@@ -21,9 +21,12 @@
 #include "libavutil/cpu.h"
 #include "libavutil/x86_cpu.h"
 #include "libavcodec/h264dsp.h"
+
+#if HAVE_INLINE_ASM
 #include "dsputil_mmx.h"
 
 DECLARE_ALIGNED(8, static const uint64_t, ff_pb_3_1  ) = 0x0103010301030103ULL;
+#endif
 
 /***********************************/
 /* IDCT */
@@ -88,6 +91,7 @@ void ff_h264_luma_dc_dequant_idct_sse2(DCTELEM *output, 
DCTELEM *input, int qmul
 /***********************************/
 /* deblocking */
 
+#if HAVE_INLINE_ASM
 #define h264_loop_filter_strength_iteration_mmx2(bS, nz, ref, mv, bidir, 
edges, step, mask_mv, dir, d_idx, mask_dir) \
     do { \
         x86_reg b_idx; \
@@ -240,6 +244,7 @@ static void h264_loop_filter_strength_mmx2( int16_t 
bS[2][4][4], uint8_t nnz[40]
         :"memory"
     );
 }
+#endif /* HAVE_INLINE_ASM */
 
 #define LF_FUNC(DIR, TYPE, DEPTH, OPT) \
 void ff_deblock_ ## DIR ## _ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *pix, 
int stride, \
@@ -344,9 +349,11 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int 
bit_depth, const int chrom
 {
     int mm_flags = av_get_cpu_flags();
 
+#if HAVE_INLINE_ASM
     if (chroma_format_idc == 1 && mm_flags & AV_CPU_FLAG_MMX2) {
         c->h264_loop_filter_strength= h264_loop_filter_strength_mmx2;
     }
+#endif
 
     if (bit_depth == 8) {
 #if HAVE_YASM
diff --git a/libavcodec/x86/mpegaudiodec_mmx.c 
b/libavcodec/x86/mpegaudiodec_mmx.c
index f51a06d..996ce46 100644
--- a/libavcodec/x86/mpegaudiodec_mmx.c
+++ b/libavcodec/x86/mpegaudiodec_mmx.c
@@ -36,6 +36,8 @@ void ff_four_imdct36_float_avx(float *out, float *buf, float 
*in, float *win,
 
 DECLARE_ALIGNED(16, static float, mdct_win_sse)[2][4][4*40];
 
+#if HAVE_INLINE_ASM
+
 #define MACS(rt, ra, rb) rt+=(ra)*(rb)
 #define MLSS(rt, ra, rb) rt-=(ra)*(rb)
 
@@ -177,7 +179,7 @@ static void apply_window_mp3(float *in, float *win, int 
*unused, float *out,
     SUM8(MLSS, sum, win + 16 + 32, in + 32);
     *out = sum;
 }
-
+#endif /* HAVE_INLINE_ASM */
 
 #define DECL_IMDCT_BLOCKS(CPU1, CPU2)                                       \
 static void imdct36_blocks_ ## CPU1(float *out, float *buf, float *in,      \
@@ -235,9 +237,11 @@ void ff_mpadsp_init_mmx(MPADSPContext *s)
         }
     }
 
+#if HAVE_INLINE_ASM
     if (mm_flags & AV_CPU_FLAG_SSE2) {
         s->apply_window_float = apply_window_mp3;
     }
+#endif /* HAVE_INLINE_ASM */
 #if HAVE_YASM
     if (mm_flags & AV_CPU_FLAG_AVX && HAVE_AVX) {
         s->imdct36_blocks_float = imdct36_blocks_avx;
diff --git a/libavcodec/x86/rv40dsp_init.c b/libavcodec/x86/rv40dsp_init.c
index cc1ea45..32ca4fd 100644
--- a/libavcodec/x86/rv40dsp_init.c
+++ b/libavcodec/x86/rv40dsp_init.c
@@ -26,7 +26,10 @@
  * 3,3 is bugged in the rv40 format and maps to _xy2 version
  */
 
-#include "libavcodec/x86/dsputil_mmx.h"
+#include "config.h"
+#if HAVE_INLINE_ASM
+#include "dsputil_mmx.h"
+#endif
 #include "libavcodec/rv34dsp.h"
 
 void ff_put_rv40_chroma_mc8_mmx  (uint8_t *dst, uint8_t *src,
@@ -190,10 +193,12 @@ void ff_rv40dsp_init_x86(RV34DSPContext *c, DSPContext 
*dsp)
     if (mm_flags & AV_CPU_FLAG_MMX) {
         c->put_chroma_pixels_tab[0] = ff_put_rv40_chroma_mc8_mmx;
         c->put_chroma_pixels_tab[1] = ff_put_rv40_chroma_mc4_mmx;
+#if HAVE_INLINE_ASM
         c->put_pixels_tab[0][15] = ff_put_rv40_qpel16_mc33_mmx;
         c->put_pixels_tab[1][15] = ff_put_rv40_qpel8_mc33_mmx;
         c->avg_pixels_tab[0][15] = ff_avg_rv40_qpel16_mc33_mmx;
         c->avg_pixels_tab[1][15] = ff_avg_rv40_qpel8_mc33_mmx;
+#endif
 #if ARCH_X86_32
         QPEL_MC_SET(put_, _mmx)
 #endif
diff --git a/libavcodec/x86/vc1dsp_mmx.c b/libavcodec/x86/vc1dsp_mmx.c
index e1f5145..a6da9b4 100644
--- a/libavcodec/x86/vc1dsp_mmx.c
+++ b/libavcodec/x86/vc1dsp_mmx.c
@@ -27,8 +27,9 @@
 #include "libavutil/cpu.h"
 #include "libavutil/x86_cpu.h"
 #include "libavcodec/dsputil.h"
-#include "dsputil_mmx.h"
 #include "libavcodec/vc1dsp.h"
+#if HAVE_INLINE_ASM
+#include "dsputil_mmx.h"
 
 #define OP_PUT(S,D)
 #define OP_AVG(S,D) "pavgb " #S ", " #D " \n\t"
@@ -682,6 +683,8 @@ static void vc1_inv_trans_8x8_dc_mmx2(uint8_t *dest, int 
linesize, DCTELEM *bloc
     );
 }
 
+#endif /* HAVE_INLINE_ASM */
+
 #define LOOP_FILTER(EXT) \
 void ff_vc1_v_loop_filter4_ ## EXT(uint8_t *src, int stride, int pq); \
 void ff_vc1_h_loop_filter4_ ## EXT(uint8_t *src, int stride, int pq); \
@@ -730,6 +733,7 @@ void ff_vc1dsp_init_mmx(VC1DSPContext *dsp)
 {
     int mm_flags = av_get_cpu_flags();
 
+#if HAVE_INLINE_ASM
     if (mm_flags & AV_CPU_FLAG_MMX) {
         dsp->put_vc1_mspel_pixels_tab[ 0] = ff_put_vc1_mspel_mc00_mmx;
         dsp->put_vc1_mspel_pixels_tab[ 4] = put_vc1_mspel_mc01_mmx;
@@ -778,6 +782,7 @@ void ff_vc1dsp_init_mmx(VC1DSPContext *dsp)
         dsp->vc1_inv_trans_8x4_dc = vc1_inv_trans_8x4_dc_mmx2;
         dsp->vc1_inv_trans_4x4_dc = vc1_inv_trans_4x4_dc_mmx2;
     }
+#endif /* HAVE_INLINE_ASM */
 
 #define ASSIGN_LF(EXT) \
         dsp->vc1_v_loop_filter4  = ff_vc1_v_loop_filter4_ ## EXT; \
diff --git a/libavcodec/x86/vp3dsp.asm b/libavcodec/x86/vp3dsp.asm
index 791cc8e..6d07214 100644
--- a/libavcodec/x86/vp3dsp.asm
+++ b/libavcodec/x86/vp3dsp.asm
@@ -38,13 +38,11 @@ cextern pb_1
 cextern pb_3
 cextern pb_7
 cextern pb_1F
+cextern pb_80
 cextern pb_81
 
 cextern pw_8
 
-cextern put_signed_pixels_clamped_mmx
-cextern add_pixels_clamped_mmx
-
 SECTION .text
 
 ; this is off by one or two for some cases when filter_limit is greater than 63
@@ -523,60 +521,100 @@ cglobal vp3_h_loop_filter_mmx2, 3, 4
         PUT_BLOCK 0, 1, 2, 3, 4, 5, 6, 7
 %endmacro
 
-%macro vp3_idct_funcs 3
-cglobal vp3_idct_%1, 1, 1, %2
+%macro vp3_idct_funcs 1
+cglobal vp3_idct_%1, 1, 1, 9
     VP3_IDCT_%1   r0
     RET
 
-cglobal vp3_idct_put_%1, 3, %3, %2
+cglobal vp3_idct_put_%1, 3, 4, 9
     VP3_IDCT_%1   r2
-%if ARCH_X86_64
-    mov           r3, r2
-    mov           r2, r1
-    mov           r1, r0
-    mov           r0, r3
+
+    movsxdifnidn  r1, r1d
+    mova          m4, [pb_80]
+    lea           r3, [r1*3]
+%assign %%i 0
+%rep 16/mmsize
+    mova          m0, [r2+mmsize*0+%%i]
+    mova          m1, [r2+mmsize*2+%%i]
+    mova          m2, [r2+mmsize*4+%%i]
+    mova          m3, [r2+mmsize*6+%%i]
+    packsswb      m0, [r2+mmsize*1+%%i]
+    packsswb      m1, [r2+mmsize*3+%%i]
+    packsswb      m2, [r2+mmsize*5+%%i]
+    packsswb      m3, [r2+mmsize*7+%%i]
+    paddb         m0, m4
+    paddb         m1, m4
+    paddb         m2, m4
+    paddb         m3, m4
+    movq   [r0     ], m0
+%if mmsize == 8
+    movq   [r0+r1  ], m1
+    movq   [r0+r1*2], m2
+    movq   [r0+r3  ], m3
 %else
-    mov          r0m, r2
-    mov          r1m, r0
-    mov          r2m, r1
+    movhps [r0+r1  ], m0
+    movq   [r0+r1*2], m1
+    movhps [r0+r3  ], m1
 %endif
-%if WIN64
-    call put_signed_pixels_clamped_mmx
-    RET
-%else
-    jmp put_signed_pixels_clamped_mmx
+%if %%i == 0
+    lea           r0, [r0+r1*4]
+%endif
+%if mmsize == 16
+    movq   [r0     ], m2
+    movhps [r0+r1  ], m2
+    movq   [r0+r1*2], m3
+    movhps [r0+r3  ], m3
 %endif
+%assign %%i %%i+64
+%endrep
+    RET
 
-cglobal vp3_idct_add_%1, 3, %3, %2
+cglobal vp3_idct_add_%1, 3, 4, 9
     VP3_IDCT_%1   r2
-%if ARCH_X86_64
-    mov           r3, r2
-    mov           r2, r1
-    mov           r1, r0
-    mov           r0, r3
-%else
-    mov          r0m, r2
-    mov          r1m, r0
-    mov          r2m, r1
+
+    mov           r3, 4
+    pxor          m4, m4
+    movsxdifnidn  r1, r1d
+.loop:
+    movq          m0, [r0]
+    movq          m1, [r0+r1]
+%if mmsize == 8
+    mova          m2, m0
+    mova          m3, m1
 %endif
-%if WIN64
-    call add_pixels_clamped_mmx
-    RET
-%else
-    jmp add_pixels_clamped_mmx
+    punpcklbw     m0, m4
+    punpcklbw     m1, m4
+%if mmsize == 8
+    punpckhbw     m2, m4
+    punpckhbw     m3, m4
+%endif
+    paddsw        m0, [r2+ 0]
+    paddsw        m1, [r2+16]
+%if mmsize == 8
+    paddsw        m2, [r2+ 8]
+    paddsw        m3, [r2+24]
+    packuswb      m0, m2
+    packuswb      m1, m3
+%else ; mmsize == 16
+    packuswb      m0, m1
 %endif
+    movq     [r0   ], m0
+%if mmsize == 8
+    movq     [r0+r1], m1
+%else ; mmsize == 16
+    movhps   [r0+r1], m0
+%endif
+    lea           r0, [r0+r1*2]
+    add           r2, 32
+    dec           r3
+    jg .loop
+    RET
 %endmacro
 
-%if ARCH_X86_64
-%define REGS 4
-%else
-%define REGS 3
-%endif
 INIT_MMX
-vp3_idct_funcs mmx,  0, REGS
+vp3_idct_funcs mmx
 INIT_XMM
-vp3_idct_funcs sse2, 9, REGS
-%undef REGS
+vp3_idct_funcs sse2
 
 %macro DC_ADD 0
     movq          m2, [r0     ]
diff --git a/libavfilter/vf_gradfun.c b/libavfilter/vf_gradfun.c
index 303e54c..0965e6d 100644
--- a/libavfilter/vf_gradfun.c
+++ b/libavfilter/vf_gradfun.c
@@ -135,12 +135,14 @@ static av_cold int init(AVFilterContext *ctx, const char 
*args, void *opaque)
     gf->blur_line = ff_gradfun_blur_line_c;
     gf->filter_line = ff_gradfun_filter_line_c;
 
+#if HAVE_INLINE_ASM
     if (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX2)
         gf->filter_line = ff_gradfun_filter_line_mmx2;
     if (HAVE_SSSE3 && cpu_flags & AV_CPU_FLAG_SSSE3)
         gf->filter_line = ff_gradfun_filter_line_ssse3;
     if (HAVE_SSE && cpu_flags & AV_CPU_FLAG_SSE2)
         gf->blur_line = ff_gradfun_blur_line_sse2;
+#endif
 
     av_log(ctx, AV_LOG_INFO, "threshold:%.2f radius:%d\n", thresh, gf->radius);
 
diff --git a/libavfilter/vf_yadif.c b/libavfilter/vf_yadif.c
index baf8b7a..94aac83 100644
--- a/libavfilter/vf_yadif.c
+++ b/libavfilter/vf_yadif.c
@@ -171,9 +171,7 @@ static void filter(AVFilterContext *ctx, AVFilterBufferRef 
*dstpic,
             }
         }
     }
-#if HAVE_MMX
-    __asm__ volatile("emms \n\t" : : : "memory");
-#endif
+    emms_c();
 }
 
 static AVFilterBufferRef *get_video_buffer(AVFilterLink *link, int perms, int 
w, int h)
@@ -407,12 +405,14 @@ static av_cold int init(AVFilterContext *ctx, const char 
*args, void *opaque)
     if (args) sscanf(args, "%d:%d:%d", &yadif->mode, &yadif->parity, 
&yadif->auto_enable);
 
     yadif->filter_line = filter_line_c;
+#if HAVE_INLINE_ASM
     if (HAVE_SSSE3 && cpu_flags & AV_CPU_FLAG_SSSE3)
         yadif->filter_line = ff_yadif_filter_line_ssse3;
     else if (HAVE_SSE && cpu_flags & AV_CPU_FLAG_SSE2)
         yadif->filter_line = ff_yadif_filter_line_sse2;
     else if (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX)
         yadif->filter_line = ff_yadif_filter_line_mmx;
+#endif
 
     av_log(ctx, AV_LOG_INFO, "mode:%d parity:%d auto_enable:%d\n", 
yadif->mode, yadif->parity, yadif->auto_enable);
 
diff --git a/libavfilter/x86/Makefile b/libavfilter/x86/Makefile
index e98693d..a8e5e2d 100644
--- a/libavfilter/x86/Makefile
+++ b/libavfilter/x86/Makefile
@@ -1,2 +1,2 @@
-MMX-OBJS-$(CONFIG_YADIF_FILTER)              += x86/yadif.o
-MMX-OBJS-$(CONFIG_GRADFUN_FILTER)            += x86/gradfun.o
+INLINEASM-OBJS-$(CONFIG_YADIF_FILTER)              += x86/yadif.o
+INLINEASMOBJS-$(CONFIG_GRADFUN_FILTER)            += x86/gradfun.o
diff --git a/libavutil/internal.h b/libavutil/internal.h
index ae678d5..14fc303 100644
--- a/libavutil/internal.h
+++ b/libavutil/internal.h
@@ -35,6 +35,9 @@
 #include <stddef.h>
 #include <assert.h>
 #include "config.h"
+#if !HAVE_INLINE_ASM
+#include <mmintrin.h>
+#endif
 #include "attributes.h"
 #include "timer.h"
 #include "dict.h"
@@ -110,7 +113,7 @@ struct AVDictionary {
 
 /* math */
 
-#if ARCH_X86
+#if ARCH_X86 && HAVE_INLINE_ASM
 #define MASK_ABS(mask, level)\
             __asm__ volatile(\
                 "cltd                   \n\t"\
@@ -239,7 +242,11 @@ struct AVDictionary {
  */
 static av_always_inline void emms_c(void)
 {
+#if HAVE_INLINE_ASM
     __asm__ volatile ("emms" ::: "memory");
+#else
+    _mm_empty();
+#endif
 }
 #else /* HAVE_MMX */
 #define emms_c()
diff --git a/libavutil/intmath.h b/libavutil/intmath.h
index e6a2e10..f7baff5 100644
--- a/libavutil/intmath.h
+++ b/libavutil/intmath.h
@@ -34,7 +34,7 @@ extern const uint32_t ff_inverse[257];
 
 #if   ARCH_ARM
 #   include "arm/intmath.h"
-#elif ARCH_X86
+#elif ARCH_X86 && HAVE_INLINE_ASM
 #   include "x86/intmath.h"
 #endif
 
diff --git a/libavutil/x86/cpu.c b/libavutil/x86/cpu.c
index 2424fe4..122f20f 100644
--- a/libavutil/x86/cpu.c
+++ b/libavutil/x86/cpu.c
@@ -25,6 +25,27 @@
 #include "libavutil/x86_cpu.h"
 #include "libavutil/cpu.h"
 
+#if !HAVE_INLINE_ASM
+#include <intrin.h>
+#include <immintrin.h>
+
+#define cpuid(index, eax, ebx, ecx, edx) \
+    do { \
+        int info[4]; \
+        __cpuid(info, index); \
+        eax = info[0]; \
+        eax = info[0]; \
+        eax = info[0]; \
+        eax = info[0]; \
+    } while (0)
+
+#define xgetbv(index, a, d) \
+    do { \
+        uint64_t res = __xgetbv(index); \
+        a = res; \
+        d = res >> 32; \
+    } while (0)
+#else
 /* ebx saving is necessary for PIC. gcc seems unable to see it alone */
 #define cpuid(index,eax,ebx,ecx,edx)\
     __asm__ volatile\
@@ -37,6 +58,7 @@
 
 #define xgetbv(index,eax,edx)                                   \
     __asm__ (".byte 0x0f, 0x01, 0xd0" : "=a"(eax), "=d"(edx) : "c" (index))
+#endif /* HAVE_INLINE_ASM */
 
 /* Function to test if multimedia instructions are supported...  */
 int ff_get_cpu_flags_x86(void)
@@ -49,6 +71,20 @@ int ff_get_cpu_flags_x86(void)
 
 #if ARCH_X86_32
     x86_reg a, c;
+#if !HAVE_INLINE_ASM
+    __asm {
+        pushfd
+        pop    eax
+        mov    ebx, eax
+        xor    eax, 0x200000
+        push   eax
+        popfd
+        pushfd
+        pop    eax
+        mov      a, eax
+        mov      c, ebx
+    }
+#else
     __asm__ volatile (
         /* See if CPUID instruction is supported ... */
         /* ... Get copies of EFLAGS into eax and ecx */
@@ -69,6 +105,7 @@ int ff_get_cpu_flags_x86(void)
         :
         : "cc"
         );
+#endif /* HAVE_INLINE_ASM */
 
     if (a == c)
         return 0; /* CPUID not supported */
diff --git a/libavutil/x86/timer.h b/libavutil/x86/timer.h
index 7f51816..31bf6f9 100644
--- a/libavutil/x86/timer.h
+++ b/libavutil/x86/timer.h
@@ -22,14 +22,22 @@
 #define AVUTIL_X86_TIMER_H
 
 #include <stdint.h>
+#include "config.h"
+#if !HAVE_INLINE_ASM
+#include <intrin.h>
+#endif
 
 #define AV_READ_TIME read_time
 
 static inline uint64_t read_time(void)
 {
+#if HAVE_INLINE_ASM
     uint32_t a, d;
     __asm__ volatile("rdtsc" : "=a" (a), "=d" (d));
     return ((uint64_t)d << 32) + a;
+#else
+    return __rdtsc();
+#endif
 }
 
 #endif /* AVUTIL_X86_TIMER_H */
diff --git a/libavutil/x86_cpu.h b/libavutil/x86_cpu.h
index f84eba6..4e8508c 100644
--- a/libavutil/x86_cpu.h
+++ b/libavutil/x86_cpu.h
@@ -95,4 +95,6 @@ typedef int x86_reg;
 #    define XMM_CLOBBERS_ONLY(...)
 #endif
 
+typedef struct { uint64_t a, b; } xmm_reg;
+
 #endif /* AVUTIL_X86_CPU_H */
diff --git a/libswscale/rgb2rgb.c b/libswscale/rgb2rgb.c
index 14b595f..eefc001 100644
--- a/libswscale/rgb2rgb.c
+++ b/libswscale/rgb2rgb.c
@@ -128,7 +128,7 @@ void (*yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t 
*vdst,
 void sws_rgb2rgb_init(void)
 {
     rgb2rgb_init_c();
-    if (HAVE_MMX)
+    if (HAVE_MMX && HAVE_INLINE_ASM)
         rgb2rgb_init_x86();
 }
 
diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index 7ae5af3..0f8ef2b 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -518,7 +518,7 @@ static int swScale(SwsContext *c, const uint8_t *src[],
         if (!enough_lines)
             break;  // we can't output a dstY line so let's try with the next 
slice
 
-#if HAVE_MMX
+#if HAVE_MMX && HAVE_INLINE_ASM
         updateMMXDitherTables(c, dstY, lumBufIndex, chrBufIndex,
                               lastInLumBuf, lastInChrBuf);
 #endif
@@ -661,7 +661,7 @@ static int swScale(SwsContext *c, const uint8_t *src[],
     if (isPlanar(dstFormat) && isALPHA(dstFormat) && !alpPixBuf)
         fillPlane(dst[3], dstStride[3], dstW, dstY - lastDstY, lastDstY, 255);
 
-#if HAVE_MMX2
+#if HAVE_MMX2 && HAVE_INLINE_ASM
     if (av_get_cpu_flags() & AV_CPU_FLAG_MMX2)
         __asm__ volatile ("sfence" ::: "memory");
 #endif
diff --git a/libswscale/utils.c b/libswscale/utils.c
index d8fee58..a6b5a18 100644
--- a/libswscale/utils.c
+++ b/libswscale/utils.c
@@ -576,7 +576,7 @@ fail:
     return ret;
 }
 
-#if HAVE_MMX2
+#if HAVE_MMX2 && HAVE_INLINE_ASM
 static int initMMX2HScaler(int dstW, int xInc, uint8_t *filterCode,
                            int16_t *filter, int32_t *filterPos, int numSplits)
 {
@@ -739,7 +739,7 @@ static int initMMX2HScaler(int dstW, int xInc, uint8_t 
*filterCode,
 
     return fragmentPos + 1;
 }
-#endif /* HAVE_MMX2 */
+#endif /* HAVE_MMX2 && HAVE_INLINE_ASM */
 
 static void getSubSampleFactors(int *h, int *v, enum PixelFormat format)
 {
@@ -971,7 +971,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, 
SwsFilter *dstFilter)
     FF_ALLOC_OR_GOTO(c, c->formatConvBuffer,
                      (FFALIGN(srcW, 16) * 2 * FFALIGN(c->srcBpc, 8) >> 3) + 16,
                      fail);
-    if (HAVE_MMX2 && cpu_flags & AV_CPU_FLAG_MMX2 &&
+    if (HAVE_MMX2 && HAVE_INLINE_ASM && cpu_flags & AV_CPU_FLAG_MMX2 &&
         c->srcBpc == 8 && c->dstBpc <= 10) {
         c->canMMX2BeUsed = (dstW >= srcW && (dstW & 31) == 0 &&
                             (srcW & 15) == 0) ? 1 : 0;
@@ -1010,7 +1010,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, 
SwsFilter *dstFilter)
 
     /* precalculate horizontal scaler filter coefficients */
     {
-#if HAVE_MMX2
+#if HAVE_MMX2 && HAVE_INLINE_ASM
 // can't downscale !!!
         if (c->canMMX2BeUsed && (flags & SWS_FAST_BILINEAR)) {
             c->lumMmx2FilterCodeSize = initMMX2HScaler(dstW, c->lumXInc, NULL,
@@ -1046,7 +1046,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, 
SwsFilter *dstFilter)
             mprotect(c->chrMmx2FilterCode, c->chrMmx2FilterCodeSize, PROT_EXEC 
| PROT_READ);
 #endif
         } else
-#endif /* HAVE_MMX2 */
+#endif /* HAVE_MMX2 && HAVE_INLINE_ASM */
         {
             const int filterAlign =
                 (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) ? 4 :
diff --git a/libswscale/x86/Makefile b/libswscale/x86/Makefile
index 7f37799..0ff5f78 100644
--- a/libswscale/x86/Makefile
+++ b/libswscale/x86/Makefile
@@ -1,7 +1,8 @@
 OBJS-$(CONFIG_XMM_CLOBBER_TEST) += x86/w64xmmtest.o
 
-MMX-OBJS                        += x86/rgb2rgb.o                        \
-                                   x86/swscale_mmx.o                    \
+MMX-OBJS                        += x86/swscale_mmx.o
+
+INLINEASM-OBJS                  += x86/rgb2rgb.o                        \
                                    x86/yuv2rgb_mmx.o                    \
 
 YASM-OBJS                       += x86/input.o                          \
diff --git a/libswscale/x86/swscale_mmx.c b/libswscale/x86/swscale_mmx.c
index 99b3262..87eb2ee 100644
--- a/libswscale/x86/swscale_mmx.c
+++ b/libswscale/x86/swscale_mmx.c
@@ -27,6 +27,8 @@
 #include "libavutil/cpu.h"
 #include "libavutil/pixdesc.h"
 
+#if HAVE_INLINE_ASM
+
 #define DITHER1XBPP
 
 DECLARE_ASM_CONST(8, uint64_t, bF8)=       0xF8F8F8F8F8F8F8F8LL;
@@ -199,6 +201,8 @@ void updateMMXDitherTables(SwsContext *c, int dstY, int 
lumBufIndex, int chrBufI
     }
 }
 
+#endif /* HAVE_INLINE_ASM */
+
 #define SCALE_FUNC(filter_n, from_bpc, to_bpc, opt) \
 extern void ff_hscale ## from_bpc ## to ## to_bpc ## _ ## filter_n ## _ ## 
opt( \
                                                 SwsContext *c, int16_t *data, \
@@ -300,12 +304,14 @@ void ff_sws_init_swScale_mmx(SwsContext *c)
 {
     int cpu_flags = av_get_cpu_flags();
 
+#if HAVE_INLINE_ASM
     if (cpu_flags & AV_CPU_FLAG_MMX)
         sws_init_swScale_MMX(c);
 #if HAVE_MMX2
     if (cpu_flags & AV_CPU_FLAG_MMX2)
         sws_init_swScale_MMX2(c);
 #endif
+#endif /* HAVE_INLINE_ASM */
 
 #if HAVE_YASM
 #define ASSIGN_SCALE_FUNC2(hscalefn, filtersize, opt1, opt2) do { \
diff --git a/libswscale/yuv2rgb.c b/libswscale/yuv2rgb.c
index 1c44a2f..6aae098 100644
--- a/libswscale/yuv2rgb.c
+++ b/libswscale/yuv2rgb.c
@@ -533,7 +533,7 @@ SwsFunc ff_yuv2rgb_get_func_ptr(SwsContext *c)
 {
     SwsFunc t = NULL;
 
-    if (HAVE_MMX)
+    if (HAVE_MMX && HAVE_INLINE_ASM)
         t = ff_yuv2rgb_init_mmx(c);
     else if (HAVE_VIS)
         t = ff_yuv2rgb_init_vis(c);
-- 
1.7.9.2

_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel

[libav-devel] [PATCH] Put inline asm behind a macro.

Reply via email to