Re: [FFmpeg-devel] [PATCH] Remove REP_RET usage throughout x86 asm files

2017-11-13 Thread Henrik Gramner
On Sun, Nov 12, 2017 at 9:59 PM, Rostislav Pehlivanov
 wrote:
> No longer needed as AUTO_REP_RET deals with it on normal RETs.

Only when the RET follows a branch instruction. If it's a branch
target (that isn't by itself preceded by a branch instruction) there
is no way of automatically detecting whether or not a rep prefix
should be used as far as I know.

On the other hand, the CPU:s where it even matters in the first place
are old legacy stuff which aren't relevant any more and eliminating
those few cases (if there are any, I didn't check) for simplicity is
probably worth it anyway.
___
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel


[FFmpeg-devel] [PATCH] Remove REP_RET usage throughout x86 asm files

2017-11-12 Thread Rostislav Pehlivanov
No longer needed as AUTO_REP_RET deals with it on normal RETs.

Signed-off-by: Rostislav Pehlivanov 
---
 libavcodec/x86/aacpsdsp.asm | 10 
 libavcodec/x86/ac3dsp.asm   | 10 
 libavcodec/x86/alacdsp.asm  |  4 +--
 libavcodec/x86/audiodsp.asm |  2 +-
 libavcodec/x86/dirac_dwt.asm| 14 +--
 libavcodec/x86/fft.asm  |  8 +++---
 libavcodec/x86/flacdsp.asm  |  8 +++---
 libavcodec/x86/h264_chromamc.asm| 18 +++---
 libavcodec/x86/h264_chromamc_10bit.asm  | 10 
 libavcodec/x86/h264_deblock_10bit.asm   | 10 
 libavcodec/x86/h264_idct.asm| 22 -
 libavcodec/x86/h264_idct_10bit.asm  |  8 +++---
 libavcodec/x86/h264_intrapred.asm   | 30 +++---
 libavcodec/x86/h264_intrapred_10bit.asm | 16 ++--
 libavcodec/x86/h264_qpel_10bit.asm  |  2 +-
 libavcodec/x86/h264_qpel_8bit.asm   | 26 +--
 libavcodec/x86/h264_weight.asm  | 16 ++--
 libavcodec/x86/h264_weight_10bit.asm| 12 -
 libavcodec/x86/hevc_sao.asm |  2 +-
 libavcodec/x86/hevc_sao_10bit.asm   |  2 +-
 libavcodec/x86/hpeldsp.asm  | 22 -
 libavcodec/x86/hpeldsp_vp3.asm  |  4 +--
 libavcodec/x86/huffyuvdsp.asm   |  2 +-
 libavcodec/x86/jpeg2000dsp.asm  |  4 +--
 libavcodec/x86/lossless_videodsp.asm|  2 +-
 libavcodec/x86/lossless_videoencdsp.asm |  2 +-
 libavcodec/x86/mdct15.asm   |  2 +-
 libavcodec/x86/me_cmp.asm   |  2 +-
 libavcodec/x86/pixblockdsp.asm  |  2 +-
 libavcodec/x86/pngdsp.asm   |  2 +-
 libavcodec/x86/qpel.asm |  6 ++---
 libavcodec/x86/qpeldsp.asm  | 12 -
 libavcodec/x86/rv34dsp.asm  |  2 +-
 libavcodec/x86/rv40dsp.asm  | 10 
 libavcodec/x86/sbrdsp.asm   | 12 -
 libavcodec/x86/takdsp.asm   |  8 +++---
 libavcodec/x86/utvideodsp.asm   |  4 +--
 libavcodec/x86/v210.asm |  2 +-
 libavcodec/x86/vc1dsp_mc.asm|  2 +-
 libavcodec/x86/videodsp.asm |  2 +-
 libavcodec/x86/vp8dsp.asm   | 30 +++---
 libavcodec/x86/vp8dsp_loopfilter.asm|  6 ++---
 libavfilter/x86/af_afir.asm |  2 +-
 libavfilter/x86/af_volume.asm   |  6 ++---
 libavfilter/x86/avf_showcqt.asm |  4 +--
 libavfilter/x86/vf_blend.asm|  2 +-
 libavfilter/x86/vf_gradfun.asm  |  6 ++---
 libavfilter/x86/vf_hqdn3d.asm   |  2 +-
 libavfilter/x86/vf_interlace.asm|  6 ++---
 libavfilter/x86/vf_maskedmerge.asm  |  2 +-
 libavfilter/x86/vf_stereo3d.asm |  2 +-
 libavfilter/x86/vf_w3fdif.asm   | 10 
 libavresample/x86/audio_convert.asm | 44 -
 libavresample/x86/audio_mix.asm | 10 
 libavresample/x86/dither.asm|  6 ++---
 libavutil/x86/float_dsp.asm | 18 +++---
 libavutil/x86/lls.asm   |  4 +--
 libavutil/x86/x86inc.asm| 16 
 libswresample/x86/audio_convert.asm | 12 -
 libswresample/x86/rematrix.asm  |  8 +++---
 libswscale/x86/input.asm| 14 +--
 libswscale/x86/output.asm   | 10 
 libswscale/x86/scale.asm|  2 +-
 tests/checkasm/x86/checkasm.asm |  2 +-
 64 files changed, 271 insertions(+), 287 deletions(-)

diff --git a/libavcodec/x86/aacpsdsp.asm b/libavcodec/x86/aacpsdsp.asm
index 4acd087c85..73c7c09514 100644
--- a/libavcodec/x86/aacpsdsp.asm
+++ b/libavcodec/x86/aacpsdsp.asm
@@ -49,7 +49,7 @@ align 16
 add  dstq, mmsize
 addnq, mmsize*2
 jl .loop
-REP_RET
+RET
 %endmacro
 
 INIT_XMM sse
@@ -83,7 +83,7 @@ align 16
 add   src2q, mmsize
 add  nq, mmsize*2
 jl .loop
-REP_RET
+RET
 
 ;***
 ;void ff_ps_stereo_interpolate_sse3(float (*l)[2], float (*r)[2],
@@ -116,7 +116,7 @@ align 16
 movhps [rq+nq], m2
 add  nq, 8
 jl .loop
-REP_RET
+RET
 
 ;***
 ;void ps_stereo_interpolate_ipdopd_sse3(float (*l)[2], float (*r)[2],
@@ -164,7 +164,7 @@ align 16
 movhps [rq+nq], m2
 add  nq, 8
 jl .loop
-REP_RET
+RET
 
 ;**
 ;void ps_hybrid_analysis_ileave_sse(float out[2][38][64],
@@ -478,7 +478,7 @@ align 16
 addoutq, strideq
 add  nq, 64
 jl .loop
-REP_RET
+RET
 %endmacro
 
 INIT_XMM sse
diff --git a/libavcodec/x86/ac3dsp.asm b/libavcodec/x86/ac3dsp.asm
index 675ade3101..304c6cfd8c 100644
--- a/libavcodec/x86/ac3dsp.asm
+++ b/libavcodec/x86/ac3dsp.asm
@@ -64,7 +64,7 @@ cgloba