from:"Ronald S. Bultje"

[FFmpeg-cvslog] avcodec/x86/hevc: fix luma 12b overflow

2024-02-26 Thread Ronald S . Bultje

ffmpeg | branch: master | Ronald S. Bultje  | Sun Feb 25 
10:49:35 2024 -0500| [d6083f503d5bd7f9a2540c3e30d95e7add765d1e] | committer: J. 
Dekker

avcodec/x86/hevc: fix luma 12b overflow

Signed-off-by: J. Dekker 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=d6083f503d5bd7f9a2540c3e30d95e7add765d1e
---

 libavcodec/x86/hevc_deblock.asm | 40 +++-
 1 file changed, 31 insertions(+), 9 deletions(-)

diff --git a/libavcodec/x86/hevc_deblock.asm b/libavcodec/x86/hevc_deblock.asm
index 85ee4800bb..61b79f8079 100644
--- a/libavcodec/x86/hevc_deblock.asm
+++ b/libavcodec/x86/hevc_deblock.asm
@@ -541,19 +541,41 @@ ALIGN 16
 add betaq, r13
 shr betaq, 3; ((beta + (beta >> 1)) >> 3))
 
-movam13, [pw_8]
 psubw   m12, m4, m3 ; q0 - p0
-psllw   m10, m12, 3; 8 * (q0 - p0)
-paddw   m12, m10 ; 9 * (q0 - p0)
-
+paddw   m10, m12, m12
+paddw   m12, m10 ; 3 * (q0 - p0)
 psubw   m10, m5, m2 ; q1 - p1
-psllwm8, m10, 1; 2 * ( q1 - p1 )
-paddw   m10, m8; 3 * ( q1 - p1 )
-psubw   m12, m10; 9 * (q0 - p0) - 3 * ( q1 - p1 )
-paddw   m12, m13; + 8
+psubw   m12, m10 ; 3 * (q0 - p0) - (q1 - p1)
+%if %1 < 12
+paddw   m10, m12, m12
+paddw   m12, [pw_8]; + 8
+paddw   m12, m10 ; 9 * (q0 - p0) - 3 * ( q1 - p1 )
 psraw   m12, 4; >> 4 , delta0
 PABSW   m13, m12; abs(delta0)
-
+%elif cpuflag(ssse3)
+pabsw   m13, m12
+paddw   m10, m13, m13
+paddw   m13, [pw_8]
+paddw   m13, m10 ; abs(9 * (q0 - p0) - 3 * ( q1 - p1 ))
+pxorm10, m10
+pcmpgtw m10, m12
+paddw   m13, m10
+psrlw   m13, 4; >> 4, abs(delta0)
+psignw  m10, m13, m12
+SWAP 10, 12
+%else
+pxorm10, m10
+pcmpgtw m10, m12
+pxorm12, m10
+psubw   m12, m10 ; abs()
+paddw   m13, m12, m12
+paddw   m12, [pw_8]
+paddw   m13, m12 ; 3*abs(m12)
+paddw   m13, m10
+psrlw   m13, 4
+pxorm12, m13, m10
+psubw   m12, m10
+%endif
 
 psllw   m10, m9, 2; 8 * tc
 paddw   m10, m9; 10 * tc

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-cvslog] vp9: don't overread by 4 pixels in ff_vp9_avg4_mmxext().

2022-06-01 Thread Ronald S . Bultje

ffmpeg | branch: master | Ronald S. Bultje  | Tue May 31 
08:30:37 2022 -0400| [6e13c30a8fd45e14303ef4a8e4da3554d5b4ba8e] | committer: 
Ronald S. Bultje

vp9: don't overread by 4 pixels in ff_vp9_avg4_mmxext().

If the block is at the end of the allocated buffer and there is no
padding, this will over-read, which may cause crashes. Reported by
Firefox.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=6e13c30a8fd45e14303ef4a8e4da3554d5b4ba8e
---

 libavcodec/x86/vp9mc.asm | 5 +
 1 file changed, 5 insertions(+)

diff --git a/libavcodec/x86/vp9mc.asm b/libavcodec/x86/vp9mc.asm
index f64161b2c2..efc4cfbef1 100644
--- a/libavcodec/x86/vp9mc.asm
+++ b/libavcodec/x86/vp9mc.asm
@@ -604,7 +604,12 @@ cglobal vp9_%1%2 %+ %%szsuf, 5, 5, %8, dst, dstride, src, 
sstride, h
 %%pavg  m0, [dstq]
 %%pavg  m1, [dstq+d%3]
 %%pavg  m2, [dstq+d%4]
+%if %2 == 4
+%%srcfn m4, [dstq+d%5]
+%%pavg  m3, m4
+%else
 %%pavg  m3, [dstq+d%5]
+%endif
 %if %2/mmsize == 8
 %%pavg  m4, [dstq+mmsize*4]
 %%pavg  m5, [dstq+mmsize*5]

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-cvslog] ivfenc: write duration for frame_cnt=1.

2021-03-03 Thread Ronald S . Bultje

ffmpeg | branch: master | Ronald S. Bultje  | Tue Mar  2 
13:52:44 2021 -0500| [d29ec02d48a7fae1e3ed5a7bd79ab3fd73b42a96] | committer: 
Ronald S. Bultje

ivfenc: write duration for frame_cnt=1.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=d29ec02d48a7fae1e3ed5a7bd79ab3fd73b42a96
---

 libavformat/ivfenc.c | 10 +++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/libavformat/ivfenc.c b/libavformat/ivfenc.c
index 0951f56c92..889c00438c 100644
--- a/libavformat/ivfenc.c
+++ b/libavformat/ivfenc.c
@@ -23,7 +23,7 @@
 
 typedef struct IVFEncContext {
 unsigned frame_cnt;
-uint64_t last_pts, sum_delta_pts;
+uint64_t last_pts, sum_delta_pts, last_pkt_duration;
 } IVFEncContext;
 
 static int ivf_init(AVFormatContext *s)
@@ -86,6 +86,7 @@ static int ivf_write_packet(AVFormatContext *s, AVPacket *pkt)
 avio_write(pb, pkt->data, pkt->size);
 if (ctx->frame_cnt)
 ctx->sum_delta_pts += pkt->pts - ctx->last_pts;
+ctx->last_pkt_duration = pkt->duration;
 ctx->frame_cnt++;
 ctx->last_pts = pkt->pts;
 
@@ -97,12 +98,15 @@ static int ivf_write_trailer(AVFormatContext *s)
 AVIOContext *pb = s->pb;
 IVFEncContext *ctx = s->priv_data;
 
-if ((pb->seekable & AVIO_SEEKABLE_NORMAL) && ctx->frame_cnt > 1) {
+if ((pb->seekable & AVIO_SEEKABLE_NORMAL) &&
+(ctx->frame_cnt > 1 || (ctx->frame_cnt == 1 && 
ctx->last_pkt_duration))) {
 int64_t end = avio_tell(pb);
 
 avio_seek(pb, 24, SEEK_SET);
 // overwrite the "length" field (duration)
-avio_wl32(pb, ctx->frame_cnt * ctx->sum_delta_pts / (ctx->frame_cnt - 
1));
+avio_wl32(pb, ctx->last_pkt_duration ?
+  ctx->sum_delta_pts + ctx->last_pkt_duration :
+  ctx->frame_cnt * ctx->sum_delta_pts / (ctx->frame_cnt - 1));
 avio_wl32(pb, 0); // zero out unused bytes
 avio_seek(pb, end, SEEK_SET);
 }

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

To unsubscribe, visit link above, or email
ffmpeg-cvslog-requ...@ffmpeg.org with subject "unsubscribe".

[FFmpeg-cvslog] png: split header state and data state in two separate variables.

2020-07-05 Thread Ronald S . Bultje

ffmpeg | branch: release/2.8 | Ronald S. Bultje  | Mon Apr  
3 10:08:29 2017 -0400| [453519af0d4a50bbc62999b312de9692e429bf22] | committer: 
Michael Niedermayer

png: split header state and data state in two separate variables.

Fixes a reported (but false) race condition in tsan for fate-apng:

WARNING: ThreadSanitizer: data race (pid=6274)
  Read of size 4 at 0x7d680001ec78 by main thread (mutexes: write M1338):
#0 update_thread_context src/libavcodec/pngdec.c:1456 
(ffmpeg+0x00dacf0c)
[..]
  Previous write of size 4 at 0x7d680001ec78 by thread T1 (mutexes: write 
M1335):
#0 decode_idat_chunk src/libavcodec/pngdec.c:737 (ffmpeg+0x00dae951)

(cherry picked from commit 478f1c3d5e5463a284ea7efecfc62d47ba3be11a)
Signed-off-by: Michael Niedermayer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=453519af0d4a50bbc62999b312de9692e429bf22
---

 libavcodec/png.h|  5 -
 libavcodec/pngdec.c | 65 -
 2 files changed, 39 insertions(+), 31 deletions(-)

diff --git a/libavcodec/png.h b/libavcodec/png.h
index 948c2f714f..e967fcf38f 100644
--- a/libavcodec/png.h
+++ b/libavcodec/png.h
@@ -42,11 +42,6 @@
 #define PNG_FILTER_VALUE_PAETH 4
 #define PNG_FILTER_VALUE_MIXED 5
 
-#define PNG_IHDR  0x0001
-#define PNG_IDAT  0x0002
-#define PNG_ALLIMAGE  0x0004
-#define PNG_PLTE  0x0008
-
 #define NB_PASSES 7
 
 #define PNGSIG 0x89504e470d0a1a0a
diff --git a/libavcodec/pngdec.c b/libavcodec/pngdec.c
index 3fa4e4e4c0..bfc7c9da0b 100644
--- a/libavcodec/pngdec.c
+++ b/libavcodec/pngdec.c
@@ -34,6 +34,16 @@
 
 #include 
 
+enum PNGHeaderState {
+PNG_IHDR = 1 << 0,
+PNG_PLTE = 1 << 1,
+};
+
+enum PNGImageState {
+PNG_IDAT = 1 << 0,
+PNG_ALLIMAGE = 1 << 1,
+};
+
 typedef struct PNGDecContext {
 PNGDSPContext dsp;
 AVCodecContext *avctx;
@@ -43,7 +53,8 @@ typedef struct PNGDecContext {
 ThreadFrame last_picture;
 ThreadFrame picture;
 
-int state;
+enum PNGHeaderState hdr_state;
+enum PNGImageState pic_state;
 int width, height;
 int cur_w, cur_h;
 int last_w, last_h;
@@ -332,7 +343,7 @@ static void png_handle_row(PNGDecContext *s)
 }
 s->y++;
 if (s->y == s->cur_h) {
-s->state |= PNG_ALLIMAGE;
+s->pic_state |= PNG_ALLIMAGE;
 if (s->filter_type == PNG_FILTER_TYPE_LOCO) {
 if (s->bit_depth == 16) {
 deloco_rgb16((uint16_t *)ptr, s->row_size / 2,
@@ -367,7 +378,7 @@ static void png_handle_row(PNGDecContext *s)
 memset(s->last_row, 0, s->row_size);
 for (;;) {
 if (s->pass == NB_PASSES - 1) {
-s->state |= PNG_ALLIMAGE;
+s->pic_state |= PNG_ALLIMAGE;
 goto the_end;
 } else {
 s->pass++;
@@ -402,7 +413,7 @@ static int png_decode_idat(PNGDecContext *s, int length)
 return AVERROR_EXTERNAL;
 }
 if (s->zstream.avail_out == 0) {
-if (!(s->state & PNG_ALLIMAGE)) {
+if (!(s->pic_state & PNG_ALLIMAGE)) {
 png_handle_row(s);
 }
 s->zstream.avail_out = s->crow_size;
@@ -539,12 +550,12 @@ static int decode_ihdr_chunk(AVCodecContext *avctx, 
PNGDecContext *s,
 if (length != 13)
 return AVERROR_INVALIDDATA;
 
-if (s->state & PNG_IDAT) {
+if (s->pic_state & PNG_IDAT) {
 av_log(avctx, AV_LOG_ERROR, "IHDR after IDAT\n");
 return AVERROR_INVALIDDATA;
 }
 
-if (s->state & PNG_IHDR) {
+if (s->hdr_state & PNG_IHDR) {
 av_log(avctx, AV_LOG_ERROR, "Multiple IHDR\n");
 return AVERROR_INVALIDDATA;
 }
@@ -571,7 +582,7 @@ static int decode_ihdr_chunk(AVCodecContext *avctx, 
PNGDecContext *s,
 s->filter_type  = bytestream2_get_byte(&s->gb);
 s->interlace_type   = bytestream2_get_byte(&s->gb);
 bytestream2_skip(&s->gb, 4); /* crc */
-s->state |= PNG_IHDR;
+s->hdr_state |= PNG_IHDR;
 if (avctx->debug & FF_DEBUG_PICT_INFO)
 av_log(avctx, AV_LOG_DEBUG, "width=%d height=%d depth=%d color_type=%d 
"
 "compression_type=%d filter_type=%d interlace_type=%d\n",
@@ -587,7 +598,7 @@ error:
 
 static int decode_phys_chunk(AVCodecContext *avctx, PNGDecContext *s)
 {
-if (s->state & PNG_IDAT) {
+if (s->pic_state & PNG_IDAT) {
 av_log(avctx, AV_LOG_ERROR, "pHYs after IDAT\n");
 return AVERROR_INVALIDDATA;
 }
@@ -607,11 +618,11 @@ static int decode_idat_chunk(AVCodecContext *avctx, 
PNGDecContext *s,
 int ret;
 size_t byte_depth = s->bit_depth > 8 ? 2 : 1;
 
-if (!(s->state & PNG_IHDR))

[FFmpeg-cvslog] libvmaf: exit gracefully if the library fails.

2017-12-18 Thread Ronald S . Bultje

ffmpeg | branch: master | Ronald S. Bultje  | Mon Dec 18 
07:59:39 2017 -0500| [df3222d4bb18cbceda443def17b1b29067ed6e3f] | committer: 
Ronald S. Bultje

libvmaf: exit gracefully if the library fails.

Fixes trac issue #6884 and Netflix/vmaf issue #124.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=df3222d4bb18cbceda443def17b1b29067ed6e3f
---

 libavfilter/vf_libvmaf.c | 29 +++--
 1 file changed, 23 insertions(+), 6 deletions(-)

diff --git a/libavfilter/vf_libvmaf.c b/libavfilter/vf_libvmaf.c
index e83b89b4f1..dfe474c40c 100644
--- a/libavfilter/vf_libvmaf.c
+++ b/libavfilter/vf_libvmaf.c
@@ -61,6 +61,7 @@ typedef struct LIBVMAFContext {
 int ssim;
 int ms_ssim;
 char *pool;
+int error;
 } LIBVMAFContext;
 
 #define OFFSET(x) offsetof(LIBVMAFContext, x)
@@ -158,18 +159,26 @@ static void compute_vmaf_score(LIBVMAFContext *s)
 
 format = (char *) s->desc->name;
 
-s->vmaf_score = compute_vmaf(format, s->width, s->height, read_frame, s,
- s->model_path, s->log_path, s->log_fmt, 0, 0,
- s->enable_transform, s->phone_model, s->psnr,
- s->ssim, s->ms_ssim, s->pool);
+s->error = compute_vmaf(&s->vmaf_score, format, s->width, s->height,
+read_frame, s, s->model_path, s->log_path,
+s->log_fmt, 0, 0, s->enable_transform,
+s->phone_model, s->psnr, s->ssim,
+s->ms_ssim, s->pool);
 }
 
 static void *call_vmaf(void *ctx)
 {
 LIBVMAFContext *s = (LIBVMAFContext *) ctx;
 compute_vmaf_score(s);
-av_log(ctx, AV_LOG_INFO, "VMAF score: %f\n",s->vmaf_score);
+if (!s->error) {
+av_log(ctx, AV_LOG_INFO, "VMAF score: %f\n",s->vmaf_score);
+} else {
+pthread_mutex_lock(&s->lock);
+pthread_cond_signal(&s->cond);
+pthread_mutex_unlock(&s->lock);
+}
 pthread_exit(NULL);
+return NULL;
 }
 
 static int do_vmaf(FFFrameSync *fs)
@@ -187,10 +196,17 @@ static int do_vmaf(FFFrameSync *fs)
 
 pthread_mutex_lock(&s->lock);
 
-while (s->frame_set != 0) {
+while (s->frame_set && !s->error) {
 pthread_cond_wait(&s->cond, &s->lock);
 }
 
+if (s->error) {
+av_log(ctx, AV_LOG_ERROR,
+   "libvmaf encountered an error, check log for details\n");
+pthread_mutex_unlock(&s->lock);
+return AVERROR(EINVAL);
+}
+
 av_frame_ref(s->gref, ref);
 av_frame_ref(s->gmain, master);
 
@@ -208,6 +224,7 @@ static av_cold int init(AVFilterContext *ctx)
 
 s->gref = av_frame_alloc();
 s->gmain = av_frame_alloc();
+s->error = 0;
 
 pthread_mutex_init(&s->lock, NULL);
 pthread_cond_init (&s->cond, NULL);

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

[FFmpeg-cvslog] vp9: Add bsf to merge superframes

2017-10-26 Thread Ronald S . Bultje

ffmpeg | branch: master | Ronald S. Bultje  | Mon Feb 29 
09:43:07 2016 -0500| [0cf949a01193dcf6f83fd95d46792dd94479b4e4] | committer: 
Mark Thompson

vp9: Add bsf to merge superframes

From ffmpeg commit 2e6636aa87303d37b112e79f093ca39500f92364.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=0cf949a01193dcf6f83fd95d46792dd94479b4e4
---

 Changelog   |   1 +
 doc/bitstream_filters.texi  |   4 +
 libavcodec/Makefile |   1 +
 libavcodec/bitstream_filters.c  |   1 +
 libavcodec/vp9_superframe_bsf.c | 207 
 5 files changed, 214 insertions(+)

diff --git a/Changelog b/Changelog
index 04972399fd..b3aed3cd7e 100644
--- a/Changelog
+++ b/Changelog
@@ -12,6 +12,7 @@ version :
 - The x86 assembler default switched from yasm to nasm, pass
   --x86asmexe=yasm to configure to restore the old behavior.
 - Cineform HD decoder
+- VP9 superframe split/merge bitstream filters
 
 
 version 12:
diff --git a/doc/bitstream_filters.texi b/doc/bitstream_filters.texi
index 49b8a645d0..ddadd7653e 100644
--- a/doc/bitstream_filters.texi
+++ b/doc/bitstream_filters.texi
@@ -95,6 +95,10 @@ This bitstream filter passes the packets through unchanged.
 
 @section remove_extradata
 
+@section vp9_superframe
+
+Combine VP9 frames into superframes.
+
 @section vp9_superframe_split
 
 Split VP9 superframes into single frames.
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 3a55a28e27..80c572c7c4 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -772,6 +772,7 @@ OBJS-$(CONFIG_NOISE_BSF)  += noise_bsf.o
 OBJS-$(CONFIG_NULL_BSF)   += null_bsf.o
 OBJS-$(CONFIG_REMOVE_EXTRADATA_BSF)   += remove_extradata_bsf.o
 OBJS-$(CONFIG_TEXT2MOVSUB_BSF)+= movsub_bsf.o
+OBJS-$(CONFIG_VP9_SUPERFRAME_BSF) += vp9_superframe_bsf.o
 OBJS-$(CONFIG_VP9_SUPERFRAME_SPLIT_BSF)   += vp9_superframe_split_bsf.o
 
 # thread libraries
diff --git a/libavcodec/bitstream_filters.c b/libavcodec/bitstream_filters.c
index 1cea6d77af..d46fdad81b 100644
--- a/libavcodec/bitstream_filters.c
+++ b/libavcodec/bitstream_filters.c
@@ -38,6 +38,7 @@ extern const AVBitStreamFilter ff_null_bsf;
 extern const AVBitStreamFilter ff_text2movsub_bsf;
 extern const AVBitStreamFilter ff_noise_bsf;
 extern const AVBitStreamFilter ff_remove_extradata_bsf;
+extern const AVBitStreamFilter ff_vp9_superframe_bsf;
 extern const AVBitStreamFilter ff_vp9_superframe_split_bsf;
 
 #include "libavcodec/bsf_list.c"
diff --git a/libavcodec/vp9_superframe_bsf.c b/libavcodec/vp9_superframe_bsf.c
new file mode 100644
index 00..be83ba3e75
--- /dev/null
+++ b/libavcodec/vp9_superframe_bsf.c
@@ -0,0 +1,207 @@
+/*
+ * VP9 invisible (alt-ref) frame to superframe merge bitstream filter
+ * Copyright (c) 2016 Ronald S. Bultje 
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/avassert.h"
+#include "avcodec.h"
+#include "bsf.h"
+#include "get_bits.h"
+
+#define MAX_CACHE 8
+typedef struct VP9BSFContext {
+int n_cache;
+struct CachedBuf {
+uint8_t *data;
+int size;
+} cache[MAX_CACHE];
+} VP9BSFContext;
+
+static void stats(const struct CachedBuf *in, int n_in,
+  unsigned *_max, unsigned *_sum)
+{
+int n;
+unsigned max = 0, sum = 0;
+
+for (n = 0; n < n_in; n++) {
+unsigned sz = in[n].size;
+
+if (sz > max)
+max = sz;
+sum += sz;
+}
+
+*_max = max;
+*_sum = sum;
+}
+
+static int merge_superframe(const struct CachedBuf *in, int n_in, AVPacket 
*out)
+{
+unsigned max, sum, mag, marker, n, sz;
+uint8_t *ptr;
+int res;
+
+stats(in, n_in, &max, &sum);
+mag = av_log2(max) >> 3;
+marker = 0xC0 + (mag << 3) + (n_in - 1);
+sz = sum + 2 + (mag + 1) * n_in;
+res = av_new_packet(out, sz);
+if (res < 0)
+return res;
+ptr = out->data;
+for (n = 0; n < n_in; n++) {
+memcpy(ptr, in[n].data, in[n].size);
+ptr += in[n].size;
+}
+
+#define wloop(mag, wr) do { \
+for (n = 0; n < n_in; n++) { \
+wr; \
+ptr += mag + 1; \
+

Re: [FFmpeg-cvslog] [FFmpeg-devel] avfilter: add vmafmotion filter

2017-10-07 Thread Ronald S. Bultje

Hi,

On Thu, Oct 5, 2017 at 7:52 PM, Michael Niedermayer 
wrote:

> On Sat, Sep 30, 2017 at 03:51:41PM +, Ashish Singh wrote:
> > ffmpeg | branch: master | Ashish Singh  | Sat Sep
> 16 02:35:58 2017 +0530| [148c8e88c43cfbabd6aee9f01ef30942cee9d359] |
> committer: Ronald S. Bultje
> >
> > avfilter: add vmafmotion filter
> >
> > Signed-off-by: Ashish Singh 
> > Signed-off-by: Ronald S. Bultje 
> >
> > > http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=
> 148c8e88c43cfbabd6aee9f01ef30942cee9d359
> > ---
> >
> >  Changelog   |   1 +
> >  doc/filters.texi|  14 ++
> >  libavfilter/Makefile|   1 +
> >  libavfilter/allfilters.c|   1 +
> >  libavfilter/vf_vmafmotion.c | 365 ++
> ++
> >  libavfilter/vmaf_motion.h   |  58 +++
> >  6 files changed, 440 insertions(+)
> [...]
> > +static av_cold int init(AVFilterContext *ctx)
> > +{
> > +VMAFMotionContext *s = ctx->priv;
> > +
> > +if (s->stats_file_str) {
> > +if (!strcmp(s->stats_file_str, "-")) {
>
> > +s->stats_file = stdout;
>
> Using stdout can interfere with the user application using the filter
>
>
> > +} else {
>
> > +s->stats_file = fopen(s->stats_file_str, "w");
>
> Opening a filter parameter provided string for writing is a dangerous
> way to output data. It allows one with access to the parameters to
> overwrite any writable file
>
> data should only be output in a safe way
>

The same mechanism is present in ssim/psnr filters. I'm open to any
alternative method you suggest. These are only settable using explicit user
interaction (and are disabled by default) so I don't particularly see the
problem.

Ronald
___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

[FFmpeg-cvslog] vp9: fix explicit memory order for report_progress.

2017-09-12 Thread Ronald S . Bultje

ffmpeg | branch: master | Ronald S. Bultje  | Tue Sep 12 
15:02:20 2017 -0400| [1db03e952b4ee998f2a19c037f60d17dc90e8f6c] | committer: 
Ronald S. Bultje

vp9: fix explicit memory order for report_progress.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=1db03e952b4ee998f2a19c037f60d17dc90e8f6c
---

 libavcodec/vp9.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/vp9.c b/libavcodec/vp9.c
index 66ccb6c49c..6b5de19266 100644
--- a/libavcodec/vp9.c
+++ b/libavcodec/vp9.c
@@ -73,7 +73,7 @@ static int vp9_alloc_entries(AVCodecContext *avctx, int n) {
 
 static void vp9_report_tile_progress(VP9Context *s, int field, int n) {
 pthread_mutex_lock(&s->progress_mutex);
-atomic_fetch_add_explicit(&s->entries[field], n, memory_order_relaxed);
+atomic_fetch_add_explicit(&s->entries[field], n, memory_order_release);
 pthread_cond_signal(&s->progress_cond);
 pthread_mutex_unlock(&s->progress_mutex);
 }

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

[FFmpeg-cvslog] frame_thread_encoder: make 'exit' member atomic.

2017-09-12 Thread Ronald S . Bultje

ffmpeg | branch: master | Ronald S. Bultje  | Mon Sep 11 
09:58:47 2017 -0400| [183216b21870f21c86c904a7530d53682d7db46d] | committer: 
Ronald S. Bultje

frame_thread_encoder: make 'exit' member atomic.

Should fix the following tsan warning:

WARNING: ThreadSanitizer: data race (pid=19806)
  Read of size 4 at 0x7b8412f0 by thread T9:
#0 worker src/libavcodec/frame_thread_encoder.c:66 (ffmpeg+0x007f349e)
[..]
  Previous write of size 4 at 0x7b8412f0 by main thread (mutexes: write 
M1395):
#0 ff_frame_thread_encoder_free src/libavcodec/frame_thread_encoder.c:239 
(ffmpeg+0x007f379e)
[..]

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=183216b21870f21c86c904a7530d53682d7db46d
---

 libavcodec/frame_thread_encoder.c | 13 -
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/libavcodec/frame_thread_encoder.c 
b/libavcodec/frame_thread_encoder.c
index 33928fe5f0..35a37c4372 100644
--- a/libavcodec/frame_thread_encoder.c
+++ b/libavcodec/frame_thread_encoder.c
@@ -18,6 +18,8 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include 
+
 #include "frame_thread_encoder.h"
 
 #include "libavutil/fifo.h"
@@ -55,7 +57,7 @@ typedef struct{
 unsigned finished_task_index;
 
 pthread_t worker[MAX_THREADS];
-int exit;
+atomic_int exit;
 } ThreadContext;
 
 static void * attribute_align_arg worker(void *v){
@@ -63,7 +65,7 @@ static void * attribute_align_arg worker(void *v){
 ThreadContext *c = avctx->internal->frame_thread_encoder;
 AVPacket *pkt = NULL;
 
-while(!c->exit){
+while (!atomic_load(&c->exit)) {
 int got_packet, ret;
 AVFrame *frame;
 Task task;
@@ -73,8 +75,8 @@ static void * attribute_align_arg worker(void *v){
 av_init_packet(pkt);
 
 pthread_mutex_lock(&c->task_fifo_mutex);
-while (av_fifo_size(c->task_fifo) <= 0 || c->exit) {
-if(c->exit){
+while (av_fifo_size(c->task_fifo) <= 0 || atomic_load(&c->exit)) {
+if (atomic_load(&c->exit)) {
 pthread_mutex_unlock(&c->task_fifo_mutex);
 goto end;
 }
@@ -187,6 +189,7 @@ int ff_frame_thread_encoder_init(AVCodecContext *avctx, 
AVDictionary *options){
 pthread_mutex_init(&c->buffer_mutex, NULL);
 pthread_cond_init(&c->task_fifo_cond, NULL);
 pthread_cond_init(&c->finished_task_cond, NULL);
+atomic_init(&c->exit, 0);
 
 for(i=0; ithread_count ; i++){
 AVDictionary *tmp = NULL;
@@ -236,7 +239,7 @@ void ff_frame_thread_encoder_free(AVCodecContext *avctx){
 ThreadContext *c= avctx->internal->frame_thread_encoder;
 
 pthread_mutex_lock(&c->task_fifo_mutex);
-c->exit = 1;
+atomic_store(&c->exit, 1);
 pthread_cond_broadcast(&c->task_fifo_cond);
 pthread_mutex_unlock(&c->task_fifo_mutex);
 

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

[FFmpeg-cvslog] vp9: assert -> av_assert and fix associated compile error.

2017-09-11 Thread Ronald S . Bultje

ffmpeg | branch: master | Ronald S. Bultje  | Mon Sep 11 
15:41:24 2017 -0400| [4ce99e96d6115ccd1fc82f826d4c628240ef53ed] | committer: 
Ronald S. Bultje

vp9: assert -> av_assert and fix associated compile error.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=4ce99e96d6115ccd1fc82f826d4c628240ef53ed
---

 libavcodec/vp9.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/vp9.c b/libavcodec/vp9.c
index f626f815b9..66ccb6c49c 100644
--- a/libavcodec/vp9.c
+++ b/libavcodec/vp9.c
@@ -1603,7 +1603,7 @@ FF_ENABLE_DEPRECATION_WARNINGS
 if (avctx->active_thread_type == FF_THREAD_SLICE) {
 int tile_row, tile_col;
 
-assert(!pass);
+av_assert1(!s->pass);
 
 for (tile_row = 0; tile_row < s->s.h.tiling.tile_rows; tile_row++) 
{
 for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; 
tile_col++) {

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

[FFmpeg-cvslog] vp9: fix compilation with threading disabled.

2017-09-11 Thread Ronald S . Bultje

ffmpeg | branch: master | Ronald S. Bultje  | Sat Sep  9 
23:24:31 2017 -0400| [9bab39dee52a44ff97975aafc70b8b428d8ca7b6] | committer: 
Ronald S. Bultje

vp9: fix compilation with threading disabled.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=9bab39dee52a44ff97975aafc70b8b428d8ca7b6
---

 libavcodec/vp9.c| 15 +--
 libavcodec/vp9dec.h |  2 ++
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/libavcodec/vp9.c b/libavcodec/vp9.c
index a71045e081..f626f815b9 100644
--- a/libavcodec/vp9.c
+++ b/libavcodec/vp9.c
@@ -88,10 +88,8 @@ static void vp9_await_tile_progress(VP9Context *s, int 
field, int n) {
 pthread_mutex_unlock(&s->progress_mutex);
 }
 #else
-static void vp9_free_entries(VP9Context *s) {}
+static void vp9_free_entries(AVCodecContext *avctx) {}
 static int vp9_alloc_entries(AVCodecContext *avctx, int n) { return 0; }
-static void vp9_report_tile_progress(VP9Context *s, int field, int n) {}
-static void vp9_await_tile_progress(VP9Context *s, int field, int n) {}
 #endif
 
 static void vp9_frame_unref(AVCodecContext *avctx, VP9Frame *f)
@@ -1343,7 +1341,7 @@ static int decode_tiles(AVCodecContext *avctx,
 return 0;
 }
 
-
+#if HAVE_THREADS
 static av_always_inline
 int decode_tiles_mt(AVCodecContext *avctx, void *tdata, int jobnr,
   int threadnr)
@@ -1451,7 +1449,7 @@ int loopfilter_proc(AVCodecContext *avctx)
 }
 return 0;
 }
-
+#endif
 
 static int vp9_decode_frame(AVCodecContext *avctx, void *frame,
 int *got_frame, AVPacket *pkt)
@@ -1583,10 +1581,12 @@ FF_ENABLE_DEPRECATION_WARNINGS
 ff_thread_finish_setup(avctx);
 }
 
+#if HAVE_THREADS
 if (avctx->active_thread_type & FF_THREAD_SLICE) {
 for (i = 0; i < s->sb_rows; i++)
 atomic_store(&s->entries[i], 0);
 }
+#endif
 
 do {
 for (i = 0; i < s->active_tile_cols; i++) {
@@ -1599,6 +1599,7 @@ FF_ENABLE_DEPRECATION_WARNINGS
 s->td[i].uveob[1] = s->td[i].uveob_base[1];
 }
 
+#if HAVE_THREADS
 if (avctx->active_thread_type == FF_THREAD_SLICE) {
 int tile_row, tile_col;
 
@@ -1629,7 +1630,9 @@ FF_ENABLE_DEPRECATION_WARNINGS
 }
 
 ff_slice_thread_execute_with_mainfunc(avctx, decode_tiles_mt, 
loopfilter_proc, s->td, NULL, s->s.h.tiling.tile_cols);
-} else {
+} else
+#endif
+{
 ret = decode_tiles(avctx, data, size);
 if (ret < 0)
 return ret;
diff --git a/libavcodec/vp9dec.h b/libavcodec/vp9dec.h
index 15e0122918..96c0e43cd2 100644
--- a/libavcodec/vp9dec.h
+++ b/libavcodec/vp9dec.h
@@ -98,9 +98,11 @@ typedef struct VP9Context {
 VP56RangeCoder c;
 int pass, active_tile_cols;
 
+#if HAVE_THREADS
 pthread_mutex_t progress_mutex;
 pthread_cond_t progress_cond;
 atomic_int *entries;
+#endif
 
 uint8_t ss_h, ss_v;
 uint8_t last_bpp, bpp_index, bytesperpixel;

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

[FFmpeg-cvslog] vf_spp: only assign function pointers if permutation matches expectations.

2017-06-24 Thread Ronald S . Bultje

ffmpeg | branch: master | Ronald S. Bultje  | Fri Jun 23 
11:01:38 2017 -0400| [97f7f831691f2a2bddbd258bcbe332516d64a91b] | committer: 
Ronald S. Bultje

vf_spp: only assign function pointers if permutation matches expectations.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=97f7f831691f2a2bddbd258bcbe332516d64a91b
---

 libavfilter/x86/vf_spp.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/libavfilter/x86/vf_spp.c b/libavfilter/x86/vf_spp.c
index 45a9eb068c..7d5da610da 100644
--- a/libavfilter/x86/vf_spp.c
+++ b/libavfilter/x86/vf_spp.c
@@ -21,6 +21,7 @@
 
 #include "libavutil/attributes.h"
 #include "libavutil/cpu.h"
+#include "libavutil/crc.h"
 #include "libavutil/mem.h"
 #include "libavutil/x86/asm.h"
 #include "libavfilter/vf_spp.h"
@@ -223,10 +224,15 @@ av_cold void ff_spp_init_x86(SPPContext *s)
 int cpu_flags = av_get_cpu_flags();
 
 if (cpu_flags & AV_CPU_FLAG_MMX) {
+static const uint32_t mmx_idct_perm_crc = 0xe5e8adc4;
+uint32_t idct_perm_crc =
+av_crc(av_crc_get_table(AV_CRC_32_IEEE), 0,
+   s->dct->idct_permutation,
+   sizeof(s->dct->idct_permutation));
 int64_t bps;
 s->store_slice = store_slice_mmx;
 av_opt_get_int(s->dct, "bits_per_sample", 0, &bps);
-if (bps <= 8) {
+if (bps <= 8 && idct_perm_crc == mmx_idct_perm_crc) {
 switch (s->mode) {
 case 0: s->requantize = hardthresh_mmx; break;
 case 1: s->requantize = softthresh_mmx; break;

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

[FFmpeg-cvslog] mdec: stop preferring the simple IDCT.

2017-06-21 Thread Ronald S . Bultje

ffmpeg | branch: master | Ronald S. Bultje  | Wed Jun 21 
09:01:20 2017 -0400| [e639d09199dd7818a24786fb6e8ddd048ede4372] | committer: 
Ronald S. Bultje

mdec: stop preferring the simple IDCT.

This was added in e3e3c82555e2382125195c1ba9f34b5a43299abc, probably
as a workaround for the fact that the quant table was not permutated
and the IDCT coefficients are, meaning that you'd only get correct
reconstruction if the IDCT permutation was an identity matrix, which
happens to be the case when you use the simple IDCT. The quant table
permutation bug was fixed in 42dd1434bf6a7230e4175c08fcfabc3ba51a0463,
meaning this workaround is no longer necessary.

In practical terms, before 42dd1434bf6a7230e4175c08fcfabc3ba51a0463,
the PSNR between decodes of the fate-mdec using simple (C) or simplemmx
IDCTs was 35. After 42dd1434bf6a7230e4175c08fcfabc3ba51a0463, it's 90.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=e639d09199dd7818a24786fb6e8ddd048ede4372
---

 libavcodec/mdec.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/libavcodec/mdec.c b/libavcodec/mdec.c
index fabc19fd9a..59658b331d 100644
--- a/libavcodec/mdec.c
+++ b/libavcodec/mdec.c
@@ -215,9 +215,6 @@ static av_cold int decode_init(AVCodecContext *avctx)
 MDECContext * const a = avctx->priv_data;
 int i;
 
-if (avctx->idct_algo == FF_IDCT_AUTO)
-avctx->idct_algo = FF_IDCT_SIMPLE;
-
 a->mb_width  = (avctx->coded_width  + 15) / 16;
 a->mb_height = (avctx->coded_height + 15) / 16;
 

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

[FFmpeg-cvslog] mdec: use correctly permutated quant matrix for dequantization.

2017-06-21 Thread Ronald S . Bultje

ffmpeg | branch: master | Ronald S. Bultje  | Wed Jun 21 
08:53:43 2017 -0400| [42dd1434bf6a7230e4175c08fcfabc3ba51a0463] | committer: 
Ronald S. Bultje

mdec: use correctly permutated quant matrix for dequantization.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=42dd1434bf6a7230e4175c08fcfabc3ba51a0463
---

 libavcodec/mdec.c | 11 ++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/libavcodec/mdec.c b/libavcodec/mdec.c
index 97bfebbeb7..fabc19fd9a 100644
--- a/libavcodec/mdec.c
+++ b/libavcodec/mdec.c
@@ -49,6 +49,7 @@ typedef struct MDECContext {
 int mb_height;
 int mb_x, mb_y;
 DECLARE_ALIGNED(16, int16_t, block)[6][64];
+DECLARE_ALIGNED(16, uint16_t, quant_matrix)[64];
 uint8_t *bitstream_buffer;
 unsigned int bitstream_buffer_size;
 int block_last_index[6];
@@ -61,7 +62,7 @@ static inline int mdec_decode_block_intra(MDECContext *a, 
int16_t *block, int n)
 int component;
 RLTable *rl = &ff_rl_mpeg1;
 uint8_t * const scantable = a->scantable.permutated;
-const uint16_t *quant_matrix = ff_mpeg1_default_intra_matrix;
+const uint16_t *quant_matrix = a->quant_matrix;
 const int qscale = a->qscale;
 
 /* DC coefficient */
@@ -212,6 +213,7 @@ static int decode_frame(AVCodecContext *avctx,
 static av_cold int decode_init(AVCodecContext *avctx)
 {
 MDECContext * const a = avctx->priv_data;
+int i;
 
 if (avctx->idct_algo == FF_IDCT_AUTO)
 avctx->idct_algo = FF_IDCT_SIMPLE;
@@ -231,6 +233,13 @@ static av_cold int decode_init(AVCodecContext *avctx)
 avctx->pix_fmt  = AV_PIX_FMT_YUVJ420P;
 avctx->color_range = AVCOL_RANGE_JPEG;
 
+/* init q matrix */
+for (i = 0; i < 64; i++) {
+int j = a->idsp.idct_permutation[i];
+
+a->quant_matrix[j] = ff_mpeg1_default_intra_matrix[i];
+}
+
 return 0;
 }
 

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

[FFmpeg-cvslog] vp9: fix overwrite in ff_vp9_ipred_dr_16x16_16_avx2.

2017-06-14 Thread Ronald S . Bultje

ffmpeg | branch: master | Ronald S. Bultje  | Wed Jun 14 
09:44:26 2017 -0400| [d35ff98e270d904481ab75d58d6cf6badf85e1b2] | committer: 
Ronald S. Bultje

vp9: fix overwrite in ff_vp9_ipred_dr_16x16_16_avx2.

Fixes trac issue 6459.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=d35ff98e270d904481ab75d58d6cf6badf85e1b2
---

 libavcodec/x86/vp9intrapred_16bpp.asm | 2 --
 1 file changed, 2 deletions(-)

diff --git a/libavcodec/x86/vp9intrapred_16bpp.asm 
b/libavcodec/x86/vp9intrapred_16bpp.asm
index 764f7041d3..6d4400ba8b 100644
--- a/libavcodec/x86/vp9intrapred_16bpp.asm
+++ b/libavcodec/x86/vp9intrapred_16bpp.asm
@@ -1219,9 +1219,7 @@ cglobal vp9_ipred_dr_16x16_16, 4, 5, 6, dst, stride, l, a
 vpalignrm4, m1, m5, 14
 mova  [dstq+strideq*8], m3 ; 8
 mova  [dstq+strideq*0], m4 ; 0
-sub   dstq, strideq
 mova [dst3q+strideq*4], m5 ; 7
-mova [ dstq+strideq*0], m1 ; -1
 RET
 %endif
 

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

[FFmpeg-cvslog] hevc: fix race condition in max_ra/seq_decode.

2017-05-25 Thread Ronald S . Bultje

ffmpeg | branch: master | Ronald S. Bultje  | Wed May 24 
11:46:07 2017 -0400| [ca2209d67af0a73fe0edb2fce1cea2445dbfd8db] | committer: 
Ronald S. Bultje

hevc: fix race condition in max_ra/seq_decode.

These variables are shared between frame threads, but they are updated
post-setup_finished() if a EOB/EOS slice type occurs. Moving the EOB/EOS
slices to the next frame thread instance (by parsing them leading into
the next picture instead of trailing behind the last picture) effectively
prevents this race condition.

This fixes tsan failures on hevc-conformance-NoOutPrior_A_Qualcomm_1.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=ca2209d67af0a73fe0edb2fce1cea2445dbfd8db
---

 libavcodec/hevc_parser.c |  2 +-
 libavcodec/hevcdec.c | 12 ++--
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/libavcodec/hevc_parser.c b/libavcodec/hevc_parser.c
index c72b1465f1..2b58eb62fe 100644
--- a/libavcodec/hevc_parser.c
+++ b/libavcodec/hevc_parser.c
@@ -267,7 +267,7 @@ static int hevc_find_frame_end(AVCodecParserContext *s, 
const uint8_t *buf,
 
 nut = (pc->state64 >> 2 * 8 + 1) & 0x3F;
 // Beginning of access unit
-if ((nut >= HEVC_NAL_VPS && nut <= HEVC_NAL_AUD) || nut == 
HEVC_NAL_SEI_PREFIX ||
+if ((nut >= HEVC_NAL_VPS && nut <= HEVC_NAL_EOB_NUT) || nut == 
HEVC_NAL_SEI_PREFIX ||
 (nut >= 41 && nut <= 44) || (nut >= 48 && nut <= 55)) {
 if (pc->frame_start_found) {
 pc->frame_start_found = 0;
diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c
index ee001fd9f2..fc9a5b749e 100644
--- a/libavcodec/hevcdec.c
+++ b/libavcodec/hevcdec.c
@@ -2890,6 +2890,7 @@ fail:
 static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
 {
 int i, ret = 0;
+int eos_at_start = 1;
 
 s->ref = NULL;
 s->last_eos = s->eos;
@@ -2907,8 +2908,15 @@ static int decode_nal_units(HEVCContext *s, const 
uint8_t *buf, int length)
 
 for (i = 0; i < s->pkt.nb_nals; i++) {
 if (s->pkt.nals[i].type == HEVC_NAL_EOB_NUT ||
-s->pkt.nals[i].type == HEVC_NAL_EOS_NUT)
-s->eos = 1;
+s->pkt.nals[i].type == HEVC_NAL_EOS_NUT) {
+if (eos_at_start) {
+s->last_eos = 1;
+} else {
+s->eos = 1;
+}
+} else {
+eos_at_start = 0;
+}
 }
 
 /* decode the NAL units */

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

[FFmpeg-cvslog] frame_thread_encoder: extend critical code covered by finished_task_mutex.

2017-05-25 Thread Ronald S . Bultje

ffmpeg | branch: master | Ronald S. Bultje  | Thu May 25 
08:20:21 2017 -0400| [d98f34d7d440ab3671be4e55c00d2cc7bf007eab] | committer: 
Ronald S. Bultje

frame_thread_encoder: extend critical code covered by finished_task_mutex.

Should fix tsan errors in utvideoenc_rgb_left and related tests.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=d98f34d7d440ab3671be4e55c00d2cc7bf007eab
---

 libavcodec/frame_thread_encoder.c | 13 +++--
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/libavcodec/frame_thread_encoder.c 
b/libavcodec/frame_thread_encoder.c
index 27ae356af3..6cf1a68fe7 100644
--- a/libavcodec/frame_thread_encoder.c
+++ b/libavcodec/frame_thread_encoder.c
@@ -272,15 +272,16 @@ int ff_thread_video_encode_frame(AVCodecContext *avctx, 
AVPacket *pkt, const AVF
 pthread_mutex_unlock(&c->task_fifo_mutex);
 
 c->task_index = (c->task_index+1) % BUFFER_SIZE;
-
-if(!c->finished_tasks[c->finished_task_index].outdata && 
(c->task_index - c->finished_task_index) % BUFFER_SIZE <= avctx->thread_count)
-return 0;
 }
 
-if(c->task_index == c->finished_task_index)
-return 0;
-
 pthread_mutex_lock(&c->finished_task_mutex);
+if (c->task_index == c->finished_task_index ||
+(frame && !c->finished_tasks[c->finished_task_index].outdata &&
+ (c->task_index - c->finished_task_index) % BUFFER_SIZE <= 
avctx->thread_count)) {
+pthread_mutex_unlock(&c->finished_task_mutex);
+return 0;
+}
+
 while (!c->finished_tasks[c->finished_task_index].outdata) {
 pthread_cond_wait(&c->finished_task_cond, &c->finished_task_mutex);
 }

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

[FFmpeg-cvslog] pthread_frame: make accesses to debug field be protected by owner lock.

2017-04-12 Thread Ronald S . Bultje

ffmpeg | branch: release/3.3 | Ronald S. Bultje  | Thu Apr  
6 13:58:59 2017 -0400| [1968a1eef1cae22e162259d7082c2eea98d81e32] | committer: 
Michael Niedermayer

pthread_frame: make accesses to debug field be protected by owner lock.

The av_log() is done outside the lock, but this way the accesses to the
field (reads and writes) are always protected by a mutex. The av_log()
is not run inside the lock context because it may involve user callbacks
and doing that in performance-sensitive code is probably not a good idea.

This should fix occasional tsan warnings when running fate-h264, like:

WARNING: ThreadSanitizer: data race (pid=10916)
  Write of size 4 at 0x7d64000174fc by main thread (mutexes: write M2313):
#0 update_context_from_user src/libavcodec/pthread_frame.c:335 
(ffmpeg+0x00df7b06)
[..]
  Previous read of size 4 at 0x7d64000174fc by thread T1 (mutexes: write M2311):
#0 ff_thread_await_progress src/libavcodec/pthread_frame.c:592 
(ffmpeg+0x00df8b3e)

(cherry picked from commit 2e664b9c1e73c80aab91070c1eb7676f04bdd12d)
Signed-off-by: Michael Niedermayer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=1968a1eef1cae22e162259d7082c2eea98d81e32
---

 libavcodec/pthread_frame.c | 6 ++
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/libavcodec/pthread_frame.c b/libavcodec/pthread_frame.c
index c246c2fded..0ba87b5929 100644
--- a/libavcodec/pthread_frame.c
+++ b/libavcodec/pthread_frame.c
@@ -566,12 +566,11 @@ void ff_thread_report_progress(ThreadFrame *f, int n, int 
field)
 
 p = f->owner[field]->internal->thread_ctx;
 
+pthread_mutex_lock(&p->progress_mutex);
 if (f->owner[field]->debug&FF_DEBUG_THREADS)
 av_log(f->owner[field], AV_LOG_DEBUG,
"%p finished %d field %d\n", progress, n, field);
 
-pthread_mutex_lock(&p->progress_mutex);
-
 atomic_store_explicit(&progress[field], n, memory_order_release);
 
 pthread_cond_broadcast(&p->progress_cond);
@@ -589,11 +588,10 @@ void ff_thread_await_progress(ThreadFrame *f, int n, int 
field)
 
 p = f->owner[field]->internal->thread_ctx;
 
+pthread_mutex_lock(&p->progress_mutex);
 if (f->owner[field]->debug&FF_DEBUG_THREADS)
 av_log(f->owner[field], AV_LOG_DEBUG,
"thread awaiting %d field %d from %p\n", n, field, progress);
-
-pthread_mutex_lock(&p->progress_mutex);
 while (atomic_load_explicit(&progress[field], memory_order_relaxed) < n)
 pthread_cond_wait(&p->progress_cond, &p->progress_mutex);
 pthread_mutex_unlock(&p->progress_mutex);

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

[FFmpeg-cvslog] pthread_frame: allow per-field ThreadFrame owners.

2017-04-12 Thread Ronald S . Bultje

ffmpeg | branch: release/3.3 | Ronald S. Bultje  | Mon Apr  
3 10:24:05 2017 -0400| [f4f3bf3c94a9aa485c09d0c03d68ca79a4785866] | committer: 
Michael Niedermayer

pthread_frame: allow per-field ThreadFrame owners.

This tries to handle cases where separate invocations of decode_frame()
(each running in separate threads) write to respective fields in the
same AVFrame->data[]. Having per-field owners makes interaction between
readers (the referencing thread) and writers (the decoding thread)
slightly more optimal if both accesses are field-based, since they will
use the respective producer's thread objects (mutex/cond) instead of
sharing the thread objects of the first field's producer.

In practice, this fixes the following tsan-warning in fate-h264:

WARNING: ThreadSanitizer: data race (pid=21615)
  Read of size 4 at 0x7d64d9fc by thread T2 (mutexes: write M1006):
#0 ff_thread_report_progress pthread_frame.c:569 (ffmpeg:x86_64+0x100f7cf54)
[..]
  Previous write of size 4 at 0x7d64d9fc by main thread (mutexes: write 
M1004):
#0 update_context_from_user pthread_frame.c:335 (ffmpeg:x86_64+0x100f81abb)

(cherry picked from commit 083300bea935d125b83f60d7030f78a7ffb0f3df)
Signed-off-by: Michael Niedermayer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=f4f3bf3c94a9aa485c09d0c03d68ca79a4785866
---

 libavcodec/h264_slice.c|  8 +---
 libavcodec/pthread_frame.c | 18 ++
 libavcodec/thread.h|  2 +-
 libavcodec/utils.c |  7 ---
 4 files changed, 20 insertions(+), 15 deletions(-)

diff --git a/libavcodec/h264_slice.c b/libavcodec/h264_slice.c
index 70ad05f640..acf6a73f60 100644
--- a/libavcodec/h264_slice.c
+++ b/libavcodec/h264_slice.c
@@ -1423,14 +1423,14 @@ static int h264_field_start(H264Context *h, const 
H264SliceContext *sl,
  * We have to do that before the "dummy" in-between frame allocation,
  * since that can modify h->cur_pic_ptr. */
 if (h->first_field) {
+int last_field = last_pic_structure == PICT_BOTTOM_FIELD;
 av_assert0(h->cur_pic_ptr);
 av_assert0(h->cur_pic_ptr->f->buf[0]);
 assert(h->cur_pic_ptr->reference != DELAYED_PIC_REF);
 
 /* Mark old field/frame as completed */
-if (h->cur_pic_ptr->tf.owner == h->avctx) {
-ff_thread_report_progress(&h->cur_pic_ptr->tf, INT_MAX,
-  last_pic_structure == PICT_BOTTOM_FIELD);
+if (h->cur_pic_ptr->tf.owner[last_field] == h->avctx) {
+ff_thread_report_progress(&h->cur_pic_ptr->tf, INT_MAX, 
last_field);
 }
 
 /* figure out if we have a complementary field pair */
@@ -1568,7 +1568,9 @@ static int h264_field_start(H264Context *h, const 
H264SliceContext *sl,
 return AVERROR_INVALIDDATA;
 }
 } else {
+int field = h->picture_structure == PICT_BOTTOM_FIELD;
 release_unused_pictures(h, 0);
+h->cur_pic_ptr->tf.owner[field] = h->avctx;
 }
 /* Some macroblocks can be accessed before they're available in case
 * of lost slices, MBAFF or threading. */
diff --git a/libavcodec/pthread_frame.c b/libavcodec/pthread_frame.c
index 9a6b83ac45..c246c2fded 100644
--- a/libavcodec/pthread_frame.c
+++ b/libavcodec/pthread_frame.c
@@ -564,10 +564,11 @@ void ff_thread_report_progress(ThreadFrame *f, int n, int 
field)
 atomic_load_explicit(&progress[field], memory_order_relaxed) >= n)
 return;
 
-p = f->owner->internal->thread_ctx;
+p = f->owner[field]->internal->thread_ctx;
 
-if (f->owner->debug&FF_DEBUG_THREADS)
-av_log(f->owner, AV_LOG_DEBUG, "%p finished %d field %d\n", progress, 
n, field);
+if (f->owner[field]->debug&FF_DEBUG_THREADS)
+av_log(f->owner[field], AV_LOG_DEBUG,
+   "%p finished %d field %d\n", progress, n, field);
 
 pthread_mutex_lock(&p->progress_mutex);
 
@@ -586,10 +587,11 @@ void ff_thread_await_progress(ThreadFrame *f, int n, int 
field)
 atomic_load_explicit(&progress[field], memory_order_acquire) >= n)
 return;
 
-p = f->owner->internal->thread_ctx;
+p = f->owner[field]->internal->thread_ctx;
 
-if (f->owner->debug&FF_DEBUG_THREADS)
-av_log(f->owner, AV_LOG_DEBUG, "thread awaiting %d field %d from 
%p\n", n, field, progress);
+if (f->owner[field]->debug&FF_DEBUG_THREADS)
+av_log(f->owner[field], AV_LOG_DEBUG,
+   "thread awaiting %d field %d from %p\n", n, field, progress);
 
 pthread_mutex_lock(&p->progress_mutex);
 while (atomic_load_explicit(&progress[field], memory_order_relaxed) < n)
@@ -882,7 +884,7 @@ static int thread_get_buffer_internal(AVCodecContext 
*avctx, ThreadFrame *f, in

[FFmpeg-cvslog] vp8: make mv_min/max thread-local if using partition threading.

2017-04-12 Thread Ronald S . Bultje

ffmpeg | branch: release/3.3 | Ronald S. Bultje  | Wed Apr  
5 16:19:55 2017 -0400| [6557ea8e2bd768f5d82bce0fab28262437a292bf] | committer: 
Michael Niedermayer

vp8: make mv_min/max thread-local if using partition threading.

Fixes tsan warnings like this in fate-vp8-test-vector-007:

WARNING: ThreadSanitizer: data race (pid=65909)
  Write of size 4 at 0x7d8ce088 by thread T1:
#0 vp8_decode_mb_row_sliced vp8.c:2519 (ffmpeg:x86_64+0x100995ede)
[..]
  Previous write of size 4 at 0x7d8ce088 by thread T2:
#0 vp8_decode_mb_row_sliced vp8.c:2519 (ffmpeg:x86_64+0x100995ede)

(cherry picked from commit fed92adbb3fc6cbf735e3df9a2f7d0a2917fcfbd)
Signed-off-by: Michael Niedermayer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=6557ea8e2bd768f5d82bce0fab28262437a292bf
---

 libavcodec/vp8.c | 53 -
 libavcodec/vp8.h | 19 ---
 2 files changed, 40 insertions(+), 32 deletions(-)

diff --git a/libavcodec/vp8.c b/libavcodec/vp8.c
index 9bc1d95220..fe7aa23491 100644
--- a/libavcodec/vp8.c
+++ b/libavcodec/vp8.c
@@ -772,7 +772,7 @@ static int vp8_decode_frame_header(VP8Context *s, const 
uint8_t *buf, int buf_si
 }
 
 static av_always_inline
-void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
+void clamp_mv(VP8mvbounds *s, VP56mv *dst, const VP56mv *src)
 {
 dst->x = av_clip(src->x, av_clip(s->mv_min.x, INT16_MIN, INT16_MAX),
  av_clip(s->mv_max.x, INT16_MIN, INT16_MAX));
@@ -1031,7 +1031,7 @@ void vp7_decode_mvs(VP8Context *s, VP8Macroblock *mb,
 }
 
 static av_always_inline
-void vp8_decode_mvs(VP8Context *s, VP8Macroblock *mb,
+void vp8_decode_mvs(VP8Context *s, VP8mvbounds *mv_bounds, VP8Macroblock *mb,
 int mb_x, int mb_y, int layout)
 {
 VP8Macroblock *mb_edge[3] = { 0  /* top */,
@@ -1102,7 +1102,7 @@ void vp8_decode_mvs(VP8Context *s, VP8Macroblock *mb,
 if (vp56_rac_get_prob_branchy(c, 
vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
 if (vp56_rac_get_prob_branchy(c, 
vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
 /* Choose the best mv out of 0,0 and the nearest mv */
-clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= 
cnt[CNT_ZERO])]);
+clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_ZERO + 
(cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
 cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode== 
VP8_MVMODE_SPLIT) +
 (mb_edge[VP8_EDGE_TOP]->mode == 
VP8_MVMODE_SPLIT)) * 2 +
 (mb_edge[VP8_EDGE_TOPLEFT]->mode == 
VP8_MVMODE_SPLIT);
@@ -1116,11 +1116,11 @@ void vp8_decode_mvs(VP8Context *s, VP8Macroblock *mb,
 mb->bmv[0] = mb->mv;
 }
 } else {
-clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
+clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_NEAR]);
 mb->bmv[0] = mb->mv;
 }
 } else {
-clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
+clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_NEAREST]);
 mb->bmv[0] = mb->mv;
 }
 } else {
@@ -1166,7 +1166,8 @@ void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder 
*c, VP8Macroblock *mb,
 }
 
 static av_always_inline
-void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
+void decode_mb_mode(VP8Context *s, VP8mvbounds *mv_bounds,
+VP8Macroblock *mb, int mb_x, int mb_y,
 uint8_t *segment, uint8_t *ref, int layout, int is_vp7)
 {
 VP56RangeCoder *c = &s->c;
@@ -1230,7 +1231,7 @@ void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int 
mb_x, int mb_y,
 if (is_vp7)
 vp7_decode_mvs(s, mb, mb_x, mb_y, layout);
 else
-vp8_decode_mvs(s, mb, mb_x, mb_y, layout);
+vp8_decode_mvs(s, mv_bounds, mb, mb_x, mb_y, layout);
 } else {
 // intra MB, 16.1
 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, 
s->prob->pred16x16);
@@ -2205,8 +2206,8 @@ void vp78_decode_mv_mb_modes(AVCodecContext *avctx, 
VP8Frame *curframe,
 VP8Context *s = avctx->priv_data;
 int mb_x, mb_y;
 
-s->mv_min.y = -MARGIN;
-s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
+s->mv_bounds.mv_min.y = -MARGIN;
+s->mv_bounds.mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
 for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
 VP8Macroblock *mb = s->macroblocks_base +
 ((s->mb_width + 1) * (mb_y + 1) + 1);
@@ -2214,20 +2215,20 @@ void vp78_decode_mv_mb_modes(AVCodecContext *avctx, 
VP8Frame *curframe,
 
 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
 
-s->mv_min.x = -MARGIN;
-

[FFmpeg-cvslog] vp8: make wait/thread_mb_pos atomic.

2017-04-12 Thread Ronald S . Bultje

ffmpeg | branch: release/3.3 | Ronald S. Bultje  | Wed Apr  
5 16:18:54 2017 -0400| [9d742f774a85fa82cbfd667f69b0ba4d14556d54] | committer: 
Michael Niedermayer

vp8: make wait/thread_mb_pos atomic.

Fixes tsan warnings like this in fate-vp8-test-vector-007:

WARNING: ThreadSanitizer: data race (pid=3590)
  Write of size 4 at 0x7d8ce07c by thread T2:
#0 decode_mb_row_no_filter src/libavcodec/vp8.c:2330 (ffmpeg+0x00ffb59e)
[..]
  Previous write of size 4 at 0x7d8ce07c by thread T1:
#0 decode_mb_row_no_filter src/libavcodec/vp8.c:2330 (ffmpeg+0x00ffb59e)

(cherry picked from commit 9a54c6f243412f62bae498ddcac337cb18ae6290)
Signed-off-by: Michael Niedermayer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=9d742f774a85fa82cbfd667f69b0ba4d14556d54
---

 libavcodec/vp8.c | 29 ++---
 libavcodec/vp8.h |  6 --
 2 files changed, 18 insertions(+), 17 deletions(-)

diff --git a/libavcodec/vp8.c b/libavcodec/vp8.c
index 1e8808c46f..9bc1d95220 100644
--- a/libavcodec/vp8.c
+++ b/libavcodec/vp8.c
@@ -2247,15 +2247,15 @@ static void vp8_decode_mv_mb_modes(AVCodecContext 
*avctx, VP8Frame *cur_frame,
 #define check_thread_pos(td, otd, mb_x_check, mb_y_check) \
 do {  \
 int tmp = (mb_y_check << 16) | (mb_x_check & 0x); \
-if (otd->thread_mb_pos < tmp) {   \
+if (atomic_load(&otd->thread_mb_pos) < tmp) { \
 pthread_mutex_lock(&otd->lock);   \
-td->wait_mb_pos = tmp;\
+atomic_store(&td->wait_mb_pos, tmp);  \
 do {  \
-if (otd->thread_mb_pos >= tmp)\
+if (atomic_load(&otd->thread_mb_pos) >= tmp)  \
 break;\
 pthread_cond_wait(&otd->cond, &otd->lock);\
 } while (1);  \
-td->wait_mb_pos = INT_MAX;\
+atomic_store(&td->wait_mb_pos, INT_MAX);  \
 pthread_mutex_unlock(&otd->lock); \
 } \
 } while (0)
@@ -2266,12 +2266,10 @@ static void vp8_decode_mv_mb_modes(AVCodecContext 
*avctx, VP8Frame *cur_frame,
 int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) 
&& \
(num_jobs > 1);\
 int is_null  = !next_td || !prev_td;  \
-int pos_check= (is_null) ? 1  \
- : (next_td != td &&  \
-pos >= next_td->wait_mb_pos) ||   \
-   (prev_td != td &&  \
-pos >= prev_td->wait_mb_pos); \
-td->thread_mb_pos = pos;  \
+int pos_check= (is_null) ? 1 :\
+(next_td != td && pos >= atomic_load(&next_td->wait_mb_pos)) ||   \
+(prev_td != td && pos >= atomic_load(&prev_td->wait_mb_pos)); \
+atomic_store(&td->thread_mb_pos, pos);\
 if (sliced_threading && pos_check) {  \
 pthread_mutex_lock(&td->lock);\
 pthread_cond_broadcast(&td->cond);\
@@ -2288,7 +2286,7 @@ static av_always_inline int 
decode_mb_row_no_filter(AVCodecContext *avctx, void
 {
 VP8Context *s = avctx->priv_data;
 VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr];
-int mb_y = td->thread_mb_pos >> 16;
+int mb_y = atomic_load(&td->thread_mb_pos) >> 16;
 int mb_x, mb_xy = mb_y * s->mb_width;
 int num_jobs = s->num_jobs;
 VP8Frame *curframe = s->curframe, *prev_frame = s->prev_frame;
@@ -2428,7 +2426,7 @@ static av_always_inline void filter_mb_row(AVCodecContext 
*avctx, void *tdata,
 {
 VP8Context *s = avctx->priv_data;
 VP8ThreadData *td = &s->thread_data[threadnr];
-int mb_x, mb_y = td->thr

[FFmpeg-cvslog] huffyuv: assign correct per-thread avctx pointer to HYuvContext::avctx.

2017-04-12 Thread Ronald S . Bultje

ffmpeg | branch: release/3.3 | Ronald S. Bultje  | Mon Apr  
3 22:28:22 2017 -0400| [5e84c94f6962e23b552809280bd89c70d4a7ef5e] | committer: 
Michael Niedermayer

huffyuv: assign correct per-thread avctx pointer to HYuvContext::avctx.

Fixes the following tsan warning when running fate-vsynth_lena-ffvhuff:

WARNING: ThreadSanitizer: data race (pid=6484)
  Write of size 8 at 0x7d64000154b8 by main thread (mutexes: write M1331):
#0 update_context_from_user src/libavcodec/pthread_frame.c:331 
(ffmpeg+0x00dca887)
[..]
  Previous read of size 8 at 0x7d64000154b8 by thread T2 (mutexes: write M1334):
#0 draw_slice src/libavcodec/huffyuvdec.c:857 (ffmpeg+0x00bcc86f)

(cherry picked from commit 7c7e7c44a6eb68eca861e45cb2ce78f582b12c69)
Signed-off-by: Michael Niedermayer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=5e84c94f6962e23b552809280bd89c70d4a7ef5e
---

 libavcodec/huffyuvdec.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/libavcodec/huffyuvdec.c b/libavcodec/huffyuvdec.c
index 5572b9819f..979c4b9d5c 100644
--- a/libavcodec/huffyuvdec.c
+++ b/libavcodec/huffyuvdec.c
@@ -579,6 +579,8 @@ static av_cold int decode_init_thread_copy(AVCodecContext 
*avctx)
 HYuvContext *s = avctx->priv_data;
 int i, ret;
 
+s->avctx = avctx;
+
 if ((ret = ff_huffyuv_alloc_temp(s)) < 0) {
 ff_huffyuv_common_end(s);
 return ret;

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

[FFmpeg-cvslog] png: set AVFrame flags/fields before calling setup_finished().

2017-04-12 Thread Ronald S . Bultje

ffmpeg | branch: release/3.3 | Ronald S. Bultje  | Mon Apr  
3 14:43:40 2017 -0400| [e90de50195d4b4b61f3c2c4ea3bb8a09b433de8e] | committer: 
Michael Niedermayer

png: set AVFrame flags/fields before calling setup_finished().

Fixes tsan warnings in fate-apng:

WARNING: ThreadSanitizer: data race (pid=51230)
  Read of size 4 at 0x7d5042fc by main thread (mutexes: write M1000):
#0 frame_copy_props frame.c:302 (ffmpeg:x86_64+0x1019a35d6)
[..]
  Previous write of size 4 at 0x7d5042fc by thread T1 (mutexes: write M997):
#0 decode_idat_chunk pngdec.c:708 (ffmpeg:x86_64+0x100f5562a)

(cherry picked from commit eff2861a757b8a46398e6fcb844b960b4775daad)
Signed-off-by: Michael Niedermayer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=e90de50195d4b4b61f3c2c4ea3bb8a09b433de8e
---

 libavcodec/pngdec.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libavcodec/pngdec.c b/libavcodec/pngdec.c
index d184c34b81..102551972e 100644
--- a/libavcodec/pngdec.c
+++ b/libavcodec/pngdec.c
@@ -701,12 +701,12 @@ static int decode_idat_chunk(AVCodecContext *avctx, 
PNGDecContext *s,
 if ((ret = ff_thread_get_buffer(avctx, &s->previous_picture, 
AV_GET_BUFFER_FLAG_REF)) < 0)
 return ret;
 }
-ff_thread_finish_setup(avctx);
-
 p->pict_type= AV_PICTURE_TYPE_I;
 p->key_frame= 1;
 p->interlaced_frame = !!s->interlace_type;
 
+ff_thread_finish_setup(avctx);
+
 /* compute the compressed row size */
 if (!s->interlace_type) {
 s->crow_size = s->row_size + 1;

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

[FFmpeg-cvslog] ffmpeg: make transcode_init_done atomic.

2017-04-12 Thread Ronald S . Bultje

ffmpeg | branch: release/3.3 | Ronald S. Bultje  | Fri Mar 
31 11:27:20 2017 -0400| [f5f0b2f44ce9df08f7a79b32d9209b23ba6b3447] | committer: 
Michael Niedermayer

ffmpeg: make transcode_init_done atomic.

Should fix tsan warnings in fate-fifo-muxer-h264/wav:

WARNING: ThreadSanitizer: data race (pid=26552)
  Write of size 4 at 0x01e0d7c0 by main thread:
#0 transcode_init src/ffmpeg.c:3761 (ffmpeg+0x0050ca1c)
[..]
  Previous read of size 4 at 0x01e0d7c0 by thread T1:
#0 decode_interrupt_cb src/ffmpeg.c:460 (ffmpeg+0x004fde19)

(cherry picked from commit 76d8c77430e9e0110623705bfb54d922cc2ac3ea)
Signed-off-by: Michael Niedermayer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=f5f0b2f44ce9df08f7a79b32d9209b23ba6b3447
---

 ffmpeg.c | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/ffmpeg.c b/ffmpeg.c
index 11faf0d4a8..ea03179c21 100644
--- a/ffmpeg.c
+++ b/ffmpeg.c
@@ -30,6 +30,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #if HAVE_IO_H
@@ -319,7 +320,7 @@ void term_exit(void)
 
 static volatile int received_sigterm = 0;
 static volatile int received_nb_signals = 0;
-static volatile int transcode_init_done = 0;
+static atomic_int transcode_init_done = ATOMIC_VAR_INIT(0);
 static volatile int ffmpeg_exited = 0;
 static int main_return_code = 0;
 
@@ -457,7 +458,7 @@ static int read_key(void)
 
 static int decode_interrupt_cb(void *ctx)
 {
-return received_nb_signals > transcode_init_done;
+return received_nb_signals > atomic_load(&transcode_init_done);
 }
 
 const AVIOInterruptCB int_cb = { decode_interrupt_cb, NULL };
@@ -612,7 +613,7 @@ static void ffmpeg_cleanup(int ret)
 if (received_sigterm) {
 av_log(NULL, AV_LOG_INFO, "Exiting normally, received signal %d.\n",
(int) received_sigterm);
-} else if (ret && transcode_init_done) {
+} else if (ret && atomic_load(&transcode_init_done)) {
 av_log(NULL, AV_LOG_INFO, "Conversion failed!\n");
 }
 term_exit();
@@ -3758,7 +3759,7 @@ static int transcode_init(void)
 return ret;
 }
 
-transcode_init_done = 1;
+atomic_store(&transcode_init_done, 1);
 
 return 0;
 }

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

[FFmpeg-cvslog] hevc: only write to max_ra and pocTid0 in the first slice.

2017-04-12 Thread Ronald S . Bultje

ffmpeg | branch: release/3.3 | Ronald S. Bultje  | Mon Apr  
3 09:51:10 2017 -0400| [d1cae50a046763bf14d74e899d90e4ef0be5b551] | committer: 
Michael Niedermayer

hevc: only write to max_ra and pocTid0 in the first slice.

Values from subsequent values are guaranteed to be identical (since
poc and nal_unit_type are checked to be the same between slices), so
this doesn't affect output in any way, but does resolve the remaining
reported race conditions (by tsan) in fate-hevc.

In practice, this fixes tsan warnings like this:

WARNING: ThreadSanitizer: data race (pid=25334)
  Read of size 4 at 0x7d9c0001adcc by main thread (mutexes: write M1386):
#0 hevc_update_thread_context src/libavcodec/hevcdec.c:3310 
(ffmpeg+0x00b41c7c)
[..]
  Previous write of size 4 at 0x7d9c0001adcc by thread T1 (mutexes: write 
M1383):
#0 hls_slice_header src/libavcodec/hevcdec.c:596 (ffmpeg+0x00b43a22)

(cherry picked from commit 1f50baa2b2da7fdbfccf0662883f38a763ff6619)
Signed-off-by: Michael Niedermayer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=d1cae50a046763bf14d74e899d90e4ef0be5b551
---

 libavcodec/hevcdec.c | 32 
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c
index ef21595c44..f9e8ff0c9f 100644
--- a/libavcodec/hevcdec.c
+++ b/libavcodec/hevcdec.c
@@ -585,7 +585,7 @@ static int hls_slice_header(HEVCContext *s)
 }
 
 /* 8.3.1 */
-if (s->temporal_id == 0 &&
+if (sh->first_slice_in_pic_flag && s->temporal_id == 0 &&
 s->nal_unit_type != HEVC_NAL_TRAIL_N &&
 s->nal_unit_type != HEVC_NAL_TSA_N   &&
 s->nal_unit_type != HEVC_NAL_STSA_N  &&
@@ -2771,25 +2771,25 @@ static int decode_nal_unit(HEVCContext *s, const 
H2645NAL *nal)
 if (ret < 0)
 return ret;
 
-if (s->max_ra == INT_MAX) {
-if (s->nal_unit_type == HEVC_NAL_CRA_NUT || IS_BLA(s)) {
-s->max_ra = s->poc;
+if (s->sh.first_slice_in_pic_flag) {
+if (s->max_ra == INT_MAX) {
+if (s->nal_unit_type == HEVC_NAL_CRA_NUT || IS_BLA(s)) {
+s->max_ra = s->poc;
+} else {
+if (IS_IDR(s))
+s->max_ra = INT_MIN;
+}
+}
+
+if ((s->nal_unit_type == HEVC_NAL_RASL_R || s->nal_unit_type == 
HEVC_NAL_RASL_N) &&
+s->poc <= s->max_ra) {
+s->is_decoded = 0;
+break;
 } else {
-if (IS_IDR(s))
+if (s->nal_unit_type == HEVC_NAL_RASL_R && s->poc > s->max_ra)
 s->max_ra = INT_MIN;
 }
-}
-
-if ((s->nal_unit_type == HEVC_NAL_RASL_R || s->nal_unit_type == 
HEVC_NAL_RASL_N) &&
-s->poc <= s->max_ra) {
-s->is_decoded = 0;
-break;
-} else {
-if (s->nal_unit_type == HEVC_NAL_RASL_R && s->poc > s->max_ra)
-s->max_ra = INT_MIN;
-}
 
-if (s->sh.first_slice_in_pic_flag) {
 ret = hevc_frame_start(s);
 if (ret < 0)
 return ret;

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

[FFmpeg-cvslog] png: split header state and data state in two separate variables.

2017-04-12 Thread Ronald S . Bultje

ffmpeg | branch: release/3.3 | Ronald S. Bultje  | Mon Apr  
3 10:08:29 2017 -0400| [51ca6fda0500da24e3d365c9dfce31bad42e8723] | committer: 
Michael Niedermayer

png: split header state and data state in two separate variables.

Fixes a reported (but false) race condition in tsan for fate-apng:

WARNING: ThreadSanitizer: data race (pid=6274)
  Read of size 4 at 0x7d680001ec78 by main thread (mutexes: write M1338):
#0 update_thread_context src/libavcodec/pngdec.c:1456 
(ffmpeg+0x00dacf0c)
[..]
  Previous write of size 4 at 0x7d680001ec78 by thread T1 (mutexes: write 
M1335):
#0 decode_idat_chunk src/libavcodec/pngdec.c:737 (ffmpeg+0x00dae951)

(cherry picked from commit 478f1c3d5e5463a284ea7efecfc62d47ba3be11a)
Signed-off-by: Michael Niedermayer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=51ca6fda0500da24e3d365c9dfce31bad42e8723
---

 libavcodec/png.h|  5 -
 libavcodec/pngdec.c | 65 -
 2 files changed, 39 insertions(+), 31 deletions(-)

diff --git a/libavcodec/png.h b/libavcodec/png.h
index 948c2f714f..e967fcf38f 100644
--- a/libavcodec/png.h
+++ b/libavcodec/png.h
@@ -42,11 +42,6 @@
 #define PNG_FILTER_VALUE_PAETH 4
 #define PNG_FILTER_VALUE_MIXED 5
 
-#define PNG_IHDR  0x0001
-#define PNG_IDAT  0x0002
-#define PNG_ALLIMAGE  0x0004
-#define PNG_PLTE  0x0008
-
 #define NB_PASSES 7
 
 #define PNGSIG 0x89504e470d0a1a0a
diff --git a/libavcodec/pngdec.c b/libavcodec/pngdec.c
index c08665be7c..d184c34b81 100644
--- a/libavcodec/pngdec.c
+++ b/libavcodec/pngdec.c
@@ -36,6 +36,16 @@
 
 #include 
 
+enum PNGHeaderState {
+PNG_IHDR = 1 << 0,
+PNG_PLTE = 1 << 1,
+};
+
+enum PNGImageState {
+PNG_IDAT = 1 << 0,
+PNG_ALLIMAGE = 1 << 1,
+};
+
 typedef struct PNGDecContext {
 PNGDSPContext dsp;
 AVCodecContext *avctx;
@@ -45,7 +55,8 @@ typedef struct PNGDecContext {
 ThreadFrame last_picture;
 ThreadFrame picture;
 
-int state;
+enum PNGHeaderState hdr_state;
+enum PNGImageState pic_state;
 int width, height;
 int cur_w, cur_h;
 int last_w, last_h;
@@ -334,7 +345,7 @@ static void png_handle_row(PNGDecContext *s)
 }
 s->y++;
 if (s->y == s->cur_h) {
-s->state |= PNG_ALLIMAGE;
+s->pic_state |= PNG_ALLIMAGE;
 if (s->filter_type == PNG_FILTER_TYPE_LOCO) {
 if (s->bit_depth == 16) {
 deloco_rgb16((uint16_t *)ptr, s->row_size / 2,
@@ -369,7 +380,7 @@ static void png_handle_row(PNGDecContext *s)
 memset(s->last_row, 0, s->row_size);
 for (;;) {
 if (s->pass == NB_PASSES - 1) {
-s->state |= PNG_ALLIMAGE;
+s->pic_state |= PNG_ALLIMAGE;
 goto the_end;
 } else {
 s->pass++;
@@ -404,7 +415,7 @@ static int png_decode_idat(PNGDecContext *s, int length)
 return AVERROR_EXTERNAL;
 }
 if (s->zstream.avail_out == 0) {
-if (!(s->state & PNG_ALLIMAGE)) {
+if (!(s->pic_state & PNG_ALLIMAGE)) {
 png_handle_row(s);
 }
 s->zstream.avail_out = s->crow_size;
@@ -541,12 +552,12 @@ static int decode_ihdr_chunk(AVCodecContext *avctx, 
PNGDecContext *s,
 if (length != 13)
 return AVERROR_INVALIDDATA;
 
-if (s->state & PNG_IDAT) {
+if (s->pic_state & PNG_IDAT) {
 av_log(avctx, AV_LOG_ERROR, "IHDR after IDAT\n");
 return AVERROR_INVALIDDATA;
 }
 
-if (s->state & PNG_IHDR) {
+if (s->hdr_state & PNG_IHDR) {
 av_log(avctx, AV_LOG_ERROR, "Multiple IHDR\n");
 return AVERROR_INVALIDDATA;
 }
@@ -569,7 +580,7 @@ static int decode_ihdr_chunk(AVCodecContext *avctx, 
PNGDecContext *s,
 s->filter_type  = bytestream2_get_byte(&s->gb);
 s->interlace_type   = bytestream2_get_byte(&s->gb);
 bytestream2_skip(&s->gb, 4); /* crc */
-s->state |= PNG_IHDR;
+s->hdr_state |= PNG_IHDR;
 if (avctx->debug & FF_DEBUG_PICT_INFO)
 av_log(avctx, AV_LOG_DEBUG, "width=%d height=%d depth=%d color_type=%d 
"
 "compression_type=%d filter_type=%d interlace_type=%d\n",
@@ -585,7 +596,7 @@ error:
 
 static int decode_phys_chunk(AVCodecContext *avctx, PNGDecContext *s)
 {
-if (s->state & PNG_IDAT) {
+if (s->pic_state & PNG_IDAT) {
 av_log(avctx, AV_LOG_ERROR, "pHYs after IDAT\n");
 return AVERROR_INVALIDDATA;
 }
@@ -605,11 +616,11 @@ static int decode_idat_chunk(AVCodecContext *avctx, 
PNGDecContext *s,
 int ret;
 size_t byte_depth = s->bit_depth > 8 ? 2 : 1;
 
-if (!(s->state & PNG_IHDR))

[FFmpeg-cvslog] pthread_frame: call update_context_from_user() after acquiring lock.

2017-04-12 Thread Ronald S . Bultje

ffmpeg | branch: release/3.3 | Ronald S. Bultje  | Mon Apr  
3 09:48:53 2017 -0400| [b51217381dd748fd831ba9403cdcdc8277bd63d5] | committer: 
Michael Niedermayer

pthread_frame: call update_context_from_user() after acquiring lock.

Otherwise the thread may still be in the middle of decoding a previous
frame, which would effectively trigger a race condition on any field
concurrently read and written.

In practice, this fixes tsan warnings like the following:

WARNING: ThreadSanitizer: data race (pid=17380)
  Write of size 4 at 0x7d64000160fc by main thread:
#0 update_context_from_user src/libavcodec/pthread_frame.c:335 
(ffmpeg+0x00dca515)
[..]
  Previous read of size 4 at 0x7d64000160fc by thread T2 (mutexes: write M1821):
#0 ff_thread_report_progress src/libavcodec/pthread_frame.c:565 
(ffmpeg+0x00dcb08a)

(cherry picked from commit 1269cd5b6f540bef5913bf134d2f461aac50d70b)
Signed-off-by: Michael Niedermayer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=b51217381dd748fd831ba9403cdcdc8277bd63d5
---

 libavcodec/pthread_frame.c | 14 +-
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/libavcodec/pthread_frame.c b/libavcodec/pthread_frame.c
index 4e1ad9d686..9a6b83ac45 100644
--- a/libavcodec/pthread_frame.c
+++ b/libavcodec/pthread_frame.c
@@ -380,7 +380,8 @@ static void release_delayed_buffers(PerThreadContext *p)
 }
 }
 
-static int submit_packet(PerThreadContext *p, AVPacket *avpkt)
+static int submit_packet(PerThreadContext *p, AVCodecContext *user_avctx,
+ AVPacket *avpkt)
 {
 FrameThreadContext *fctx = p->parent;
 PerThreadContext *prev_thread = fctx->prev_thread;
@@ -392,6 +393,12 @@ static int submit_packet(PerThreadContext *p, AVPacket 
*avpkt)
 
 pthread_mutex_lock(&p->mutex);
 
+ret = update_context_from_user(p->avctx, user_avctx);
+if (ret) {
+pthread_mutex_unlock(&p->mutex);
+return ret;
+}
+
 release_delayed_buffers(p);
 
 if (prev_thread) {
@@ -480,10 +487,7 @@ int ff_thread_decode_frame(AVCodecContext *avctx,
  */
 
 p = &fctx->threads[fctx->next_decoding];
-err = update_context_from_user(p->avctx, avctx);
-if (err)
-goto finish;
-err = submit_packet(p, avpkt);
+err = submit_packet(p, avctx, avpkt);
 if (err)
 goto finish;
 

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

[FFmpeg-cvslog] h264: don't sync pic_id between threads.

2017-04-12 Thread Ronald S . Bultje

ffmpeg | branch: release/3.3 | Ronald S. Bultje  | Mon Apr  
3 09:25:15 2017 -0400| [e9fc7a90ba21f2fb2953f24375db2b81e891a6f2] | committer: 
Michael Niedermayer

h264: don't sync pic_id between threads.

This is how the ref list manager links bitstream IDs to H264Picture/Ref
objects, and is local to the producer thread. There is no need for the
consumer thread to know the bitstream IDs of its references in their
respective producer threads.

In practice, this fixes tsan warnings when running fate-h264:

WARNING: ThreadSanitizer: data race (pid=19295)
  Read of size 4 at 0x7dbce614 by main thread (mutexes: write M1914):
#0 ff_h264_ref_picture src/libavcodec/h264_picture.c:112 
(ffmpeg+0x013b3709)
[..]
  Previous write of size 4 at 0x7dbce614 by thread T2 (mutexes: write 
M1917):
#0 build_def_list src/libavcodec/h264_refs.c:91 (ffmpeg+0x013b46cf)

(cherry picked from commit e72690b18da064f6c0f04f09ccde72b6636e3159)
Signed-off-by: Michael Niedermayer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=e9fc7a90ba21f2fb2953f24375db2b81e891a6f2
---

 libavcodec/h264_picture.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/libavcodec/h264_picture.c b/libavcodec/h264_picture.c
index db9673793a..2dbe5ee40b 100644
--- a/libavcodec/h264_picture.c
+++ b/libavcodec/h264_picture.c
@@ -109,7 +109,6 @@ int ff_h264_ref_picture(H264Context *h, H264Picture *dst, 
H264Picture *src)
 dst->poc   = src->poc;
 dst->frame_num = src->frame_num;
 dst->mmco_reset= src->mmco_reset;
-dst->pic_id= src->pic_id;
 dst->long_ref  = src->long_ref;
 dst->mbaff = src->mbaff;
 dst->field_picture = src->field_picture;

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

[FFmpeg-cvslog] h264: don't re-call ff_h264_direct_ref_list_init() w/ frame-mt.

2017-04-12 Thread Ronald S . Bultje

ffmpeg | branch: release/3.3 | Ronald S. Bultje  | Thu Apr  
6 11:47:03 2017 -0400| [414d11fff6453b3d3af75734292f8d16edeba940] | committer: 
Michael Niedermayer

h264: don't re-call ff_h264_direct_ref_list_init() w/ frame-mt.

I'm hoping that this will address the remaining tsan fate-h264 issues:

WARNING: ThreadSanitizer: data race (pid=24478)
  Read of size 8 at 0x7dbc0001c828 by main thread (mutexes: write M3243):
#0 ff_h264_ref_picture src/libavcodec/h264_picture.c:107 
(ffmpeg+0x013b78d8)
[..]
  Previous write of size 1 at 0x7dbc0001c82e by thread T2 (mutexes: write 
M3245):
#0 ff_h264_direct_ref_list_init src/libavcodec/h264_direct.c:137 
(ffmpeg+0x01382c93)

But I'm not sure because I haven't been able to reproduce locally.

(cherry picked from commit 7f05c5cea04112471d8147487aa3b44141922d09)
Signed-off-by: Michael Niedermayer 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=414d11fff6453b3d3af75734292f8d16edeba940
---

 libavcodec/h264_slice.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/libavcodec/h264_slice.c b/libavcodec/h264_slice.c
index 44a0b9fb17..70ad05f640 100644
--- a/libavcodec/h264_slice.c
+++ b/libavcodec/h264_slice.c
@@ -1889,7 +1889,8 @@ static int h264_slice_init(H264Context *h, 
H264SliceContext *sl,
 
 if (sl->slice_type_nos == AV_PICTURE_TYPE_B && !sl->direct_spatial_mv_pred)
 ff_h264_direct_dist_scale_factor(h, sl);
-ff_h264_direct_ref_list_init(h, sl);
+if (!h->setup_finished)
+ff_h264_direct_ref_list_init(h, sl);
 
 if (h->avctx->skip_loop_filter >= AVDISCARD_ALL ||
 (h->avctx->skip_loop_filter >= AVDISCARD_NONKEY &&

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

[FFmpeg-cvslog] pthread_frame: make accesses to debug field be protected by owner lock.

2017-04-07 Thread Ronald S. Bultje

ffmpeg | branch: master | Ronald S. Bultje  | Thu Apr  6 
13:58:59 2017 -0400| [2e664b9c1e73c80aab91070c1eb7676f04bdd12d] | committer: 
Ronald S. Bultje

pthread_frame: make accesses to debug field be protected by owner lock.

The av_log() is done outside the lock, but this way the accesses to the
field (reads and writes) are always protected by a mutex. The av_log()
is not run inside the lock context because it may involve user callbacks
and doing that in performance-sensitive code is probably not a good idea.

This should fix occasional tsan warnings when running fate-h264, like:

WARNING: ThreadSanitizer: data race (pid=10916)
  Write of size 4 at 0x7d64000174fc by main thread (mutexes: write M2313):
#0 update_context_from_user src/libavcodec/pthread_frame.c:335 
(ffmpeg+0x00df7b06)
[..]
  Previous read of size 4 at 0x7d64000174fc by thread T1 (mutexes: write M2311):
#0 ff_thread_await_progress src/libavcodec/pthread_frame.c:592 
(ffmpeg+0x00df8b3e)

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=2e664b9c1e73c80aab91070c1eb7676f04bdd12d
---

 libavcodec/pthread_frame.c | 6 ++
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/libavcodec/pthread_frame.c b/libavcodec/pthread_frame.c
index aaf576d..46c6292 100644
--- a/libavcodec/pthread_frame.c
+++ b/libavcodec/pthread_frame.c
@@ -566,12 +566,11 @@ void ff_thread_report_progress(ThreadFrame *f, int n, int 
field)
 
 p = f->owner[field]->internal->thread_ctx;
 
+pthread_mutex_lock(&p->progress_mutex);
 if (f->owner[field]->debug&FF_DEBUG_THREADS)
 av_log(f->owner[field], AV_LOG_DEBUG,
"%p finished %d field %d\n", progress, n, field);
 
-pthread_mutex_lock(&p->progress_mutex);
-
 atomic_store_explicit(&progress[field], n, memory_order_release);
 
 pthread_cond_broadcast(&p->progress_cond);
@@ -589,11 +588,10 @@ void ff_thread_await_progress(ThreadFrame *f, int n, int 
field)
 
 p = f->owner[field]->internal->thread_ctx;
 
+pthread_mutex_lock(&p->progress_mutex);
 if (f->owner[field]->debug&FF_DEBUG_THREADS)
 av_log(f->owner[field], AV_LOG_DEBUG,
"thread awaiting %d field %d from %p\n", n, field, progress);
-
-pthread_mutex_lock(&p->progress_mutex);
 while (atomic_load_explicit(&progress[field], memory_order_relaxed) < n)
 pthread_cond_wait(&p->progress_cond, &p->progress_mutex);
 pthread_mutex_unlock(&p->progress_mutex);

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

[FFmpeg-cvslog] h264: don't re-call ff_h264_direct_ref_list_init() w/ frame-mt.

2017-04-07 Thread Ronald S. Bultje

ffmpeg | branch: master | Ronald S. Bultje  | Thu Apr  6 
11:47:03 2017 -0400| [7f05c5cea04112471d8147487aa3b44141922d09] | committer: 
Ronald S. Bultje

h264: don't re-call ff_h264_direct_ref_list_init() w/ frame-mt.

I'm hoping that this will address the remaining tsan fate-h264 issues:

WARNING: ThreadSanitizer: data race (pid=24478)
  Read of size 8 at 0x7dbc0001c828 by main thread (mutexes: write M3243):
#0 ff_h264_ref_picture src/libavcodec/h264_picture.c:107 
(ffmpeg+0x013b78d8)
[..]
  Previous write of size 1 at 0x7dbc0001c82e by thread T2 (mutexes: write 
M3245):
#0 ff_h264_direct_ref_list_init src/libavcodec/h264_direct.c:137 
(ffmpeg+0x01382c93)

But I'm not sure because I haven't been able to reproduce locally.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=7f05c5cea04112471d8147487aa3b44141922d09
---

 libavcodec/h264_slice.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/libavcodec/h264_slice.c b/libavcodec/h264_slice.c
index 34bd6cf..acf6a73 100644
--- a/libavcodec/h264_slice.c
+++ b/libavcodec/h264_slice.c
@@ -1891,7 +1891,8 @@ static int h264_slice_init(H264Context *h, 
H264SliceContext *sl,
 
 if (sl->slice_type_nos == AV_PICTURE_TYPE_B && !sl->direct_spatial_mv_pred)
 ff_h264_direct_dist_scale_factor(h, sl);
-ff_h264_direct_ref_list_init(h, sl);
+if (!h->setup_finished)
+ff_h264_direct_ref_list_init(h, sl);
 
 if (h->avctx->skip_loop_filter >= AVDISCARD_ALL ||
 (h->avctx->skip_loop_filter >= AVDISCARD_NONKEY &&

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

[FFmpeg-cvslog] vp8: make wait/thread_mb_pos atomic.

2017-04-06 Thread Ronald S. Bultje

ffmpeg | branch: master | Ronald S. Bultje  | Wed Apr  5 
16:18:54 2017 -0400| [9a54c6f243412f62bae498ddcac337cb18ae6290] | committer: 
Ronald S. Bultje

vp8: make wait/thread_mb_pos atomic.

Fixes tsan warnings like this in fate-vp8-test-vector-007:

WARNING: ThreadSanitizer: data race (pid=3590)
  Write of size 4 at 0x7d8ce07c by thread T2:
#0 decode_mb_row_no_filter src/libavcodec/vp8.c:2330 (ffmpeg+0x00ffb59e)
[..]
  Previous write of size 4 at 0x7d8ce07c by thread T1:
#0 decode_mb_row_no_filter src/libavcodec/vp8.c:2330 (ffmpeg+0x00ffb59e)

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=9a54c6f243412f62bae498ddcac337cb18ae6290
---

 libavcodec/vp8.c | 29 ++---
 libavcodec/vp8.h |  6 --
 2 files changed, 18 insertions(+), 17 deletions(-)

diff --git a/libavcodec/vp8.c b/libavcodec/vp8.c
index 1e8808c..9bc1d95 100644
--- a/libavcodec/vp8.c
+++ b/libavcodec/vp8.c
@@ -2247,15 +2247,15 @@ static void vp8_decode_mv_mb_modes(AVCodecContext 
*avctx, VP8Frame *cur_frame,
 #define check_thread_pos(td, otd, mb_x_check, mb_y_check) \
 do {  \
 int tmp = (mb_y_check << 16) | (mb_x_check & 0x); \
-if (otd->thread_mb_pos < tmp) {   \
+if (atomic_load(&otd->thread_mb_pos) < tmp) { \
 pthread_mutex_lock(&otd->lock);   \
-td->wait_mb_pos = tmp;\
+atomic_store(&td->wait_mb_pos, tmp);  \
 do {  \
-if (otd->thread_mb_pos >= tmp)\
+if (atomic_load(&otd->thread_mb_pos) >= tmp)  \
 break;\
 pthread_cond_wait(&otd->cond, &otd->lock);\
 } while (1);  \
-td->wait_mb_pos = INT_MAX;\
+atomic_store(&td->wait_mb_pos, INT_MAX);  \
 pthread_mutex_unlock(&otd->lock); \
 } \
 } while (0)
@@ -2266,12 +2266,10 @@ static void vp8_decode_mv_mb_modes(AVCodecContext 
*avctx, VP8Frame *cur_frame,
 int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) 
&& \
(num_jobs > 1);\
 int is_null  = !next_td || !prev_td;  \
-int pos_check= (is_null) ? 1  \
- : (next_td != td &&  \
-pos >= next_td->wait_mb_pos) ||   \
-   (prev_td != td &&  \
-pos >= prev_td->wait_mb_pos); \
-td->thread_mb_pos = pos;  \
+int pos_check= (is_null) ? 1 :\
+(next_td != td && pos >= atomic_load(&next_td->wait_mb_pos)) ||   \
+(prev_td != td && pos >= atomic_load(&prev_td->wait_mb_pos)); \
+atomic_store(&td->thread_mb_pos, pos);\
 if (sliced_threading && pos_check) {  \
 pthread_mutex_lock(&td->lock);\
 pthread_cond_broadcast(&td->cond);\
@@ -2288,7 +2286,7 @@ static av_always_inline int 
decode_mb_row_no_filter(AVCodecContext *avctx, void
 {
 VP8Context *s = avctx->priv_data;
 VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr];
-int mb_y = td->thread_mb_pos >> 16;
+int mb_y = atomic_load(&td->thread_mb_pos) >> 16;
 int mb_x, mb_xy = mb_y * s->mb_width;
 int num_jobs = s->num_jobs;
 VP8Frame *curframe = s->curframe, *prev_frame = s->prev_frame;
@@ -2428,7 +2426,7 @@ static av_always_inline void filter_mb_row(AVCodecContext 
*avctx, void *tdata,
 {
 VP8Context *s = avctx->priv_data;
 VP8ThreadData *td = &s->thread_data[threadnr];
-int mb_x, mb_y = td->thread_mb_pos >> 16, num_jobs = s->num_jobs;
+int mb_x, mb_y = atomic_load(&td->thread_mb_pos) >> 16, num_j

[FFmpeg-cvslog] x86/simple_idct: add explicit sse2 simple_idct_put/add versions.

2017-04-06 Thread Ronald S. Bultje

ffmpeg | branch: master | Ronald S. Bultje  | Tue Apr  4 
12:42:58 2017 -0400| [e0c205677f6b3b7dba6891724cb68bfb81e9b8d6] | committer: 
Ronald S. Bultje

x86/simple_idct: add explicit sse2 simple_idct_put/add versions.

These use the mmx IDCT, but sse2 put/add_pixels_clamped implementations.
This way we don't need to use the ff_put/add_pixels_clamped function
pointers.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=e0c205677f6b3b7dba6891724cb68bfb81e9b8d6
---

 libavcodec/x86/idctdsp_init.c | 38 ++
 libavcodec/x86/simple_idct.c  | 15 +--
 libavcodec/x86/simple_idct.h  |  3 +++
 3 files changed, 42 insertions(+), 14 deletions(-)

diff --git a/libavcodec/x86/idctdsp_init.c b/libavcodec/x86/idctdsp_init.c
index bcf7e5b..3f078e8 100644
--- a/libavcodec/x86/idctdsp_init.c
+++ b/libavcodec/x86/idctdsp_init.c
@@ -63,27 +63,41 @@ av_cold void ff_idctdsp_init_x86(IDCTDSPContext *c, 
AVCodecContext *avctx,
 {
 int cpu_flags = av_get_cpu_flags();
 
-if (INLINE_MMX(cpu_flags)) {
-if (!high_bit_depth &&
-avctx->lowres == 0 &&
-(avctx->idct_algo == FF_IDCT_AUTO ||
- avctx->idct_algo == FF_IDCT_SIMPLEAUTO ||
- avctx->idct_algo == FF_IDCT_SIMPLEMMX)) {
-c->idct_put  = ff_simple_idct_put_mmx;
-c->idct_add  = ff_simple_idct_add_mmx;
-c->idct  = ff_simple_idct_mmx;
-c->perm_type = FF_IDCT_PERM_SIMPLE;
-}
-}
 if (EXTERNAL_MMX(cpu_flags)) {
 c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_mmx;
 c->put_pixels_clamped= ff_put_pixels_clamped_mmx;
 c->add_pixels_clamped= ff_add_pixels_clamped_mmx;
+
+if (INLINE_MMX(cpu_flags)) {
+if (!high_bit_depth &&
+avctx->lowres == 0 &&
+(avctx->idct_algo == FF_IDCT_AUTO ||
+ avctx->idct_algo == FF_IDCT_SIMPLEAUTO ||
+ avctx->idct_algo == FF_IDCT_SIMPLEMMX)) {
+c->idct_put  = ff_simple_idct_put_mmx;
+c->idct_add  = ff_simple_idct_add_mmx;
+c->idct  = ff_simple_idct_mmx;
+c->perm_type = FF_IDCT_PERM_SIMPLE;
+}
+}
 }
+
 if (EXTERNAL_SSE2(cpu_flags)) {
 c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_sse2;
 c->put_pixels_clamped= ff_put_pixels_clamped_sse2;
 c->add_pixels_clamped= ff_add_pixels_clamped_sse2;
+
+if (INLINE_SSE2(cpu_flags)) {
+if (!high_bit_depth &&
+avctx->lowres == 0 &&
+(avctx->idct_algo == FF_IDCT_AUTO ||
+ avctx->idct_algo == FF_IDCT_SIMPLEAUTO ||
+ avctx->idct_algo == FF_IDCT_SIMPLEMMX)) {
+c->idct_put  = ff_simple_idct_put_sse2;
+c->idct_add  = ff_simple_idct_add_sse2;
+c->perm_type = FF_IDCT_PERM_SIMPLE;
+}
+}
 }
 
 if (ARCH_X86_64 && avctx->lowres == 0) {
diff --git a/libavcodec/x86/simple_idct.c b/libavcodec/x86/simple_idct.c
index d3a19fa..1155920 100644
--- a/libavcodec/x86/simple_idct.c
+++ b/libavcodec/x86/simple_idct.c
@@ -24,6 +24,7 @@
 #include "libavutil/x86/asm.h"
 
 #include "libavcodec/idctdsp.h"
+#include "libavcodec/x86/idctdsp.h"
 
 #include "idctdsp.h"
 #include "simple_idct.h"
@@ -907,12 +908,22 @@ void ff_simple_idct_mmx(int16_t *block)
 void ff_simple_idct_put_mmx(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
 {
 idct(block);
-ff_put_pixels_clamped(block, dest, line_size);
+ff_put_pixels_clamped_mmx(block, dest, line_size);
 }
 void ff_simple_idct_add_mmx(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
 {
 idct(block);
-ff_add_pixels_clamped(block, dest, line_size);
+ff_add_pixels_clamped_mmx(block, dest, line_size);
+}
+void ff_simple_idct_put_sse2(uint8_t *dest, ptrdiff_t line_size, int16_t 
*block)
+{
+idct(block);
+ff_put_pixels_clamped_sse2(block, dest, line_size);
+}
+void ff_simple_idct_add_sse2(uint8_t *dest, ptrdiff_t line_size, int16_t 
*block)
+{
+idct(block);
+ff_add_pixels_clamped_sse2(block, dest, line_size);
 }
 
 #endif /* HAVE_INLINE_ASM */
diff --git a/libavcodec/x86/simple_idct.h b/libavcodec/x86/simple_idct.h
index ad76baf..d17ef6a 100644
--- a/libavcodec/x86/simple_idct.h
+++ b/libavcodec/x86/simple_idct.h
@@ -26,6 +26,9 @@ void ff_simple_idct_mmx(int16_t *block);
 void ff_simple_idct_add_mmx(uint8_t *dest, ptrdiff_t line_size, int16_t 
*block);
 void ff_simple_idct_put_mmx(uint8_t *dest, ptrdiff_t line_size, int16_t 
*block);
 
+void ff_simple_idct_add_sse2(uint8_t *dest, ptrdiff

[FFmpeg-cvslog] jrev/xvid: hardcode use of C put/add_pixels_clamped.

2017-04-06 Thread Ronald S. Bultje

ffmpeg | branch: master | Ronald S. Bultje  | Tue Apr  4 
12:45:51 2017 -0400| [32baeafeee4f8446c2c3720b9223ad2166ca9d30] | committer: 
Ronald S. Bultje

jrev/xvid: hardcode use of C put/add_pixels_clamped.

This removes the last use of the ff_put/add_pixels_clamped global
function pointers, and as such they are removed.

This patch has a negative effect on performance on MIPS, since there's
a SIMD-optimized put/add_pixels_clamped, but no xvid or jrev. From a
code maintenance point of view, that is probably acceptable.

Because the global function pointers are removed, this fixes the following
tsan warnings when running e.g. fate-dnxhr-parse:

WARNING: ThreadSanitizer: data race (pid=29917)
  Write of size 8 at 0x025b12d8 by thread T2 (mutexes: write M1543):
#0 ff_idctdsp_init src/libavcodec/idctdsp.c:313 (ffmpeg+0x0044b68e)
[..]
  Previous write of size 8 at 0x025b12d8 by thread T1 (mutexes: write 
M1541):
#0 ff_idctdsp_init src/libavcodec/idctdsp.c:313 (ffmpeg+0x0044b68e)

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=32baeafeee4f8446c2c3720b9223ad2166ca9d30
---

 libavcodec/idctdsp.c  | 18 ++
 libavcodec/idctdsp.h  |  6 --
 libavcodec/jrevdct.c  |  4 ++--
 libavcodec/xvididct.c |  4 ++--
 4 files changed, 14 insertions(+), 18 deletions(-)

diff --git a/libavcodec/idctdsp.c b/libavcodec/idctdsp.c
index 84dd645..d596aed 100644
--- a/libavcodec/idctdsp.c
+++ b/libavcodec/idctdsp.c
@@ -80,11 +80,8 @@ av_cold void ff_init_scantable_permutation(uint8_t 
*idct_permutation,
 }
 }
 
-void (*ff_put_pixels_clamped)(const int16_t *block, uint8_t *pixels, ptrdiff_t 
line_size);
-void (*ff_add_pixels_clamped)(const int16_t *block, uint8_t *pixels, ptrdiff_t 
line_size);
-
-static void put_pixels_clamped_c(const int16_t *block, uint8_t *av_restrict 
pixels,
- ptrdiff_t line_size)
+void ff_put_pixels_clamped_c(const int16_t *block, uint8_t *av_restrict pixels,
+ ptrdiff_t line_size)
 {
 int i;
 
@@ -157,8 +154,8 @@ static void put_signed_pixels_clamped_c(const int16_t 
*block,
 }
 }
 
-static void add_pixels_clamped_c(const int16_t *block, uint8_t *av_restrict 
pixels,
- ptrdiff_t line_size)
+void ff_add_pixels_clamped_c(const int16_t *block, uint8_t *av_restrict pixels,
+ ptrdiff_t line_size)
 {
 int i;
 
@@ -290,9 +287,9 @@ av_cold void ff_idctdsp_init(IDCTDSPContext *c, 
AVCodecContext *avctx)
 }
 }
 
-c->put_pixels_clamped= put_pixels_clamped_c;
+c->put_pixels_clamped= ff_put_pixels_clamped_c;
 c->put_signed_pixels_clamped = put_signed_pixels_clamped_c;
-c->add_pixels_clamped= add_pixels_clamped_c;
+c->add_pixels_clamped= ff_add_pixels_clamped_c;
 
 if (CONFIG_MPEG4_DECODER && avctx->idct_algo == FF_IDCT_XVID)
 ff_xvid_idct_init(c, avctx);
@@ -310,9 +307,6 @@ av_cold void ff_idctdsp_init(IDCTDSPContext *c, 
AVCodecContext *avctx)
 if (ARCH_MIPS)
 ff_idctdsp_init_mips(c, avctx, high_bit_depth);
 
-ff_put_pixels_clamped = c->put_pixels_clamped;
-ff_add_pixels_clamped = c->add_pixels_clamped;
-
 ff_init_scantable_permutation(c->idct_permutation,
   c->perm_type);
 }
diff --git a/libavcodec/idctdsp.h b/libavcodec/idctdsp.h
index f9ba6c3..26221f6 100644
--- a/libavcodec/idctdsp.h
+++ b/libavcodec/idctdsp.h
@@ -97,8 +97,10 @@ typedef struct IDCTDSPContext {
 enum idct_permutation_type perm_type;
 } IDCTDSPContext;
 
-extern void (*ff_put_pixels_clamped)(const int16_t *block, uint8_t *pixels, 
ptrdiff_t line_size);
-extern void (*ff_add_pixels_clamped)(const int16_t *block, uint8_t *pixels, 
ptrdiff_t line_size);
+void ff_put_pixels_clamped_c(const int16_t *block, uint8_t *av_restrict pixels,
+ ptrdiff_t line_size);
+void ff_add_pixels_clamped_c(const int16_t *block, uint8_t *av_restrict pixels,
+ ptrdiff_t line_size);
 
 void ff_idctdsp_init(IDCTDSPContext *c, AVCodecContext *avctx);
 
diff --git a/libavcodec/jrevdct.c b/libavcodec/jrevdct.c
index 89dd9f2..3b15a52 100644
--- a/libavcodec/jrevdct.c
+++ b/libavcodec/jrevdct.c
@@ -1159,11 +1159,11 @@ void ff_j_rev_dct1(DCTBLOCK data){
 void ff_jref_idct_put(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
 {
 ff_j_rev_dct(block);
-ff_put_pixels_clamped(block, dest, line_size);
+ff_put_pixels_clamped_c(block, dest, line_size);
 }
 
 void ff_jref_idct_add(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
 {
 ff_j_rev_dct(block);
-ff_add_pixels_clamped(block, dest, line_size);
+ff_add_pixels_clamped_c(block, dest, line_size);
 }
diff --git a/libavcodec/xvididct.c b/libavcodec/xvididct.c
index 4642a30..d8f3dd7 100644
--- a/libavcodec/xvididct.c
+++ b/libavcodec/xvididct.c
@@ -321,13 +321,13 @@ void ff_xvid_idct(int16_t *cons

[FFmpeg-cvslog] x86/idctdsp_init: reindent.

2017-04-06 Thread Ronald S. Bultje

ffmpeg | branch: master | Ronald S. Bultje  | Wed Apr  5 
07:49:14 2017 -0400| [83ae7e6350cf12ce64b184fb717011551cc02d62] | committer: 
Ronald S. Bultje

x86/idctdsp_init: reindent.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=83ae7e6350cf12ce64b184fb717011551cc02d62
---

 libavcodec/x86/idctdsp_init.c | 30 +++---
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/libavcodec/x86/idctdsp_init.c b/libavcodec/x86/idctdsp_init.c
index 3f078e8..fd5ef3f 100644
--- a/libavcodec/x86/idctdsp_init.c
+++ b/libavcodec/x86/idctdsp_init.c
@@ -102,22 +102,22 @@ av_cold void ff_idctdsp_init_x86(IDCTDSPContext *c, 
AVCodecContext *avctx,
 
 if (ARCH_X86_64 && avctx->lowres == 0) {
 if (avctx->bits_per_raw_sample == 10 &&
-(avctx->idct_algo == FF_IDCT_AUTO ||
- avctx->idct_algo == FF_IDCT_SIMPLEAUTO ||
- avctx->idct_algo == FF_IDCT_SIMPLE)) {
-if (EXTERNAL_SSE2(cpu_flags)) {
-c->idct_put  = ff_simple_idct10_put_sse2;
-c->idct_add  = NULL;
-c->idct  = ff_simple_idct10_sse2;
-c->perm_type = FF_IDCT_PERM_TRANSPOSE;
+(avctx->idct_algo == FF_IDCT_AUTO ||
+ avctx->idct_algo == FF_IDCT_SIMPLEAUTO ||
+ avctx->idct_algo == FF_IDCT_SIMPLE)) {
+if (EXTERNAL_SSE2(cpu_flags)) {
+c->idct_put  = ff_simple_idct10_put_sse2;
+c->idct_add  = NULL;
+c->idct  = ff_simple_idct10_sse2;
+c->perm_type = FF_IDCT_PERM_TRANSPOSE;
 
-}
-if (EXTERNAL_AVX(cpu_flags)) {
-c->idct_put  = ff_simple_idct10_put_avx;
-c->idct_add  = NULL;
-c->idct  = ff_simple_idct10_avx;
-c->perm_type = FF_IDCT_PERM_TRANSPOSE;
-}
+}
+if (EXTERNAL_AVX(cpu_flags)) {
+c->idct_put  = ff_simple_idct10_put_avx;
+c->idct_add  = NULL;
+c->idct  = ff_simple_idct10_avx;
+c->perm_type = FF_IDCT_PERM_TRANSPOSE;
+}
 }
 
 if (avctx->bits_per_raw_sample == 12 &&

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

[FFmpeg-cvslog] cavs: convert idct from inline asm to yasm.

2017-04-06 Thread Ronald S. Bultje

ffmpeg | branch: master | Ronald S. Bultje  | Tue Apr  4 
11:55:16 2017 -0400| [c9d98c5649ac11617200bf19b1e027505251d3cf] | committer: 
Ronald S. Bultje

cavs: convert idct from inline asm to yasm.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=c9d98c5649ac11617200bf19b1e027505251d3cf
---

 libavcodec/x86/Makefile |   1 +
 libavcodec/x86/cavsdsp.c| 165 ++--
 libavcodec/x86/cavsidct.asm | 165 
 3 files changed, 171 insertions(+), 160 deletions(-)

diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
index 0295a9f..d4cb27f 100644
--- a/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile
@@ -142,6 +142,7 @@ YASM-OBJS-$(CONFIG_ADPCM_G722_DECODER) += x86/g722dsp.o
 YASM-OBJS-$(CONFIG_ADPCM_G722_ENCODER) += x86/g722dsp.o
 YASM-OBJS-$(CONFIG_ALAC_DECODER)   += x86/alacdsp.o
 YASM-OBJS-$(CONFIG_APNG_DECODER)   += x86/pngdsp.o
+YASM-OBJS-$(CONFIG_CAVS_DECODER)   += x86/cavsidct.o
 YASM-OBJS-$(CONFIG_DCA_DECODER)+= x86/dcadsp.o x86/synth_filter.o
 YASM-OBJS-$(CONFIG_DIRAC_DECODER)  += x86/diracdsp.o\
   x86/dirac_dwt.o
diff --git a/libavcodec/x86/cavsdsp.c b/libavcodec/x86/cavsdsp.c
index ecb9b23..add4536 100644
--- a/libavcodec/x86/cavsdsp.c
+++ b/libavcodec/x86/cavsdsp.c
@@ -34,172 +34,19 @@
 #include "idctdsp.h"
 #include "config.h"
 
-#if HAVE_MMX_INLINE
 
-/* in/out: mma=mma+mmb, mmb=mmb-mma */
-#define SUMSUB_BA( a, b ) \
-"paddw "#b", "#a" \n\t"\
-"paddw "#b", "#b" \n\t"\
-"psubw "#a", "#b" \n\t"
-
-/*
- *
- * inverse transform
- *
- /
-
-static inline void cavs_idct8_1d(int16_t *block, uint64_t bias)
-{
-__asm__ volatile(
-"movq 112(%0), %%mm4  \n\t" /* mm4 = src7 */
-"movq  16(%0), %%mm5  \n\t" /* mm5 = src1 */
-"movq  80(%0), %%mm2  \n\t" /* mm2 = src5 */
-"movq  48(%0), %%mm7  \n\t" /* mm7 = src3 */
-"movq   %%mm4, %%mm0  \n\t"
-"movq   %%mm5, %%mm3  \n\t"
-"movq   %%mm2, %%mm6  \n\t"
-"movq   %%mm7, %%mm1  \n\t"
-
-"paddw  %%mm4, %%mm4  \n\t" /* mm4 = 2*src7 */
-"paddw  %%mm3, %%mm3  \n\t" /* mm3 = 2*src1 */
-"paddw  %%mm6, %%mm6  \n\t" /* mm6 = 2*src5 */
-"paddw  %%mm1, %%mm1  \n\t" /* mm1 = 2*src3 */
-"paddw  %%mm4, %%mm0  \n\t" /* mm0 = 3*src7 */
-"paddw  %%mm3, %%mm5  \n\t" /* mm5 = 3*src1 */
-"paddw  %%mm6, %%mm2  \n\t" /* mm2 = 3*src5 */
-"paddw  %%mm1, %%mm7  \n\t" /* mm7 = 3*src3 */
-"psubw  %%mm4, %%mm5  \n\t" /* mm5 = 3*src1 - 2*src7 = a0 */
-"paddw  %%mm6, %%mm7  \n\t" /* mm7 = 3*src3 + 2*src5 = a1 */
-"psubw  %%mm2, %%mm1  \n\t" /* mm1 = 2*src3 - 3*src5 = a2 */
-"paddw  %%mm0, %%mm3  \n\t" /* mm3 = 2*src1 + 3*src7 = a3 */
-
-"movq   %%mm5, %%mm4  \n\t"
-"movq   %%mm7, %%mm6  \n\t"
-"movq   %%mm3, %%mm0  \n\t"
-"movq   %%mm1, %%mm2  \n\t"
-SUMSUB_BA( %%mm7, %%mm5 )   /* mm7 = a0 + a1  mm5 = a0 - a1 */
-"paddw  %%mm3, %%mm7  \n\t" /* mm7 = a0 + a1 + a3 */
-"paddw  %%mm1, %%mm5  \n\t" /* mm5 = a0 - a1 + a2 */
-"paddw  %%mm7, %%mm7  \n\t"
-"paddw  %%mm5, %%mm5  \n\t"
-"paddw  %%mm6, %%mm7  \n\t" /* mm7 = b4 */
-"paddw  %%mm4, %%mm5  \n\t" /* mm5 = b5 */
-
-SUMSUB_BA( %%mm1, %%mm3 )   /* mm1 = a3 + a2  mm3 = a3 - a2 */
-"psubw  %%mm1, %%mm4  \n\t" /* mm4 = a0 - a2 - a3 */
-"movq   %%mm4, %%mm1  \n\t" /* mm1 = a0 - a2 - a3 */
-"psubw  %%mm6, %%mm3  \n\t" /* mm3 = a3 - a2 - a1 */
-"paddw  %%mm1, %%mm1  \n\t"
-"paddw  %%mm3, %%mm3  \n\t"
-"psubw  %%mm2, %%mm1  \n\t" /* mm1 = b7 */
-"paddw  %%mm0, %%mm3  \n\t" /* mm3 = b6 */
-
-"movq  32(%0), %%mm2  \n\t" /* mm2 = src2 */
-"movq  96(%0), %%mm6  \n\t" /* mm6 = src6 */
-"movq   %%mm2, %%mm4  \n\t"
-"movq   %%mm6, %%mm0  \n\t"
-"psllw  $2,%%mm4  \n\t" /* mm4 = 4*src2 */
-"psllw  $2,%%mm6  \n\t" /* mm6 = 4*src6 */
-"paddw  %%mm4, %%mm2  \n\t" /* mm2 = 5*src2 */
-"paddw  %%mm6, %%mm0  \n\t" /* mm0 = 5*src6 */
-"paddw  %%mm2, %%mm2  \n\t"
-

[FFmpeg-cvslog] vp8: make mv_min/max thread-local if using partition threading.

2017-04-06 Thread Ronald S. Bultje

ffmpeg | branch: master | Ronald S. Bultje  | Wed Apr  5 
16:19:55 2017 -0400| [fed92adbb3fc6cbf735e3df9a2f7d0a2917fcfbd] | committer: 
Ronald S. Bultje

vp8: make mv_min/max thread-local if using partition threading.

Fixes tsan warnings like this in fate-vp8-test-vector-007:

WARNING: ThreadSanitizer: data race (pid=65909)
  Write of size 4 at 0x7d8ce088 by thread T1:
#0 vp8_decode_mb_row_sliced vp8.c:2519 (ffmpeg:x86_64+0x100995ede)
[..]
  Previous write of size 4 at 0x7d8ce088 by thread T2:
#0 vp8_decode_mb_row_sliced vp8.c:2519 (ffmpeg:x86_64+0x100995ede)

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=fed92adbb3fc6cbf735e3df9a2f7d0a2917fcfbd
---

 libavcodec/vp8.c | 53 -
 libavcodec/vp8.h | 19 ---
 2 files changed, 40 insertions(+), 32 deletions(-)

diff --git a/libavcodec/vp8.c b/libavcodec/vp8.c
index 9bc1d95..fe7aa23 100644
--- a/libavcodec/vp8.c
+++ b/libavcodec/vp8.c
@@ -772,7 +772,7 @@ static int vp8_decode_frame_header(VP8Context *s, const 
uint8_t *buf, int buf_si
 }
 
 static av_always_inline
-void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
+void clamp_mv(VP8mvbounds *s, VP56mv *dst, const VP56mv *src)
 {
 dst->x = av_clip(src->x, av_clip(s->mv_min.x, INT16_MIN, INT16_MAX),
  av_clip(s->mv_max.x, INT16_MIN, INT16_MAX));
@@ -1031,7 +1031,7 @@ void vp7_decode_mvs(VP8Context *s, VP8Macroblock *mb,
 }
 
 static av_always_inline
-void vp8_decode_mvs(VP8Context *s, VP8Macroblock *mb,
+void vp8_decode_mvs(VP8Context *s, VP8mvbounds *mv_bounds, VP8Macroblock *mb,
 int mb_x, int mb_y, int layout)
 {
 VP8Macroblock *mb_edge[3] = { 0  /* top */,
@@ -1102,7 +1102,7 @@ void vp8_decode_mvs(VP8Context *s, VP8Macroblock *mb,
 if (vp56_rac_get_prob_branchy(c, 
vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
 if (vp56_rac_get_prob_branchy(c, 
vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
 /* Choose the best mv out of 0,0 and the nearest mv */
-clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= 
cnt[CNT_ZERO])]);
+clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_ZERO + 
(cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
 cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode== 
VP8_MVMODE_SPLIT) +
 (mb_edge[VP8_EDGE_TOP]->mode == 
VP8_MVMODE_SPLIT)) * 2 +
 (mb_edge[VP8_EDGE_TOPLEFT]->mode == 
VP8_MVMODE_SPLIT);
@@ -1116,11 +1116,11 @@ void vp8_decode_mvs(VP8Context *s, VP8Macroblock *mb,
 mb->bmv[0] = mb->mv;
 }
 } else {
-clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
+clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_NEAR]);
 mb->bmv[0] = mb->mv;
 }
 } else {
-clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
+clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_NEAREST]);
 mb->bmv[0] = mb->mv;
 }
 } else {
@@ -1166,7 +1166,8 @@ void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder 
*c, VP8Macroblock *mb,
 }
 
 static av_always_inline
-void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
+void decode_mb_mode(VP8Context *s, VP8mvbounds *mv_bounds,
+VP8Macroblock *mb, int mb_x, int mb_y,
 uint8_t *segment, uint8_t *ref, int layout, int is_vp7)
 {
 VP56RangeCoder *c = &s->c;
@@ -1230,7 +1231,7 @@ void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int 
mb_x, int mb_y,
 if (is_vp7)
 vp7_decode_mvs(s, mb, mb_x, mb_y, layout);
 else
-vp8_decode_mvs(s, mb, mb_x, mb_y, layout);
+vp8_decode_mvs(s, mv_bounds, mb, mb_x, mb_y, layout);
 } else {
 // intra MB, 16.1
 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, 
s->prob->pred16x16);
@@ -2205,8 +2206,8 @@ void vp78_decode_mv_mb_modes(AVCodecContext *avctx, 
VP8Frame *curframe,
 VP8Context *s = avctx->priv_data;
 int mb_x, mb_y;
 
-s->mv_min.y = -MARGIN;
-s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
+s->mv_bounds.mv_min.y = -MARGIN;
+s->mv_bounds.mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
 for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
 VP8Macroblock *mb = s->macroblocks_base +
 ((s->mb_width + 1) * (mb_y + 1) + 1);
@@ -2214,20 +2215,20 @@ void vp78_decode_mv_mb_modes(AVCodecContext *avctx, 
VP8Frame *curframe,
 
 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
 
-s->mv_min.x = -MARGIN;
-s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
+s->mv_bounds.mv_min.x = -MARGIN;
+

[FFmpeg-cvslog] cavs: add a sse2 idct implementation.

2017-04-06 Thread Ronald S. Bultje

ffmpeg | branch: master | Ronald S. Bultje  | Tue Apr  4 
12:17:08 2017 -0400| [2f0591cfa3b773d7a2fec72b30ec25d4ffb0cb32] | committer: 
Ronald S. Bultje

cavs: add a sse2 idct implementation.

This makes using the function pointer ff_add_pixels_clamped() unnecessary,
since we always know what the best implementation is at compile-time.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=2f0591cfa3b773d7a2fec72b30ec25d4ffb0cb32
---

 libavcodec/x86/cavsdsp.c| 15 +-
 libavcodec/x86/cavsidct.asm | 48 -
 2 files changed, 61 insertions(+), 2 deletions(-)

diff --git a/libavcodec/x86/cavsdsp.c b/libavcodec/x86/cavsdsp.c
index add4536..a8a198b 100644
--- a/libavcodec/x86/cavsdsp.c
+++ b/libavcodec/x86/cavsdsp.c
@@ -29,6 +29,7 @@
 #include "libavutil/x86/cpu.h"
 #include "libavcodec/cavsdsp.h"
 #include "libavcodec/idctdsp.h"
+#include "libavcodec/x86/idctdsp.h"
 #include "constants.h"
 #include "fpel.h"
 #include "idctdsp.h"
@@ -43,7 +44,16 @@ static void cavs_idct8_add_mmx(uint8_t *dst, int16_t *block, 
ptrdiff_t stride)
 {
 LOCAL_ALIGNED(16, int16_t, b2, [64]);
 ff_cavs_idct8_mmx(b2, block);
-ff_add_pixels_clamped(b2, dst, stride);
+ff_add_pixels_clamped_mmx(b2, dst, stride);
+}
+
+void ff_cavs_idct8_sse2(int16_t *out, const int16_t *in);
+
+static void cavs_idct8_add_sse2(uint8_t *dst, int16_t *block, ptrdiff_t stride)
+{
+LOCAL_ALIGNED(16, int16_t, b2, [64]);
+ff_cavs_idct8_sse2(b2, block);
+ff_add_pixels_clamped_sse2(b2, dst, stride);
 }
 
 #endif /* HAVE_MMX_EXTERNAL */
@@ -446,6 +456,9 @@ av_cold void ff_cavsdsp_init_x86(CAVSDSPContext *c, 
AVCodecContext *avctx)
 if (EXTERNAL_SSE2(cpu_flags)) {
 c->put_cavs_qpel_pixels_tab[0][0] = put_cavs_qpel16_mc00_sse2;
 c->avg_cavs_qpel_pixels_tab[0][0] = avg_cavs_qpel16_mc00_sse2;
+
+c->cavs_idct8_add = cavs_idct8_add_sse2;
+c->idct_perm  = FF_IDCT_PERM_TRANSPOSE;
 }
 #endif
 }
diff --git a/libavcodec/x86/cavsidct.asm b/libavcodec/x86/cavsidct.asm
index 5421196..6c768c2 100644
--- a/libavcodec/x86/cavsidct.asm
+++ b/libavcodec/x86/cavsidct.asm
@@ -29,11 +29,16 @@ cextern pw_64
 
 SECTION .text
 
-%macro CAVS_IDCT8_1D 2 ; source, round
+%macro CAVS_IDCT8_1D 2-3 1 ; source, round, init_load
+%if %3 == 1
 movam4, [%1+7*16]   ; m4 = src7
 movam5, [%1+1*16]   ; m5 = src1
 movam2, [%1+5*16]   ; m2 = src5
 movam7, [%1+3*16]   ; m7 = src3
+%else
+SWAP 1, 7
+SWAP 4, 6
+%endif
 movam0, m4
 movam3, m5
 movam6, m2
@@ -163,3 +168,44 @@ cglobal cavs_idct8, 2, 4, 8, 8 * 16, out, in, cnt, tmp
 jg .loop_2
 
 RET
+
+INIT_XMM sse2
+cglobal cavs_idct8, 2, 2, 8 + ARCH_X86_64, 0 - 8 * 16, out, in
+CAVS_IDCT8_1D  inq, [pw_4]
+psraw   m7, 3
+psraw   m6, 3
+psraw   m5, 3
+psraw   m4, 3
+psraw   m3, 3
+psraw   m2, 3
+psraw   m1, 3
+psraw   m0, 3
+%if ARCH_X86_64
+TRANSPOSE8x8W7, 5, 3, 1, 0, 2, 4, 6, 8
+mova[rsp+4*16], m0
+%else
+mova[rsp+0*16], m4
+TRANSPOSE8x8W7, 5, 3, 1, 0, 2, 4, 6, [rsp+0*16], [rsp+4*16], 1
+%endif
+mova[rsp+0*16], m7
+mova[rsp+2*16], m3
+mova[rsp+6*16], m4
+CAVS_IDCT8_1D  rsp, [pw_64], 0
+psraw   m7, 7
+psraw   m6, 7
+psraw   m5, 7
+psraw   m4, 7
+psraw   m3, 7
+psraw   m2, 7
+psraw   m1, 7
+psraw   m0, 7
+
+mova   [outq+0*16], m7
+mova   [outq+1*16], m5
+mova   [outq+2*16], m3
+mova   [outq+3*16], m1
+mova   [outq+4*16], m0
+mova   [outq+5*16], m2
+mova   [outq+6*16], m4
+mova   [outq+7*16], m6
+RET

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

[FFmpeg-cvslog] pthread_frame: allow per-field ThreadFrame owners.

2017-04-06 Thread Ronald S. Bultje

ffmpeg | branch: master | Ronald S. Bultje  | Mon Apr  3 
10:24:05 2017 -0400| [083300bea935d125b83f60d7030f78a7ffb0f3df] | committer: 
Ronald S. Bultje

pthread_frame: allow per-field ThreadFrame owners.

This tries to handle cases where separate invocations of decode_frame()
(each running in separate threads) write to respective fields in the
same AVFrame->data[]. Having per-field owners makes interaction between
readers (the referencing thread) and writers (the decoding thread)
slightly more optimal if both accesses are field-based, since they will
use the respective producer's thread objects (mutex/cond) instead of
sharing the thread objects of the first field's producer.

In practice, this fixes the following tsan-warning in fate-h264:

WARNING: ThreadSanitizer: data race (pid=21615)
  Read of size 4 at 0x7d64d9fc by thread T2 (mutexes: write M1006):
#0 ff_thread_report_progress pthread_frame.c:569 (ffmpeg:x86_64+0x100f7cf54)
[..]
  Previous write of size 4 at 0x7d64d9fc by main thread (mutexes: write 
M1004):
#0 update_context_from_user pthread_frame.c:335 (ffmpeg:x86_64+0x100f81abb)

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=083300bea935d125b83f60d7030f78a7ffb0f3df
---

 libavcodec/h264_slice.c|  8 +---
 libavcodec/pthread_frame.c | 18 ++
 libavcodec/thread.h|  2 +-
 libavcodec/utils.c |  7 ---
 4 files changed, 20 insertions(+), 15 deletions(-)

diff --git a/libavcodec/h264_slice.c b/libavcodec/h264_slice.c
index fa1e9ae..d4d31cc 100644
--- a/libavcodec/h264_slice.c
+++ b/libavcodec/h264_slice.c
@@ -1423,14 +1423,14 @@ static int h264_field_start(H264Context *h, const 
H264SliceContext *sl,
  * We have to do that before the "dummy" in-between frame allocation,
  * since that can modify h->cur_pic_ptr. */
 if (h->first_field) {
+int last_field = last_pic_structure == PICT_BOTTOM_FIELD;
 av_assert0(h->cur_pic_ptr);
 av_assert0(h->cur_pic_ptr->f->buf[0]);
 assert(h->cur_pic_ptr->reference != DELAYED_PIC_REF);
 
 /* Mark old field/frame as completed */
-if (h->cur_pic_ptr->tf.owner == h->avctx) {
-ff_thread_report_progress(&h->cur_pic_ptr->tf, INT_MAX,
-  last_pic_structure == PICT_BOTTOM_FIELD);
+if (h->cur_pic_ptr->tf.owner[last_field] == h->avctx) {
+ff_thread_report_progress(&h->cur_pic_ptr->tf, INT_MAX, 
last_field);
 }
 
 /* figure out if we have a complementary field pair */
@@ -1568,7 +1568,9 @@ static int h264_field_start(H264Context *h, const 
H264SliceContext *sl,
 return AVERROR_INVALIDDATA;
 }
 } else {
+int field = h->picture_structure == PICT_BOTTOM_FIELD;
 release_unused_pictures(h, 0);
+h->cur_pic_ptr->tf.owner[field] = h->avctx;
 }
 /* Some macroblocks can be accessed before they're available in case
 * of lost slices, MBAFF or threading. */
diff --git a/libavcodec/pthread_frame.c b/libavcodec/pthread_frame.c
index 9a6b83a..c246c2f 100644
--- a/libavcodec/pthread_frame.c
+++ b/libavcodec/pthread_frame.c
@@ -564,10 +564,11 @@ void ff_thread_report_progress(ThreadFrame *f, int n, int 
field)
 atomic_load_explicit(&progress[field], memory_order_relaxed) >= n)
 return;
 
-p = f->owner->internal->thread_ctx;
+p = f->owner[field]->internal->thread_ctx;
 
-if (f->owner->debug&FF_DEBUG_THREADS)
-av_log(f->owner, AV_LOG_DEBUG, "%p finished %d field %d\n", progress, 
n, field);
+if (f->owner[field]->debug&FF_DEBUG_THREADS)
+av_log(f->owner[field], AV_LOG_DEBUG,
+   "%p finished %d field %d\n", progress, n, field);
 
 pthread_mutex_lock(&p->progress_mutex);
 
@@ -586,10 +587,11 @@ void ff_thread_await_progress(ThreadFrame *f, int n, int 
field)
 atomic_load_explicit(&progress[field], memory_order_acquire) >= n)
 return;
 
-p = f->owner->internal->thread_ctx;
+p = f->owner[field]->internal->thread_ctx;
 
-if (f->owner->debug&FF_DEBUG_THREADS)
-av_log(f->owner, AV_LOG_DEBUG, "thread awaiting %d field %d from 
%p\n", n, field, progress);
+if (f->owner[field]->debug&FF_DEBUG_THREADS)
+av_log(f->owner[field], AV_LOG_DEBUG,
+   "thread awaiting %d field %d from %p\n", n, field, progress);
 
 pthread_mutex_lock(&p->progress_mutex);
 while (atomic_load_explicit(&progress[field], memory_order_relaxed) < n)
@@ -882,7 +884,7 @@ static int thread_get_buffer_internal(AVCodecContext 
*avctx, ThreadFrame *f, int
 PerThreadContext *p = avctx->internal->thread_ctx;
 int err;
 
-f->owner = avctx;
+f->owner[0] = f-&g

[FFmpeg-cvslog] idct_arm: remove use of ff_put/add_pixels_clamped function pointer.

2017-04-06 Thread Ronald S. Bultje

ffmpeg | branch: master | Ronald S. Bultje  | Tue Apr  4 
11:20:39 2017 -0400| [40cbd686dc846935fb3f50cf77e575bd98649e3f] | committer: 
Ronald S. Bultje

idct_arm: remove use of ff_put/add_pixels_clamped function pointer.

Instead, hardcode the use of the _arm implementation of add_pixels,
and use the C version for put_pixels (as no arm-optimized version
exists). Since there's separate implementations of idct{,_put,_add}
for neon, this has no practical impact on performance.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=40cbd686dc846935fb3f50cf77e575bd98649e3f
---

 libavcodec/arm/idctdsp_init_arm.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/libavcodec/arm/idctdsp_init_arm.c 
b/libavcodec/arm/idctdsp_init_arm.c
index 43782b2..ebc90e4 100644
--- a/libavcodec/arm/idctdsp_init_arm.c
+++ b/libavcodec/arm/idctdsp_init_arm.c
@@ -39,28 +39,28 @@ static void j_rev_dct_arm_put(uint8_t *dest, ptrdiff_t 
line_size,
   int16_t *block)
 {
 ff_j_rev_dct_arm(block);
-ff_put_pixels_clamped(block, dest, line_size);
+ff_put_pixels_clamped_c(block, dest, line_size);
 }
 
 static void j_rev_dct_arm_add(uint8_t *dest, ptrdiff_t line_size,
   int16_t *block)
 {
 ff_j_rev_dct_arm(block);
-ff_add_pixels_clamped(block, dest, line_size);
+ff_add_pixels_clamped_arm(block, dest, line_size);
 }
 
 static void simple_idct_arm_put(uint8_t *dest, ptrdiff_t line_size,
 int16_t *block)
 {
 ff_simple_idct_arm(block);
-ff_put_pixels_clamped(block, dest, line_size);
+ff_put_pixels_clamped_c(block, dest, line_size);
 }
 
 static void simple_idct_arm_add(uint8_t *dest, ptrdiff_t line_size,
 int16_t *block)
 {
 ff_simple_idct_arm(block);
-ff_add_pixels_clamped(block, dest, line_size);
+ff_add_pixels_clamped_arm(block, dest, line_size);
 }
 
 av_cold void ff_idctdsp_init_arm(IDCTDSPContext *c, AVCodecContext *avctx,

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

[FFmpeg-cvslog] x86/xvididct: remove use of ff_put/add_pixels_clamped function pointer.

2017-04-06 Thread Ronald S. Bultje

ffmpeg | branch: master | Ronald S. Bultje  | Tue Apr  4 
11:21:43 2017 -0400| [b51d7d89f8bbce2d8bade0f5fbba2bbd5612ca3a] | committer: 
Ronald S. Bultje

x86/xvididct: remove use of ff_put/add_pixels_clamped function pointer.

Since there's separate SSE2 implementations of xvid_idct_put/add, this
patch has no practical impact on performance.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=b51d7d89f8bbce2d8bade0f5fbba2bbd5612ca3a
---

 libavcodec/x86/xvididct_init.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/libavcodec/x86/xvididct_init.c b/libavcodec/x86/xvididct_init.c
index fd10953..c7b5ead 100644
--- a/libavcodec/x86/xvididct_init.c
+++ b/libavcodec/x86/xvididct_init.c
@@ -30,25 +30,25 @@
 static void xvid_idct_mmx_put(uint8_t *dest, ptrdiff_t line_size, short *block)
 {
 ff_xvid_idct_mmx(block);
-ff_put_pixels_clamped(block, dest, line_size);
+ff_put_pixels_clamped_mmx(block, dest, line_size);
 }
 
 static void xvid_idct_mmx_add(uint8_t *dest, ptrdiff_t line_size, short *block)
 {
 ff_xvid_idct_mmx(block);
-ff_add_pixels_clamped(block, dest, line_size);
+ff_add_pixels_clamped_mmx(block, dest, line_size);
 }
 
 static void xvid_idct_mmxext_put(uint8_t *dest, ptrdiff_t line_size, short 
*block)
 {
 ff_xvid_idct_mmxext(block);
-ff_put_pixels_clamped(block, dest, line_size);
+ff_put_pixels_clamped_mmx(block, dest, line_size);
 }
 
 static void xvid_idct_mmxext_add(uint8_t *dest, ptrdiff_t line_size, short 
*block)
 {
 ff_xvid_idct_mmxext(block);
-ff_add_pixels_clamped(block, dest, line_size);
+ff_add_pixels_clamped_mmx(block, dest, line_size);
 }
 #endif
 

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

[FFmpeg-cvslog] huffyuv: assign correct per-thread avctx pointer to HYuvContext::avctx.

2017-04-05 Thread Ronald S. Bultje

ffmpeg | branch: master | Ronald S. Bultje  | Mon Apr  3 
22:28:22 2017 -0400| [7c7e7c44a6eb68eca861e45cb2ce78f582b12c69] | committer: 
Ronald S. Bultje

huffyuv: assign correct per-thread avctx pointer to HYuvContext::avctx.

Fixes the following tsan warning when running fate-vsynth_lena-ffvhuff:

WARNING: ThreadSanitizer: data race (pid=6484)
  Write of size 8 at 0x7d64000154b8 by main thread (mutexes: write M1331):
#0 update_context_from_user src/libavcodec/pthread_frame.c:331 
(ffmpeg+0x00dca887)
[..]
  Previous read of size 8 at 0x7d64000154b8 by thread T2 (mutexes: write M1334):
#0 draw_slice src/libavcodec/huffyuvdec.c:857 (ffmpeg+0x00bcc86f)

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=7c7e7c44a6eb68eca861e45cb2ce78f582b12c69
---

 libavcodec/huffyuvdec.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/libavcodec/huffyuvdec.c b/libavcodec/huffyuvdec.c
index 5572b98..979c4b9 100644
--- a/libavcodec/huffyuvdec.c
+++ b/libavcodec/huffyuvdec.c
@@ -579,6 +579,8 @@ static av_cold int decode_init_thread_copy(AVCodecContext 
*avctx)
 HYuvContext *s = avctx->priv_data;
 int i, ret;
 
+s->avctx = avctx;
+
 if ((ret = ff_huffyuv_alloc_temp(s)) < 0) {
 ff_huffyuv_common_end(s);
 return ret;

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

[FFmpeg-cvslog] checkasm: vp9dsp: benchmark all sub-IDCTs (but not WHT or ADST).

2017-04-04 Thread Ronald S. Bultje

ffmpeg | branch: master | Ronald S. Bultje  | Fri Nov 18 
00:17:02 2016 +0200| [06fec74cacbb0ef7f3e5ea0e6c9ced1b6fd7565d] | committer: 
Martin Storsjö

checkasm: vp9dsp: benchmark all sub-IDCTs (but not WHT or ADST).

Signed-off-by: Martin Storsjö 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=06fec74cacbb0ef7f3e5ea0e6c9ced1b6fd7565d
---

 tests/checkasm/vp9dsp.c | 21 ++---
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/tests/checkasm/vp9dsp.c b/tests/checkasm/vp9dsp.c
index 690e0cf..25f9dd1 100644
--- a/tests/checkasm/vp9dsp.c
+++ b/tests/checkasm/vp9dsp.c
@@ -269,14 +269,20 @@ static void check_itxfm(void)
 int n_txtps = tx < TX_32X32 ? N_TXFM_TYPES : 1;
 
 for (txtp = 0; txtp < n_txtps; txtp++) {
-if (check_func(dsp.itxfm_add[tx][txtp], "vp9_inv_%s_%dx%d_add",
-   tx == 4 ? "wht_wht" : txtp_types[txtp], sz, sz)) {
-randomize_buffers();
-ftx(coef, tx, txtp, sz, BIT_DEPTH);
-
-for (sub = (txtp == 0) ? 1 : 2; sub <= sz; sub <<= 1) {
+// skip testing sub-IDCTs for WHT or ADST since they don't
+// implement it in any of the SIMD functions. If they do,
+// consider changing this to ensure we have complete test
+// coverage
+for (sub = (txtp == 0 && tx < 4) ? 1 : sz; sub <= sz; sub <<= 1) {
+if (check_func(dsp.itxfm_add[tx][txtp],
+   "vp9_inv_%s_%dx%d_sub%d_add",
+   tx == 4 ? "wht_wht" : txtp_types[txtp],
+   sz, sz, sub)) {
 int eob;
 
+randomize_buffers();
+ftx(coef, tx, txtp, sz, BIT_DEPTH);
+
 if (sub < sz) {
 eob = copy_subcoefs(subcoef0, coef, tx, txtp,
 sz, sub, BIT_DEPTH);
@@ -294,8 +300,9 @@ static void check_itxfm(void)
 !iszero(subcoef0, sz * sz * SIZEOF_COEF) ||
 !iszero(subcoef1, sz * sz * SIZEOF_COEF))
 fail();
+
+bench_new(dst, sz * SIZEOF_PIXEL, coef, eob);
 }
-bench_new(dst, sz * SIZEOF_PIXEL, coef, sz * sz);
 }
 }
 }

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

[FFmpeg-cvslog] png: set AVFrame flags/fields before calling setup_finished().

2017-04-03 Thread Ronald S. Bultje

ffmpeg | branch: master | Ronald S. Bultje  | Mon Apr  3 
14:43:40 2017 -0400| [eff2861a757b8a46398e6fcb844b960b4775daad] | committer: 
Ronald S. Bultje

png: set AVFrame flags/fields before calling setup_finished().

Fixes tsan warnings in fate-apng:

WARNING: ThreadSanitizer: data race (pid=51230)
  Read of size 4 at 0x7d5042fc by main thread (mutexes: write M1000):
#0 frame_copy_props frame.c:302 (ffmpeg:x86_64+0x1019a35d6)
[..]
  Previous write of size 4 at 0x7d5042fc by thread T1 (mutexes: write M997):
#0 decode_idat_chunk pngdec.c:708 (ffmpeg:x86_64+0x100f5562a)

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=eff2861a757b8a46398e6fcb844b960b4775daad
---

 libavcodec/pngdec.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libavcodec/pngdec.c b/libavcodec/pngdec.c
index d184c34..1025519 100644
--- a/libavcodec/pngdec.c
+++ b/libavcodec/pngdec.c
@@ -701,12 +701,12 @@ static int decode_idat_chunk(AVCodecContext *avctx, 
PNGDecContext *s,
 if ((ret = ff_thread_get_buffer(avctx, &s->previous_picture, 
AV_GET_BUFFER_FLAG_REF)) < 0)
 return ret;
 }
-ff_thread_finish_setup(avctx);
-
 p->pict_type= AV_PICTURE_TYPE_I;
 p->key_frame= 1;
 p->interlaced_frame = !!s->interlace_type;
 
+ff_thread_finish_setup(avctx);
+
 /* compute the compressed row size */
 if (!s->interlace_type) {
 s->crow_size = s->row_size + 1;

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

[FFmpeg-cvslog] png: split header state and data state in two separate variables.

2017-04-03 Thread Ronald S. Bultje

ffmpeg | branch: master | Ronald S. Bultje  | Mon Apr  3 
10:08:29 2017 -0400| [478f1c3d5e5463a284ea7efecfc62d47ba3be11a] | committer: 
Ronald S. Bultje

png: split header state and data state in two separate variables.

Fixes a reported (but false) race condition in tsan for fate-apng:

WARNING: ThreadSanitizer: data race (pid=6274)
  Read of size 4 at 0x7d680001ec78 by main thread (mutexes: write M1338):
#0 update_thread_context src/libavcodec/pngdec.c:1456 
(ffmpeg+0x00dacf0c)
[..]
  Previous write of size 4 at 0x7d680001ec78 by thread T1 (mutexes: write 
M1335):
#0 decode_idat_chunk src/libavcodec/pngdec.c:737 (ffmpeg+0x00dae951)

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=478f1c3d5e5463a284ea7efecfc62d47ba3be11a
---

 libavcodec/png.h|  5 -
 libavcodec/pngdec.c | 65 -
 2 files changed, 39 insertions(+), 31 deletions(-)

diff --git a/libavcodec/png.h b/libavcodec/png.h
index 948c2f7..e967fcf 100644
--- a/libavcodec/png.h
+++ b/libavcodec/png.h
@@ -42,11 +42,6 @@
 #define PNG_FILTER_VALUE_PAETH 4
 #define PNG_FILTER_VALUE_MIXED 5
 
-#define PNG_IHDR  0x0001
-#define PNG_IDAT  0x0002
-#define PNG_ALLIMAGE  0x0004
-#define PNG_PLTE  0x0008
-
 #define NB_PASSES 7
 
 #define PNGSIG 0x89504e470d0a1a0a
diff --git a/libavcodec/pngdec.c b/libavcodec/pngdec.c
index c08665b..d184c34 100644
--- a/libavcodec/pngdec.c
+++ b/libavcodec/pngdec.c
@@ -36,6 +36,16 @@
 
 #include 
 
+enum PNGHeaderState {
+PNG_IHDR = 1 << 0,
+PNG_PLTE = 1 << 1,
+};
+
+enum PNGImageState {
+PNG_IDAT = 1 << 0,
+PNG_ALLIMAGE = 1 << 1,
+};
+
 typedef struct PNGDecContext {
 PNGDSPContext dsp;
 AVCodecContext *avctx;
@@ -45,7 +55,8 @@ typedef struct PNGDecContext {
 ThreadFrame last_picture;
 ThreadFrame picture;
 
-int state;
+enum PNGHeaderState hdr_state;
+enum PNGImageState pic_state;
 int width, height;
 int cur_w, cur_h;
 int last_w, last_h;
@@ -334,7 +345,7 @@ static void png_handle_row(PNGDecContext *s)
 }
 s->y++;
 if (s->y == s->cur_h) {
-s->state |= PNG_ALLIMAGE;
+s->pic_state |= PNG_ALLIMAGE;
 if (s->filter_type == PNG_FILTER_TYPE_LOCO) {
 if (s->bit_depth == 16) {
 deloco_rgb16((uint16_t *)ptr, s->row_size / 2,
@@ -369,7 +380,7 @@ static void png_handle_row(PNGDecContext *s)
 memset(s->last_row, 0, s->row_size);
 for (;;) {
 if (s->pass == NB_PASSES - 1) {
-s->state |= PNG_ALLIMAGE;
+s->pic_state |= PNG_ALLIMAGE;
 goto the_end;
 } else {
 s->pass++;
@@ -404,7 +415,7 @@ static int png_decode_idat(PNGDecContext *s, int length)
 return AVERROR_EXTERNAL;
 }
 if (s->zstream.avail_out == 0) {
-if (!(s->state & PNG_ALLIMAGE)) {
+if (!(s->pic_state & PNG_ALLIMAGE)) {
 png_handle_row(s);
 }
 s->zstream.avail_out = s->crow_size;
@@ -541,12 +552,12 @@ static int decode_ihdr_chunk(AVCodecContext *avctx, 
PNGDecContext *s,
 if (length != 13)
 return AVERROR_INVALIDDATA;
 
-if (s->state & PNG_IDAT) {
+if (s->pic_state & PNG_IDAT) {
 av_log(avctx, AV_LOG_ERROR, "IHDR after IDAT\n");
 return AVERROR_INVALIDDATA;
 }
 
-if (s->state & PNG_IHDR) {
+if (s->hdr_state & PNG_IHDR) {
 av_log(avctx, AV_LOG_ERROR, "Multiple IHDR\n");
 return AVERROR_INVALIDDATA;
 }
@@ -569,7 +580,7 @@ static int decode_ihdr_chunk(AVCodecContext *avctx, 
PNGDecContext *s,
 s->filter_type  = bytestream2_get_byte(&s->gb);
 s->interlace_type   = bytestream2_get_byte(&s->gb);
 bytestream2_skip(&s->gb, 4); /* crc */
-s->state |= PNG_IHDR;
+s->hdr_state |= PNG_IHDR;
 if (avctx->debug & FF_DEBUG_PICT_INFO)
 av_log(avctx, AV_LOG_DEBUG, "width=%d height=%d depth=%d color_type=%d 
"
 "compression_type=%d filter_type=%d interlace_type=%d\n",
@@ -585,7 +596,7 @@ error:
 
 static int decode_phys_chunk(AVCodecContext *avctx, PNGDecContext *s)
 {
-if (s->state & PNG_IDAT) {
+if (s->pic_state & PNG_IDAT) {
 av_log(avctx, AV_LOG_ERROR, "pHYs after IDAT\n");
 return AVERROR_INVALIDDATA;
 }
@@ -605,11 +616,11 @@ static int decode_idat_chunk(AVCodecContext *avctx, 
PNGDecContext *s,
 int ret;
 size_t byte_depth = s->bit_depth > 8 ? 2 : 1;
 
-if (!(s->state & PNG_IHDR)) {
+if (!(s->hdr_state & PNG_IHDR)) {
 av_log(avctx, AV_LOG_ERROR, "IDAT without IHDR\n"

[FFmpeg-cvslog] hevc: only write to max_ra and pocTid0 in the first slice.

2017-04-03 Thread Ronald S. Bultje

ffmpeg | branch: master | Ronald S. Bultje  | Mon Apr  3 
09:51:10 2017 -0400| [1f50baa2b2da7fdbfccf0662883f38a763ff6619] | committer: 
Ronald S. Bultje

hevc: only write to max_ra and pocTid0 in the first slice.

Values from subsequent values are guaranteed to be identical (since
poc and nal_unit_type are checked to be the same between slices), so
this doesn't affect output in any way, but does resolve the remaining
reported race conditions (by tsan) in fate-hevc.

In practice, this fixes tsan warnings like this:

WARNING: ThreadSanitizer: data race (pid=25334)
  Read of size 4 at 0x7d9c0001adcc by main thread (mutexes: write M1386):
#0 hevc_update_thread_context src/libavcodec/hevcdec.c:3310 
(ffmpeg+0x00b41c7c)
[..]
  Previous write of size 4 at 0x7d9c0001adcc by thread T1 (mutexes: write 
M1383):
#0 hls_slice_header src/libavcodec/hevcdec.c:596 (ffmpeg+0x00b43a22)

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=1f50baa2b2da7fdbfccf0662883f38a763ff6619
---

 libavcodec/hevcdec.c | 32 
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c
index ef21595..f9e8ff0 100644
--- a/libavcodec/hevcdec.c
+++ b/libavcodec/hevcdec.c
@@ -585,7 +585,7 @@ static int hls_slice_header(HEVCContext *s)
 }
 
 /* 8.3.1 */
-if (s->temporal_id == 0 &&
+if (sh->first_slice_in_pic_flag && s->temporal_id == 0 &&
 s->nal_unit_type != HEVC_NAL_TRAIL_N &&
 s->nal_unit_type != HEVC_NAL_TSA_N   &&
 s->nal_unit_type != HEVC_NAL_STSA_N  &&
@@ -2771,25 +2771,25 @@ static int decode_nal_unit(HEVCContext *s, const 
H2645NAL *nal)
 if (ret < 0)
 return ret;
 
-if (s->max_ra == INT_MAX) {
-if (s->nal_unit_type == HEVC_NAL_CRA_NUT || IS_BLA(s)) {
-s->max_ra = s->poc;
+if (s->sh.first_slice_in_pic_flag) {
+if (s->max_ra == INT_MAX) {
+if (s->nal_unit_type == HEVC_NAL_CRA_NUT || IS_BLA(s)) {
+s->max_ra = s->poc;
+} else {
+if (IS_IDR(s))
+s->max_ra = INT_MIN;
+}
+}
+
+if ((s->nal_unit_type == HEVC_NAL_RASL_R || s->nal_unit_type == 
HEVC_NAL_RASL_N) &&
+s->poc <= s->max_ra) {
+s->is_decoded = 0;
+break;
 } else {
-if (IS_IDR(s))
+if (s->nal_unit_type == HEVC_NAL_RASL_R && s->poc > s->max_ra)
 s->max_ra = INT_MIN;
 }
-}
-
-if ((s->nal_unit_type == HEVC_NAL_RASL_R || s->nal_unit_type == 
HEVC_NAL_RASL_N) &&
-s->poc <= s->max_ra) {
-s->is_decoded = 0;
-break;
-} else {
-if (s->nal_unit_type == HEVC_NAL_RASL_R && s->poc > s->max_ra)
-s->max_ra = INT_MIN;
-}
 
-if (s->sh.first_slice_in_pic_flag) {
 ret = hevc_frame_start(s);
 if (ret < 0)
 return ret;

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

[FFmpeg-cvslog] pthread_frame: call update_context_from_user() after acquiring lock.

2017-04-03 Thread Ronald S. Bultje

ffmpeg | branch: master | Ronald S. Bultje  | Mon Apr  3 
09:48:53 2017 -0400| [1269cd5b6f540bef5913bf134d2f461aac50d70b] | committer: 
Ronald S. Bultje

pthread_frame: call update_context_from_user() after acquiring lock.

Otherwise the thread may still be in the middle of decoding a previous
frame, which would effectively trigger a race condition on any field
concurrently read and written.

In practice, this fixes tsan warnings like the following:

WARNING: ThreadSanitizer: data race (pid=17380)
  Write of size 4 at 0x7d64000160fc by main thread:
#0 update_context_from_user src/libavcodec/pthread_frame.c:335 
(ffmpeg+0x00dca515)
[..]
  Previous read of size 4 at 0x7d64000160fc by thread T2 (mutexes: write M1821):
#0 ff_thread_report_progress src/libavcodec/pthread_frame.c:565 
(ffmpeg+0x00dcb08a)

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=1269cd5b6f540bef5913bf134d2f461aac50d70b
---

 libavcodec/pthread_frame.c | 14 +-
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/libavcodec/pthread_frame.c b/libavcodec/pthread_frame.c
index 4e1ad9d..9a6b83a 100644
--- a/libavcodec/pthread_frame.c
+++ b/libavcodec/pthread_frame.c
@@ -380,7 +380,8 @@ static void release_delayed_buffers(PerThreadContext *p)
 }
 }
 
-static int submit_packet(PerThreadContext *p, AVPacket *avpkt)
+static int submit_packet(PerThreadContext *p, AVCodecContext *user_avctx,
+ AVPacket *avpkt)
 {
 FrameThreadContext *fctx = p->parent;
 PerThreadContext *prev_thread = fctx->prev_thread;
@@ -392,6 +393,12 @@ static int submit_packet(PerThreadContext *p, AVPacket 
*avpkt)
 
 pthread_mutex_lock(&p->mutex);
 
+ret = update_context_from_user(p->avctx, user_avctx);
+if (ret) {
+pthread_mutex_unlock(&p->mutex);
+return ret;
+}
+
 release_delayed_buffers(p);
 
 if (prev_thread) {
@@ -480,10 +487,7 @@ int ff_thread_decode_frame(AVCodecContext *avctx,
  */
 
 p = &fctx->threads[fctx->next_decoding];
-err = update_context_from_user(p->avctx, avctx);
-if (err)
-goto finish;
-err = submit_packet(p, avpkt);
+err = submit_packet(p, avctx, avpkt);
 if (err)
 goto finish;
 

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

[FFmpeg-cvslog] ffmpeg: make transcode_init_done atomic.

2017-04-03 Thread Ronald S. Bultje

ffmpeg | branch: master | Ronald S. Bultje  | Fri Mar 31 
11:27:20 2017 -0400| [76d8c77430e9e0110623705bfb54d922cc2ac3ea] | committer: 
Ronald S. Bultje

ffmpeg: make transcode_init_done atomic.

Should fix tsan warnings in fate-fifo-muxer-h264/wav:

WARNING: ThreadSanitizer: data race (pid=26552)
  Write of size 4 at 0x01e0d7c0 by main thread:
#0 transcode_init src/ffmpeg.c:3761 (ffmpeg+0x0050ca1c)
[..]
  Previous read of size 4 at 0x01e0d7c0 by thread T1:
#0 decode_interrupt_cb src/ffmpeg.c:460 (ffmpeg+0x004fde19)

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=76d8c77430e9e0110623705bfb54d922cc2ac3ea
---

 ffmpeg.c | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/ffmpeg.c b/ffmpeg.c
index 11faf0d..ea03179 100644
--- a/ffmpeg.c
+++ b/ffmpeg.c
@@ -30,6 +30,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #if HAVE_IO_H
@@ -319,7 +320,7 @@ void term_exit(void)
 
 static volatile int received_sigterm = 0;
 static volatile int received_nb_signals = 0;
-static volatile int transcode_init_done = 0;
+static atomic_int transcode_init_done = ATOMIC_VAR_INIT(0);
 static volatile int ffmpeg_exited = 0;
 static int main_return_code = 0;
 
@@ -457,7 +458,7 @@ static int read_key(void)
 
 static int decode_interrupt_cb(void *ctx)
 {
-return received_nb_signals > transcode_init_done;
+return received_nb_signals > atomic_load(&transcode_init_done);
 }
 
 const AVIOInterruptCB int_cb = { decode_interrupt_cb, NULL };
@@ -612,7 +613,7 @@ static void ffmpeg_cleanup(int ret)
 if (received_sigterm) {
 av_log(NULL, AV_LOG_INFO, "Exiting normally, received signal %d.\n",
(int) received_sigterm);
-} else if (ret && transcode_init_done) {
+} else if (ret && atomic_load(&transcode_init_done)) {
 av_log(NULL, AV_LOG_INFO, "Conversion failed!\n");
 }
 term_exit();
@@ -3758,7 +3759,7 @@ static int transcode_init(void)
 return ret;
 }
 
-transcode_init_done = 1;
+atomic_store(&transcode_init_done, 1);
 
 return 0;
 }

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

[FFmpeg-cvslog] codec_desc: mark some lossless audio codecs as intraonly.

2017-04-03 Thread Ronald S. Bultje

ffmpeg | branch: master | Ronald S. Bultje  | Mon Apr  3 
09:36:32 2017 -0400| [467a0538100b193d205a922737358dcc8e957e94] | committer: 
Ronald S. Bultje

codec_desc: mark some lossless audio codecs as intraonly.

Fixes tsan warnings in several audio codecs (flac, alac, wavpack, tta
and tak) that look like this:

WARNING: ThreadSanitizer: data race (pid=14340)
  Read of size 4 at 0x7d64000169d8 by main thread (mutexes: write M1335):
#0 update_context_from_thread src/libavcodec/pthread_frame.c:284 
(ffmpeg+0x00dc795f)
[..]
  Previous write of size 4 at 0x7d64000169d8 by thread T1 (mutexes: write 
M1333):
#0 wavpack_decode_block src/libavcodec/wavpack.c:1012 
(ffmpeg+0x0112b175)

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=467a0538100b193d205a922737358dcc8e957e94
---

 libavcodec/avcodec.h|  2 +-
 libavcodec/codec_desc.c | 11 ++-
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
index d780477..6f38b3f 100644
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -724,7 +724,7 @@ typedef struct AVCodecDescriptor {
 
 /**
  * Codec uses only intra compression.
- * Video codecs only.
+ * Video and audio codecs only.
  */
 #define AV_CODEC_PROP_INTRA_ONLY(1 << 0)
 /**
diff --git a/libavcodec/codec_desc.c b/libavcodec/codec_desc.c
index 9711019..041b797 100644
--- a/libavcodec/codec_desc.c
+++ b/libavcodec/codec_desc.c
@@ -2313,7 +2313,7 @@ static const AVCodecDescriptor codec_descriptors[] = {
 .type  = AVMEDIA_TYPE_AUDIO,
 .name  = "flac",
 .long_name = NULL_IF_CONFIG_SMALL("FLAC (Free Lossless Audio Codec)"),
-.props = AV_CODEC_PROP_LOSSLESS,
+.props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
 },
 {
 .id= AV_CODEC_ID_MP3ADU,
@@ -2341,7 +2341,7 @@ static const AVCodecDescriptor codec_descriptors[] = {
 .type  = AVMEDIA_TYPE_AUDIO,
 .name  = "alac",
 .long_name = NULL_IF_CONFIG_SMALL("ALAC (Apple Lossless Audio Codec)"),
-.props = AV_CODEC_PROP_LOSSLESS,
+.props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
 },
 {
 .id= AV_CODEC_ID_WESTWOOD_SND1,
@@ -2383,7 +2383,7 @@ static const AVCodecDescriptor codec_descriptors[] = {
 .type  = AVMEDIA_TYPE_AUDIO,
 .name  = "tta",
 .long_name = NULL_IF_CONFIG_SMALL("TTA (True Audio)"),
-.props = AV_CODEC_PROP_LOSSLESS,
+.props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
 },
 {
 .id= AV_CODEC_ID_SMACKAUDIO,
@@ -2404,7 +2404,8 @@ static const AVCodecDescriptor codec_descriptors[] = {
 .type  = AVMEDIA_TYPE_AUDIO,
 .name  = "wavpack",
 .long_name = NULL_IF_CONFIG_SMALL("WavPack"),
-.props = AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_LOSSLESS,
+.props = AV_CODEC_PROP_INTRA_ONLY |
+ AV_CODEC_PROP_LOSSY | AV_CODEC_PROP_LOSSLESS,
 },
 {
 .id= AV_CODEC_ID_DSICINAUDIO,
@@ -2712,7 +2713,7 @@ static const AVCodecDescriptor codec_descriptors[] = {
 .type  = AVMEDIA_TYPE_AUDIO,
 .name  = "tak",
 .long_name = NULL_IF_CONFIG_SMALL("TAK (Tom's lossless Audio 
Kompressor)"),
-.props = AV_CODEC_PROP_LOSSLESS,
+.props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
 },
 {
 .id= AV_CODEC_ID_METASOUND,

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

[FFmpeg-cvslog] h264: don't sync pic_id between threads.

2017-04-03 Thread Ronald S. Bultje

ffmpeg | branch: master | Ronald S. Bultje  | Mon Apr  3 
09:25:15 2017 -0400| [e72690b18da064f6c0f04f09ccde72b6636e3159] | committer: 
Ronald S. Bultje

h264: don't sync pic_id between threads.

This is how the ref list manager links bitstream IDs to H264Picture/Ref
objects, and is local to the producer thread. There is no need for the
consumer thread to know the bitstream IDs of its references in their
respective producer threads.

In practice, this fixes tsan warnings when running fate-h264:

WARNING: ThreadSanitizer: data race (pid=19295)
  Read of size 4 at 0x7dbce614 by main thread (mutexes: write M1914):
#0 ff_h264_ref_picture src/libavcodec/h264_picture.c:112 
(ffmpeg+0x013b3709)
[..]
  Previous write of size 4 at 0x7dbce614 by thread T2 (mutexes: write 
M1917):
#0 build_def_list src/libavcodec/h264_refs.c:91 (ffmpeg+0x013b46cf)

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=e72690b18da064f6c0f04f09ccde72b6636e3159
---

 libavcodec/h264_picture.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/libavcodec/h264_picture.c b/libavcodec/h264_picture.c
index db96737..2dbe5ee 100644
--- a/libavcodec/h264_picture.c
+++ b/libavcodec/h264_picture.c
@@ -109,7 +109,6 @@ int ff_h264_ref_picture(H264Context *h, H264Picture *dst, 
H264Picture *src)
 dst->poc   = src->poc;
 dst->frame_num = src->frame_num;
 dst->mmco_reset= src->mmco_reset;
-dst->pic_id= src->pic_id;
 dst->long_ref  = src->long_ref;
 dst->mbaff = src->mbaff;
 dst->field_picture = src->field_picture;

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

[FFmpeg-cvslog] checkasm: add vp9dsp.itxfm_add tests.

2017-03-31 Thread Ronald S. Bultje

ffmpeg | branch: master | Ronald S. Bultje  | Tue Sep 22 
14:24:27 2015 -0400| [0b37cd09a67c3ba4db044404b99c65a32b4ad932] | committer: 
Martin Storsjö

checkasm: add vp9dsp.itxfm_add tests.

This includes fixes by Henrik Gramner.

The forward transforms are derived from the reference encoder.

Signed-off-by: Martin Storsjö 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=0b37cd09a67c3ba4db044404b99c65a32b4ad932
---

 tests/checkasm/vp9dsp.c | 272 
 1 file changed, 272 insertions(+)

diff --git a/tests/checkasm/vp9dsp.c b/tests/checkasm/vp9dsp.c
index f0cc2a7..690e0cf 100644
--- a/tests/checkasm/vp9dsp.c
+++ b/tests/checkasm/vp9dsp.c
@@ -18,13 +18,16 @@
  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  */
 
+#include 
 #include 
 
 #include "libavutil/common.h"
 #include "libavutil/internal.h"
 #include "libavutil/intreadwrite.h"
+#include "libavutil/mathematics.h"
 
 #include "libavcodec/vp9.h"
+#include "libavcodec/vp9data.h"
 
 #include "checkasm.h"
 
@@ -33,6 +36,274 @@ static const uint32_t pixel_mask[3] = { 0x, 
0x03ff03ff, 0x0fff0fff };
 #define BIT_DEPTH 8
 #define SIZEOF_PIXEL ((BIT_DEPTH + 7) / 8)
 
+#define randomize_buffers() \
+do { \
+uint32_t mask = pixel_mask[(BIT_DEPTH - 8) >> 1];  \
+for (y = 0; y < sz; y++) { \
+for (x = 0; x < sz * SIZEOF_PIXEL; x += 4) {   \
+uint32_t r = rnd() & mask; \
+AV_WN32A(dst + y * sz * SIZEOF_PIXEL + x, r);  \
+AV_WN32A(src + y * sz * SIZEOF_PIXEL + x, rnd() & mask);   \
+}  \
+for (x = 0; x < sz; x++) { \
+if (BIT_DEPTH == 8) {  \
+coef[y * sz + x] = src[y * sz + x] - dst[y * sz + x];  \
+} else {   \
+((int32_t *) coef)[y * sz + x] =   \
+((uint16_t *) src)[y * sz + x] -   \
+((uint16_t *) dst)[y * sz + x];\
+}  \
+}  \
+}  \
+} while(0)
+
+// wht function copied from libvpx
+static void fwht_1d(double *out, const double *in, int sz)
+{
+double t0 = in[0] + in[1];
+double t3 = in[3] - in[2];
+double t4 = trunc((t0 - t3) * 0.5);
+double t1 = t4 - in[1];
+double t2 = t4 - in[2];
+
+out[0] = t0 - t2;
+out[1] = t2;
+out[2] = t3 + t1;
+out[3] = t1;
+}
+
+// standard DCT-II
+static void fdct_1d(double *out, const double *in, int sz)
+{
+int k, n;
+
+for (k = 0; k < sz; k++) {
+out[k] = 0.0;
+for (n = 0; n < sz; n++)
+out[k] += in[n] * cos(M_PI * (2 * n + 1) * k / (sz * 2.0));
+}
+out[0] *= M_SQRT1_2;
+}
+
+// see "Towards jointly optimal spatial prediction and adaptive transform in
+// video/image coding", by J. Han, A. Saxena, and K. Rose
+// IEEE Proc. ICASSP, pp. 726-729, Mar. 2010.
+static void fadst4_1d(double *out, const double *in, int sz)
+{
+int k, n;
+
+for (k = 0; k < sz; k++) {
+out[k] = 0.0;
+for (n = 0; n < sz; n++)
+out[k] += in[n] * sin(M_PI * (n + 1) * (2 * k + 1) / (sz * 2.0 + 
1.0));
+}
+}
+
+// see "A Butterfly Structured Design of The Hybrid Transform Coding Scheme",
+// by Jingning Han, Yaowu Xu, and Debargha Mukherjee
+// 
http://static.googleusercontent.com/media/research.google.com/en//pubs/archive/41418.pdf
+static void fadst_1d(double *out, const double *in, int sz)
+{
+int k, n;
+
+for (k = 0; k < sz; k++) {
+out[k] = 0.0;
+for (n = 0; n < sz; n++)
+out[k] += in[n] * sin(M_PI * (2 * n + 1) * (2 * k + 1) / (sz * 
4.0));
+}
+}
+
+typedef void (*ftx1d_fn)(double *out, const double *in, int sz);
+static void ftx_2d(double *out, const double *in, enum TxfmMode tx,
+   enum TxfmType txtp, int sz)
+{
+static const double scaling_factors[5][4] = {
+{ 4.0, 16.0 * M_SQRT1_2 / 3.0, 16.0 * M_SQRT1_2 / 3.0, 32.0 / 9.0 },
+{ 2.0, 2.0, 2.0, 2.0 },
+{ 1.0, 1.0, 1.0, 1.0 },
+{ 0.25 },
+{ 4.0 }
+};
+static const ftx1d_fn ftx1d_tbl[5][4][2] = {
+{
+{ fdct_1d, fdct_1d },
+{ fadst4_1d, fdct_1d },
+{ fdct_1d, fadst4_1d },
+{ fadst4_1d, fadst4_1d },
+}, {
+

[FFmpeg-cvslog] lagarith: assign correct per-thread value to LagarithContext::avctx.

2017-03-31 Thread Ronald S. Bultje

ffmpeg | branch: master | Ronald S. Bultje  | Tue Mar 28 
20:11:43 2017 -0400| [081c21ca55d72921125848c8c2c191a6ff8b5f88] | committer: 
Ronald S. Bultje

lagarith: assign correct per-thread value to LagarithContext::avctx.

This fixes race conditions reported by tsan in fate-lagarith. The races
were because each thread's LagarithContext::avctx was set to the first
thread's AVCodecContext.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=081c21ca55d72921125848c8c2c191a6ff8b5f88
---

 libavcodec/lagarith.c | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/libavcodec/lagarith.c b/libavcodec/lagarith.c
index f03305f..469eec4 100644
--- a/libavcodec/lagarith.c
+++ b/libavcodec/lagarith.c
@@ -730,6 +730,16 @@ static av_cold int lag_decode_init(AVCodecContext *avctx)
 return 0;
 }
 
+#if HAVE_THREADS
+static av_cold int lag_decode_init_thread_copy(AVCodecContext *avctx)
+{
+LagarithContext *l = avctx->priv_data;
+l->avctx = avctx;
+
+return 0;
+}
+#endif
+
 static av_cold int lag_decode_end(AVCodecContext *avctx)
 {
 LagarithContext *l = avctx->priv_data;
@@ -746,6 +756,7 @@ AVCodec ff_lagarith_decoder = {
 .id = AV_CODEC_ID_LAGARITH,
 .priv_data_size = sizeof(LagarithContext),
 .init   = lag_decode_init,
+.init_thread_copy = ONLY_IF_THREADS_ENABLED(lag_decode_init_thread_copy),
 .close  = lag_decode_end,
 .decode = lag_decode_frame,
 .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

[FFmpeg-cvslog] h264: don't write to source picture object in ff_h264_ref_picture().

2017-03-31 Thread Ronald S. Bultje

ffmpeg | branch: master | Ronald S. Bultje  | Wed Mar 29 
09:03:49 2017 -0400| [b5300c8ad8c5384ab3654d6cb27693422bc424e7] | committer: 
Ronald S. Bultje

h264: don't write to source picture object in ff_h264_ref_picture().

Doing so is analogous to writing to source data in memcpy(), and causes
(harmless) tsan warnings in fate-h264.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=b5300c8ad8c5384ab3654d6cb27693422bc424e7
---

 libavcodec/h264_picture.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/h264_picture.c b/libavcodec/h264_picture.c
index f634d2a..db96737 100644
--- a/libavcodec/h264_picture.c
+++ b/libavcodec/h264_picture.c
@@ -70,8 +70,8 @@ int ff_h264_ref_picture(H264Context *h, H264Picture *dst, 
H264Picture *src)
 
 av_assert0(!dst->f->buf[0]);
 av_assert0(src->f->buf[0]);
+av_assert0(src->tf.f == src->f);
 
-src->tf.f = src->f;
 dst->tf.f = dst->f;
 ret = ff_thread_ref_frame(&dst->tf, &src->tf);
 if (ret < 0)

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

[FFmpeg-cvslog] codec_desc: mark fraps as an intra-only codec.

2017-03-31 Thread Ronald S. Bultje

ffmpeg | branch: master | Ronald S. Bultje  | Tue Mar 28 
19:40:59 2017 -0400| [9e2050b698b204bcc4af39e014b3e621294a114a] | committer: 
Ronald S. Bultje

codec_desc: mark fraps as an intra-only codec.

Fixes reported race conditions by tsan in fate-avio-direct.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=9e2050b698b204bcc4af39e014b3e621294a114a
---

 libavcodec/codec_desc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/codec_desc.c b/libavcodec/codec_desc.c
index 4e98cf9..9711019 100644
--- a/libavcodec/codec_desc.c
+++ b/libavcodec/codec_desc.c
@@ -520,7 +520,7 @@ static const AVCodecDescriptor codec_descriptors[] = {
 .type  = AVMEDIA_TYPE_VIDEO,
 .name  = "fraps",
 .long_name = NULL_IF_CONFIG_SMALL("Fraps"),
-.props = AV_CODEC_PROP_LOSSLESS,
+.props = AV_CODEC_PROP_INTRA_ONLY | AV_CODEC_PROP_LOSSLESS,
 },
 {
 .id= AV_CODEC_ID_TRUEMOTION2,

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

[FFmpeg-cvslog] h264: only assign H264Picture::mbaff for first slice.

2017-03-31 Thread Ronald S. Bultje

ffmpeg | branch: master | Ronald S. Bultje  | Wed Mar 29 
09:33:47 2017 -0400| [1ddc37051f11bd4bbadbcd17ea49b76a965d6a47] | committer: 
Ronald S. Bultje

h264: only assign H264Picture::mbaff for first slice.

The value must be identical between slices, since mbaff depends on
picture_structure and sps, both of which are checked to be identical
to the first slice before this point.

In practice, this silences some tsan warnings in fate-h264.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=1ddc37051f11bd4bbadbcd17ea49b76a965d6a47
---

 libavcodec/h264_direct.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/libavcodec/h264_direct.c b/libavcodec/h264_direct.c
index 4e7202b..a7a107c 100644
--- a/libavcodec/h264_direct.c
+++ b/libavcodec/h264_direct.c
@@ -138,7 +138,11 @@ void ff_h264_direct_ref_list_init(const H264Context *const 
h, H264SliceContext *
 memcpy(cur->ref_poc[1],   cur->ref_poc[0],   sizeof(cur->ref_poc[0]));
 }
 
-cur->mbaff = FRAME_MBAFF(h);
+if (h->current_slice == 0) {
+cur->mbaff = FRAME_MBAFF(h);
+} else {
+av_assert0(cur->mbaff == FRAME_MBAFF(h));
+}
 
 sl->col_fieldoff = 0;
 

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

[FFmpeg-cvslog] fic: set pict_type/key_frame after (instead of during) slice decoding.

2017-03-31 Thread Ronald S. Bultje

ffmpeg | branch: master | Ronald S. Bultje  | Tue Mar 28 
19:52:07 2017 -0400| [73f863d751df84db7a0ca1bd83cdff1b95dc94dd] | committer: 
Ronald S. Bultje

fic: set pict_type/key_frame after (instead of during) slice decoding.

This fixes a race condition that was already documented in the source
code, and is also reported by tsan in fate-fic-avi.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=73f863d751df84db7a0ca1bd83cdff1b95dc94dd
---

 libavcodec/fic.c | 29 ++---
 1 file changed, 14 insertions(+), 15 deletions(-)

diff --git a/libavcodec/fic.c b/libavcodec/fic.c
index d3952a4..2bec3d7 100644
--- a/libavcodec/fic.c
+++ b/libavcodec/fic.c
@@ -34,6 +34,7 @@ typedef struct FICThreadContext {
 int slice_h;
 int src_size;
 int y_off;
+int p_frame;
 } FICThreadContext;
 
 typedef struct FICContext {
@@ -133,16 +134,13 @@ static void fic_idct_put(uint8_t *dst, int stride, 
int16_t *block)
 }
 }
 static int fic_decode_block(FICContext *ctx, GetBitContext *gb,
-uint8_t *dst, int stride, int16_t *block)
+uint8_t *dst, int stride, int16_t *block, int 
*is_p)
 {
 int i, num_coeff;
 
 /* Is it a skip block? */
 if (get_bits1(gb)) {
-/* This is a P-frame. */
-ctx->frame->key_frame = 0;
-ctx->frame->pict_type = AV_PICTURE_TYPE_P;
-
+*is_p = 1;
 return 0;
 }
 
@@ -182,7 +180,8 @@ static int fic_decode_slice(AVCodecContext *avctx, void 
*tdata)
 for (x = 0; x < (ctx->aligned_width >> !!p); x += 8) {
 int ret;
 
-if ((ret = fic_decode_block(ctx, &gb, dst + x, stride, 
tctx->block)) != 0)
+if ((ret = fic_decode_block(ctx, &gb, dst + x, stride,
+tctx->block, &tctx->p_frame)) != 0)
 return ret;
 }
 
@@ -348,15 +347,6 @@ static int fic_decode_frame(AVCodecContext *avctx, void 
*data,
 return AVERROR_INVALIDDATA;
 }
 
-/*
- * Set the frametype to I initially. It will be set to P if the frame
- * has any dependencies (skip blocks). There will be a race condition
- * inside the slice decode function to set these, but we do not care.
- * since they will only ever be set to 0/P.
- */
-ctx->frame->key_frame = 1;
-ctx->frame->pict_type = AV_PICTURE_TYPE_I;
-
 /* Allocate slice data. */
 av_fast_malloc(&ctx->slice_data, &ctx->slice_data_size,
nslices * sizeof(ctx->slice_data[0]));
@@ -398,6 +388,15 @@ static int fic_decode_frame(AVCodecContext *avctx, void 
*data,
   NULL, nslices, sizeof(ctx->slice_data[0]))) < 0)
 return ret;
 
+ctx->frame->key_frame = 1;
+ctx->frame->pict_type = AV_PICTURE_TYPE_I;
+for (slice = 0; slice < nslices; slice++) {
+if (ctx->slice_data[slice].p_frame) {
+ctx->frame->key_frame = 0;
+ctx->frame->pict_type = AV_PICTURE_TYPE_P;
+break;
+}
+}
 av_frame_free(&ctx->final_frame);
 ctx->final_frame = av_frame_clone(ctx->frame);
 if (!ctx->final_frame) {

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

[FFmpeg-cvslog] dnxhd: initialize DNXHDContext::avctx to each thread's respective one.

2017-03-31 Thread Ronald S. Bultje

ffmpeg | branch: master | Ronald S. Bultje  | Tue Mar 28 
19:37:42 2017 -0400| [f800d6508d7e8fbd8d9777b775d333a4f02112ef] | committer: 
Ronald S. Bultje

dnxhd: initialize DNXHDContext::avctx to each thread's respective one.

Otherwise all thread's private contexts have the avctx pointer set to
the AVCodecContext of the first thread, which means all writes to
ctx->avctx->* (in e.g. read_header) are effectively race conditions.

Fixes fate-dnxhd under tsan.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=f800d6508d7e8fbd8d9777b775d333a4f02112ef
---

 libavcodec/dnxhddec.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/libavcodec/dnxhddec.c b/libavcodec/dnxhddec.c
index f67763e..383e64c 100644
--- a/libavcodec/dnxhddec.c
+++ b/libavcodec/dnxhddec.c
@@ -145,6 +145,7 @@ static av_cold int 
dnxhd_decode_init_thread_copy(AVCodecContext *avctx)
 {
 DNXHDContext *ctx = avctx->priv_data;
 
+ctx->avctx = avctx;
 // make sure VLC tables will be loaded when cid is parsed
 ctx->cid = -1;
 

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

[FFmpeg-cvslog] pthread_frame: don't sync items between threads for intra-only codecs.

2017-03-28 Thread Ronald S. Bultje

ffmpeg | branch: master | Ronald S. Bultje  | Tue Mar 28 
15:04:46 2017 -0400| [027ee9b3ed697ff080be0f14b47a11c89ce68cdd] | committer: 
Ronald S. Bultje

pthread_frame: don't sync items between threads for intra-only codecs.

Intra-only codecs should either be able to read these items from the
bitstream, or they should be set upon codec initialization. In both
cases, syncing these items at runtime is unnecessary.

In practice, this fixes race conditions for decoders that read these
values from the bitstream.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=027ee9b3ed697ff080be0f14b47a11c89ce68cdd
---

 libavcodec/pthread_frame.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/pthread_frame.c b/libavcodec/pthread_frame.c
index b618be0..295763a 100644
--- a/libavcodec/pthread_frame.c
+++ b/libavcodec/pthread_frame.c
@@ -244,7 +244,7 @@ static int update_context_from_thread(AVCodecContext *dst, 
AVCodecContext *src,
 {
 int err = 0;
 
-if (dst != src) {
+if (dst != src && (for_user || !(av_codec_get_codec_descriptor(src)->props 
& AV_CODEC_PROP_INTRA_ONLY))) {
 dst->time_base = src->time_base;
 dst->framerate = src->framerate;
 dst->width = src->width;

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

[FFmpeg-cvslog] h264: revert 1189af429211ac650aac730368a6cf5b23756605.

2017-03-28 Thread Ronald S. Bultje

ffmpeg | branch: master | Ronald S. Bultje  | Mon Mar 27 
09:39:24 2017 -0400| [8c2aa45d4a99dc0d9990dfb56782487006f718c3] | committer: 
Ronald S. Bultje

h264: revert 1189af429211ac650aac730368a6cf5b23756605.

The patch introduces race conditions.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=8c2aa45d4a99dc0d9990dfb56782487006f718c3
---

 libavcodec/h264_slice.c |  3 ---
 libavcodec/h264dec.c| 24 
 libavcodec/h264dec.h|  8 
 3 files changed, 35 deletions(-)

diff --git a/libavcodec/h264_slice.c b/libavcodec/h264_slice.c
index a703853..fa1e9ae 100644
--- a/libavcodec/h264_slice.c
+++ b/libavcodec/h264_slice.c
@@ -383,9 +383,6 @@ int ff_h264_update_thread_context(AVCodecContext *dst,
 h->picture_structure= h1->picture_structure;
 h->mb_aff_frame = h1->mb_aff_frame;
 h->droppable= h1->droppable;
-h->backup_width = h1->backup_width;
-h->backup_height= h1->backup_height;
-h->backup_pix_fmt   = h1->backup_pix_fmt;
 
 for (i = 0; i < H264_MAX_PICTURE_COUNT; i++) {
 ff_h264_unref_picture(h, &h->DPB[i]);
diff --git a/libavcodec/h264dec.c b/libavcodec/h264dec.c
index 585ce86..25aeba7 100644
--- a/libavcodec/h264dec.c
+++ b/libavcodec/h264dec.c
@@ -307,9 +307,6 @@ static int h264_init_context(AVCodecContext *avctx, 
H264Context *h)
 int i;
 
 h->avctx = avctx;
-h->backup_width  = -1;
-h->backup_height = -1;
-h->backup_pix_fmt= AV_PIX_FMT_NONE;
 h->cur_chroma_format_idc = -1;
 
 h->picture_structure = PICT_FRAME;
@@ -861,14 +858,6 @@ static int output_frame(H264Context *h, AVFrame *dst, 
H264Picture *srcp)
 
 av_dict_set(&dst->metadata, "stereo_mode", 
ff_h264_sei_stereo_mode(&h->sei.frame_packing), 0);
 
-h->backup_width   = h->avctx->width;
-h->backup_height  = h->avctx->height;
-h->backup_pix_fmt = h->avctx->pix_fmt;
-
-h->avctx->width   = dst->width;
-h->avctx->height  = dst->height;
-h->avctx->pix_fmt = dst->format;
-
 if (srcp->sei_recovery_frame_cnt == 0)
 dst->key_frame = 1;
 if (!srcp->crop)
@@ -1003,19 +992,6 @@ static int h264_decode_frame(AVCodecContext *avctx, void 
*data,
 h->setup_finished = 0;
 h->nb_slice_ctx_queued = 0;
 
-if (h->backup_width != -1) {
-avctx->width= h->backup_width;
-h->backup_width = -1;
-}
-if (h->backup_height != -1) {
-avctx->height= h->backup_height;
-h->backup_height = -1;
-}
-if (h->backup_pix_fmt != AV_PIX_FMT_NONE) {
-avctx->pix_fmt= h->backup_pix_fmt;
-h->backup_pix_fmt = AV_PIX_FMT_NONE;
-}
-
 ff_h264_unref_picture(h, &h->last_pic_for_ec);
 
 /* end of stream, output what is still in the buffers */
diff --git a/libavcodec/h264dec.h b/libavcodec/h264dec.h
index 5f868b7..e994f7e 100644
--- a/libavcodec/h264dec.h
+++ b/libavcodec/h264dec.h
@@ -363,14 +363,6 @@ typedef struct H264Context {
 int width, height;
 int chroma_x_shift, chroma_y_shift;
 
-/**
- * Backup frame properties: needed, because they can be different
- * between returned frame and last decoded frame.
- **/
-int backup_width;
-int backup_height;
-enum AVPixelFormat backup_pix_fmt;
-
 int droppable;
 int coded_picture_number;
 

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

[FFmpeg-cvslog] vp9: split out loopfilter functions in their own source file.

2017-03-28 Thread Ronald S. Bultje

ffmpeg | branch: master | Ronald S. Bultje  | Mon Mar 27 
16:59:06 2017 -0400| [b823bbc10cc7b8674bb2dea50bd5dfc081e28620] | committer: 
Ronald S. Bultje

vp9: split out loopfilter functions in their own source file.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=b823bbc10cc7b8674bb2dea50bd5dfc081e28620
---

 libavcodec/Makefile |   2 +-
 libavcodec/vp9.c| 181 +-
 libavcodec/vp9dec.h |   3 +
 libavcodec/vp9lpf.c | 202 
 4 files changed, 208 insertions(+), 180 deletions(-)

diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index cf7ba25..5651526 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -610,7 +610,7 @@ OBJS-$(CONFIG_VP8_DECODER) += vp8.o vp56rac.o
 OBJS-$(CONFIG_VP8_CUVID_DECODER)   += cuvid.o
 OBJS-$(CONFIG_VP8_MEDIACODEC_DECODER)  += mediacodecdec.o
 OBJS-$(CONFIG_VP8_VAAPI_ENCODER)   += vaapi_encode_vp8.o
-OBJS-$(CONFIG_VP9_DECODER) += vp9.o vp9data.o vp9dsp.o \
+OBJS-$(CONFIG_VP9_DECODER) += vp9.o vp9data.o vp9dsp.o vp9lpf.o \
   vp9block.o vp9prob.o vp9mvs.o 
vp56rac.o \
   vp9dsp_8bpp.o vp9dsp_10bpp.o 
vp9dsp_12bpp.o
 OBJS-$(CONFIG_VP9_CUVID_DECODER)   += cuvid.o
diff --git a/libavcodec/vp9.c b/libavcodec/vp9.c
index 01b6a1b..4d7310f 100644
--- a/libavcodec/vp9.c
+++ b/libavcodec/vp9.c
@@ -1056,184 +1056,6 @@ static void decode_sb_mem(AVCodecContext *avctx, int 
row, int col, VP9Filter *lf
 }
 }
 
-static av_always_inline void filter_plane_cols(VP9Context *s, int col, int 
ss_h, int ss_v,
-   uint8_t *lvl, uint8_t 
(*mask)[4],
-   uint8_t *dst, ptrdiff_t ls)
-{
-int y, x, bytesperpixel = s->bytesperpixel;
-
-// filter edges between columns (e.g. block1 | block2)
-for (y = 0; y < 8; y += 2 << ss_v, dst += 16 * ls, lvl += 16 << ss_v) {
-uint8_t *ptr = dst, *l = lvl, *hmask1 = mask[y], *hmask2 = mask[y + 1 
+ ss_v];
-unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2], hm13 = hmask1[3];
-unsigned hm2 = hmask2[1] | hmask2[2], hm23 = hmask2[3];
-unsigned hm = hm1 | hm2 | hm13 | hm23;
-
-for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 8 * bytesperpixel >> ss_h) {
-if (col || x > 1) {
-if (hm1 & x) {
-int L = *l, H = L >> 4;
-int E = s->filter_lut.mblim_lut[L], I = 
s->filter_lut.lim_lut[L];
-
-if (hmask1[0] & x) {
-if (hmask2[0] & x) {
-av_assert2(l[8 << ss_v] == L);
-s->dsp.loop_filter_16[0](ptr, ls, E, I, H);
-} else {
-s->dsp.loop_filter_8[2][0](ptr, ls, E, I, H);
-}
-} else if (hm2 & x) {
-L = l[8 << ss_v];
-H |= (L >> 4) << 8;
-E |= s->filter_lut.mblim_lut[L] << 8;
-I |= s->filter_lut.lim_lut[L] << 8;
-s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
-   [!!(hmask2[1] & x)]
-   [0](ptr, ls, E, I, H);
-} else {
-s->dsp.loop_filter_8[!!(hmask1[1] & x)]
-[0](ptr, ls, E, I, H);
-}
-} else if (hm2 & x) {
-int L = l[8 << ss_v], H = L >> 4;
-int E = s->filter_lut.mblim_lut[L], I = 
s->filter_lut.lim_lut[L];
-
-s->dsp.loop_filter_8[!!(hmask2[1] & x)]
-[0](ptr + 8 * ls, ls, E, I, H);
-}
-}
-if (ss_h) {
-if (x & 0xAA)
-l += 2;
-} else {
-if (hm13 & x) {
-int L = *l, H = L >> 4;
-int E = s->filter_lut.mblim_lut[L], I = 
s->filter_lut.lim_lut[L];
-
-if (hm23 & x) {
-L = l[8 << ss_v];
-H |= (L >> 4) << 8;
-E |= s->filter_lut.mblim_lut[L] << 8;
-I |= s->filter_lut.lim_lut[L] << 8;
-s->dsp.loop_filter_mix2[0][0][0](ptr + 4 * 
bytesperpixel, ls, E, I, H);
-} else {
-s->dsp.loop_filter_8[0][0](ptr + 4 * bytesperpixel, 
ls, E, I, H);
-}
-} else if (hm23 & x) {

[FFmpeg-cvslog] vp9: split out reconstruction functions in their own source file.

2017-03-28 Thread Ronald S. Bultje

ffmpeg | branch: master | Ronald S. Bultje  | Mon Mar 27 
17:32:20 2017 -0400| [6d0d1c4a43f5e5fc195226367fd1c49843d25d71] | committer: 
Ronald S. Bultje

vp9: split out reconstruction functions in their own source file.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=6d0d1c4a43f5e5fc195226367fd1c49843d25d71
---

 libavcodec/Makefile  |   2 +-
 libavcodec/vp9_mc_template.c |   6 +-
 libavcodec/vp9block.c| 643 +--
 libavcodec/vp9data.c |  10 +
 libavcodec/vp9data.h |   1 +
 libavcodec/vp9dec.h  |   7 +
 libavcodec/vp9recon.c| 639 ++
 7 files changed, 674 insertions(+), 634 deletions(-)

diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 5651526..876a69e 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -610,7 +610,7 @@ OBJS-$(CONFIG_VP8_DECODER) += vp8.o vp56rac.o
 OBJS-$(CONFIG_VP8_CUVID_DECODER)   += cuvid.o
 OBJS-$(CONFIG_VP8_MEDIACODEC_DECODER)  += mediacodecdec.o
 OBJS-$(CONFIG_VP8_VAAPI_ENCODER)   += vaapi_encode_vp8.o
-OBJS-$(CONFIG_VP9_DECODER) += vp9.o vp9data.o vp9dsp.o vp9lpf.o \
+OBJS-$(CONFIG_VP9_DECODER) += vp9.o vp9data.o vp9dsp.o vp9lpf.o 
vp9recon.o \
   vp9block.o vp9prob.o vp9mvs.o 
vp56rac.o \
   vp9dsp_8bpp.o vp9dsp_10bpp.o 
vp9dsp_12bpp.o
 OBJS-$(CONFIG_VP9_CUVID_DECODER)   += cuvid.o
diff --git a/libavcodec/vp9_mc_template.c b/libavcodec/vp9_mc_template.c
index 2d1e9bf..8ff654b 100644
--- a/libavcodec/vp9_mc_template.c
+++ b/libavcodec/vp9_mc_template.c
@@ -405,8 +405,10 @@ static void FN(inter_pred)(AVCodecContext *avctx)
 }
 } else {
 int bwl = bwlog_tab[0][b->bs];
-int bw = bwh_tab[0][b->bs][0] * 4, bh = bwh_tab[0][b->bs][1] * 4;
-int uvbw = bwh_tab[s->ss_h][b->bs][0] * 4, uvbh = 
bwh_tab[s->ss_v][b->bs][1] * 4;
+int bw = ff_vp9_bwh_tab[0][b->bs][0] * 4;
+int bh = ff_vp9_bwh_tab[0][b->bs][1] * 4;
+int uvbw = ff_vp9_bwh_tab[s->ss_h][b->bs][0] * 4;
+int uvbh = ff_vp9_bwh_tab[s->ss_v][b->bs][1] * 4;
 
 mc_luma_dir(s, mc[bwl][b->filter][0], s->dst[0], ls_y,
 ref1->data[0], ref1->linesize[0], tref1,
diff --git a/libavcodec/vp9block.c b/libavcodec/vp9block.c
index f91ef1a..ae2f0e4 100644
--- a/libavcodec/vp9block.c
+++ b/libavcodec/vp9block.c
@@ -31,16 +31,6 @@
 #include "vp9data.h"
 #include "vp9dec.h"
 
-static const uint8_t bwh_tab[2][N_BS_SIZES][2] = {
-{
-{ 16, 16 }, { 16, 8 }, { 8, 16 }, { 8, 8 }, { 8, 4 }, { 4, 8 },
-{  4,  4 }, {  4, 2 }, { 2,  4 }, { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 
},
-}, {
-{  8,  8 }, {  8, 4 }, { 4,  8 }, { 4, 4 }, { 4, 2 }, { 2, 4 },
-{  2,  2 }, {  2, 1 }, { 1,  2 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 
},
-}
-};
-
 static av_always_inline void setctx_2d(uint8_t *ptr, int w, int h,
ptrdiff_t stride, int v)
 {
@@ -103,8 +93,8 @@ static void decode_mode(AVCodecContext *avctx)
 VP9Block *b = s->b;
 int row = s->row, col = s->col, row7 = s->row7;
 enum TxfmMode max_tx = max_tx_for_bl_bp[b->bs];
-int bw4 = bwh_tab[1][b->bs][0], w4 = FFMIN(s->cols - col, bw4);
-int bh4 = bwh_tab[1][b->bs][1], h4 = FFMIN(s->rows - row, bh4), y;
+int bw4 = ff_vp9_bwh_tab[1][b->bs][0], w4 = FFMIN(s->cols - col, bw4);
+int bh4 = ff_vp9_bwh_tab[1][b->bs][1], h4 = FFMIN(s->rows - row, bh4), y;
 int have_a = row > 0, have_l = col > s->tile_col_start;
 int vref, filter_id;
 
@@ -272,8 +262,8 @@ static void decode_mode(AVCodecContext *avctx)
 b->mode[2] =
 b->mode[1] = b->mode[0];
 // FIXME this can probably be optimized
-memset(a, b->mode[0], bwh_tab[0][b->bs][0]);
-memset(l, b->mode[0], bwh_tab[0][b->bs][1]);
+memset(a, b->mode[0], ff_vp9_bwh_tab[0][b->bs][0]);
+memset(l, b->mode[0], ff_vp9_bwh_tab[0][b->bs][1]);
 }
 b->uvmode = vp8_rac_get_tree(&s->c, ff_vp9_intramode_tree,
  
ff_vp9_default_kf_uvmode_probs[b->mode[3]]);
@@ -725,7 +715,7 @@ static void decode_mode(AVCodecContext *avctx)
 }
 #endif
 
-switch (bwh_tab[1][b->bs][0]) {
+switch (ff_vp9_bwh_tab[1][b->bs][0]) {
 #define SET_CTXS(dir, off, n) \
 do { \
 SPLAT_CTX(s->dir##_skip_ctx[off],  b->skip,  n); \
@@ -748,7 +738,7 @@ static void decode_mode(AVCodecContext *avctx)
 case 4: SET_CTXS(above, col, 4); break;
 case 8: SET_CTXS(above, col, 8); break;
 }
-switch (bwh_tab[1][b->bs][1]) {
+switch (ff_vp9_bwh_tab[1][b->bs][1]) {
 case 1: SET_CTXS(lef

[FFmpeg-cvslog] hevc: initialize no_rasl_output_flag in hevc_frame_start().

2017-03-28 Thread Ronald S. Bultje

ffmpeg | branch: master | Ronald S. Bultje  | Mon Mar 27 
09:56:38 2017 -0400| [bddabfaab65808e40605181d579ffcd85bfe4c26] | committer: 
Ronald S. Bultje

hevc: initialize no_rasl_output_flag in hevc_frame_start().

This prevents a race condition in files with multiple slices per frame.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=bddabfaab65808e40605181d579ffcd85bfe4c26
---

 libavcodec/hevcdec.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c
index 98ed2a0..ef21595 100644
--- a/libavcodec/hevcdec.c
+++ b/libavcodec/hevcdec.c
@@ -826,8 +826,6 @@ static int hls_slice_header(HEVCContext *s)
 s->HEVClc->tu.cu_qp_offset_cb = 0;
 s->HEVClc->tu.cu_qp_offset_cr = 0;
 
-s->no_rasl_output_flag = IS_IDR(s) || IS_BLA(s) || (s->nal_unit_type == 
HEVC_NAL_CRA_NUT && s->last_eos);
-
 return 0;
 }
 
@@ -2677,6 +2675,8 @@ static int hevc_frame_start(HEVCContext *s)
 s->is_decoded= 0;
 s->first_nal_type= s->nal_unit_type;
 
+s->no_rasl_output_flag = IS_IDR(s) || IS_BLA(s) || (s->nal_unit_type == 
HEVC_NAL_CRA_NUT && s->last_eos);
+
 if (s->ps.pps->tiles_enabled_flag)
 lc->end_of_tiles_x = s->ps.pps->column_width[0] << 
s->ps.sps->log2_ctb_size;
 

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

[FFmpeg-cvslog] vp9: split out generic decoding skeleton interface API from VP9 types.

2017-03-28 Thread Ronald S. Bultje

ffmpeg | branch: master | Ronald S. Bultje  | Mon Mar 27 
22:05:17 2017 -0400| [0c466417846f80a134dd7078435829c8e47fcbb0] | committer: 
Ronald S. Bultje

vp9: split out generic decoding skeleton interface API from VP9 types.

This allows vp9dsp.h to only include the VP9 types header, and not the
decoder skeleton interface which is for hardware decoders (dxva2/vaapi).

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=0c466417846f80a134dd7078435829c8e47fcbb0
---

 libavcodec/arm/vp9dsp_init_16bpp_arm_template.c |   1 +
 libavcodec/arm/vp9dsp_init_arm.c|   1 +
 libavcodec/dxva2_vp9.c  |   2 +-
 libavcodec/vaapi_vp9.c  |   2 +-
 libavcodec/vp9.h| 145 +---
 libavcodec/vp9dec.h |   1 +
 libavcodec/vp9dsp.h |   4 +-
 libavcodec/vp9shared.h  | 169 
 8 files changed, 178 insertions(+), 147 deletions(-)

diff --git a/libavcodec/arm/vp9dsp_init_16bpp_arm_template.c 
b/libavcodec/arm/vp9dsp_init_16bpp_arm_template.c
index 3620535..1b00078 100644
--- a/libavcodec/arm/vp9dsp_init_16bpp_arm_template.c
+++ b/libavcodec/arm/vp9dsp_init_16bpp_arm_template.c
@@ -21,6 +21,7 @@
 #include 
 
 #include "libavutil/attributes.h"
+#include "libavutil/internal.h"
 #include "libavutil/arm/cpu.h"
 #include "vp9dsp_init.h"
 
diff --git a/libavcodec/arm/vp9dsp_init_arm.c b/libavcodec/arm/vp9dsp_init_arm.c
index 4c57fd6..cb7f48d 100644
--- a/libavcodec/arm/vp9dsp_init_arm.c
+++ b/libavcodec/arm/vp9dsp_init_arm.c
@@ -21,6 +21,7 @@
 #include 
 
 #include "libavutil/attributes.h"
+#include "libavutil/internal.h"
 #include "libavutil/arm/cpu.h"
 #include "libavcodec/vp9dsp.h"
 #include "vp9dsp_init.h"
diff --git a/libavcodec/dxva2_vp9.c b/libavcodec/dxva2_vp9.c
index d53b327..fd7bd98 100644
--- a/libavcodec/dxva2_vp9.c
+++ b/libavcodec/dxva2_vp9.c
@@ -23,7 +23,7 @@
 #include "libavutil/avassert.h"
 #include "libavutil/pixdesc.h"
 
-#include "vp9.h"
+#include "vp9shared.h"
 
 // The headers above may include w32threads.h, which uses the original
 // _WIN32_WINNT define, while dxva2_internal.h redefines it to target a
diff --git a/libavcodec/vaapi_vp9.c b/libavcodec/vaapi_vp9.c
index 7374465..d8ece75 100644
--- a/libavcodec/vaapi_vp9.c
+++ b/libavcodec/vaapi_vp9.c
@@ -24,7 +24,7 @@
 
 #include "hwaccel.h"
 #include "vaapi_decode.h"
-#include "vp9.h"
+#include "vp9shared.h"
 
 static VASurfaceID vaapi_vp9_surface_id(const VP9Frame *vf)
 {
diff --git a/libavcodec/vp9.h b/libavcodec/vp9.h
index 6d2abaf..c8d07ad 100644
--- a/libavcodec/vp9.h
+++ b/libavcodec/vp9.h
@@ -24,13 +24,6 @@
 #ifndef AVCODEC_VP9_H
 #define AVCODEC_VP9_H
 
-#include 
-#include 
-
-#include "avcodec.h"
-#include "thread.h"
-#include "vp56.h"
-
 enum TxfmMode {
 TX_4X4,
 TX_8X8,
@@ -73,142 +66,8 @@ enum FilterMode {
 FILTER_8TAP_REGULAR,
 FILTER_8TAP_SHARP,
 FILTER_BILINEAR,
-FILTER_SWITCHABLE,
-};
-
-enum BlockPartition {
-PARTITION_NONE,// [ ] <-.
-PARTITION_H,   // [-]   |
-PARTITION_V,   // [|]   |
-PARTITION_SPLIT,   // [+] --'
-};
-
-enum InterPredMode {
-NEARESTMV = 10,
-NEARMV= 11,
-ZEROMV= 12,
-NEWMV = 13,
-};
-
-enum CompPredMode {
-PRED_SINGLEREF,
-PRED_COMPREF,
-PRED_SWITCHABLE,
+N_FILTERS,
+FILTER_SWITCHABLE = N_FILTERS,
 };
 
-typedef struct VP9mvrefPair {
-VP56mv mv[2];
-int8_t ref[2];
-} VP9mvrefPair;
-
-typedef struct VP9Frame {
-ThreadFrame tf;
-AVBufferRef *extradata;
-uint8_t *segmentation_map;
-VP9mvrefPair *mv;
-int uses_2pass;
-
-AVBufferRef *hwaccel_priv_buf;
-void *hwaccel_picture_private;
-} VP9Frame;
-
-enum BlockLevel {
-BL_64X64,
-BL_32X32,
-BL_16X16,
-BL_8X8,
-};
-
-enum BlockSize {
-BS_64x64,
-BS_64x32,
-BS_32x64,
-BS_32x32,
-BS_32x16,
-BS_16x32,
-BS_16x16,
-BS_16x8,
-BS_8x16,
-BS_8x8,
-BS_8x4,
-BS_4x8,
-BS_4x4,
-N_BS_SIZES,
-};
-
-typedef struct VP9BitstreamHeader {
-// bitstream header
-uint8_t profile;
-uint8_t bpp;
-uint8_t keyframe;
-uint8_t invisible;
-uint8_t errorres;
-uint8_t intraonly;
-uint8_t resetctx;
-uint8_t refreshrefmask;
-uint8_t highprecisionmvs;
-enum FilterMode filtermode;
-uint8_t allowcompinter;
-uint8_t refreshctx;
-uint8_t parallelmode;
-uint8_t framectxid;
-uint8_t use_last_frame_mvs;
-uint8_t refidx[3];
-uint8_t signbias[3];
-uint8_t fixcompref;
-uint8_t varcompref[2];
-struct {
-uint8_t level;
-int8_t sharpness;
-} filter;
-struct {
-uint8_t enabled;
-

[FFmpeg-cvslog] vp9: re-split the decoder/format/dsp interface header files.

2017-03-28 Thread Ronald S. Bultje

ffmpeg | branch: master | Ronald S. Bultje  | Mon Mar 27 
16:47:46 2017 -0400| [f8c019944d45f4ea9786f8690f8a64fd9398ebf3] | committer: 
Ronald S. Bultje

vp9: re-split the decoder/format/dsp interface header files.

The advantage here is that the internal software decoder interface is
not exposed to the DSP functions or the hardware accelerations.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=f8c019944d45f4ea9786f8690f8a64fd9398ebf3
---

 libavcodec/aarch64/vp9dsp_init.h|   2 +-
 libavcodec/aarch64/vp9dsp_init_aarch64.c|   2 +-
 libavcodec/arm/vp9dsp_init.h|   2 +-
 libavcodec/arm/vp9dsp_init_arm.c|   2 +-
 libavcodec/mips/vp9_idct_msa.c  |   2 +-
 libavcodec/mips/vp9_intra_msa.c |   2 +-
 libavcodec/mips/vp9_lpf_msa.c   |   2 +-
 libavcodec/mips/vp9_mc_msa.c|   2 +-
 libavcodec/mips/vp9dsp_init_mips.c  |   2 +-
 libavcodec/vp9.c|   2 +-
 libavcodec/vp9.h| 277 
 libavcodec/vp9block.c   |   1 +
 libavcodec/vp9data.h|   2 +-
 libavcodec/vp9dec.h | 206 +
 libavcodec/vp9dsp.c |   2 +-
 libavcodec/vp9dsp.h | 136 ++
 libavcodec/vp9dsp_template.c|   2 +-
 libavcodec/vp9mvs.c |   1 +
 libavcodec/vp9prob.c|   1 +
 libavcodec/x86/vp9dsp_init.c|   2 +-
 libavcodec/x86/vp9dsp_init.h|   2 +-
 libavcodec/x86/vp9dsp_init_16bpp.c  |   2 +-
 libavcodec/x86/vp9dsp_init_16bpp_template.c |   2 +-
 23 files changed, 362 insertions(+), 294 deletions(-)

diff --git a/libavcodec/aarch64/vp9dsp_init.h b/libavcodec/aarch64/vp9dsp_init.h
index e288fb4..9df1752 100644
--- a/libavcodec/aarch64/vp9dsp_init.h
+++ b/libavcodec/aarch64/vp9dsp_init.h
@@ -21,7 +21,7 @@
 #ifndef AVCODEC_AARCH64_VP9DSP_INIT_H
 #define AVCODEC_AARCH64_VP9DSP_INIT_H
 
-#include "libavcodec/vp9.h"
+#include "libavcodec/vp9dsp.h"
 
 void ff_vp9dsp_init_10bpp_aarch64(VP9DSPContext *dsp);
 void ff_vp9dsp_init_12bpp_aarch64(VP9DSPContext *dsp);
diff --git a/libavcodec/aarch64/vp9dsp_init_aarch64.c 
b/libavcodec/aarch64/vp9dsp_init_aarch64.c
index e27c232..91a82d8 100644
--- a/libavcodec/aarch64/vp9dsp_init_aarch64.c
+++ b/libavcodec/aarch64/vp9dsp_init_aarch64.c
@@ -22,7 +22,7 @@
 
 #include "libavutil/attributes.h"
 #include "libavutil/aarch64/cpu.h"
-#include "libavcodec/vp9.h"
+#include "libavcodec/vp9dsp.h"
 #include "vp9dsp_init.h"
 
 #define declare_fpel(type, sz)  \
diff --git a/libavcodec/arm/vp9dsp_init.h b/libavcodec/arm/vp9dsp_init.h
index 0047a24..0dc1c2d 100644
--- a/libavcodec/arm/vp9dsp_init.h
+++ b/libavcodec/arm/vp9dsp_init.h
@@ -21,7 +21,7 @@
 #ifndef AVCODEC_ARM_VP9DSP_INIT_H
 #define AVCODEC_ARM_VP9DSP_INIT_H
 
-#include "libavcodec/vp9.h"
+#include "libavcodec/vp9dsp.h"
 
 void ff_vp9dsp_init_10bpp_arm(VP9DSPContext *dsp);
 void ff_vp9dsp_init_12bpp_arm(VP9DSPContext *dsp);
diff --git a/libavcodec/arm/vp9dsp_init_arm.c b/libavcodec/arm/vp9dsp_init_arm.c
index f9245c3..4c57fd6 100644
--- a/libavcodec/arm/vp9dsp_init_arm.c
+++ b/libavcodec/arm/vp9dsp_init_arm.c
@@ -22,7 +22,7 @@
 
 #include "libavutil/attributes.h"
 #include "libavutil/arm/cpu.h"
-#include "libavcodec/vp9.h"
+#include "libavcodec/vp9dsp.h"
 #include "vp9dsp_init.h"
 
 #define declare_fpel(type, sz)  \
diff --git a/libavcodec/mips/vp9_idct_msa.c b/libavcodec/mips/vp9_idct_msa.c
index e2b7b3c..25ea16c 100644
--- a/libavcodec/mips/vp9_idct_msa.c
+++ b/libavcodec/mips/vp9_idct_msa.c
@@ -19,7 +19,7 @@
  */
 
 #include 
-#include "libavcodec/vp9.h"
+#include "libavcodec/vp9dsp.h"
 #include "libavutil/mips/generic_macros_msa.h"
 #include "vp9dsp_mips.h"
 
diff --git a/libavcodec/mips/vp9_intra_msa.c b/libavcodec/mips/vp9_intra_msa.c
index 4097cf4..54cf0ae 100644
--- a/libavcodec/mips/vp9_intra_msa.c
+++ b/libavcodec/mips/vp9_intra_msa.c
@@ -18,7 +18,7 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#include "libavcodec/vp9.h"
+#include "libavcodec/vp9dsp.h"
 #include "libavutil/mips/generic_macros_msa.h"
 #include "vp9dsp_mips.h"
 
diff --git a/libavcodec/mips/vp9_lpf_msa.c b/libavcodec/mips/vp9_lpf_msa.c
index 9164dcd..eef8afc 100644
--- a/libavcodec/mips/vp9_lpf_msa.c
+++ b/libavcodec/mips/vp9_lpf_msa.c
@@ -18,7 +18,7 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#include "libavcodec/vp9.h"
+#include "libavcodec/vp9dsp.h"
 #inc

[FFmpeg-cvslog] dirac: make initialization of arithmetic coder tables threadsafe.

2017-03-28 Thread Ronald S. Bultje

ffmpeg | branch: master | Ronald S. Bultje  | Mon Mar 27 
11:24:43 2017 -0400| [5ba8c3a0ed0e43e6418eabdf8af9549c9e806382] | committer: 
Ronald S. Bultje

dirac: make initialization of arithmetic coder tables threadsafe.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=5ba8c3a0ed0e43e6418eabdf8af9549c9e806382
---

 libavcodec/dirac_arith.c | 15 ++-
 libavcodec/dirac_arith.h |  1 +
 libavcodec/diracdec.c|  9 -
 3 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/libavcodec/dirac_arith.c b/libavcodec/dirac_arith.c
index bf91392..7eb9bd6 100644
--- a/libavcodec/dirac_arith.c
+++ b/libavcodec/dirac_arith.c
@@ -83,6 +83,16 @@ const uint8_t ff_dirac_next_ctx[DIRAC_CTX_COUNT] = {
 
 int16_t ff_dirac_prob_branchless[256][2];
 
+av_cold void ff_dirac_init_arith_tables(void)
+{
+int i;
+
+for (i = 0; i < 256; i++) {
+ff_dirac_prob_branchless[i][0] =  ff_dirac_prob[255-i];
+ff_dirac_prob_branchless[i][1] = -ff_dirac_prob[i];
+}
+}
+
 void ff_dirac_init_arith_decoder(DiracArith *c, GetBitContext *gb, int length)
 {
 int i;
@@ -106,11 +116,6 @@ void ff_dirac_init_arith_decoder(DiracArith *c, 
GetBitContext *gb, int length)
 c->counter = -16;
 c->range   = 0x;
 
-for (i = 0; i < 256; i++) {
-ff_dirac_prob_branchless[i][0] =  ff_dirac_prob[255-i];
-ff_dirac_prob_branchless[i][1] = -ff_dirac_prob[i];
-}
-
 for (i = 0; i < DIRAC_CTX_COUNT; i++)
 c->contexts[i] = 0x8000;
 }
diff --git a/libavcodec/dirac_arith.h b/libavcodec/dirac_arith.h
index 003430a..24a7ca3 100644
--- a/libavcodec/dirac_arith.h
+++ b/libavcodec/dirac_arith.h
@@ -190,6 +190,7 @@ static inline int dirac_get_arith_int(DiracArith *c, int 
follow_ctx, int data_ct
 return ret;
 }
 
+void ff_dirac_init_arith_tables(void);
 void ff_dirac_init_arith_decoder(DiracArith *c, GetBitContext *gb, int length);
 
 #endif /* AVCODEC_DIRAC_ARITH_H */
diff --git a/libavcodec/diracdec.c b/libavcodec/diracdec.c
index e0604af..202ae94 100644
--- a/libavcodec/diracdec.c
+++ b/libavcodec/diracdec.c
@@ -26,6 +26,7 @@
  * @author Marco Gerards , David Conrad, Jordi Ortiz 

  */
 
+#include "libavutil/thread.h"
 #include "avcodec.h"
 #include "get_bits.h"
 #include "bytestream.h"
@@ -379,10 +380,12 @@ static void free_sequence_buffers(DiracContext *s)
 av_freep(&s->mcscratch);
 }
 
+static AVOnce dirac_arith_init = AV_ONCE_INIT;
+
 static av_cold int dirac_decode_init(AVCodecContext *avctx)
 {
 DiracContext *s = avctx->priv_data;
-int i;
+int i, ret;
 
 s->avctx = avctx;
 s->frame_number = -1;
@@ -404,6 +407,9 @@ static av_cold int dirac_decode_init(AVCodecContext *avctx)
 return AVERROR(ENOMEM);
 }
 }
+ret = ff_thread_once(&dirac_arith_init, ff_dirac_init_arith_tables);
+if (ret != 0)
+return AVERROR_UNKNOWN;
 
 return 0;
 }
@@ -2299,5 +2305,6 @@ AVCodec ff_dirac_decoder = {
 .close  = dirac_decode_end,
 .decode = dirac_decode_frame,
 .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_SLICE_THREADS | 
AV_CODEC_CAP_DR1,
+.caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE,
 .flush  = dirac_decode_flush,
 };

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

[FFmpeg-cvslog] vp9lpf/x86: remove unused register from ABSSUB_CMP macro.

2017-03-23 Thread Ronald S. Bultje

ffmpeg | branch: master | Ronald S. Bultje  | Sat Dec 20 
11:13:06 2014 -0500| [683da2788e418877808f1407d68140cafaae8b4f] | committer: 
Anton Khirnov

vp9lpf/x86: remove unused register from ABSSUB_CMP macro.

Signed-off-by: Anton Khirnov 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=683da2788e418877808f1407d68140cafaae8b4f
---

 libavcodec/x86/vp9lpf.asm | 42 +-
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/libavcodec/x86/vp9lpf.asm b/libavcodec/x86/vp9lpf.asm
index d5b3fca..7dc40a3 100644
--- a/libavcodec/x86/vp9lpf.asm
+++ b/libavcodec/x86/vp9lpf.asm
@@ -70,9 +70,9 @@ SECTION .text
 %endmacro
 
 ; %1 = abs(%2-%3) <= %4
-%macro ABSSUB_CMP 6-7 [pb_80]; dst, src1, src2, cmp, tmp1, tmp2, [pb_80]
-ABSSUB  %1, %2, %3, %6  ; dst = abs(src1-src2)
-CMP_LTE %1, %4, %6, %7  ; dst <= cmp
+%macro ABSSUB_CMP 5-6 [pb_80]; dst, src1, src2, cmp, tmp, [pb_80]
+ABSSUB  %1, %2, %3, %5  ; dst = abs(src1-src2)
+CMP_LTE %1, %4, %5, %6  ; dst <= cmp
 %endmacro
 
 %macro MASK_APPLY 4 ; %1=new_data/dst %2=old_data %3=mask %4=tmp
@@ -439,16 +439,16 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3, dst, 
stride, mstride, dst2, stri
 SWAP10,  6, 14
 SWAP11,  7, 15
 %endif
-ABSSUB_CMP  m5,  m8,  m9, m2, m6, m7, m0; m5 = abs(p3-p2) <= I
-ABSSUB_CMP  m1,  m9, m10, m2, m6, m7, m0; m1 = abs(p2-p1) <= I
+ABSSUB_CMP  m5,  m8,  m9, m2, m7, m0; m5 = abs(p3-p2) <= I
+ABSSUB_CMP  m1,  m9, m10, m2, m7, m0; m1 = abs(p2-p1) <= I
 pandm5, m1
-ABSSUB_CMP  m1, m10, m11, m2, m6, m7, m0; m1 = abs(p1-p0) <= I
+ABSSUB_CMP  m1, m10, m11, m2, m7, m0; m1 = abs(p1-p0) <= I
 pandm5, m1
-ABSSUB_CMP  m1, m12, m13, m2, m6, m7, m0; m1 = abs(q1-q0) <= I
+ABSSUB_CMP  m1, m12, m13, m2, m7, m0; m1 = abs(q1-q0) <= I
 pandm5, m1
-ABSSUB_CMP  m1, m13, m14, m2, m6, m7, m0; m1 = abs(q2-q1) <= I
+ABSSUB_CMP  m1, m13, m14, m2, m7, m0; m1 = abs(q2-q1) <= I
 pandm5, m1
-ABSSUB_CMP  m1, m14, m15, m2, m6, m7, m0; m1 = abs(q3-q2) <= I
+ABSSUB_CMP  m1, m14, m15, m2, m7, m0; m1 = abs(q3-q2) <= I
 pandm5, m1
 ABSSUB  m1, m11, m12, m7; abs(p0-q0)
 paddusb m1, m1  ; abs(p0-q0) * 2
@@ -466,9 +466,9 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3, dst, 
stride, mstride, dst2, stri
 ; calc flat8in (if not 44_16) and hev masks
 movam6, [pb_81] ; [1 1 1 1 ...] ^ 0x80
 %if %2 != 44
-ABSSUB_CMP  m2, m8, m11, m6, m4, m5 ; abs(p3 - p0) <= 1
+ABSSUB_CMP  m2, m8, m11, m6, m5 ; abs(p3 - p0) <= 1
 movam8, [pb_80]
-ABSSUB_CMP  m1, m9, m11, m6, m4, m5, m8 ; abs(p2 - p0) <= 1
+ABSSUB_CMP  m1, m9, m11, m6, m5, m8 ; abs(p2 - p0) <= 1
 pandm2, m1
 ABSSUB  m4, m10, m11, m5; abs(p1 - p0)
 %if %2 == 16
@@ -491,9 +491,9 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3, dst, 
stride, mstride, dst2, stri
 por m0, m5  ; hev final value
 CMP_LTE m4, m6, m5  ; abs(q1 - q0) <= 1
 pandm2, m4  ; (flat8in)
-ABSSUB_CMP  m1, m14, m12, m6, m4, m5, m8; abs(q2 - q0) <= 1
+ABSSUB_CMP  m1, m14, m12, m6, m5, m8; abs(q2 - q0) <= 1
 pandm2, m1
-ABSSUB_CMP  m1, m15, m12, m6, m4, m5, m8; abs(q3 - q0) <= 1
+ABSSUB_CMP  m1, m15, m12, m6, m5, m8; abs(q3 - q0) <= 1
 pandm2, m1  ; flat8in final value
 %if %2 == 84 || %2 == 48
 pandm2, [mask_mix%2]
@@ -517,26 +517,26 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3, dst, 
stride, mstride, dst2, stri
 ; calc flat8out mask
 movam8, [P7]
 movam9, [P6]
-ABSSUB_CMP  m1, m8, m11, m6, m4, m5 ; abs(p7 - p0) <= 1
-ABSSUB_CMP  m7, m9, m11, m6, m4, m5 ; abs(p6 - p0) <= 1
+ABSSUB_CMP  m1, m8, m11, m6, m5 ; abs(p7 - p0) <= 1
+ABSSUB_CMP  m7, m9, m11, m6, m5 ; abs(p6 - p0) <= 1
 pandm1, m7
 movam8, [P5]
 movam9, [P4]
-ABSSUB_CMP  m7, m8, m11, m6, m4, m5 ; abs(p5 - p0) <= 1
+ABSSUB_CMP  m7, m8, m11, m6, m5 ; abs(p5 - p0) <= 1

[FFmpeg-cvslog] vp9lpf/x86: move variable assigned inside macro branch.

2017-03-23 Thread Ronald S. Bultje

ffmpeg | branch: master | Ronald S. Bultje  | Wed Dec 24 
14:22:19 2014 -0500| [4ce8ba72f9cbdecf3a2ee3533959e097a2095595] | committer: 
Anton Khirnov

vp9lpf/x86: move variable assigned inside macro branch.

The value is not used outside the branch.

Signed-off-by: Anton Khirnov 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=4ce8ba72f9cbdecf3a2ee3533959e097a2095595
---

 libavcodec/x86/vp9lpf.asm | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/x86/vp9lpf.asm b/libavcodec/x86/vp9lpf.asm
index 5d82995..0b72fac 100644
--- a/libavcodec/x86/vp9lpf.asm
+++ b/libavcodec/x86/vp9lpf.asm
@@ -463,8 +463,8 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3, dst, 
stride, mstride, dst2, stri
 
 ; (m3: fm, m8..15: p3 p2 p1 p0 q0 q1 q2 q3)
 ; calc flat8in (if not 44_16) and hev masks
-movam6, [pb_81] ; [1 1 1 1 ...] ^ 0x80
 %if %2 != 44
+movam6, [pb_81] ; [1 1 1 1 ...] ^ 0x80
 ABSSUB_GT   m2, m8, m11, m6, m5 ; abs(p3 - p0) <= 1
 movam8, [pb_80]
 ABSSUB_GT   m1, m9, m11, m6, m5, m8 ; abs(p2 - p0) <= 1

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

[FFmpeg-cvslog] vp9lpf/x86: slightly simplify 44/48/84/88 h stores.

2017-03-23 Thread Ronald S. Bultje

ffmpeg | branch: master | Ronald S. Bultje  | Fri Dec 19 
22:18:42 2014 -0500| [6e74e9636b1752e777146421ffa2b2498071e28d] | committer: 
Anton Khirnov

vp9lpf/x86: slightly simplify 44/48/84/88 h stores.

Signed-off-by: Anton Khirnov 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=6e74e9636b1752e777146421ffa2b2498071e28d
---

 libavcodec/x86/vp9lpf.asm | 88 +--
 1 file changed, 40 insertions(+), 48 deletions(-)

diff --git a/libavcodec/x86/vp9lpf.asm b/libavcodec/x86/vp9lpf.asm
index 878bc54..d5b3fca 100644
--- a/libavcodec/x86/vp9lpf.asm
+++ b/libavcodec/x86/vp9lpf.asm
@@ -725,34 +725,34 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3, dst, 
stride, mstride, dst2, stri
 SBUTTERFLY  bw, 2, 3, 8
 SBUTTERFLY  wd, 0, 2, 8
 SBUTTERFLY  wd, 1, 3, 8
-SBUTTERFLY  dq, 0, 4, 8
-SBUTTERFLY  dq, 1, 5, 8
-SBUTTERFLY  dq, 2, 6, 8
-SBUTTERFLY  dq, 3, 7, 8
 movd  [P7], m0
-punpckhqdq m0, m8
-movd  [P6], m0
-movd  [Q0], m1
-punpckhqdq  m1, m9
-movd  [Q1], m1
 movd  [P3], m2
-punpckhqdq  m2, m10
-movd  [P2], m2
+movd  [Q0], m1
 movd  [Q4], m3
-punpckhqdq m3, m11
+psrldq  m0, 4
+psrldq  m1, 4
+psrldq  m2, 4
+psrldq  m3, 4
+movd  [P6], m0
+movd  [P2], m2
+movd  [Q1], m1
 movd  [Q5], m3
-movd  [P5], m4
-punpckhqdq m4, m12
-movd  [P4], m4
-movd  [Q2], m5
-punpckhqdq m5, m13
-movd  [Q3], m5
-movd  [P1], m6
-punpckhqdq m6, m14
-movd  [P0], m6
-movd  [Q6], m7
-punpckhqdq m7, m8
-movd  [Q7], m7
+psrldq  m0, 4
+psrldq  m1, 4
+psrldq  m2, 4
+psrldq  m3, 4
+movd  [P5], m0
+movd  [P1], m2
+movd  [Q2], m1
+movd  [Q6], m3
+psrldq  m0, 4
+psrldq  m1, 4
+psrldq  m2, 4
+psrldq  m3, 4
+movd  [P4], m0
+movd  [P0], m2
+movd  [Q3], m1
+movd  [Q7], m3
 %else
 ; the following code do a transpose of 8 full lines to 16 half
 ; lines (high part). It is inlined to avoid the need of a staging area
@@ -777,30 +777,22 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3, dst, 
stride, mstride, dst2, stri
 SBUTTERFLY  dq,  1,  5, 8
 SBUTTERFLY  dq,  2,  6, 8
 SBUTTERFLY  dq,  3,  7, 8
-movh  [P7], m0
-punpckhqdq m0, m8
-movh  [P6], m0
-movh  [Q0], m1
-punpckhqdq  m1, m9
-movh  [Q1], m1
-movh  [P3], m2
-punpckhqdq  m2, m10
-movh  [P2], m2
-movh  [Q4], m3
-punpckhqdq m3, m11
-movh  [Q5], m3
-movh  [P5], m4
-punpckhqdq m4, m12
-movh  [P4], m4
-movh  [Q2], m5
-punpckhqdq m5, m13
-movh  [Q3], m5
-movh  [P1], m6
-punpckhqdq m6, m14
-movh  [P0], m6
-movh  [Q6], m7
-punpckhqdq m7, m8
-movh  [Q7], m7
+movh   [P7], m0
+movhps [P6], m0
+movh   [Q0], m1
+movhps [Q1], m1
+movh   [P3], m2
+movhps [P2], m2
+movh   [Q4], m3
+movhps [Q5], m3
+movh   [P5], m4
+movhps [P4], m4
+movh   [Q2], m5
+movhps [Q3], m5
+movh   [P1], m6
+movhps [P0], m6
+movh   [Q6], m7
+movhps [Q7], m7
 %endif
 %endif
 

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

[FFmpeg-cvslog] vp9lpf/x86: simplify ABSSUM_CMP by inverting the comparison meaning.

2017-03-23 Thread Ronald S. Bultje

ffmpeg | branch: master | Ronald S. Bultje  | Sun Dec 21 
19:34:03 2014 -0500| [e4961035b288043b2b00bdc2ccbe3c31393e12d5] | committer: 
Anton Khirnov

vp9lpf/x86: simplify ABSSUM_CMP by inverting the comparison meaning.

Signed-off-by: Anton Khirnov 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=e4961035b288043b2b00bdc2ccbe3c31393e12d5
---

 libavcodec/x86/vp9lpf.asm | 103 +++---
 1 file changed, 52 insertions(+), 51 deletions(-)

diff --git a/libavcodec/x86/vp9lpf.asm b/libavcodec/x86/vp9lpf.asm
index 7dc40a3..5d82995 100644
--- a/libavcodec/x86/vp9lpf.asm
+++ b/libavcodec/x86/vp9lpf.asm
@@ -36,6 +36,7 @@ pb_40:  times 16 db 0x40
 pb_81:  times 16 db 0x81
 pb_f8:  times 16 db 0xf8
 pb_fe:  times 16 db 0xfe
+pb_ff:  times 16 db 0xff
 
 pw_4:   times  8 dw 4
 pw_8:   times  8 dw 8
@@ -59,20 +60,18 @@ SECTION .text
 por %1, %4
 %endmacro
 
-; %1 = %1<=%2
-%macro CMP_LTE 3-4 ; src/dst, cmp, tmp, pb_80
-%if %0 == 4
-pxor%1, %4
+; %1 = %1>%2
+%macro CMP_GT 2-3 ; src/dst, cmp, pb_80
+%if %0 == 3
+pxor%1, %3
 %endif
-pcmpgtb %3, %2, %1  ; cmp > src?
-pcmpeqb %1, %2  ; cmp == src? XXX: avoid this with 
a -1/+1 well placed?
-por %1, %3  ; cmp >= src?
+pcmpgtb %1, %2
 %endmacro
 
-; %1 = abs(%2-%3) <= %4
-%macro ABSSUB_CMP 5-6 [pb_80]; dst, src1, src2, cmp, tmp, [pb_80]
+; %1 = abs(%2-%3) > %4
+%macro ABSSUB_GT 5-6 [pb_80]; dst, src1, src2, cmp, tmp, [pb_80]
 ABSSUB  %1, %2, %3, %5  ; dst = abs(src1-src2)
-CMP_LTE %1, %4, %5, %6  ; dst <= cmp
+CMP_GT  %1, %4, %6  ; dst > cmp
 %endmacro
 
 %macro MASK_APPLY 4 ; %1=new_data/dst %2=old_data %3=mask %4=tmp
@@ -439,17 +438,17 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3, dst, 
stride, mstride, dst2, stri
 SWAP10,  6, 14
 SWAP11,  7, 15
 %endif
-ABSSUB_CMP  m5,  m8,  m9, m2, m7, m0; m5 = abs(p3-p2) <= I
-ABSSUB_CMP  m1,  m9, m10, m2, m7, m0; m1 = abs(p2-p1) <= I
-pandm5, m1
-ABSSUB_CMP  m1, m10, m11, m2, m7, m0; m1 = abs(p1-p0) <= I
-pandm5, m1
-ABSSUB_CMP  m1, m12, m13, m2, m7, m0; m1 = abs(q1-q0) <= I
-pandm5, m1
-ABSSUB_CMP  m1, m13, m14, m2, m7, m0; m1 = abs(q2-q1) <= I
-pandm5, m1
-ABSSUB_CMP  m1, m14, m15, m2, m7, m0; m1 = abs(q3-q2) <= I
-pandm5, m1
+ABSSUB_GT   m5,  m8,  m9, m2, m7, m0; m5 = abs(p3-p2) <= I
+ABSSUB_GT   m1,  m9, m10, m2, m7, m0; m1 = abs(p2-p1) <= I
+por m5, m1
+ABSSUB_GT   m1, m10, m11, m2, m7, m0; m1 = abs(p1-p0) <= I
+por m5, m1
+ABSSUB_GT   m1, m12, m13, m2, m7, m0; m1 = abs(q1-q0) <= I
+por m5, m1
+ABSSUB_GT   m1, m13, m14, m2, m7, m0; m1 = abs(q2-q1) <= I
+por m5, m1
+ABSSUB_GT   m1, m14, m15, m2, m7, m0; m1 = abs(q3-q2) <= I
+por m5, m1
 ABSSUB  m1, m11, m12, m7; abs(p0-q0)
 paddusb m1, m1  ; abs(p0-q0) * 2
 ABSSUB  m2, m10, m13, m7; abs(p1-q1)
@@ -457,19 +456,19 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3, dst, 
stride, mstride, dst2, stri
 psrlq   m2, 1   ; abs(p1-q1)/2
 paddusb m1, m2  ; abs(p0-q0)*2 + 
abs(p1-q1)/2
 pxorm1, m0
-pcmpgtb m4, m3, m1  ; E > X?
-pcmpeqb m3, m1  ; E == X?
-por m3, m4  ; E >= X?
-pandm3, m5  ; fm final value
+pcmpgtb m1, m3
+por m1, m5  ; fm final value
+SWAP 1, 3
+pxorm3, [pb_ff]
 
 ; (m3: fm, m8..15: p3 p2 p1 p0 q0 q1 q2 q3)
 ; calc flat8in (if not 44_16) and hev masks
 movam6, [pb_81] ; [1 1 1 1 ...] ^ 0x80
 %if %2 != 44
-ABSSUB_CMP  m2, m8, m11, m6, m5 ; abs(p3 - p0) <= 1
+ABSSUB_GT   m2, m8, m11, m6, m5 ; abs(p3 - p0) <= 1
 movam8, [pb_80]
-ABSSUB_CMP  m1, m9, m11, m6, m5, m8 ; abs(p2 - p0) <= 1
-pandm2, m1
+ABSSUB_GT   m1, m9, m11, m6, m5, m8 ; abs(p2 - p0) <= 1
+por m2, m1
 ABSSUB  m4, m10, m11, m5

[FFmpeg-cvslog] vp9lpf/x86: store unpacked intermediates for filter6/14 on stack.

2017-03-23 Thread Ronald S. Bultje

ffmpeg | branch: master | Ronald S. Bultje  | Wed Dec 24 
14:17:28 2014 -0500| [c6375a83d1ad512ed24e8fef044f3ba17237e03e] | committer: 
Anton Khirnov

vp9lpf/x86: store unpacked intermediates for filter6/14 on stack.

filter16 goes from 508 to 482 (h) or 346 to 314 (v) cycles; filter88
goes from 240 to 238 (h) or 174 to 165 (v) cycles, measured on TOS.

Signed-off-by: Anton Khirnov 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=c6375a83d1ad512ed24e8fef044f3ba17237e03e
---

 libavcodec/x86/vp9lpf.asm | 151 --
 1 file changed, 79 insertions(+), 72 deletions(-)

diff --git a/libavcodec/x86/vp9lpf.asm b/libavcodec/x86/vp9lpf.asm
index 0b72fac..e337132 100644
--- a/libavcodec/x86/vp9lpf.asm
+++ b/libavcodec/x86/vp9lpf.asm
@@ -80,39 +80,42 @@ SECTION .text
 por %1, %4  ; new&mask | old&~mask
 %endmacro
 
-%macro FILTER_SUBx2_ADDx2 8 ; %1=dst %2=h/l %3=cache %4=sub1 %5=sub2 %6=add1 
%7=add2 %8=rshift
-punpck%2bw  %3, %4, m0
-psubw   %1, %3
-punpck%2bw  %3, %5, m0
-psubw   %1, %3
-punpck%2bw  %3, %6, m0
-paddw   %1, %3
-punpck%2bw  %3, %7, m0
+%macro FILTER_SUBx2_ADDx2 9-10 "" ; %1=dst %2=h/l %3=cache %4=stack_off 
%5=sub1 %6=sub2 %7=add1 %8=add2 %9=rshift, [unpack]
+psubw   %3, [rsp+%4+%5*32]
+psubw   %3, [rsp+%4+%6*32]
+paddw   %3, [rsp+%4+%7*32]
+%ifnidn %10, ""
+punpck%2bw  %1, %10, m0
+mova[rsp+%4+%8*32], %1
 paddw   %3, %1
-psraw   %1, %3, %8
+%else
+paddw   %3, [rsp+%4+%8*32]
+%endif
+psraw   %1, %3, %9
 %endmacro
 
-%macro FILTER_INIT 8 ; tmp1, tmp2, cacheL, cacheH, dstp, filterid, mask, source
-FILTER%6_INIT   %1, l, %3
-FILTER%6_INIT   %2, h, %4
+; FIXME interleave l/h better (for instruction pairing)
+%macro FILTER_INIT 9 ; tmp1, tmp2, cacheL, cacheH, dstp, stack_off, filterid, 
mask, source
+FILTER%7_INIT   %1, l, %3, %6 +  0
+FILTER%7_INIT   %2, h, %4, %6 + 16
 packuswb%1, %2
-MASK_APPLY  %1, %8, %7, %2
+MASK_APPLY  %1, %9, %8, %2
 mova%5, %1
 %endmacro
 
-%macro FILTER_UPDATE 11-14 ; tmp1, tmp2, cacheL, cacheH, dstp, -, -, +, +, 
rshift, mask, [source], [preload reg + value]
-%if %0 == 13 ; no source + preload
-mova%12, %13
-%elif %0 == 14 ; source + preload
-mova%13, %14
+
+%macro FILTER_UPDATE 12-15 "", "" ; tmp1, tmp2, cacheL, cacheH, dstp, 
stack_off, -, -, +, +, rshift, mask, [source], [unpack]
+; FIXME interleave this properly with the subx2/addx2
+%if %0 == 15
+mova   %14, %15
 %endif
-FILTER_SUBx2_ADDx2  %1, l, %3, %6, %7, %8, %9, %10
-FILTER_SUBx2_ADDx2  %2, h, %4, %6, %7, %8, %9, %10
+FILTER_SUBx2_ADDx2  %1, l, %3, %6 +  0, %7, %8, %9, %10, %11, %14
+FILTER_SUBx2_ADDx2  %2, h, %4, %6 + 16, %7, %8, %9, %10, %11, %14
 packuswb%1, %2
-%if %0 == 12 || %0 == 14
-MASK_APPLY  %1, %12, %11, %2
+%ifnidn %13, ""
+MASK_APPLY  %1, %13, %12, %2
 %else
-MASK_APPLY  %1, %5, %11, %2
+MASK_APPLY  %1, %5, %12, %2
 %endif
 mova%5, %1
 %endmacro
@@ -152,44 +155,48 @@ SECTION .text
 paddusb %1, %4  ; add the negatives
 %endmacro
 
-%macro FILTER6_INIT 3 ; %1=dst %2=h/l %3=cache
+%macro FILTER6_INIT 4 ; %1=dst %2=h/l %3=cache, %4=stack_off
 punpck%2bw  %1, m14, m0 ; p3: B->W
+mova [rsp+%4+0*32], %1
 paddw   %3, %1, %1  ; p3*2
 paddw   %3, %1  ; p3*3
 punpck%2bw  %1, m15, m0 ; p2: B->W
+mova [rsp+%4+1*32], %1
 paddw   %3, %1  ; p3*3 + p2
 paddw   %3, %1  ; p3*3 + p2*2
 punpck%2bw  %1, m10, m0 ; p1: B->W
+mova [rsp+%4+2*32], %1
 paddw   %3, %1  ; p3*3 + p2*2 + p1
 punpck%2bw  %1, m11, m0 ; p0: B->W
+mova [rsp+%4+3*32], %1
 paddw   %3, %1  ; p3*3 + p2*2 + p1 + p0
 punpck%2bw  %1, m12, m0 ; q0: B->W
+mova [rsp+%4+4*32], %1
 paddw   %3, %1  ; p3*3 + p2*2 + p1 + 
p0 + q0
 paddw   %3, [pw_4]  ; p3*3 + p2*2 + p1 + 
p0 + q0 + 4
 psraw   %1, %3, 3   ; (p3*3 + p2*2 + p1 + 
p0 + q0 + 4) >> 3
 %endmacro
 
-%macro FILTER14_INIT 3 ; %1=dst %2=h/l %3=cache
+%macro FILTER14_INIT 4 ; %1=dst %2=h/l %3=cache, %4=stack

[FFmpeg-cvslog] vp9lpf/x86: save one register in SIGN_ADD/SUB.

2017-03-23 Thread Ronald S. Bultje

ffmpeg | branch: master | Ronald S. Bultje  | Sat Dec 27 
14:47:07 2014 -0500| [7c62891efedf0102934bc18d62c6561152a4d0bc] | committer: 
Anton Khirnov

vp9lpf/x86: save one register in SIGN_ADD/SUB.

Signed-off-by: Anton Khirnov 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=7c62891efedf0102934bc18d62c6561152a4d0bc
---

 libavcodec/x86/vp9lpf.asm | 28 ++--
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/libavcodec/x86/vp9lpf.asm b/libavcodec/x86/vp9lpf.asm
index e337132..c2afc44 100644
--- a/libavcodec/x86/vp9lpf.asm
+++ b/libavcodec/x86/vp9lpf.asm
@@ -142,17 +142,17 @@ SECTION .text
 %endmacro
 
 ; clip_u8(u8 + i8)
-%macro SIGN_ADD 5 ; dst, u8, i8, tmp1, tmp2
-EXTRACT_POS_NEG %3, %4, %5
-psubusb %1, %2, %4  ; sub the negatives
-paddusb %1, %5  ; add the positives
+%macro SIGN_ADD 4 ; dst, u8, i8, tmp1
+EXTRACT_POS_NEG %3, %4, %1
+paddusb %1, %2  ; add the positives
+psubusb %1, %4  ; sub the negatives
 %endmacro
 
 ; clip_u8(u8 - i8)
-%macro SIGN_SUB 5 ; dst, u8, i8, tmp1, tmp2
-EXTRACT_POS_NEG %3, %4, %5
-psubusb %1, %2, %5  ; sub the positives
-paddusb %1, %4  ; add the negatives
+%macro SIGN_SUB 4 ; dst, u8, i8, tmp1
+EXTRACT_POS_NEG %3, %1, %4
+paddusb %1, %2  ; add the negatives
+psubusb %1, %4  ; sub the positives
 %endmacro
 
 %macro FILTER6_INIT 4 ; %1=dst %2=h/l %3=cache, %4=stack_off
@@ -578,8 +578,8 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3 + %4, dst, 
stride, mstride, dst2,
 paddsb  m4, [pb_3]  ; m4: f2 = clip(f + 3, 
127)
 movam14, [pb_10]; will be reused in 
filter4()
 SRSHIFT3B_2Xm6, m4, m14, m7 ; f1 and f2 sign byte 
shift by 3
-SIGN_SUBm7, m12, m6, m5, m9 ; m7 = q0 - f1
-SIGN_ADDm8, m11, m4, m5, m9 ; m8 = p0 + f2
+SIGN_SUBm7, m12, m6, m5 ; m7 = q0 - f1
+SIGN_ADDm8, m11, m4, m5 ; m8 = p0 + f2
 %if %2 != 44
 pandn   m6, m2, m3  ;  ~mask(in) & mask(fm)
 pandm6, m0  ; (~mask(in) & 
mask(fm)) & mask(hev)
@@ -607,18 +607,18 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3 + %4, dst, 
stride, mstride, dst2,
 %define q0tmp m2
 pandn   m0, m3
 %endif
-SIGN_SUBq0tmp, m12, m6, m4, m14 ; q0 - f1
+SIGN_SUBq0tmp, m12, m6, m4  ; q0 - f1
 MASK_APPLY  q0tmp, m7, m0, m5   ; filter4(q0) & mask
 mova[Q0], q0tmp
-SIGN_ADDp0tmp, m11, m15, m4, m14; p0 + f2
+SIGN_ADDp0tmp, m11, m15, m4 ; p0 + f2
 MASK_APPLY  p0tmp, m8, m0, m5   ; filter4(p0) & mask
 mova[P0], p0tmp
 paddb   m6, [pb_80] ;
 pxorm8, m8  ;   f=(f1+1)>>1
 pavgb   m6, m8  ;
 psubb   m6, [pb_40] ;
-SIGN_ADDm7, m10, m6, m8, m9 ; p1 + f
-SIGN_SUBm4, m13, m6, m8, m9 ; q1 - f
+SIGN_ADDm7, m10, m6, m8 ; p1 + f
+SIGN_SUBm4, m13, m6, m8 ; q1 - f
 MASK_APPLY  m7, m10, m0, m14; m7 = filter4(p1)
 MASK_APPLY  m4, m13, m0, m14; m4 = filter4(q1)
 mova[P1], m7

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

[FFmpeg-cvslog] vp9lpf/x86: make filter_16_h work on 32-bit.

2017-03-23 Thread Ronald S. Bultje

ffmpeg | branch: master | Ronald S. Bultje  | Fri Dec 26 
17:50:38 2014 -0500| [715f139c9bd407ef7f4d1f564ad683140ec61e6d] | committer: 
Anton Khirnov

vp9lpf/x86: make filter_16_h work on 32-bit.

Signed-off-by: Anton Khirnov 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=715f139c9bd407ef7f4d1f564ad683140ec61e6d
---

 libavcodec/x86/vp9dsp_init.c |   4 +-
 libavcodec/x86/vp9lpf.asm| 191 ++-
 2 files changed, 154 insertions(+), 41 deletions(-)

diff --git a/libavcodec/x86/vp9dsp_init.c b/libavcodec/x86/vp9dsp_init.c
index 76ea48f..3b9e1bb 100644
--- a/libavcodec/x86/vp9dsp_init.c
+++ b/libavcodec/x86/vp9dsp_init.c
@@ -279,9 +279,7 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp)
 init_subpel2(4, idx,  4, type, opt)
 
 #define init_lpf(opt) do { \
-if (ARCH_X86_64) { \
-dsp->loop_filter_16[0] = ff_vp9_loop_filter_h_16_16_##opt; \
-} \
+dsp->loop_filter_16[0] = ff_vp9_loop_filter_h_16_16_##opt; \
 dsp->loop_filter_16[1] = ff_vp9_loop_filter_v_16_16_##opt; \
 dsp->loop_filter_mix2[0][0][0] = ff_vp9_loop_filter_h_44_16_##opt; \
 dsp->loop_filter_mix2[0][0][1] = ff_vp9_loop_filter_v_44_16_##opt; \
diff --git a/libavcodec/x86/vp9lpf.asm b/libavcodec/x86/vp9lpf.asm
index c20eeb8..54f20fe 100644
--- a/libavcodec/x86/vp9lpf.asm
+++ b/libavcodec/x86/vp9lpf.asm
@@ -291,6 +291,30 @@ SECTION .text
 SWAP %12, %14
 %endmacro
 
+%macro TRANSPOSE8x8B 13
+SBUTTERFLY bw,  %1, %2, %7
+movdq%10 m%7, %9
+movdqa %11, m%2
+SBUTTERFLY bw,  %3, %4, %2
+SBUTTERFLY bw,  %5, %6, %2
+SBUTTERFLY bw,  %7, %8, %2
+SBUTTERFLY wd,  %1, %3, %2
+movdqa m%2, %11
+movdqa %11, m%3
+SBUTTERFLY wd,  %2, %4, %3
+SBUTTERFLY wd,  %5, %7, %3
+SBUTTERFLY wd,  %6, %8, %3
+SBUTTERFLY dq, %1, %5, %3
+SBUTTERFLY dq, %2, %6, %3
+movdqa m%3, %11
+movh   %12, m%2
+movhps %13, m%2
+SBUTTERFLY dq, %3, %7, %2
+SBUTTERFLY dq, %4, %8, %2
+SWAP %2, %5
+SWAP %4, %7
+%endmacro
+
 %macro DEFINE_REAL_P7_TO_Q7 0-1 0
 %define P7 dstq  + 4*mstrideq  + %1
 %define P6 dstq  +   mstride3q + %1
@@ -310,6 +334,25 @@ SECTION .text
 %define Q7 dst2q +stride3q + %1
 %endmacro
 
+%macro DEFINE_TRANSPOSED_P7_TO_Q7 0-1 0
+%define P3 rsp +   0 + %1
+%define P2 rsp +  16 + %1
+%define P1 rsp +  32 + %1
+%define P0 rsp +  48 + %1
+%define Q0 rsp +  64 + %1
+%define Q1 rsp +  80 + %1
+%define Q2 rsp +  96 + %1
+%define Q3 rsp + 112 + %1
+%define P7 rsp + 128 + %1
+%define P6 rsp + 144 + %1
+%define P5 rsp + 160 + %1
+%define P4 rsp + 176 + %1
+%define Q4 rsp + 192 + %1
+%define Q5 rsp + 208 + %1
+%define Q6 rsp + 224 + %1
+%define Q7 rsp + 240 + %1
+%endmacro
+
 ; ..AB -> 
 %macro SPLATB_MIX 1-2 [mask_mix]
 %if cpuflag(ssse3)
@@ -364,7 +407,9 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3 + %4 + %5, 
dst, stride, mstride,
 movxm3, [P4]
 movxm4, [P3]
 movxm5, [P2]
+%if ARCH_X86_64 || %2 != 16
 movxm6, [P1]
+%endif
 movxm7, [P0]
 %if ARCH_X86_64
 movxm8, [Q0]
@@ -375,21 +420,14 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3 + %4 + %5, 
dst, stride, mstride,
 movx   m13, [Q5]
 movx   m14, [Q6]
 movx   m15, [Q7]
+DEFINE_TRANSPOSED_P7_TO_Q7
 %if %2 == 16
 TRANSPOSE16x16B 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, [rsp]
-%define P7 rsp + 128
-%define P6 rsp + 144
-%define P5 rsp + 160
-%define P4 rsp + 176
-%define Q4 rsp + 192
-%define Q5 rsp + 208
-%define Q6 rsp + 224
-%define Q7 rsp + 240
 mova   [P7],  m0
 mova   [P6],  m1
 mova   [P5],  m2
 mova   [P4],  m3
-%else
+%else ; %2 == 44/48/84/88
 ; 8x16 transpose
 punpcklbwm0,  m1
 punpcklbwm2,  m3
@@ -407,8 +445,65 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3 + %4 + %5, 
dst, stride, mstride,
 SWAP 10,  9
 SWAP 12, 10
 SWAP 14, 11
-%endif
+%endif ; %2
+mova   [P3],  m4
+mova   [P2],  m5
+mova   [P1],  m6
+mova   [P0],  m7
+mova   [Q0],  m8
+mova   [Q1],  m9
+mova   [Q2], m10
+mova   [Q3], m11
+%if %2 == 16
+mova   [Q4], m12
+mova   [Q5], m13
+mova   [Q6], m14
+mova   [Q7], m15
+%endif ; %2
 %else ; x86-32
+%if %2 == 16
+TRANSPOSE8x8B0, 1, 2, 3, 4, 5, 6, 7, [P1], u, [rsp+%3+%4], [rsp+64], 
[rsp+80]
+DEFINE_TRANSPOSED_P7_TO_Q7
+movh  [P7], m0
+movh  [P5], m1
+movh  [P3], m2
+movh  [P1], m3
+movh  [Q2], m5
+movh  [Q4], m6
+movh  [Q6], m7
+movhps[P6], m0
+movhps[P4], m1
+movhps

[FFmpeg-cvslog] vp9lpf/x86: make filter_88_v work on 32-bit.

2017-03-23 Thread Ronald S. Bultje

ffmpeg | branch: master | Ronald S. Bultje  | Sat Dec 27 
15:12:01 2014 -0500| [37637e65907b1a8c3731ea69c638792cb2438d0c] | committer: 
Anton Khirnov

vp9lpf/x86: make filter_88_v work on 32-bit.

Signed-off-by: Anton Khirnov 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=37637e65907b1a8c3731ea69c638792cb2438d0c
---

 libavcodec/x86/vp9dsp_init.c |   2 +-
 libavcodec/x86/vp9lpf.asm| 155 ++-
 2 files changed, 109 insertions(+), 48 deletions(-)

diff --git a/libavcodec/x86/vp9dsp_init.c b/libavcodec/x86/vp9dsp_init.c
index daced21..523e92c 100644
--- a/libavcodec/x86/vp9dsp_init.c
+++ b/libavcodec/x86/vp9dsp_init.c
@@ -291,8 +291,8 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp)
 dsp->loop_filter_mix2[1][0][0] = ff_vp9_loop_filter_h_84_16_##opt; \
 dsp->loop_filter_mix2[1][0][1] = ff_vp9_loop_filter_v_84_16_##opt; \
 dsp->loop_filter_mix2[1][1][0] = ff_vp9_loop_filter_h_88_16_##opt; \
-dsp->loop_filter_mix2[1][1][1] = ff_vp9_loop_filter_v_88_16_##opt; \
 } \
+dsp->loop_filter_mix2[1][1][1] = ff_vp9_loop_filter_v_88_16_##opt; \
 } while (0)
 
 if (EXTERNAL_MMX(cpu_flags)) {
diff --git a/libavcodec/x86/vp9lpf.asm b/libavcodec/x86/vp9lpf.asm
index d9a6215..e1c2b7b 100644
--- a/libavcodec/x86/vp9lpf.asm
+++ b/libavcodec/x86/vp9lpf.asm
@@ -52,6 +52,22 @@ mask_mix48: times 8 db 0x00
 
 SECTION .text
 
+%macro SCRATCH 3
+%if ARCH_X86_64
+SWAP%1, %2
+%else
+mova  [%3], m%1
+%endif
+%endmacro
+
+%macro UNSCRATCH 3
+%if ARCH_X86_64
+SWAP%1, %2
+%else
+mova   m%1, [%3]
+%endif
+%endmacro
+
 ; %1 = abs(%2-%3)
 %macro ABSSUB 4 ; dst, src1 (RO), src2 (RO), tmp
 %if ARCH_X86_64
@@ -86,12 +102,26 @@ SECTION .text
 por %1, %4  ; new&mask | old&~mask
 %endmacro
 
-%macro FILTER_SUBx2_ADDx2 9-10 "" ; %1=dst %2=h/l %3=cache %4=stack_off 
%5=sub1 %6=sub2 %7=add1 %8=add2 %9=rshift, [unpack]
+%macro UNPACK 4
+%if ARCH_X86_64
+punpck%1bw  %2, %3, %4
+%else
+mova%2, %3
+punpck%1bw  %2, %4
+%endif
+%endmacro
+
+%macro FILTER_SUBx2_ADDx2 11 ; %1=dst %2=h/l %3=cache %4=stack_off %5=sub1 
%6=sub2 %7=add1
+ ; %8=add2 %9=rshift, [unpack], 
[unpack_is_mem_on_x86_32]
 psubw   %3, [rsp+%4+%5*32]
 psubw   %3, [rsp+%4+%6*32]
 paddw   %3, [rsp+%4+%7*32]
 %ifnidn %10, ""
+%if %11 == 0
 punpck%2bw  %1, %10, m0
+%else
+UNPACK  %2, %1, %10, m0
+%endif
 mova[rsp+%4+%8*32], %1
 paddw   %3, %1
 %else
@@ -110,13 +140,14 @@ SECTION .text
 %endmacro
 
 
-%macro FILTER_UPDATE 12-15 "", "" ; tmp1, tmp2, cacheL, cacheH, dstp, 
stack_off, -, -, +, +, rshift, mask, [source], [unpack]
+%macro FILTER_UPDATE 12-16 "", "", "", 0 ; tmp1, tmp2, cacheL, cacheH, dstp, 
stack_off, -, -, +, +, rshift,
+ ; mask, [source], [unpack + src], 
[unpack_is_mem_on_x86_32]
 ; FIXME interleave this properly with the subx2/addx2
-%if %0 == 15
+%ifnidn %15, ""
 mova   %14, %15
 %endif
-FILTER_SUBx2_ADDx2  %1, l, %3, %6 +  0, %7, %8, %9, %10, %11, %14
-FILTER_SUBx2_ADDx2  %2, h, %4, %6 + 16, %7, %8, %9, %10, %11, %14
+FILTER_SUBx2_ADDx2  %1, l, %3, %6 +  0, %7, %8, %9, %10, %11, %14, %16
+FILTER_SUBx2_ADDx2  %2, h, %4, %6 + 16, %7, %8, %9, %10, %11, %14, %16
 packuswb%1, %2
 %ifnidn %13, ""
 MASK_APPLY  %1, %13, %12, %2
@@ -162,21 +193,21 @@ SECTION .text
 %endmacro
 
 %macro FILTER6_INIT 4 ; %1=dst %2=h/l %3=cache, %4=stack_off
-punpck%2bw  %1, m14, m0 ; p3: B->W
+UNPACK  %2, %1, rp3, m0 ; p3: B->W
 mova [rsp+%4+0*32], %1
 paddw   %3, %1, %1  ; p3*2
 paddw   %3, %1  ; p3*3
-punpck%2bw  %1, m15, m0 ; p2: B->W
+punpck%2bw  %1, m2,  m0 ; p2: B->W
 mova [rsp+%4+1*32], %1
 paddw   %3, %1  ; p3*3 + p2
 paddw   %3, %1  ; p3*3 + p2*2
-punpck%2bw  %1, m10, m0 ; p1: B->W
+UNPACK  %2, %1, rp1, m0 ; p1: B->W
 mova [rsp+%4+2*32], %1
 paddw   %3, %1  ; p3*3 + p2*2 + p1
-punpck%2bw  %1, m11, m0 ; p0: B->W
+UNPACK  %2, %1, rp0, m0 ; p0: B->W
 mova [rsp+%4+3*32], %1
 paddw   %3, %1  ; p3*3 + p2*2 + p1 + p0
-punpck%2bw  %1, m12, m0 ;

[FFmpeg-cvslog] vp9lpf/x86: make cglobal statement more conservative in register allocation.

2017-03-23 Thread Ronald S. Bultje

ffmpeg | branch: master | Ronald S. Bultje  | Fri Dec 19 
22:09:30 2014 -0500| [6411c328a233b80faa5aa3ef4266f9a16e499699] | committer: 
Anton Khirnov

vp9lpf/x86: make cglobal statement more conservative in register allocation.

Signed-off-by: Anton Khirnov 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=6411c328a233b80faa5aa3ef4266f9a16e499699
---

 libavcodec/x86/vp9lpf.asm | 21 -
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/libavcodec/x86/vp9lpf.asm b/libavcodec/x86/vp9lpf.asm
index dc22705..878bc54 100644
--- a/libavcodec/x86/vp9lpf.asm
+++ b/libavcodec/x86/vp9lpf.asm
@@ -308,7 +308,20 @@ SECTION .text
 %endif
 %endmacro
 
-%macro LOOPFILTER 2 ; %1=v/h %2=size1
+%macro LOOPFILTER 3 ; %1=v/h %2=size1 %3=stack
+%if UNIX64
+cglobal vp9_loop_filter_%1_%2_16, 5, 9, 16, %3, dst, stride, E, I, H, mstride, 
dst2, stride3, mstride3
+%else
+%if WIN64
+cglobal vp9_loop_filter_%1_%2_16, 4, 8, 16, %3, dst, stride, E, I, mstride, 
dst2, stride3, mstride3
+%else
+cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3, dst, stride, mstride, dst2, 
stride3, mstride3
+%define Ed dword r2m
+%define Id dword r3m
+%endif
+%define Hd dword r4m
+%endif
+
 mov   mstrideq, strideq
 neg   mstrideq
 
@@ -796,10 +809,8 @@ SECTION .text
 
 %macro LPF_16_VH 2
 INIT_XMM %2
-cglobal vp9_loop_filter_v_%1_16, 5,10,16,  dst, stride, E, I, H, mstride, 
dst2, stride3, mstride3
-LOOPFILTER v, %1
-cglobal vp9_loop_filter_h_%1_16, 5,10,16, 256, dst, stride, E, I, H, mstride, 
dst2, stride3, mstride3
-LOOPFILTER h, %1
+LOOPFILTER v, %1, 0
+LOOPFILTER h, %1, 256
 %endmacro
 
 %macro LPF_16_VH_ALL_OPTS 1

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

[FFmpeg-cvslog] vp9lpf/x86: make filter_44_v work on 32-bit.

2017-03-23 Thread Ronald S. Bultje

ffmpeg | branch: master | Ronald S. Bultje  | Sat Dec 27 
15:08:48 2014 -0500| [be10834bd9dde81fc10568b7da8ffd1493df8589] | committer: 
Anton Khirnov

vp9lpf/x86: make filter_44_v work on 32-bit.

Signed-off-by: Anton Khirnov 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=be10834bd9dde81fc10568b7da8ffd1493df8589
---

 libavcodec/x86/vp9dsp_init.c |   4 +-
 libavcodec/x86/vp9lpf.asm| 155 +++
 2 files changed, 100 insertions(+), 59 deletions(-)

diff --git a/libavcodec/x86/vp9dsp_init.c b/libavcodec/x86/vp9dsp_init.c
index 88267b9..daced21 100644
--- a/libavcodec/x86/vp9dsp_init.c
+++ b/libavcodec/x86/vp9dsp_init.c
@@ -283,7 +283,9 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp)
 dsp->loop_filter_16[0] = ff_vp9_loop_filter_h_16_16_##opt; \
 dsp->loop_filter_16[1] = ff_vp9_loop_filter_v_16_16_##opt; \
 dsp->loop_filter_mix2[0][0][0] = ff_vp9_loop_filter_h_44_16_##opt; \
-dsp->loop_filter_mix2[0][0][1] = ff_vp9_loop_filter_v_44_16_##opt; \
+} \
+dsp->loop_filter_mix2[0][0][1] = ff_vp9_loop_filter_v_44_16_##opt; \
+if (ARCH_X86_64) { \
 dsp->loop_filter_mix2[0][1][0] = ff_vp9_loop_filter_h_48_16_##opt; \
 dsp->loop_filter_mix2[0][1][1] = ff_vp9_loop_filter_v_48_16_##opt; \
 dsp->loop_filter_mix2[1][0][0] = ff_vp9_loop_filter_h_84_16_##opt; \
diff --git a/libavcodec/x86/vp9lpf.asm b/libavcodec/x86/vp9lpf.asm
index c2afc44..d9a6215 100644
--- a/libavcodec/x86/vp9lpf.asm
+++ b/libavcodec/x86/vp9lpf.asm
@@ -2,6 +2,7 @@
 ;* VP9 loop filter SIMD optimizations
 ;*
 ;* Copyright (C) 2013-2014 Clément Bœsch 
+;* Copyright (C) 2014 Ronald S. Bultje 
 ;*
 ;* This file is part of Libav.
 ;*
@@ -23,8 +24,6 @@
 
 %include "libavutil/x86/x86util.asm"
 
-%if ARCH_X86_64
-
 SECTION_RODATA
 
 cextern pb_3
@@ -55,8 +54,15 @@ SECTION .text
 
 ; %1 = abs(%2-%3)
 %macro ABSSUB 4 ; dst, src1 (RO), src2 (RO), tmp
+%if ARCH_X86_64
 psubusb %1, %3, %2
 psubusb %4, %2, %3
+%else
+mova%1, %3
+mova%4, %2
+psubusb %1, %2
+psubusb %4, %3
+%endif
 por %1, %4
 %endmacro
 
@@ -428,6 +434,7 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3 + %4, dst, 
stride, mstride, dst2,
 movam0, [pb_80]
 pxorm2, m0
 pxorm3, m0
+%if ARCH_X86_64
 %ifidn %1, v
 movam8, [P3]
 movam9, [P2]
@@ -445,20 +452,38 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3 + %4, dst, 
stride, mstride, dst2,
 SWAP10,  6, 14
 SWAP11,  7, 15
 %endif
-ABSSUB_GT   m5,  m8,  m9, m2, m7, m0; m5 = abs(p3-p2) <= I
-ABSSUB_GT   m1,  m9, m10, m2, m7, m0; m1 = abs(p2-p1) <= I
+%define rp3 m8
+%define rp2 m9
+%define rp1 m10
+%define rp0 m11
+%define rq0 m12
+%define rq1 m13
+%define rq2 m14
+%define rq3 m15
+%else
+%define rp3 [P3]
+%define rp2 [P2]
+%define rp1 [P1]
+%define rp0 [P0]
+%define rq0 [Q0]
+%define rq1 [Q1]
+%define rq2 [Q2]
+%define rq3 [Q3]
+%endif
+ABSSUB_GT   m5, rp3, rp2, m2, m7, m0; m5 = abs(p3-p2) <= I
+ABSSUB_GT   m1, rp2, rp1, m2, m7, m0; m1 = abs(p2-p1) <= I
 por m5, m1
-ABSSUB_GT   m1, m10, m11, m2, m7, m0; m1 = abs(p1-p0) <= I
+ABSSUB_GT   m1, rp1, rp0, m2, m7, m0; m1 = abs(p1-p0) <= I
 por m5, m1
-ABSSUB_GT   m1, m12, m13, m2, m7, m0; m1 = abs(q1-q0) <= I
+ABSSUB_GT   m1, rq0, rq1, m2, m7, m0; m1 = abs(q1-q0) <= I
 por m5, m1
-ABSSUB_GT   m1, m13, m14, m2, m7, m0; m1 = abs(q2-q1) <= I
+ABSSUB_GT   m1, rq1, rq2, m2, m7, m0; m1 = abs(q2-q1) <= I
 por m5, m1
-ABSSUB_GT   m1, m14, m15, m2, m7, m0; m1 = abs(q3-q2) <= I
+ABSSUB_GT   m1, rq2, rq3, m2, m7, m0; m1 = abs(q3-q2) <= I
 por m5, m1
-ABSSUB  m1, m11, m12, m7; abs(p0-q0)
+ABSSUB  m1, rp0, rq0, m7; abs(p0-q0)
 paddusb m1, m1  ; abs(p0-q0) * 2
-ABSSUB  m2, m10, m13, m7; abs(p1-q1)
+ABSSUB  m2, rp1, rq1, m7; abs(p1-q1)
 pandm2, [pb_fe] ; drop lsb so shift 
can work
 psrlq   m2, 1   ; abs(p1-q1)/2
 paddusb m1, m2  ; abs(p0-q0)*2 + 
abs(p1-q1)/2
@@ -510,10 +535,10 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3 + %4, dst, 
stride, mstride, dst2,
 movdm7, Hd
 SPLATB_MIX  m7
 pxorm7,

[FFmpeg-cvslog] vp9lpf/x86: make filter_48/84/88_h work on 32-bit.

2017-03-23 Thread Ronald S. Bultje

ffmpeg | branch: master | Ronald S. Bultje  | Fri Dec 26 
15:15:50 2014 -0500| [8915320db94c9b3ceb97d6ad92addda690af8c18] | committer: 
Anton Khirnov

vp9lpf/x86: make filter_48/84/88_h work on 32-bit.

Signed-off-by: Anton Khirnov 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=8915320db94c9b3ceb97d6ad92addda690af8c18
---

 libavcodec/x86/vp9dsp_init.c | 12 +++--
 libavcodec/x86/vp9lpf.asm| 62 
 2 files changed, 48 insertions(+), 26 deletions(-)

diff --git a/libavcodec/x86/vp9dsp_init.c b/libavcodec/x86/vp9dsp_init.c
index 6438644..76ea48f 100644
--- a/libavcodec/x86/vp9dsp_init.c
+++ b/libavcodec/x86/vp9dsp_init.c
@@ -285,17 +285,11 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp)
 dsp->loop_filter_16[1] = ff_vp9_loop_filter_v_16_16_##opt; \
 dsp->loop_filter_mix2[0][0][0] = ff_vp9_loop_filter_h_44_16_##opt; \
 dsp->loop_filter_mix2[0][0][1] = ff_vp9_loop_filter_v_44_16_##opt; \
-if (ARCH_X86_64) { \
-dsp->loop_filter_mix2[0][1][0] = ff_vp9_loop_filter_h_48_16_##opt; \
-} \
+dsp->loop_filter_mix2[0][1][0] = ff_vp9_loop_filter_h_48_16_##opt; \
 dsp->loop_filter_mix2[0][1][1] = ff_vp9_loop_filter_v_48_16_##opt; \
-if (ARCH_X86_64) { \
-dsp->loop_filter_mix2[1][0][0] = ff_vp9_loop_filter_h_84_16_##opt; \
-} \
+dsp->loop_filter_mix2[1][0][0] = ff_vp9_loop_filter_h_84_16_##opt; \
 dsp->loop_filter_mix2[1][0][1] = ff_vp9_loop_filter_v_84_16_##opt; \
-if (ARCH_X86_64) { \
-dsp->loop_filter_mix2[1][1][0] = ff_vp9_loop_filter_h_88_16_##opt; \
-} \
+dsp->loop_filter_mix2[1][1][0] = ff_vp9_loop_filter_h_88_16_##opt; \
 dsp->loop_filter_mix2[1][1][1] = ff_vp9_loop_filter_v_88_16_##opt; \
 } while (0)
 
diff --git a/libavcodec/x86/vp9lpf.asm b/libavcodec/x86/vp9lpf.asm
index 881bdab..c20eeb8 100644
--- a/libavcodec/x86/vp9lpf.asm
+++ b/libavcodec/x86/vp9lpf.asm
@@ -939,9 +939,12 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3 + %4 + %5, 
dst, stride, mstride,
 movam3, [P0]
 movam4, [Q0]
 movam5, [Q1]
+%if ARCH_X86_64
 movam6, [Q2]
+%endif
 movam7, [Q3]
 DEFINE_REAL_P7_TO_Q7
+%if ARCH_X86_64
 SBUTTERFLY  bw,  0,  1, 8
 SBUTTERFLY  bw,  2,  3, 8
 SBUTTERFLY  bw,  4,  5, 8
@@ -954,22 +957,47 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3 + %4 + %5, 
dst, stride, mstride,
 SBUTTERFLY  dq,  1,  5, 8
 SBUTTERFLY  dq,  2,  6, 8
 SBUTTERFLY  dq,  3,  7, 8
-movh   [P7], m0
-movhps [P6], m0
-movh   [Q0], m1
-movhps [Q1], m1
-movh   [P3], m2
-movhps [P2], m2
-movh   [Q4], m3
-movhps [Q5], m3
-movh   [P5], m4
-movhps [P4], m4
-movh   [Q2], m5
-movhps [Q3], m5
-movh   [P1], m6
-movhps [P0], m6
-movh   [Q6], m7
-movhps [Q7], m7
+%else
+SBUTTERFLY  bw,  0,  1, 6
+mova  [rsp+64], m1
+movam6, [rsp+96]
+SBUTTERFLY  bw,  2,  3, 1
+SBUTTERFLY  bw,  4,  5, 1
+SBUTTERFLY  bw,  6,  7, 1
+SBUTTERFLY  wd,  0,  2, 1
+mova  [rsp+96], m2
+movam1, [rsp+64]
+SBUTTERFLY  wd,  1,  3, 2
+SBUTTERFLY  wd,  4,  6, 2
+SBUTTERFLY  wd,  5,  7, 2
+SBUTTERFLY  dq,  0,  4, 2
+SBUTTERFLY  dq,  1,  5, 2
+movh  [Q0], m1
+movhps[Q1], m1
+movam2, [rsp+96]
+SBUTTERFLY  dq,  2,  6, 1
+SBUTTERFLY  dq,  3,  7, 1
+%endif
+SWAP 3, 6
+SWAP 1, 4
+movh  [P7], m0
+movhps[P6], m0
+movh  [P5], m1
+movhps[P4], m1
+movh  [P3], m2
+movhps[P2], m2
+movh  [P1], m3
+movhps[P0], m3
+%if ARCH_X86_64
+movh  [Q0], m4
+movhps[Q1], m4
+%endif
+movh  [Q2], m5
+movhps[Q3], m5
+movh  [Q4], m6
+movhps[Q5], m6
+movh  [Q6], m7
+movhps[Q7], m7
 %endif
 %endif
 
@@ -979,7 +1007,7 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3 + %4 + %5, 
dst, stride, mstride,
 %macro LPF_16_VH 5
 INIT_XMM %5
 LOOPFILTER v, %1, %2,  0, %4
-%if ARCH_X86_64 || %1 == 44
+%if ARCH_X86_64 || %1 != 16
 LOOPFILTER h, %1, %2, %3, %4
 %endif
 %endmacro

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

[FFmpeg-cvslog] vp9lpf/x86: make filter_16_v work on 32-bit.

2017-03-23 Thread Ronald S. Bultje

ffmpeg | branch: master | Ronald S. Bultje  | Fri Dec 26 
14:05:23 2014 -0500| [5bfa96c4b30d9fdb59a8f2a9d0769a3fa2e622be] | committer: 
Anton Khirnov

vp9lpf/x86: make filter_16_v work on 32-bit.

Signed-off-by: Anton Khirnov 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=5bfa96c4b30d9fdb59a8f2a9d0769a3fa2e622be
---

 libavcodec/x86/vp9dsp_init.c |   4 +-
 libavcodec/x86/vp9lpf.asm| 135 ++-
 2 files changed, 99 insertions(+), 40 deletions(-)

diff --git a/libavcodec/x86/vp9dsp_init.c b/libavcodec/x86/vp9dsp_init.c
index 9f09053..76bb06e 100644
--- a/libavcodec/x86/vp9dsp_init.c
+++ b/libavcodec/x86/vp9dsp_init.c
@@ -281,7 +281,9 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp)
 #define init_lpf(opt) do { \
 if (ARCH_X86_64) { \
 dsp->loop_filter_16[0] = ff_vp9_loop_filter_h_16_16_##opt; \
-dsp->loop_filter_16[1] = ff_vp9_loop_filter_v_16_16_##opt; \
+} \
+dsp->loop_filter_16[1] = ff_vp9_loop_filter_v_16_16_##opt; \
+if (ARCH_X86_64) { \
 dsp->loop_filter_mix2[0][0][0] = ff_vp9_loop_filter_h_44_16_##opt; \
 } \
 dsp->loop_filter_mix2[0][0][1] = ff_vp9_loop_filter_v_44_16_##opt; \
diff --git a/libavcodec/x86/vp9lpf.asm b/libavcodec/x86/vp9lpf.asm
index 150cd73..57536b9 100644
--- a/libavcodec/x86/vp9lpf.asm
+++ b/libavcodec/x86/vp9lpf.asm
@@ -144,8 +144,10 @@ SECTION .text
  ; mask, [source], [unpack + src], 
[unpack_is_mem_on_x86_32]
 ; FIXME interleave this properly with the subx2/addx2
 %ifnidn %15, ""
+%if %16 == 0 || ARCH_X86_64
 mova   %14, %15
 %endif
+%endif
 FILTER_SUBx2_ADDx2  %1, l, %3, %6 +  0, %7, %8, %9, %10, %11, %14, %16
 FILTER_SUBx2_ADDx2  %2, h, %4, %6 + 16, %7, %8, %9, %10, %11, %14, %16
 packuswb%1, %2
@@ -197,7 +199,7 @@ SECTION .text
 mova [rsp+%4+0*32], %1
 paddw   %3, %1, %1  ; p3*2
 paddw   %3, %1  ; p3*3
-punpck%2bw  %1, m2,  m0 ; p2: B->W
+punpck%2bw  %1, m1,  m0 ; p2: B->W
 mova [rsp+%4+1*32], %1
 paddw   %3, %1  ; p3*3 + p2
 paddw   %3, %1  ; p3*3 + p2*2
@@ -223,10 +225,10 @@ SECTION .text
 mova[rsp+%4+ 9*32], %1
 paddw   %3, %1  ; p7*7 + p6
 paddw   %3, %1  ; p7*7 + p6*2
-punpck%2bw  %1, m8, m0  ; p5: B->W
+UNPACK  %2, %1, rp5, m0 ; p5: B->W
 mova[rsp+%4+10*32], %1
 paddw   %3, %1  ; p7*7 + p6*2 + p5
-punpck%2bw  %1, m9, m0  ; p4: B->W
+UNPACK  %2, %1, rp4, m0 ; p4: B->W
 mova[rsp+%4+11*32], %1
 paddw   %3, %1  ; p7*7 + p6*2 + p5 + p4
 paddw   %3, [rsp+%4+ 0*32]  ; p7*7 + p6*2 + p5 + 
p4 + p3
@@ -583,28 +585,56 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3 + %4 + %5, 
dst, stride, mstride,
 %if %2 == 16
 ; (m0: hev, m2: flat8in, m3: fm, m6: pb_81, m9..15: p2 p1 p0 q0 q1 q2 q3)
 ; calc flat8out mask
+%if ARCH_X86_64
 movam8, [P7]
 movam9, [P6]
-ABSSUB_GT   m1, m8, m11, m6, m5 ; abs(p7 - p0) <= 1
-ABSSUB_GT   m7, m9, m11, m6, m5 ; abs(p6 - p0) <= 1
+%define rp7 m8
+%define rp6 m9
+%else
+%define rp7 [P7]
+%define rp6 [P6]
+%endif
+ABSSUB_GT   m1, rp7, rp0, m6, m5; abs(p7 - p0) <= 1
+ABSSUB_GT   m7, rp6, rp0, m6, m5; abs(p6 - p0) <= 1
 por m1, m7
+%if ARCH_X86_64
 movam8, [P5]
 movam9, [P4]
-ABSSUB_GT   m7, m8, m11, m6, m5 ; abs(p5 - p0) <= 1
+%define rp5 m8
+%define rp4 m9
+%else
+%define rp5 [P5]
+%define rp4 [P4]
+%endif
+ABSSUB_GT   m7, rp5, rp0, m6, m5; abs(p5 - p0) <= 1
 por m1, m7
-ABSSUB_GT   m7, m9, m11, m6, m5 ; abs(p4 - p0) <= 1
+ABSSUB_GT   m7, rp4, rp0, m6, m5; abs(p4 - p0) <= 1
 por m1, m7
+%if ARCH_X86_64
 movam14, [Q4]
 movam15, [Q5]
-ABSSUB_GT   m7, m14, m12, m6, m5; abs(q4 - q0) <= 1
+%define rq4 m14
+%define rq5 m15
+%else
+%define rq4 [Q4]
+%define rq5 [Q5]
+%endif
+ABSSUB_GT   m7, rq4, rq0, m6, m5; abs(q4 - q0) <= 1
 por m1, m7
-ABSSUB_GT   m7, m15, m12, m6, m5; abs(q5 - q0) <= 1
+ABSSUB_GT   m7, rq5, rq0, m6, m5; abs(q5 - q0) <= 1

[FFmpeg-cvslog] vp9lpf/x86: make filter_48/84_v work on 32-bit.

2017-03-23 Thread Ronald S. Bultje

ffmpeg | branch: master | Ronald S. Bultje  | Fri Dec 26 
12:10:26 2014 -0500| [b905e8d2fe03da1bf34ffa6e04b322f19a479143] | committer: 
Anton Khirnov

vp9lpf/x86: make filter_48/84_v work on 32-bit.

Signed-off-by: Anton Khirnov 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=b905e8d2fe03da1bf34ffa6e04b322f19a479143
---

 libavcodec/x86/vp9dsp_init.c | 8 ++--
 libavcodec/x86/vp9lpf.asm| 8 +++-
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/libavcodec/x86/vp9dsp_init.c b/libavcodec/x86/vp9dsp_init.c
index 523e92c..9f09053 100644
--- a/libavcodec/x86/vp9dsp_init.c
+++ b/libavcodec/x86/vp9dsp_init.c
@@ -287,9 +287,13 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp)
 dsp->loop_filter_mix2[0][0][1] = ff_vp9_loop_filter_v_44_16_##opt; \
 if (ARCH_X86_64) { \
 dsp->loop_filter_mix2[0][1][0] = ff_vp9_loop_filter_h_48_16_##opt; \
-dsp->loop_filter_mix2[0][1][1] = ff_vp9_loop_filter_v_48_16_##opt; \
+} \
+dsp->loop_filter_mix2[0][1][1] = ff_vp9_loop_filter_v_48_16_##opt; \
+if (ARCH_X86_64) { \
 dsp->loop_filter_mix2[1][0][0] = ff_vp9_loop_filter_h_84_16_##opt; \
-dsp->loop_filter_mix2[1][0][1] = ff_vp9_loop_filter_v_84_16_##opt; \
+} \
+dsp->loop_filter_mix2[1][0][1] = ff_vp9_loop_filter_v_84_16_##opt; \
+if (ARCH_X86_64) { \
 dsp->loop_filter_mix2[1][1][0] = ff_vp9_loop_filter_h_88_16_##opt; \
 } \
 dsp->loop_filter_mix2[1][1][1] = ff_vp9_loop_filter_v_88_16_##opt; \
diff --git a/libavcodec/x86/vp9lpf.asm b/libavcodec/x86/vp9lpf.asm
index e1c2b7b..150cd73 100644
--- a/libavcodec/x86/vp9lpf.asm
+++ b/libavcodec/x86/vp9lpf.asm
@@ -920,9 +920,7 @@ LPF_16_VH %1, %2, %3, avx
 %if ARCH_X86_64
 LPF_16_VH_ALL_OPTS 16, 512
 %endif
-LPF_16_VH_ALL_OPTS 44,   0, 0
-%if ARCH_X86_64
-LPF_16_VH_ALL_OPTS 48, 256
-LPF_16_VH_ALL_OPTS 84, 256
-%endif
+LPF_16_VH_ALL_OPTS 44,   0,  0
+LPF_16_VH_ALL_OPTS 48, 256, 16
+LPF_16_VH_ALL_OPTS 84, 256, 16
 LPF_16_VH_ALL_OPTS 88, 256, 16

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

[FFmpeg-cvslog] vp9lpf/x86: save one register in loopfilter surface coverage.

2017-03-23 Thread Ronald S. Bultje

ffmpeg | branch: master | Ronald S. Bultje  | Fri Dec 19 
21:44:57 2014 -0500| [a6e288d62414c25ed173b17b48ddea947bede73e] | committer: 
Anton Khirnov

vp9lpf/x86: save one register in loopfilter surface coverage.

Signed-off-by: Anton Khirnov 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=a6e288d62414c25ed173b17b48ddea947bede73e
---

 libavcodec/x86/vp9lpf.asm | 56 +++
 1 file changed, 28 insertions(+), 28 deletions(-)

diff --git a/libavcodec/x86/vp9lpf.asm b/libavcodec/x86/vp9lpf.asm
index 6138da1..dc22705 100644
--- a/libavcodec/x86/vp9lpf.asm
+++ b/libavcodec/x86/vp9lpf.asm
@@ -279,22 +279,22 @@ SECTION .text
 %endmacro
 
 %macro DEFINE_REAL_P7_TO_Q7 0-1 0
-%define P7 dst1q + 2*mstrideq  + %1
-%define P6 dst1q +   mstrideq  + %1
-%define P5 dst1q   + %1
-%define P4 dst1q +strideq  + %1
-%define P3 dstq  + 4*mstrideq  + %1
-%define P2 dstq  +   mstride3q + %1
-%define P1 dstq  + 2*mstrideq  + %1
-%define P0 dstq  +   mstrideq  + %1
-%define Q0 dstq+ %1
-%define Q1 dstq  +   strideq   + %1
-%define Q2 dstq  + 2*strideq   + %1
-%define Q3 dstq  +   stride3q  + %1
-%define Q4 dstq  + 4*strideq   + %1
-%define Q5 dst2q + mstrideq+ %1
-%define Q6 dst2q   + %1
-%define Q7 dst2q +  strideq+ %1
+%define P7 dstq  + 4*mstrideq  + %1
+%define P6 dstq  +   mstride3q + %1
+%define P5 dstq  + 2*mstrideq  + %1
+%define P4 dstq  +   mstrideq  + %1
+%define P3 dstq+ %1
+%define P2 dstq  +strideq  + %1
+%define P1 dstq  + 2* strideq  + %1
+%define P0 dstq  +stride3q + %1
+%define Q0 dstq  + 4* strideq  + %1
+%define Q1 dst2q +   mstride3q + %1
+%define Q2 dst2q + 2*mstrideq  + %1
+%define Q3 dst2q +   mstrideq  + %1
+%define Q4 dst2q   + %1
+%define Q5 dst2q +strideq  + %1
+%define Q6 dst2q + 2* strideq  + %1
+%define Q7 dst2q +stride3q + %1
 %endmacro
 
 ; ..AB -> 
@@ -309,26 +309,26 @@ SECTION .text
 %endmacro
 
 %macro LOOPFILTER 2 ; %1=v/h %2=size1
-lea mstrideq, [strideq]
-neg mstrideq
+mov   mstrideq, strideq
+neg   mstrideq
 
-lea stride3q, [strideq+2*strideq]
-mov mstride3q, stride3q
-neg mstride3q
+lea   stride3q, [strideq*3]
+lea  mstride3q, [mstrideq*3]
 
 %ifidn %1, h
 %if %2 > 16
 %define movx movh
-lea dstq, [dstq + 8*strideq - 4]
+lea   dstq, [dstq + 4*strideq - 4]
 %else
 %define movx movu
-lea dstq, [dstq + 8*strideq - 8] ; go from top center (h pos) to center 
left (v pos)
+lea   dstq, [dstq + 4*strideq - 8] ; go from top center (h 
pos) to center left (v pos)
 %endif
+lea  dst2q, [dstq + 8*strideq]
+%else
+lea   dstq, [dstq + 4*mstrideq]
+lea  dst2q, [dstq + 8*strideq]
 %endif
 
-lea dst1q, [dstq + 2*mstride3q] ; dst1q = 
&dst[stride * -6]
-lea dst2q, [dstq + 2* stride3q] ; dst2q = 
&dst[stride * +6]
-
 DEFINE_REAL_P7_TO_Q7
 
 %ifidn %1, h
@@ -796,9 +796,9 @@ SECTION .text
 
 %macro LPF_16_VH 2
 INIT_XMM %2
-cglobal vp9_loop_filter_v_%1_16, 5,10,16,  dst, stride, E, I, H, mstride, 
dst1, dst2, stride3, mstride3
+cglobal vp9_loop_filter_v_%1_16, 5,10,16,  dst, stride, E, I, H, mstride, 
dst2, stride3, mstride3
 LOOPFILTER v, %1
-cglobal vp9_loop_filter_h_%1_16, 5,10,16, 256, dst, stride, E, I, H, mstride, 
dst1, dst2, stride3, mstride3
+cglobal vp9_loop_filter_h_%1_16, 5,10,16, 256, dst, stride, E, I, H, mstride, 
dst2, stride3, mstride3
 LOOPFILTER h, %1
 %endmacro
 

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

[FFmpeg-cvslog] vp9lpf/x86: make filter_44_h work on 32-bit.

2017-03-23 Thread Ronald S. Bultje

ffmpeg | branch: master | Ronald S. Bultje  | Fri Dec 26 
14:48:01 2014 -0500| [725a216481c422a71a727771706d6343a0eaeaf8] | committer: 
Anton Khirnov

vp9lpf/x86: make filter_44_h work on 32-bit.

Signed-off-by: Anton Khirnov 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=725a216481c422a71a727771706d6343a0eaeaf8
---

 libavcodec/x86/vp9dsp_init.c |   4 +-
 libavcodec/x86/vp9lpf.asm| 140 ---
 2 files changed, 78 insertions(+), 66 deletions(-)

diff --git a/libavcodec/x86/vp9dsp_init.c b/libavcodec/x86/vp9dsp_init.c
index 76bb06e..6438644 100644
--- a/libavcodec/x86/vp9dsp_init.c
+++ b/libavcodec/x86/vp9dsp_init.c
@@ -283,9 +283,7 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp)
 dsp->loop_filter_16[0] = ff_vp9_loop_filter_h_16_16_##opt; \
 } \
 dsp->loop_filter_16[1] = ff_vp9_loop_filter_v_16_16_##opt; \
-if (ARCH_X86_64) { \
-dsp->loop_filter_mix2[0][0][0] = ff_vp9_loop_filter_h_44_16_##opt; \
-} \
+dsp->loop_filter_mix2[0][0][0] = ff_vp9_loop_filter_h_44_16_##opt; \
 dsp->loop_filter_mix2[0][0][1] = ff_vp9_loop_filter_v_44_16_##opt; \
 if (ARCH_X86_64) { \
 dsp->loop_filter_mix2[0][1][0] = ff_vp9_loop_filter_h_48_16_##opt; \
diff --git a/libavcodec/x86/vp9lpf.asm b/libavcodec/x86/vp9lpf.asm
index 57536b9..881bdab 100644
--- a/libavcodec/x86/vp9lpf.asm
+++ b/libavcodec/x86/vp9lpf.asm
@@ -291,38 +291,6 @@ SECTION .text
 SWAP %12, %14
 %endmacro
 
-; transpose 16 half lines (high part) to 8 full centered lines
-%macro TRANSPOSE16x8B 16
-punpcklbw   m%1,  m%2
-punpcklbw   m%3,  m%4
-punpcklbw   m%5,  m%6
-punpcklbw   m%7,  m%8
-punpcklbw   m%9,  m%10
-punpcklbw   m%11, m%12
-punpcklbw   m%13, m%14
-punpcklbw   m%15, m%16
-SBUTTERFLY  wd,  %1,  %3,  %2
-SBUTTERFLY  wd,  %5,  %7,  %2
-SBUTTERFLY  wd,  %9,  %11, %2
-SBUTTERFLY  wd,  %13, %15, %2
-SBUTTERFLY  dq,  %1,  %5,  %2
-SBUTTERFLY  dq,  %3,  %7,  %2
-SBUTTERFLY  dq,  %9,  %13, %2
-SBUTTERFLY  dq,  %11, %15, %2
-SBUTTERFLY  qdq, %1,  %9,  %2
-SBUTTERFLY  qdq, %3,  %11, %2
-SBUTTERFLY  qdq, %5,  %13, %2
-SBUTTERFLY  qdq, %7,  %15, %2
-SWAP %5, %1
-SWAP %6, %9
-SWAP %7, %1
-SWAP %8, %13
-SWAP %9, %3
-SWAP %10, %11
-SWAP %11, %1
-SWAP %12, %15
-%endmacro
-
 %macro DEFINE_REAL_P7_TO_Q7 0-1 0
 %define P7 dstq  + 4*mstrideq  + %1
 %define P6 dstq  +   mstride3q + %1
@@ -398,6 +366,7 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3 + %4 + %5, 
dst, stride, mstride,
 movxm5, [P2]
 movxm6, [P1]
 movxm7, [P0]
+%if ARCH_X86_64
 movxm8, [Q0]
 movxm9, [Q1]
 movx   m10, [Q2]
@@ -406,32 +375,67 @@ cglobal vp9_loop_filter_%1_%2_16, 2, 6, 16, %3 + %4 + %5, 
dst, stride, mstride,
 movx   m13, [Q5]
 movx   m14, [Q6]
 movx   m15, [Q7]
-%define P7 rsp +   0
-%define P6 rsp +  16
-%define P5 rsp +  32
-%define P4 rsp +  48
-%define P3 rsp +  64
-%define P2 rsp +  80
-%define P1 rsp +  96
-%define P0 rsp + 112
-%define Q0 rsp + 128
-%define Q1 rsp + 144
-%define Q2 rsp + 160
-%define Q3 rsp + 176
+%if %2 == 16
+TRANSPOSE16x16B 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, [rsp]
+%define P7 rsp + 128
+%define P6 rsp + 144
+%define P5 rsp + 160
+%define P4 rsp + 176
 %define Q4 rsp + 192
 %define Q5 rsp + 208
 %define Q6 rsp + 224
 %define Q7 rsp + 240
-
-%if %2 == 16
-TRANSPOSE16x16B 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, [rsp]
 mova   [P7],  m0
 mova   [P6],  m1
 mova   [P5],  m2
 mova   [P4],  m3
 %else
-TRANSPOSE16x8B 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
-%endif
+; 8x16 transpose
+punpcklbwm0,  m1
+punpcklbwm2,  m3
+punpcklbwm4,  m5
+punpcklbwm6,  m7
+punpcklbwm8,  m9
+punpcklbw   m10, m11
+punpcklbw   m12, m13
+punpcklbw   m14, m15
+TRANSPOSE8x8W 0, 2, 4, 6, 8, 10, 12, 14, 15
+SWAP  0,  4
+SWAP  2,  5
+SWAP  0,  6
+SWAP  0,  7
+SWAP 10,  9
+SWAP 12, 10
+SWAP 14, 11
+%endif
+%else ; x86-32
+punpcklbwm0, m1
+punpcklbwm2, m3
+punpcklbwm4, m5
+punpcklbwm6, m7
+movx m1, [Q0]
+movx m3, [Q1]
+movx m5, [Q2]
+movx m7, [Q3]
+punpcklbwm1, m3
+punpcklbwm5, m7
+movx m3, [Q4]
+movx m7, [Q5]
+punpcklbwm3, m7
+mova  [rsp], m3
+movx m3, [Q6]
+movx m7, [Q7]
+punpcklbwm3, m7
+%endif
+%define P3 rsp +   0
+%define

[FFmpeg-cvslog] vp9: ignore reference segmentation map if error_resilience flag is set.

2017-03-23 Thread Ronald S. Bultje

ffmpeg | branch: master | Ronald S. Bultje  | Tue Aug 12 
18:11:05 2014 -0400| [a451324dddf5d2ab4bcd6aa0f546596f71bdada3] | committer: 
Anton Khirnov

vp9: ignore reference segmentation map if error_resilience flag is set.

Fixes ffvp9_fails_where_libvpx.succeeds.webm.

Bug-Id: ffmpeg/3849.

Signed-off-by: Anton Khirnov 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=a451324dddf5d2ab4bcd6aa0f546596f71bdada3
---

 libavcodec/vp9.c  |  2 +-
 libavcodec/vp9block.c | 28 
 2 files changed, 17 insertions(+), 13 deletions(-)

diff --git a/libavcodec/vp9.c b/libavcodec/vp9.c
index 32d995f..9062185 100644
--- a/libavcodec/vp9.c
+++ b/libavcodec/vp9.c
@@ -64,7 +64,7 @@ static int vp9_frame_alloc(AVCodecContext *avctx, VP9Frame *f)
 f->mv   = (VP9MVRefPair*)f->mv_buf->data;
 
 if (s->segmentation.enabled && !s->segmentation.update_map &&
-!s->keyframe && !s->intraonly)
+!s->keyframe && !s->intraonly && !s->errorres)
 memcpy(f->segmentation_map, s->frames[LAST_FRAME].segmentation_map, 
sz);
 
 return 0;
diff --git a/libavcodec/vp9block.c b/libavcodec/vp9block.c
index 9b0d836..cd40c38 100644
--- a/libavcodec/vp9block.c
+++ b/libavcodec/vp9block.c
@@ -70,18 +70,22 @@ static void decode_mode(VP9Context *s, VP9Block *const b)
 vp56_rac_get_prob_branchy(&s->c,
   
s->prob.segpred[s->above_segpred_ctx[col] +
   
s->left_segpred_ctx[row7]]))) {
-uint8_t *refsegmap = s->frames[LAST_FRAME].segmentation_map;
-int pred = MAX_SEGMENT - 1;
-int x;
-
-if (!s->last_uses_2pass)
-ff_thread_await_progress(&s->frames[LAST_FRAME].tf, row >> 3, 0);
-
-for (y = 0; y < h4; y++)
-for (x = 0; x < w4; x++)
-pred = FFMIN(pred,
- refsegmap[(y + row) * 8 * s->sb_cols + x + col]);
-b->seg_id = pred;
+if (!s->errorres) {
+uint8_t *refsegmap = s->frames[LAST_FRAME].segmentation_map;
+int pred = MAX_SEGMENT - 1;
+int x;
+
+if (!s->last_uses_2pass)
+ff_thread_await_progress(&s->frames[LAST_FRAME].tf, row >> 3, 
0);
+
+for (y = 0; y < h4; y++)
+for (x = 0; x < w4; x++)
+pred = FFMIN(pred,
+ refsegmap[(y + row) * 8 * s->sb_cols + x + 
col]);
+b->seg_id = pred;
+} else {
+b->seg_id = 0;
+}
 
 memset(&s->above_segpred_ctx[col], 1, w4);
 memset(&s->left_segpred_ctx[row7], 1, h4);

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

[FFmpeg-cvslog] checkasm: add VP9 loopfilter tests.

2017-03-23 Thread Ronald S. Bultje

ffmpeg | branch: master | Ronald S. Bultje  | Thu Sep 17 
11:58:10 2015 -0400| [c935b54bd6a12714fc08d88791dadee2ba07176a] | committer: 
Anton Khirnov

checkasm: add VP9 loopfilter tests.

The randomize_buffer() implementation assures that "most of the time",
we'll do a good mix of wide16/wide8/hev/regular/no filters for complete
code coverage. However, this is not mathematically assured because that
would make the code either much more complex, or much less random.

Some fixes and improvements by Rodger Combs 

Signed-off-by: Anton Khirnov 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=c935b54bd6a12714fc08d88791dadee2ba07176a
---

 tests/checkasm/vp9dsp.c | 159 
 1 file changed, 159 insertions(+)

diff --git a/tests/checkasm/vp9dsp.c b/tests/checkasm/vp9dsp.c
index dd37077..f0d9372 100644
--- a/tests/checkasm/vp9dsp.c
+++ b/tests/checkasm/vp9dsp.c
@@ -32,6 +32,164 @@ static const uint32_t pixel_mask[3] = { 0x, 
0x03ff03ff, 0x0fff0fff };
 
 #define BIT_DEPTH 8
 #define SIZEOF_PIXEL ((BIT_DEPTH + 7) / 8)
+
+#define setpx(a,b,c) \
+do { \
+if (SIZEOF_PIXEL == 1) { \
+buf0[(a) + (b) * jstride] = av_clip_uint8(c); \
+} else { \
+((uint16_t *)buf0)[(a) + (b) * jstride] = av_clip_uintp2(c, 
BIT_DEPTH); \
+} \
+} while (0)
+#define setdx(a,b,c,d) setpx(a,b,c-(d)+(rnd()%((d)*2+1)))
+#define setsx(a,b,c,d) setdx(a,b,c,(d) << (BIT_DEPTH - 8))
+
+static void randomize_loopfilter_buffers(int bidx, int lineoff, int str,
+ int bit_depth, int dir,
+ const int *E, const int *F,
+ const int *H, const int *I,
+ uint8_t *buf0, uint8_t *buf1)
+{
+uint32_t mask = (1 << BIT_DEPTH) - 1;
+int off = dir ? lineoff : lineoff * 16;
+int istride = dir ? 1 : 16;
+int jstride = dir ? str : 1;
+int i, j;
+for (i = 0; i < 2; i++) /* flat16 */ {
+int idx = off + i * istride, p0, q0;
+setpx(idx,  0, q0 = rnd() & mask);
+setsx(idx, -1, p0 = q0, E[bidx] >> 2);
+for (j = 1; j < 8; j++) {
+setsx(idx, -1 - j, p0, F[bidx]);
+setsx(idx, j, q0, F[bidx]);
+}
+}
+for (i = 2; i < 4; i++) /* flat8 */ {
+int idx = off + i * istride, p0, q0;
+setpx(idx,  0, q0 = rnd() & mask);
+setsx(idx, -1, p0 = q0, E[bidx] >> 2);
+for (j = 1; j < 4; j++) {
+setsx(idx, -1 - j, p0, F[bidx]);
+setsx(idx, j, q0, F[bidx]);
+}
+for (j = 4; j < 8; j++) {
+setpx(idx, -1 - j, rnd() & mask);
+setpx(idx, j, rnd() & mask);
+}
+}
+for (i = 4; i < 6; i++) /* regular */ {
+int idx = off + i * istride, p2, p1, p0, q0, q1, q2;
+setpx(idx,  0, q0 = rnd() & mask);
+setsx(idx,  1, q1 = q0, I[bidx]);
+setsx(idx,  2, q2 = q1, I[bidx]);
+setsx(idx,  3, q2,  I[bidx]);
+setsx(idx, -1, p0 = q0, E[bidx] >> 2);
+setsx(idx, -2, p1 = p0, I[bidx]);
+setsx(idx, -3, p2 = p1, I[bidx]);
+setsx(idx, -4, p2,  I[bidx]);
+for (j = 4; j < 8; j++) {
+setpx(idx, -1 - j, rnd() & mask);
+setpx(idx, j, rnd() & mask);
+}
+}
+for (i = 6; i < 8; i++) /* off */ {
+int idx = off + i * istride;
+for (j = 0; j < 8; j++) {
+setpx(idx, -1 - j, rnd() & mask);
+setpx(idx, j, rnd() & mask);
+}
+}
+}
+
+#define randomize_buffers(bidx, lineoff, str)\
+randomize_loopfilter_buffers(bidx, lineoff, str, BIT_DEPTH, dir, \
+ E, F, H, I, buf0, buf1)
+
+static void check_loopfilter(void)
+{
+LOCAL_ALIGNED_32(uint8_t, base0, [32 + 16 * 16 * 2]);
+LOCAL_ALIGNED_32(uint8_t, base1, [32 + 16 * 16 * 2]);
+VP9DSPContext dsp;
+int dir, wd, wd2;
+static const char *const dir_name[2] = { "h", "v" };
+static const int E[2] = { 20, 28 }, I[2] = { 10, 16 };
+static const int H[2] = {  7, 11 }, F[2] = {  1,  1 };
+declare_func(void, uint8_t *dst, ptrdiff_t stride, int E, int I, int H);
+
+ff_vp9dsp_init(&dsp);
+
+for (dir = 0; dir < 2; dir++) {
+uint8_t *buf0, *buf1;
+int midoff = (dir ? 8 * 8 : 8) * SIZEOF_PIXEL;
+int midoff_aligned = (dir ? 8 * 8 : 16) * SIZEOF_PIXEL;
+
+buf0 = base0 + midoff_aligned;
+buf1 = base1 + midoff_aligned;
+
+for (wd = 0; wd < 3; wd++) {
+// 4/8/16wd_8px
+if (check_func(dsp.loop_filter_8[wd][dir],
+   "vp9_loop_filter_%s_%d_8",
+   dir_name[dir], 4 << wd)) {
+randomi

[FFmpeg-cvslog] wmavoice: remove unused or write-only variables.

2017-03-20 Thread Ronald S. Bultje

ffmpeg | branch: master | Ronald S. Bultje  | Thu Dec 22 
09:02:32 2016 -0500| [f3cd2302a9c9724f57fda4afb5ad7a588fb8b304] | committer: 
Ronald S. Bultje

wmavoice: remove unused or write-only variables.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=f3cd2302a9c9724f57fda4afb5ad7a588fb8b304
---

 libavcodec/wmavoice.c | 44 +---
 1 file changed, 17 insertions(+), 27 deletions(-)

diff --git a/libavcodec/wmavoice.c b/libavcodec/wmavoice.c
index c2390a9..2ec4499 100644
--- a/libavcodec/wmavoice.c
+++ b/libavcodec/wmavoice.c
@@ -104,26 +104,24 @@ static const struct frame_type_desc {
 uint8_t dbl_pulses;   ///< how many pulse vectors have pulse pairs
   ///< (rather than just one single pulse)
   ///< only if #fcb_type == #FCB_TYPE_EXC_PULSES
-uint16_t frame_size;  ///< the amount of bits that make up the block
-  ///< data (per frame)
 } frame_descs[17] = {
-{ 1, 0, ACB_TYPE_NONE,   FCB_TYPE_SILENCE,0,   0 },
-{ 2, 1, ACB_TYPE_NONE,   FCB_TYPE_HARDCODED,  0,  28 },
-{ 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_AW_PULSES,  0,  46 },
-{ 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 2,  80 },
-{ 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 5, 104 },
-{ 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 0, 108 },
-{ 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 2, 132 },
-{ 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 5, 168 },
-{ 2, 1, ACB_TYPE_HAMMING,FCB_TYPE_EXC_PULSES, 0,  64 },
-{ 2, 1, ACB_TYPE_HAMMING,FCB_TYPE_EXC_PULSES, 2,  80 },
-{ 2, 1, ACB_TYPE_HAMMING,FCB_TYPE_EXC_PULSES, 5, 104 },
-{ 4, 2, ACB_TYPE_HAMMING,FCB_TYPE_EXC_PULSES, 0, 108 },
-{ 4, 2, ACB_TYPE_HAMMING,FCB_TYPE_EXC_PULSES, 2, 132 },
-{ 4, 2, ACB_TYPE_HAMMING,FCB_TYPE_EXC_PULSES, 5, 168 },
-{ 8, 3, ACB_TYPE_HAMMING,FCB_TYPE_EXC_PULSES, 0, 176 },
-{ 8, 3, ACB_TYPE_HAMMING,FCB_TYPE_EXC_PULSES, 2, 208 },
-{ 8, 3, ACB_TYPE_HAMMING,FCB_TYPE_EXC_PULSES, 5, 256 }
+{ 1, 0, ACB_TYPE_NONE,   FCB_TYPE_SILENCE,0 },
+{ 2, 1, ACB_TYPE_NONE,   FCB_TYPE_HARDCODED,  0 },
+{ 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_AW_PULSES,  0 },
+{ 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 2 },
+{ 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 5 },
+{ 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 0 },
+{ 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 2 },
+{ 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 5 },
+{ 2, 1, ACB_TYPE_HAMMING,FCB_TYPE_EXC_PULSES, 0 },
+{ 2, 1, ACB_TYPE_HAMMING,FCB_TYPE_EXC_PULSES, 2 },
+{ 2, 1, ACB_TYPE_HAMMING,FCB_TYPE_EXC_PULSES, 5 },
+{ 4, 2, ACB_TYPE_HAMMING,FCB_TYPE_EXC_PULSES, 0 },
+{ 4, 2, ACB_TYPE_HAMMING,FCB_TYPE_EXC_PULSES, 2 },
+{ 4, 2, ACB_TYPE_HAMMING,FCB_TYPE_EXC_PULSES, 5 },
+{ 8, 3, ACB_TYPE_HAMMING,FCB_TYPE_EXC_PULSES, 0 },
+{ 8, 3, ACB_TYPE_HAMMING,FCB_TYPE_EXC_PULSES, 2 },
+{ 8, 3, ACB_TYPE_HAMMING,FCB_TYPE_EXC_PULSES, 5 }
 };
 
 /**
@@ -160,10 +158,6 @@ typedef struct WMAVoiceContext {
 int lsp_q_mode;   ///< defines quantizer defaults [0, 1]
 int lsp_def_mode; ///< defines different sets of LSP defaults
   ///< [0, 1]
-int frame_lsp_bitsize;///< size (in bits) of LSPs, when encoded
-  ///< per-frame (independent coding)
-int sframe_lsp_bitsize;   ///< size (in bits) of LSPs, when encoded
-  ///< per superframe (residual coding)
 
 int min_pitch_val;///< base value for pitch parsing code
 int max_pitch_val;///< max value + 1 for pitch parsing
@@ -423,12 +417,8 @@ static av_cold int wmavoice_decode_init(AVCodecContext 
*ctx)
 lsp16_flag   =flags & 0x1000;
 if (lsp16_flag) {
 s->lsps   = 16;
-s->frame_lsp_bitsize  = 34;
-s->sframe_lsp_bitsize = 60;
 } else {
 s->lsps   = 10;
-s->frame_lsp_bitsize  = 24;
-s->sframe_lsp_bitsize = 48;
 }
 for (n = 0; n < s->lsps; n++)
 s->prev_lsps[n] = M_PI * (n + 1.0) / (s->lsps + 1.0);

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

[FFmpeg-cvslog] vp9: add frame threading

2017-03-18 Thread Ronald S. Bultje

ffmpeg | branch: master | Ronald S. Bultje  | Tue Aug  2 
07:55:31 2016 +0200| [1730a67ab99de0648dd55e81ea7fec12ab70225c] | committer: 
Anton Khirnov

vp9: add frame threading

Signed-off-by: Anton Khirnov 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=1730a67ab99de0648dd55e81ea7fec12ab70225c
---

 libavcodec/vp9.c  | 273 +-
 libavcodec/vp9.h  |  11 ++
 libavcodec/vp9block.c | 111 ++--
 libavcodec/vp9mvs.c   |   4 +
 4 files changed, 318 insertions(+), 81 deletions(-)

diff --git a/libavcodec/vp9.c b/libavcodec/vp9.c
index 497dcf2..7989ca8 100644
--- a/libavcodec/vp9.c
+++ b/libavcodec/vp9.c
@@ -105,14 +105,20 @@ static void vp9_decode_flush(AVCodecContext *avctx)
 
 for (i = 0; i < FF_ARRAY_ELEMS(s->refs); i++)
 ff_thread_release_buffer(avctx, &s->refs[i]);
+
+s->use_last_frame_mvs = 0;
+
+s->alloc_width  = 0;
+s->alloc_height = 0;
 }
 
 static int update_size(AVCodecContext *avctx, int w, int h)
 {
 VP9Context *s = avctx->priv_data;
 uint8_t *p;
+int nb_blocks, nb_superblocks;
 
-if (s->above_partition_ctx && w == avctx->width && h == avctx->height)
+if (s->above_partition_ctx && w == s->alloc_width && h == s->alloc_height)
 return 0;
 
 vp9_decode_flush(avctx);
@@ -154,16 +160,26 @@ static int update_size(AVCodecContext *avctx, int w, int 
h)
 
 av_freep(&s->b_base);
 av_freep(&s->block_base);
-s->b_base = av_malloc(sizeof(*s->b_base));
-s->block_base = av_mallocz((64 * 64 + 128) * 3);
+
+if (avctx->active_thread_type & FF_THREAD_FRAME) {
+nb_blocks  = s->cols * s->rows;
+nb_superblocks = s->sb_cols * s->sb_rows;
+} else {
+nb_blocks = nb_superblocks = 1;
+}
+
+s->b_base = av_malloc_array(nb_blocks, sizeof(*s->b_base));
+s->block_base = av_mallocz_array(nb_superblocks, (64 * 64 + 128) * 3);
 if (!s->b_base || !s->block_base)
 return AVERROR(ENOMEM);
+s->uvblock_base[0] = s->block_base  + nb_superblocks * 64 * 64;
+s->uvblock_base[1] = s->uvblock_base[0] + nb_superblocks * 32 * 32;
+s->eob_base= (uint8_t *)(s->uvblock_base[1] + nb_superblocks * 32 
* 32);
+s->uveob_base[0]   = s->eob_base + nb_superblocks * 256;
+s->uveob_base[1]   = s->uveob_base[0] + nb_superblocks * 64;
 
-s->uvblock_base[0] = s->block_base + 64 * 64;
-s->uvblock_base[1] = s->uvblock_base[0] + 32 * 32;
-s->eob_base= (uint8_t *) (s->uvblock_base[1] + 32 * 32);
-s->uveob_base[0]   = s->eob_base + 256;
-s->uveob_base[1]   = s->uveob_base[0] + 64;
+s->alloc_width  = w;
+s->alloc_height = h;
 
 return 0;
 }
@@ -278,7 +294,6 @@ static int decode_frame_header(AVCodecContext *avctx,
 last_invisible = s->invisible;
 s->invisible   = !get_bits1(&s->gb);
 s->errorres= get_bits1(&s->gb);
-// FIXME disable this upon resolution change
 s->use_last_frame_mvs = !s->errorres && !last_invisible;
 
 if (s->keyframe) {
@@ -851,6 +866,61 @@ static int decode_subblock(AVCodecContext *avctx, int row, 
int col,
 return ret;
 }
 
+static int decode_superblock_mem(AVCodecContext *avctx, int row, int col, 
struct VP9Filter *lflvl,
+ ptrdiff_t yoff, ptrdiff_t uvoff, enum 
BlockLevel bl)
+{
+VP9Context *s = avctx->priv_data;
+VP9Block *b = s->b;
+ptrdiff_t hbs = 4 >> bl;
+AVFrame *f = s->frames[CUR_FRAME].tf.f;
+ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
+int res;
+
+if (bl == BL_8X8) {
+av_assert2(b->bl == BL_8X8);
+res = ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff, b->bl, 
b->bp);
+} else if (s->b->bl == bl) {
+if ((res = ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff, 
b->bl, b->bp)) < 0)
+return res;
+if (b->bp == PARTITION_H && row + hbs < s->rows) {
+yoff  += hbs * 8 * y_stride;
+uvoff += hbs * 4 * uv_stride;
+res = ff_vp9_decode_block(avctx, row + hbs, col, lflvl, yoff, 
uvoff, b->bl, b->bp);
+} else if (b->bp == PARTITION_V && col + hbs < s->cols) {
+yoff  += hbs * 8;
+uvoff += hbs * 4;
+res = ff_vp9_decode_block(avctx, row, col + hbs, lflvl, yoff, 
uvoff, b->bl, b->bp);
+}
+} else {
+if ((res = decode_superblock_mem(avctx, row, col, lflvl, yoff, uvoff, 
bl + 1)) < 0)
+return res;
+if (col + hbs < s->cols) { // FIXME why not <=?
+if (row + hbs < s->rows) {
+if ((res = decode_supe

[FFmpeg-cvslog] vp9: split last/cur_frame from the reference buffers.

2017-03-18 Thread Ronald S. Bultje

ffmpeg | branch: master | Ronald S. Bultje  | Sat Nov 23 
10:27:18 2013 -0500| [bc6e0b64a9100652c1ce52292408d8fd79930d53] | committer: 
Anton Khirnov

vp9: split last/cur_frame from the reference buffers.

We need more information from last/cur_frame than from reference
buffers, so we can use a simplified structure for reference buffers,
and then store mvs and segmentation map information in last/cur.

This prepares the decoder for frame threading support.

Signed-off-by: Anton Khirnov 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=bc6e0b64a9100652c1ce52292408d8fd79930d53
---

 libavcodec/vp9.c  | 197 --
 libavcodec/vp9.h  |  22 +-
 libavcodec/vp9block.c |  74 +++
 libavcodec/vp9mvs.c   |  12 +--
 4 files changed, 207 insertions(+), 98 deletions(-)

diff --git a/libavcodec/vp9.c b/libavcodec/vp9.c
index c11e9b8..11ed00e 100644
--- a/libavcodec/vp9.c
+++ b/libavcodec/vp9.c
@@ -34,13 +34,77 @@
 #define VP9_SYNCCODE 0x498342
 #define MAX_PROB 255
 
+static void vp9_frame_unref(AVCodecContext *avctx, VP9Frame *f)
+{
+ff_thread_release_buffer(avctx, &f->tf);
+av_buffer_unref(&f->segmentation_map_buf);
+av_buffer_unref(&f->mv_buf);
+f->segmentation_map = NULL;
+f->mv   = NULL;
+}
+
+static int vp9_frame_alloc(AVCodecContext *avctx, VP9Frame *f)
+{
+VP9Context *s = avctx->priv_data;
+int ret, sz;
+
+ret = ff_thread_get_buffer(avctx, &f->tf, AV_GET_BUFFER_FLAG_REF);
+if (ret < 0)
+return ret;
+
+sz = 64 * s->sb_cols * s->sb_rows;
+f->segmentation_map_buf = av_buffer_allocz(sz * 
sizeof(*f->segmentation_map));
+f->mv_buf   = av_buffer_allocz(sz * sizeof(*f->mv));
+if (!f->segmentation_map_buf || !f->mv_buf) {
+vp9_frame_unref(avctx, f);
+return AVERROR(ENOMEM);
+}
+
+f->segmentation_map = f->segmentation_map_buf->data;
+f->mv   = (VP9MVRefPair*)f->mv_buf->data;
+
+if (s->segmentation.enabled && !s->segmentation.update_map &&
+!s->keyframe && !s->intraonly)
+memcpy(f->segmentation_map, s->frames[LAST_FRAME].segmentation_map, 
sz);
+
+return 0;
+}
+
+static int vp9_frame_ref(VP9Frame *dst, VP9Frame *src)
+{
+int ret;
+
+dst->segmentation_map_buf = av_buffer_ref(src->segmentation_map_buf);
+dst->mv_buf   = av_buffer_ref(src->mv_buf);
+if (!dst->segmentation_map_buf || !dst->mv_buf) {
+ret = AVERROR(ENOMEM);
+goto fail;
+}
+
+ret = ff_thread_ref_frame(&dst->tf, &src->tf);
+if (ret < 0)
+goto fail;
+
+dst->segmentation_map = src->segmentation_map;
+dst->mv   = src->mv;
+
+return 0;
+fail:
+av_buffer_unref(&dst->segmentation_map_buf);
+av_buffer_unref(&dst->mv_buf);
+return ret;
+}
+
 static void vp9_decode_flush(AVCodecContext *avctx)
 {
 VP9Context *s = avctx->priv_data;
 int i;
 
+for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
+vp9_frame_unref(avctx, &s->frames[i]);
+
 for (i = 0; i < FF_ARRAY_ELEMS(s->refs); i++)
-av_frame_unref(s->refs[i]);
+ff_thread_release_buffer(avctx, &s->refs[i]);
 }
 
 static int update_size(AVCodecContext *avctx, int w, int h)
@@ -66,8 +130,7 @@ static int update_size(AVCodecContext *avctx, int w, int h)
 #define assign(var, type, n) var = (type)p; p += s->sb_cols * n * sizeof(*var)
 av_free(s->above_partition_ctx);
 p = av_malloc(s->sb_cols *
-  (240 + sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx) +
-   64 * s->sb_rows * (1 + sizeof(*s->mv[0]) * 2)));
+  (240 + sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx)));
 if (!p)
 return AVERROR(ENOMEM);
 assign(s->above_partition_ctx, uint8_t *, 8);
@@ -87,9 +150,6 @@ static int update_size(AVCodecContext *avctx, int w, int h)
 assign(s->above_filter_ctx,uint8_t *, 8);
 assign(s->lflvl,   VP9Filter *,   1);
 assign(s->above_mv_ctx,VP56mv(*)[2], 16);
-assign(s->segmentation_map,uint8_t *,  64 * s->sb_rows);
-assign(s->mv[0],   VP9MVRefPair *, 64 * s->sb_rows);
-assign(s->mv[1],   VP9MVRefPair *, 64 * s->sb_rows);
 #undef assign
 
 return 0;
@@ -268,22 +328,22 @@ static int decode_frame_header(AVCodecContext *avctx,
 s->signbias[1]= get_bits1(&s->gb);
 s->refidx[2]  = get_bits(&s->gb, 3);
 s->signbias[2]= get_bits1(&s->gb);
-if (!s->refs[s->refidx[0]]->buf[0] ||
-!s->refs[s->refidx[1]]->buf[0] ||
-

[FFmpeg-cvslog] vp9: allocate 'b', 'block/uvblock' and 'eob/uveob' dynamically.

2017-03-18 Thread Ronald S. Bultje

ffmpeg | branch: master | Ronald S. Bultje  | Sat Nov 23 
12:10:12 2013 -0500| [5b995452a63ed754545a0ac90be79fac63b3390d] | committer: 
Anton Khirnov

vp9: allocate 'b', 'block/uvblock' and 'eob/uveob' dynamically.

This will be needed for frame threading.

Signed-off-by: Anton Khirnov 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=5b995452a63ed754545a0ac90be79fac63b3390d
---

 libavcodec/vp9.c  | 24 
 libavcodec/vp9.h  |  9 -
 libavcodec/vp9block.c |  8 
 libavcodec/vp9mvs.c   |  4 ++--
 4 files changed, 34 insertions(+), 11 deletions(-)

diff --git a/libavcodec/vp9.c b/libavcodec/vp9.c
index 11ed00e..497dcf2 100644
--- a/libavcodec/vp9.c
+++ b/libavcodec/vp9.c
@@ -152,6 +152,19 @@ static int update_size(AVCodecContext *avctx, int w, int h)
 assign(s->above_mv_ctx,VP56mv(*)[2], 16);
 #undef assign
 
+av_freep(&s->b_base);
+av_freep(&s->block_base);
+s->b_base = av_malloc(sizeof(*s->b_base));
+s->block_base = av_mallocz((64 * 64 + 128) * 3);
+if (!s->b_base || !s->block_base)
+return AVERROR(ENOMEM);
+
+s->uvblock_base[0] = s->block_base + 64 * 64;
+s->uvblock_base[1] = s->uvblock_base[0] + 32 * 32;
+s->eob_base= (uint8_t *) (s->uvblock_base[1] + 32 * 32);
+s->uveob_base[0]   = s->eob_base + 256;
+s->uveob_base[1]   = s->uveob_base[0] + 64;
+
 return 0;
 }
 
@@ -1155,6 +1168,15 @@ static int vp9_decode_frame(AVCodecContext *avctx, 
AVFrame *frame,
 memset(s->above_uv_nnz_ctx[0], 0, s->sb_cols * 8);
 memset(s->above_uv_nnz_ctx[1], 0, s->sb_cols * 8);
 memset(s->above_segpred_ctx, 0, s->cols);
+
+s->b  = s->b_base;
+s->block  = s->block_base;
+s->uvblock[0] = s->uvblock_base[0];
+s->uvblock[1] = s->uvblock_base[1];
+s->eob= s->eob_base;
+s->uveob[0]   = s->uveob_base[0];
+s->uveob[1]   = s->uveob_base[1];
+
 for (tile_row = 0; tile_row < s->tiling.tile_rows; tile_row++) {
 set_tile_offset(&s->tiling.tile_row_start, &s->tiling.tile_row_end,
 tile_row, s->tiling.log2_tile_rows, s->sb_rows);
@@ -1351,6 +1373,8 @@ static av_cold int vp9_decode_free(AVCodecContext *avctx)
 
 av_freep(&s->c_b);
 av_freep(&s->above_partition_ctx);
+av_freep(&s->b_base);
+av_freep(&s->block_base);
 
 return 0;
 }
diff --git a/libavcodec/vp9.h b/libavcodec/vp9.h
index 8711987..e591298 100644
--- a/libavcodec/vp9.h
+++ b/libavcodec/vp9.h
@@ -280,7 +280,8 @@ typedef struct VP9Context {
 VP56RangeCoder c;
 VP56RangeCoder *c_b;
 unsigned c_b_size;
-VP9Block b;
+VP9Block *b;
+VP9Block *b_base;
 
 // bitstream header
 uint8_t profile;
@@ -412,10 +413,8 @@ typedef struct VP9Context {
 DECLARE_ALIGNED(32, uint8_t, edge_emu_buffer)[71 * 80];
 
 // block reconstruction intermediates
-DECLARE_ALIGNED(32, int16_t, block)[4096];
-DECLARE_ALIGNED(32, int16_t, uvblock)[2][1024];
-uint8_t eob[256];
-uint8_t uveob[2][64];
+int16_t *block_base, *block, *uvblock_base[2], *uvblock[2];
+uint8_t *eob_base, *uveob_base[2], *eob, *uveob[2];
 struct { int x, y; } min_mv, max_mv;
 DECLARE_ALIGNED(32, uint8_t, tmp_y)[64 * 64];
 DECLARE_ALIGNED(32, uint8_t, tmp_uv)[2][32 * 32];
diff --git a/libavcodec/vp9block.c b/libavcodec/vp9block.c
index c018fa0..feb5e6c 100644
--- a/libavcodec/vp9block.c
+++ b/libavcodec/vp9block.c
@@ -823,7 +823,7 @@ skip_eob:
 static int decode_coeffs(AVCodecContext *avctx)
 {
 VP9Context *s = avctx->priv_data;
-VP9Block *const b = &s->b;
+VP9Block *b = s->b;
 int row = b->row, col = b->col;
 uint8_t (*p)[6][11] = s->prob.coef[b->tx][0 /* y */][!b->intra];
 unsigned (*c)[6][3] = s->counts.coef[b->tx][0 /* y */][!b->intra];
@@ -1074,7 +1074,7 @@ static av_always_inline int check_intra_mode(VP9Context 
*s, int mode,
 static void intra_recon(AVCodecContext *avctx, ptrdiff_t y_off, ptrdiff_t 
uv_off)
 {
 VP9Context *s = avctx->priv_data;
-VP9Block *const b = &s->b;
+VP9Block *b = s->b;
 AVFrame *f = s->frames[CUR_FRAME].tf.f;
 int row = b->row, col = b->col;
 int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
@@ -1227,7 +1227,7 @@ static int inter_recon(AVCodecContext *avctx)
 { 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4 },
 };
 VP9Context *s = avctx->priv_data;
-VP9Block *const b = &s->b;
+VP9Block *b = s->b;
 int row = b->row, col = b->col;
 
 ThreadFrame *tref1 = &s->refs[s->refidx[b->ref[0]]];
@@ -1555,7 +1555,7 @@ int ff_vp9_decode_block(AVCodecContext *avctx, int row, 
int col,
 enum BlockLe

[FFmpeg-cvslog] vp9: make mv bounds 32bit.

2017-03-16 Thread Ronald S. Bultje

ffmpeg | branch: master | Ronald S. Bultje  | Tue Jan  7 
07:24:03 2014 -0500| [0df4801105d84883071b0978cb3afc7cd5184ce8] | committer: 
Anton Khirnov

vp9: make mv bounds 32bit.

The frame dimensions are 16bit, so the mv bounds can easily overflow
int16 for large videos.

Bug-Id: Handbrake/46
CC: libav-sta...@libav.org
Signed-off-by: Anton Khirnov 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=0df4801105d84883071b0978cb3afc7cd5184ce8
---

 libavcodec/vp9.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavcodec/vp9.h b/libavcodec/vp9.h
index b83bd61..31509bf 100644
--- a/libavcodec/vp9.h
+++ b/libavcodec/vp9.h
@@ -402,7 +402,7 @@ typedef struct VP9Context {
 DECLARE_ALIGNED(32, int16_t, uvblock)[2][1024];
 uint8_t eob[256];
 uint8_t uveob[2][64];
-VP56mv min_mv, max_mv;
+struct { int x, y; } min_mv, max_mv;
 DECLARE_ALIGNED(32, uint8_t, tmp_y)[64 * 64];
 DECLARE_ALIGNED(32, uint8_t, tmp_uv)[2][32 * 32];
 } VP9Context;

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

[FFmpeg-cvslog] vp9mc/x86: add 16px functions (64bit only).

2017-03-16 Thread Ronald S. Bultje

ffmpeg | branch: master | Ronald S. Bultje  | Tue Dec 24 
16:17:03 2013 -0500| [3a09494939ddb2f2fd0f8d015162d5174ec07d4c] | committer: 
Anton Khirnov

vp9mc/x86: add 16px functions (64bit only).

Signed-off-by: Anton Khirnov 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=3a09494939ddb2f2fd0f8d015162d5174ec07d4c
---

 libavcodec/x86/vp9dsp_init.c |   5 ++
 libavcodec/x86/vp9mc.asm | 122 +++
 2 files changed, 127 insertions(+)

diff --git a/libavcodec/x86/vp9dsp_init.c b/libavcodec/x86/vp9dsp_init.c
index 833d983..dc08e60 100644
--- a/libavcodec/x86/vp9dsp_init.c
+++ b/libavcodec/x86/vp9dsp_init.c
@@ -64,6 +64,9 @@ ff_ ## avg ## _8tap_1d_ ## dir ## _ ## sz ## _ ## opt(uint8_t 
*dst, \
 
 mc_funcs(4);
 mc_funcs(8);
+#if ARCH_X86_64
+mc_funcs(16);
+#endif
 
 #undef mc_funcs
 #undef mc_func
@@ -95,7 +98,9 @@ ff_ ## avg ## _8tap_1d_ ## dir ## _ ## sz ## _ ## opt(uint8_t 
*dst, \
 mc_rep_func(put, sz, hsz, v, ssse3); \
 mc_rep_func(avg, sz, hsz, v, ssse3)
 
+#if ARCH_X86_32
 mc_rep_funcs(16, 8);
+#endif
 mc_rep_funcs(32, 16);
 mc_rep_funcs(64, 32);
 
diff --git a/libavcodec/x86/vp9mc.asm b/libavcodec/x86/vp9mc.asm
index 59e5668..152715c 100644
--- a/libavcodec/x86/vp9mc.asm
+++ b/libavcodec/x86/vp9mc.asm
@@ -144,6 +144,62 @@ INIT_XMM ssse3
 filter_h_fn put
 filter_h_fn avg
 
+%if ARCH_X86_64
+%macro filter_hx2_fn 1
+%assign %%px mmsize
+cglobal %1_8tap_1d_h_ %+ %%px, 6, 6, 14, dst, src, dstride, sstride, h, filtery
+mova   m13, [pw_256]
+movam8, [filteryq+ 0]
+movam9, [filteryq+16]
+mova   m10, [filteryq+32]
+mova   m11, [filteryq+48]
+.loop:
+movum0, [srcq-3]
+movum1, [srcq-2]
+movum2, [srcq-1]
+movum3, [srcq+0]
+movum4, [srcq+1]
+movum5, [srcq+2]
+movum6, [srcq+3]
+movum7, [srcq+4]
+add   srcq, sstrideq
+SBUTTERFLY  bw, 0, 1, 12
+SBUTTERFLY  bw, 2, 3, 12
+SBUTTERFLY  bw, 4, 5, 12
+SBUTTERFLY  bw, 6, 7, 12
+pmaddubsw   m0, m8
+pmaddubsw   m1, m8
+pmaddubsw   m2, m9
+pmaddubsw   m3, m9
+pmaddubsw   m4, m10
+pmaddubsw   m5, m10
+pmaddubsw   m6, m11
+pmaddubsw   m7, m11
+paddw   m0, m2
+paddw   m1, m3
+paddw   m4, m6
+paddw   m5, m7
+paddsw  m0, m4
+paddsw  m1, m5
+pmulhrswm0, m13
+pmulhrswm1, m13
+packuswbm0, m1
+%ifidn %1, avg
+pavgb   m0, [dstq]
+%endif
+mova[dstq], m0
+add   dstq, dstrideq
+dec hd
+jg .loop
+RET
+%endmacro
+
+INIT_XMM ssse3
+filter_hx2_fn put
+filter_hx2_fn avg
+
+%endif ; ARCH_X86_64
+
 %macro filter_v_fn 1
 %assign %%px mmsize/2
 %if ARCH_X86_64
@@ -218,6 +274,72 @@ INIT_XMM ssse3
 filter_v_fn put
 filter_v_fn avg
 
+%if ARCH_X86_64
+
+%macro filter_vx2_fn 1
+%assign %%px mmsize
+cglobal %1_8tap_1d_v_ %+ %%px, 6, 8, 14, dst, src, dstride, sstride, h, 
filtery, src4, sstride3
+sub   srcq, sstrideq
+lea  sstride3q, [sstrideq*3]
+sub   srcq, sstrideq
+mova   m13, [pw_256]
+sub   srcq, sstrideq
+movam8, [filteryq+ 0]
+lea  src4q, [srcq+sstrideq*4]
+movam9, [filteryq+16]
+mova   m10, [filteryq+32]
+mova   m11, [filteryq+48]
+.loop:
+; FIXME maybe reuse loads from previous rows, or just
+; more generally unroll this to prevent multiple loads of
+; the same data?
+movum0, [srcq]
+movum1, [srcq+sstrideq]
+movum2, [srcq+sstrideq*2]
+movum3, [srcq+sstride3q]
+movum4, [src4q]
+movum5, [src4q+sstrideq]
+movum6, [src4q+sstrideq*2]
+movum7, [src4q+sstride3q]
+add   srcq, sstrideq
+add  src4q, sstrideq
+SBUTTERFLY  bw, 0, 1, 12
+SBUTTERFLY  bw, 2, 3, 12
+SBUTTERFLY  bw, 4, 5, 12
+SBUTTERFLY  bw, 6, 7, 12
+pmaddubsw   m0, m8
+pmaddubsw   m1, m8
+pmaddubsw   m2, m9
+pmaddubsw   m3, m9
+pmaddubsw   m4, m10
+pmaddubsw   m5, m10
+pmaddubsw   m6, m11
+pmaddubsw   m7, m11
+paddw   m0, m2
+paddw   m1, m3
+paddw   m4, m6
+paddw   m5, m7
+paddsw  m0, m4
+paddsw  m1, m5
+pmulhrswm0, m13
+pmulhrswm1, m13
+packuswbm0, m1
+%ifidn %1, avg
+pavgb   m0, [dstq]
+%endif
+mova[dstq], m0
+add   dstq, dstrideq
+dec hd
+jg .loop
+RET
+%endmacro
+
+INIT_XMM ssse3
+filter_vx2_fn put
+filter_vx2_fn avg
+
+%endif ; ARCH_X86_64
+
 %macro fpel_fn 6
 %if %2 == 4
 %define %%srcfn movh

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

[FFmpeg-cvslog] checkasm: add vp9 MC tests.

2017-03-16 Thread Ronald S. Bultje

ffmpeg | branch: master | Ronald S. Bultje  | Tue Sep 15 
16:41:29 2015 -0400| [e99ecda55082cb9dde8fd349361e169dc383943a] | committer: 
Anton Khirnov

checkasm: add vp9 MC tests.

Signed-off-by: Anton Khirnov 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=e99ecda55082cb9dde8fd349361e169dc383943a
---

 tests/checkasm/Makefile   |   1 +
 tests/checkasm/checkasm.c |   3 ++
 tests/checkasm/checkasm.h |   1 +
 tests/checkasm/vp9dsp.c   | 127 ++
 4 files changed, 132 insertions(+)

diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile
index d339b75..8d3d03a 100644
--- a/tests/checkasm/Makefile
+++ b/tests/checkasm/Makefile
@@ -11,6 +11,7 @@ AVCODECOBJS-$(CONFIG_VP8DSP)+= vp8dsp.o
 AVCODECOBJS-$(CONFIG_DCA_DECODER)   += dcadsp.o synth_filter.o
 AVCODECOBJS-$(CONFIG_HEVC_DECODER)  += hevc_mc.o hevc_idct.o
 AVCODECOBJS-$(CONFIG_V210_ENCODER)  += v210enc.o
+AVCODECOBJS-$(CONFIG_VP9_DECODER)   += vp9dsp.o
 
 CHECKASMOBJS-$(CONFIG_AVCODEC)  += $(AVCODECOBJS-yes)
 
diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c
index 34f49c0..86d3bab 100644
--- a/tests/checkasm/checkasm.c
+++ b/tests/checkasm/checkasm.c
@@ -93,6 +93,9 @@ static const struct {
 #if CONFIG_VP8DSP
 { "vp8dsp", checkasm_check_vp8dsp },
 #endif
+#if CONFIG_VP9_DECODER
+{ "vp9dsp", checkasm_check_vp9dsp },
+#endif
 { NULL }
 };
 
diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h
index 73109c3..5e67b7d 100644
--- a/tests/checkasm/checkasm.h
+++ b/tests/checkasm/checkasm.h
@@ -42,6 +42,7 @@ void checkasm_check_hevc_mc(void);
 void checkasm_check_synth_filter(void);
 void checkasm_check_v210enc(void);
 void checkasm_check_vp8dsp(void);
+void checkasm_check_vp9dsp(void);
 
 void *checkasm_check_func(void *func, const char *name, ...) 
av_printf_format(2, 3);
 int checkasm_bench_func(void);
diff --git a/tests/checkasm/vp9dsp.c b/tests/checkasm/vp9dsp.c
new file mode 100644
index 000..dd37077
--- /dev/null
+++ b/tests/checkasm/vp9dsp.c
@@ -0,0 +1,127 @@
+/*
+ * Copyright (c) 2015 Ronald S. Bultje 
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with Libav; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include 
+
+#include "libavutil/common.h"
+#include "libavutil/internal.h"
+#include "libavutil/intreadwrite.h"
+
+#include "libavcodec/vp9.h"
+
+#include "checkasm.h"
+
+static const uint32_t pixel_mask[3] = { 0x, 0x03ff03ff, 0x0fff0fff };
+
+#define BIT_DEPTH 8
+#define SIZEOF_PIXEL ((BIT_DEPTH + 7) / 8)
+#define DST_BUF_SIZE (size * size * SIZEOF_PIXEL)
+#define SRC_BUF_STRIDE 72
+#define SRC_BUF_SIZE ((size + 7) * SRC_BUF_STRIDE * SIZEOF_PIXEL)
+#define src (buf + 3 * SIZEOF_PIXEL * (SRC_BUF_STRIDE + 1))
+
+#define randomize_buffers()   \
+do {  \
+uint32_t mask = pixel_mask[(BIT_DEPTH - 8) >> 1]; \
+int k;\
+for (k = 0; k < SRC_BUF_SIZE; k += 4) {   \
+uint32_t r = rnd() & mask;\
+AV_WN32A(buf + k, r); \
+} \
+if (op == 1) {\
+for (k = 0; k < DST_BUF_SIZE; k += 4) {   \
+uint32_t r = rnd() & mask;\
+AV_WN32A(dst0 + k, r);\
+AV_WN32A(dst1 + k, r);\
+} \
+} \
+} while (0)
+
+static void check_mc(void)
+{
+static const char *const filter_names[4] = {
+"8tap_smooth", "8tap_regular", "8tap_sharp", "bilin"
+};
+static const char *const subpel_names[2][2] = { { "", "h" }, { "v", "hv" } 
};
+static const char *const op_names[2] = { "put", "avg" };
+
+LOCAL_ALIGNED_32(uint8_t, buf,  [72 * 72 * 2]);
+LOCAL_ALIGNED_32(uint8_t, dst0, [64 * 64 * 2]);
+LOCAL_ALIGNED_32(uint8_t, dst1, [64 * 64 * 2])

[FFmpeg-cvslog] vp9mc/x86: sse2 MC assembly.

2017-03-16 Thread Ronald S. Bultje

ffmpeg | branch: master | Ronald S. Bultje  | Sun Dec 14 
20:13:24 2014 -0500| [9790b44a89d191a07a9d8b361fb4d18ea15f51a1] | committer: 
Anton Khirnov

vp9mc/x86: sse2 MC assembly.

Also a slight change to the ssse3 code, which prevents a theoretical
overflow in the sharp filter.

Signed-off-by: Anton Khirnov 

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=9790b44a89d191a07a9d8b361fb4d18ea15f51a1
---

 libavcodec/x86/vp9dsp_init.c | 184 +---
 libavcodec/x86/vp9mc.asm | 246 +++
 2 files changed, 324 insertions(+), 106 deletions(-)

diff --git a/libavcodec/x86/vp9dsp_init.c b/libavcodec/x86/vp9dsp_init.c
index 8c4af83..41fa35a 100644
--- a/libavcodec/x86/vp9dsp_init.c
+++ b/libavcodec/x86/vp9dsp_init.c
@@ -51,39 +51,41 @@ fpel_func(avg, 32, avx2);
 fpel_func(avg, 64, avx2);
 #undef fpel_func
 
-#define mc_func(avg, sz, dir, opt) 
 \
+#define mc_func(avg, sz, dir, opt, type, f_sz) 
 \
 void   
 \
 ff_vp9_ ## avg ## _8tap_1d_ ## dir ## _ ## sz ## _ ## opt(uint8_t *dst,
 \
   const uint8_t *src,  
 \
   ptrdiff_t 
dst_stride, \
   ptrdiff_t 
src_stride, \
   int h,   
 \
-  const int8_t 
(*filter)[32])
-
-#define mc_funcs(sz, opt) \
-mc_func(put, sz, h, opt); \
-mc_func(avg, sz, h, opt); \
-mc_func(put, sz, v, opt); \
-mc_func(avg, sz, v, opt)
-
-mc_funcs(4, ssse3);
-mc_funcs(8, ssse3);
+  const type 
(*filter)[f_sz])
+
+#define mc_funcs(sz, opt, type, f_sz) \
+mc_func(put, sz, h, opt, type, f_sz); \
+mc_func(avg, sz, h, opt, type, f_sz); \
+mc_func(put, sz, v, opt, type, f_sz); \
+mc_func(avg, sz, v, opt, type, f_sz)
+
+mc_funcs(4, mmxext, int16_t,  8);
+mc_funcs(8, sse2,   int16_t,  8);
+mc_funcs(4, ssse3,  int8_t,  32);
+mc_funcs(8, ssse3,  int8_t,  32);
 #if ARCH_X86_64
-mc_funcs(16, ssse3);
-mc_funcs(32, avx2);
+mc_funcs(16, ssse3, int8_t,  32);
+mc_funcs(32, avx2,  int8_t,  32);
 #endif
 
 #undef mc_funcs
 #undef mc_func
 
-#define mc_rep_func(avg, sz, hsz, dir, opt) \
+#define mc_rep_func(avg, sz, hsz, dir, opt, type, f_sz) \
 static av_always_inline void\
 ff_vp9_ ## avg ## _8tap_1d_ ## dir ## _ ## sz ## _ ## opt(uint8_t *dst, \
   const uint8_t *src,   \
   ptrdiff_t dst_stride, \
   ptrdiff_t src_stride, \
   int h,\
-  const int8_t 
(*filter)[32]) \
+  const type 
(*filter)[f_sz]) \
 {   \
 ff_vp9_ ## avg ## _8tap_1d_ ## dir ## _ ## hsz ## _ ## opt(dst, src,\
dst_stride,  \
@@ -97,27 +99,31 @@ ff_vp9_ ## avg ## _8tap_1d_ ## dir ## _ ## sz ## _ ## 
opt(uint8_t *dst, \
h, filter);  \
 }
 
-#define mc_rep_funcs(sz, hsz, opt) \
-mc_rep_func(put, sz, hsz, h, opt); \
-mc_rep_func(avg, sz, hsz, h, opt); \
-mc_rep_func(put, sz, hsz, v, opt); \
-mc_rep_func(avg, sz, hsz, v, opt)
+#define mc_rep_funcs(sz, hsz, opt, type, f_sz) \
+mc_rep_func(put, sz, hsz, h, opt, type, f_sz); \
+mc_rep_func(avg, sz, hsz, h, opt, type, f_sz); \
+mc_rep_func(put, sz, hsz, v, opt, type, f_sz); \
+mc_rep_func(avg, sz, hsz, v, opt, type, f_sz)
 
+mc_rep_funcs(16, 8,  sse2,  int16_t,  8);
 #if ARCH_X86_32
-mc_rep_funcs(16, 8, ssse3);
+mc_rep_funcs(16, 8,  ssse3, int8_t,  32);
 #endif
-mc_rep_funcs(32, 16, ssse3);
-mc_rep_funcs(64, 32, ssse3);
+mc_rep_funcs(32, 16, sse2,  int16_t,  8);
+mc_rep_funcs(32, 16, ssse3, int8_t,  32);
+mc_rep_funcs(64, 32, sse2,  int16_t,  8);
+mc_rep_funcs(64, 32, ssse3, int8_t,  32);
 #if ARCH_X86_64 && HAVE_AVX2_EXTERNAL
-mc_rep_funcs(64, 32, avx2);
+mc_rep_funcs(64, 32, avx2,  int8_t,  32);
 #endif
 
 #undef mc_rep_funcs
 #undef mc_rep_func
 
 extern const int8_t ff_filters_ssse3[3][15][4][32];
+extern const int16_t ff_filters_sse2[3][15][8][8];
 
-#define filter_8tap_2d_fn(op, sz, f, fname, align, opt)
  \
+#define filter_8tap_2d_fn(op, sz, f, f_opt, fname,

[FFmpeg-cvslog] wmavoice: move overflow handling to common code.

2016-12-27 Thread Ronald S. Bultje

ffmpeg | branch: master | Ronald S. Bultje  | Tue Dec 20 
17:10:33 2016 -0500| [7b27dd5c16de785297ce4de4b88afa0b6685f61d] | committer: 
Ronald S. Bultje

wmavoice: move overflow handling to common code.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=7b27dd5c16de785297ce4de4b88afa0b6685f61d
---

 libavcodec/wmavoice.c | 17 +
 1 file changed, 5 insertions(+), 12 deletions(-)

diff --git a/libavcodec/wmavoice.c b/libavcodec/wmavoice.c
index f31c9d2..90dfe20 100644
--- a/libavcodec/wmavoice.c
+++ b/libavcodec/wmavoice.c
@@ -1800,6 +1800,11 @@ static int synth_superframe(AVCodecContext *ctx, AVFrame 
*frame,
 skip_bits(gb, 10 * (res + 1));
 }
 
+if (get_bits_left(gb) < 0) {
+wmavoice_flush(ctx);
+return AVERROR_INVALIDDATA;
+}
+
 *got_frame_ptr = 1;
 
 /* Update history */
@@ -1925,12 +1930,6 @@ static int wmavoice_decode_packet(AVCodecContext *ctx, 
void *data,
 cnt += s->spillover_nbits;
 s->skip_bits_next = cnt & 7;
 res = cnt >> 3;
-if (res > avpkt->size) {
-av_log(ctx, AV_LOG_ERROR,
-   "Trying to skip %d bytes in packet of size %d\n",
-   res, avpkt->size);
-return AVERROR_INVALIDDATA;
-}
 return res;
 } else
 skip_bits_long (gb, s->spillover_nbits - cnt +
@@ -1955,12 +1954,6 @@ static int wmavoice_decode_packet(AVCodecContext *ctx, 
void *data,
 int cnt = get_bits_count(gb);
 s->skip_bits_next = cnt & 7;
 res = cnt >> 3;
-if (res > avpkt->size) {
-av_log(ctx, AV_LOG_ERROR,
-   "Trying to skip %d bytes in packet of size %d\n",
-   res, avpkt->size);
-return AVERROR_INVALIDDATA;
-}
 return res;
 }
 } else if ((s->sframe_cache_size = pos) > 0) {

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

[FFmpeg-cvslog] wmavoice: reindent.

2016-12-27 Thread Ronald S. Bultje

ffmpeg | branch: master | Ronald S. Bultje  | Tue Dec 20 
17:14:41 2016 -0500| [b011bb5f8b2ce5f21bc4f07f50a56a26310383af] | committer: 
Ronald S. Bultje

wmavoice: reindent.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=b011bb5f8b2ce5f21bc4f07f50a56a26310383af
---

 libavcodec/wmavoice.c | 72 +--
 1 file changed, 36 insertions(+), 36 deletions(-)

diff --git a/libavcodec/wmavoice.c b/libavcodec/wmavoice.c
index ae100fb..f31c9d2 100644
--- a/libavcodec/wmavoice.c
+++ b/libavcodec/wmavoice.c
@@ -1915,29 +1915,29 @@ static int wmavoice_decode_packet(AVCodecContext *ctx, 
void *data,
 /* If the packet header specifies a s->spillover_nbits, then we want
  * to push out all data of the previous packet (+ spillover) before
  * continuing to parse new superframes in the current packet. */
-if (s->sframe_cache_size > 0) {
-int cnt = get_bits_count(gb);
-copy_bits(&s->pb, avpkt->data, size, gb, s->spillover_nbits);
-flush_put_bits(&s->pb);
-s->sframe_cache_size += s->spillover_nbits;
-if ((res = synth_superframe(ctx, data, got_frame_ptr)) == 0 &&
-*got_frame_ptr) {
-cnt += s->spillover_nbits;
-s->skip_bits_next = cnt & 7;
-res = cnt >> 3;
-if (res > avpkt->size) {
-av_log(ctx, AV_LOG_ERROR,
-   "Trying to skip %d bytes in packet of size 
%d\n",
-   res, avpkt->size);
-return AVERROR_INVALIDDATA;
-}
-return res;
-} else
-skip_bits_long (gb, s->spillover_nbits - cnt +
-get_bits_count(gb)); // resync
-} else if (s->spillover_nbits) {
-skip_bits_long(gb, s->spillover_nbits);  // resync
-}
+if (s->sframe_cache_size > 0) {
+int cnt = get_bits_count(gb);
+copy_bits(&s->pb, avpkt->data, size, gb, s->spillover_nbits);
+flush_put_bits(&s->pb);
+s->sframe_cache_size += s->spillover_nbits;
+if ((res = synth_superframe(ctx, data, got_frame_ptr)) == 0 &&
+*got_frame_ptr) {
+cnt += s->spillover_nbits;
+s->skip_bits_next = cnt & 7;
+res = cnt >> 3;
+if (res > avpkt->size) {
+av_log(ctx, AV_LOG_ERROR,
+   "Trying to skip %d bytes in packet of size %d\n",
+   res, avpkt->size);
+return AVERROR_INVALIDDATA;
+}
+return res;
+} else
+skip_bits_long (gb, s->spillover_nbits - cnt +
+get_bits_count(gb)); // resync
+} else if (s->spillover_nbits) {
+skip_bits_long(gb, s->spillover_nbits);  // resync
+}
 } else if (s->skip_bits_next)
 skip_bits(gb, s->skip_bits_next);
 
@@ -1949,20 +1949,20 @@ static int wmavoice_decode_packet(AVCodecContext *ctx, 
void *data,
 *got_frame_ptr = 0;
 return size;
 } else if (s->nb_superframes > 0) {
-if ((res = synth_superframe(ctx, data, got_frame_ptr)) < 0) {
-return res;
-} else if (*got_frame_ptr) {
-int cnt = get_bits_count(gb);
-s->skip_bits_next = cnt & 7;
-res = cnt >> 3;
-if (res > avpkt->size) {
-av_log(ctx, AV_LOG_ERROR,
-   "Trying to skip %d bytes in packet of size %d\n",
-   res, avpkt->size);
-return AVERROR_INVALIDDATA;
+if ((res = synth_superframe(ctx, data, got_frame_ptr)) < 0) {
+return res;
+} else if (*got_frame_ptr) {
+int cnt = get_bits_count(gb);
+s->skip_bits_next = cnt & 7;
+res = cnt >> 3;
+if (res > avpkt->size) {
+av_log(ctx, AV_LOG_ERROR,
+   "Trying to skip %d bytes in packet of size %d\n",
+   res, avpkt->size);
+return AVERROR_INVALIDDATA;
+}
+return res;
 }
-return res;
-}
 } else if ((s->sframe_cache_size = pos) > 0) {
 /* ... cache it for spillover in next packet */
 init_put_bits(&s->pb, s->sframe_cache, SFRAME_CACHE_MAXSIZE);

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

[FFmpeg-cvslog] wmavoice: protect against zero-energy in adaptive gain control.

2016-12-27 Thread Ronald S. Bultje

ffmpeg | branch: master | Ronald S. Bultje  | Tue Dec 20 
18:01:05 2016 -0500| [33d7f822f8ed2d1870babc1d04d4d48cf8b6f240] | committer: 
Ronald S. Bultje

wmavoice: protect against zero-energy in adaptive gain control.

Otherwise the scale factor becomes NaN, resulting in corrupt output.
Fixes #5426.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=33d7f822f8ed2d1870babc1d04d4d48cf8b6f240
---

 libavcodec/wmavoice.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/libavcodec/wmavoice.c b/libavcodec/wmavoice.c
index 90dfe20..cd5958c 100644
--- a/libavcodec/wmavoice.c
+++ b/libavcodec/wmavoice.c
@@ -512,7 +512,8 @@ static void adaptive_gain_control(float *out, const float 
*in,
 speech_energy += fabsf(speech_synth[i]);
 postfilter_energy += fabsf(in[i]);
 }
-gain_scale_factor = (1.0 - alpha) * speech_energy / postfilter_energy;
+gain_scale_factor = postfilter_energy == 0.0 ? 0.0 :
+(1.0 - alpha) * speech_energy / postfilter_energy;
 
 for (i = 0; i < size; i++) {
 mem = alpha * mem + gain_scale_factor;

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

[FFmpeg-cvslog] wmavoice: move wmavoice_flush() up.

2016-12-27 Thread Ronald S. Bultje

ffmpeg | branch: master | Ronald S. Bultje  | Tue Dec 20 
17:03:46 2016 -0500| [992cb15e671332650ddd5020c00cf08a40bb7cf0] | committer: 
Ronald S. Bultje

wmavoice: move wmavoice_flush() up.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=992cb15e671332650ddd5020c00cf08a40bb7cf0
---

 libavcodec/wmavoice.c | 56 +--
 1 file changed, 28 insertions(+), 28 deletions(-)

diff --git a/libavcodec/wmavoice.c b/libavcodec/wmavoice.c
index ceac61f..4b3ab43 100644
--- a/libavcodec/wmavoice.c
+++ b/libavcodec/wmavoice.c
@@ -337,6 +337,34 @@ static av_cold void wmavoice_init_static_data(AVCodec 
*codec)
 bits, 1, 1, codes, 2, 2, 132);
 }
 
+static av_cold void wmavoice_flush(AVCodecContext *ctx)
+{
+WMAVoiceContext *s = ctx->priv_data;
+int n;
+
+s->postfilter_agc= 0;
+s->sframe_cache_size = 0;
+s->skip_bits_next= 0;
+for (n = 0; n < s->lsps; n++)
+s->prev_lsps[n] = M_PI * (n + 1.0) / (s->lsps + 1.0);
+memset(s->excitation_history, 0,
+   sizeof(*s->excitation_history) * MAX_SIGNAL_HISTORY);
+memset(s->synth_history,  0,
+   sizeof(*s->synth_history)  * MAX_LSPS);
+memset(s->gain_pred_err,  0,
+   sizeof(s->gain_pred_err));
+
+if (s->do_apf) {
+memset(&s->synth_filter_out_buf[MAX_LSPS_ALIGN16 - s->lsps], 0,
+   sizeof(*s->synth_filter_out_buf) * s->lsps);
+memset(s->dcf_mem,  0,
+   sizeof(*s->dcf_mem)  * 2);
+memset(s->zero_exc_pf,  0,
+   sizeof(*s->zero_exc_pf)  * s->history_nsamples);
+memset(s->denoise_filter_cache, 0, sizeof(s->denoise_filter_cache));
+}
+}
+
 /**
  * Set up decoder with parameters from demuxer (extradata etc.).
  */
@@ -2046,34 +2074,6 @@ static av_cold int wmavoice_decode_end(AVCodecContext 
*ctx)
 return 0;
 }
 
-static av_cold void wmavoice_flush(AVCodecContext *ctx)
-{
-WMAVoiceContext *s = ctx->priv_data;
-int n;
-
-s->postfilter_agc= 0;
-s->sframe_cache_size = 0;
-s->skip_bits_next= 0;
-for (n = 0; n < s->lsps; n++)
-s->prev_lsps[n] = M_PI * (n + 1.0) / (s->lsps + 1.0);
-memset(s->excitation_history, 0,
-   sizeof(*s->excitation_history) * MAX_SIGNAL_HISTORY);
-memset(s->synth_history,  0,
-   sizeof(*s->synth_history)  * MAX_LSPS);
-memset(s->gain_pred_err,  0,
-   sizeof(s->gain_pred_err));
-
-if (s->do_apf) {
-memset(&s->synth_filter_out_buf[MAX_LSPS_ALIGN16 - s->lsps], 0,
-   sizeof(*s->synth_filter_out_buf) * s->lsps);
-memset(s->dcf_mem,  0,
-   sizeof(*s->dcf_mem)  * 2);
-memset(s->zero_exc_pf,  0,
-   sizeof(*s->zero_exc_pf)  * s->history_nsamples);
-memset(s->denoise_filter_cache, 0, sizeof(s->denoise_filter_cache));
-}
-}
-
 AVCodec ff_wmavoice_decoder = {
 .name = "wmavoice",
 .long_name= NULL_IF_CONFIG_SMALL("Windows Media Audio Voice"),

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

[FFmpeg-cvslog] checkasm/vp9: benchmark all sub-IDCTs (but not WHT or ADST).

2016-12-27 Thread Ronald S. Bultje

ffmpeg | branch: master | Ronald S. Bultje  | Thu Nov 17 
16:08:20 2016 -0500| [1c8fbd7b90469f69fe3a3f78ba7886195d97c34f] | committer: 
Ronald S. Bultje

checkasm/vp9: benchmark all sub-IDCTs (but not WHT or ADST).

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=1c8fbd7b90469f69fe3a3f78ba7886195d97c34f
---

 tests/checkasm/vp9dsp.c | 22 ++
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/tests/checkasm/vp9dsp.c b/tests/checkasm/vp9dsp.c
index 441041c..f32b97c 100644
--- a/tests/checkasm/vp9dsp.c
+++ b/tests/checkasm/vp9dsp.c
@@ -331,15 +331,20 @@ static void check_itxfm(void)
 int n_txtps = tx < TX_32X32 ? N_TXFM_TYPES : 1;
 
 for (txtp = 0; txtp < n_txtps; txtp++) {
-if (check_func(dsp.itxfm_add[tx][txtp], 
"vp9_inv_%s_%dx%d_add_%d",
-   tx == 4 ? "wht_wht" : txtp_types[txtp], sz, sz,
-   bit_depth)) {
-randomize_buffers();
-ftx(coef, tx, txtp, sz, bit_depth);
-
-for (sub = (txtp == 0) ? 1 : 2; sub <= sz; sub <<= 1) {
+// skip testing sub-IDCTs for WHT or ADST since they don't
+// implement it in any of the SIMD functions. If they do,
+// consider changing this to ensure we have complete test
+// coverage
+for (sub = (txtp == 0 && tx < 4) ? 1 : sz; sub <= sz; sub <<= 
1) {
+if (check_func(dsp.itxfm_add[tx][txtp],
+   "vp9_inv_%s_%dx%d_sub%d_add_%d",
+   tx == 4 ? "wht_wht" : txtp_types[txtp],
+   sz, sz, sub, bit_depth)) {
 int eob;
 
+randomize_buffers();
+ftx(coef, tx, txtp, sz, bit_depth);
+
 if (sub < sz) {
 eob = copy_subcoefs(subcoef0, coef, tx, txtp,
 sz, sub, bit_depth);
@@ -357,8 +362,9 @@ static void check_itxfm(void)
 !iszero(subcoef0, sz * sz * SIZEOF_COEF) ||
 !iszero(subcoef1, sz * sz * SIZEOF_COEF))
 fail();
+
+bench_new(dst, sz * SIZEOF_PIXEL, coef, eob);
 }
-bench_new(dst, sz * SIZEOF_PIXEL, coef, sz * sz);
 }
 }
 }

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

[FFmpeg-cvslog] wmavoice: disable bitstream checking.

2016-12-27 Thread Ronald S. Bultje

ffmpeg | branch: master | Ronald S. Bultje  | Tue Dec 20 
17:14:26 2016 -0500| [3deb4b54a24f8cddce463d9f5751b01efeb976af] | committer: 
Ronald S. Bultje

wmavoice: disable bitstream checking.

The checked bitstream reader does that already. To allow parsing of
superframes split over a packet boundary, we always decode the last
superframe in each packet at the start of the next packet, even if
theoretically we could have decoded it. The last superframe in the
last packet is decoded using AV_CODEC_CAP_DELAY.

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=3deb4b54a24f8cddce463d9f5751b01efeb976af
---

 libavcodec/wmavoice.c | 144 ++
 1 file changed, 29 insertions(+), 115 deletions(-)

diff --git a/libavcodec/wmavoice.c b/libavcodec/wmavoice.c
index 4b3ab43..ae100fb 100644
--- a/libavcodec/wmavoice.c
+++ b/libavcodec/wmavoice.c
@@ -251,6 +251,7 @@ typedef struct WMAVoiceContext {
 
 int frame_cntr;   ///< current frame index [0 - 0xFFFE]; is
   ///< only used for comfort noise in #pRNG()
+int nb_superframes;   ///< number of superframes in current packet
 float gain_pred_err[6];   ///< cache for gain prediction
 float excitation_history[MAX_SIGNAL_HISTORY];
   ///< cache of the signal of previous
@@ -875,7 +876,6 @@ static void dequant_lsps(double *lsps, int num,
 /**
  * @name LSP dequantization routines
  * LSP dequantization routines, for 10/16LSPs and independent/residual coding.
- * @note we assume enough bits are available, caller should check.
  * lsp10i() consumes 24 bits; lsp10r() consumes an additional 24 bits;
  * lsp16i() consumes 34 bits; lsp16r() consumes an additional 26 bits.
  * @{
@@ -1419,7 +1419,6 @@ static void synth_block_fcb_acb(WMAVoiceContext *s, 
GetBitContext *gb,
 
 /**
  * Parse data in a single block.
- * @note we assume enough bits are available, caller should check.
  *
  * @param s WMA Voice decoding context private data
  * @param gb bit I/O context
@@ -1463,7 +1462,6 @@ static void synth_block(WMAVoiceContext *s, GetBitContext 
*gb,
 
 /**
  * Synthesize output samples for a single frame.
- * @note we assume enough bits are available, caller should check.
  *
  * @param ctx WMA Voice decoder context
  * @param gb bit I/O context (s->gb or one for cross-packet superframes)
@@ -1682,83 +1680,6 @@ static void stabilize_lsps(double *lsps, int num)
 }
 
 /**
- * Test if there's enough bits to read 1 superframe.
- *
- * @param orig_gb bit I/O context used for reading. This function
- *does not modify the state of the bitreader; it
- *only uses it to copy the current stream position
- * @param s WMA Voice decoding context private data
- * @return < 0 on error, 1 on not enough bits or 0 if OK.
- */
-static int check_bits_for_superframe(GetBitContext *orig_gb,
- WMAVoiceContext *s)
-{
-GetBitContext s_gb, *gb = &s_gb;
-int n, need_bits, bd_idx;
-const struct frame_type_desc *frame_desc;
-
-/* initialize a copy */
-init_get_bits(gb, orig_gb->buffer, orig_gb->size_in_bits);
-skip_bits_long(gb, get_bits_count(orig_gb));
-av_assert1(get_bits_left(gb) == get_bits_left(orig_gb));
-
-/* superframe header */
-if (get_bits_left(gb) < 14)
-return 1;
-if (!get_bits1(gb))
-return AVERROR(ENOSYS);   // WMAPro-in-WMAVoice superframe
-if (get_bits1(gb)) skip_bits(gb, 12); // number of  samples in superframe
-if (s->has_residual_lsps) {   // residual LSPs (for all frames)
-if (get_bits_left(gb) < s->sframe_lsp_bitsize)
-return 1;
-skip_bits_long(gb, s->sframe_lsp_bitsize);
-}
-
-/* frames */
-for (n = 0; n < MAX_FRAMES; n++) {
-int aw_idx_is_ext = 0;
-
-if (!s->has_residual_lsps) { // independent LSPs (per-frame)
-   if (get_bits_left(gb) < s->frame_lsp_bitsize) return 1;
-   skip_bits_long(gb, s->frame_lsp_bitsize);
-}
-bd_idx = s->vbm_tree[get_vlc2(gb, frame_type_vlc.table, 6, 3)];
-if (bd_idx < 0)
-return AVERROR_INVALIDDATA; // invalid frame type VLC code
-frame_desc = &frame_descs[bd_idx];
-if (frame_desc->acb_type == ACB_TYPE_ASYMMETRIC) {
-if (get_bits_left(gb) < s->pitch_nbits)
-return 1;
-skip_bits_long(gb, s->pitch_nbits);
-}
-if (frame_desc->fcb_type == FCB_TYPE_SILENCE) {
-skip_bits(gb, 8);
-} else if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {
-int tmp = get_bits(gb, 6);
-if (tmp >= 0x36) {
-skip_bits(gb, 2);
-aw_idx_is_ext = 1;
-}
-}
-
-/* blocks */
-if (frame_desc->acb_type == ACB

[FFmpeg-cvslog] http: make length/offset-related variables unsigned.

2016-12-05 Thread Ronald S. Bultje

ffmpeg | branch: release/2.8 | Ronald S. Bultje  | Mon Dec  
5 15:55:26 2016 -0500| [606b21353df7d08ea203193f3026281737c696a2] | committer: 
Ronald S. Bultje

http: make length/offset-related variables unsigned.

Fixes #5992, reported and found by Paul Cher .

(cherry picked from commit 2a05c8f813de6f2278827734bf8102291e7484aa)

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=606b21353df7d08ea203193f3026281737c696a2
---

 libavformat/http.c | 66 +-
 1 file changed, 36 insertions(+), 30 deletions(-)

diff --git a/libavformat/http.c b/libavformat/http.c
index 20e942e..04c4ed6 100644
--- a/libavformat/http.c
+++ b/libavformat/http.c
@@ -61,8 +61,8 @@ typedef struct HTTPContext {
 int line_count;
 int http_code;
 /* Used if "Transfer-Encoding: chunked" otherwise -1. */
-int64_t chunksize;
-int64_t off, end_off, filesize;
+uint64_t chunksize;
+uint64_t off, end_off, filesize;
 char *location;
 HTTPAuthState auth_state;
 HTTPAuthState proxy_auth_state;
@@ -90,9 +90,9 @@ typedef struct HTTPContext {
 AVDictionary *cookie_dict;
 int icy;
 /* how much data was read since the last ICY metadata packet */
-int icy_data_read;
+uint64_t icy_data_read;
 /* after how many bytes of read data a new metadata packet will be found */
-int icy_metaint;
+uint64_t icy_metaint;
 char *icy_metadata_headers;
 char *icy_metadata_packet;
 AVDictionary *metadata;
@@ -467,7 +467,7 @@ static int http_open(URLContext *h, const char *uri, int 
flags,
 else
 h->is_streamed = 1;
 
-s->filesize = -1;
+s->filesize = UINT64_MAX;
 s->location = av_strdup(uri);
 if (!s->location)
 return AVERROR(ENOMEM);
@@ -594,9 +594,9 @@ static void parse_content_range(URLContext *h, const char 
*p)
 
 if (!strncmp(p, "bytes ", 6)) {
 p += 6;
-s->off = strtoll(p, NULL, 10);
+s->off = strtoull(p, NULL, 10);
 if ((slash = strchr(p, '/')) && strlen(slash) > 0)
-s->filesize = strtoll(slash + 1, NULL, 10);
+s->filesize = strtoull(slash + 1, NULL, 10);
 }
 if (s->seekable == -1 && (!s->is_akamai || s->filesize != 2147483647))
 h->is_streamed = 0; /* we _can_ in fact seek */
@@ -786,8 +786,9 @@ static int process_line(URLContext *h, char *line, int 
line_count,
 if ((ret = parse_location(s, p)) < 0)
 return ret;
 *new_location = 1;
-} else if (!av_strcasecmp(tag, "Content-Length") && s->filesize == -1) 
{
-s->filesize = strtoll(p, NULL, 10);
+} else if (!av_strcasecmp(tag, "Content-Length") &&
+   s->filesize == UINT64_MAX) {
+s->filesize = strtoull(p, NULL, 10);
 } else if (!av_strcasecmp(tag, "Content-Range")) {
 parse_content_range(h, p);
 } else if (!av_strcasecmp(tag, "Accept-Ranges") &&
@@ -796,7 +797,7 @@ static int process_line(URLContext *h, char *line, int 
line_count,
 h->is_streamed = 0;
 } else if (!av_strcasecmp(tag, "Transfer-Encoding") &&
!av_strncasecmp(p, "chunked", 7)) {
-s->filesize  = -1;
+s->filesize  = UINT64_MAX;
 s->chunksize = 0;
 } else if (!av_strcasecmp(tag, "WWW-Authenticate")) {
 ff_http_auth_handle_header(&s->auth_state, tag, p);
@@ -820,7 +821,7 @@ static int process_line(URLContext *h, char *line, int 
line_count,
 if (parse_cookie(s, p, &s->cookie_dict))
 av_log(h, AV_LOG_WARNING, "Unable to parse '%s'\n", p);
 } else if (!av_strcasecmp(tag, "Icy-MetaInt")) {
-s->icy_metaint = strtoll(p, NULL, 10);
+s->icy_metaint = strtoull(p, NULL, 10);
 } else if (!av_strncasecmp(tag, "Icy-", 4)) {
 if ((ret = parse_icy(s, tag, p)) < 0)
 return ret;
@@ -950,7 +951,7 @@ static int http_read_header(URLContext *h, int 
*new_location)
 char line[MAX_URL_SIZE];
 int err = 0;
 
-s->chunksize = -1;
+s->chunksize = UINT64_MAX;
 
 for (;;) {
 if ((err = http_get_line(s, line, sizeof(line))) < 0)
@@ -984,7 +985,7 @@ static int http_connect(URLContext *h, const char *path, 
const char *local_path,
 int post, err;
 char headers[HTTP_HEADERS_SIZE] = "";
 char *authstr = NULL, *proxyauthstr = NULL;
-int64_t off = s->off;
+uint64_t off = s->off;
 int len = 0;
 const char *method;
 int send_expect_100 = 0;
@@ -1032,7 +1033,7 @@ static int http_connect(URLContext *h, const char *path, 
const char *local_p

[FFmpeg-cvslog] http: move chunk handling from http_read_stream() to http_buf_read().

2016-12-05 Thread Ronald S. Bultje

ffmpeg | branch: release/2.8 | Ronald S. Bultje  | Mon Dec  
5 10:18:10 2016 -0500| [d3fc5c17de03ffa69d97a5dfabb54d38967daf2d] | committer: 
Ronald S. Bultje

http: move chunk handling from http_read_stream() to http_buf_read().

(cherry picked from commit 845bb401781ef04e342bd558df16a8dbf5f800f9)

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=d3fc5c17de03ffa69d97a5dfabb54d38967daf2d
---

 libavformat/http.c | 57 +-
 1 file changed, 31 insertions(+), 26 deletions(-)

diff --git a/libavformat/http.c b/libavformat/http.c
index 04c4ed6..cdcf4cc 100644
--- a/libavformat/http.c
+++ b/libavformat/http.c
@@ -1140,6 +1140,34 @@ static int http_buf_read(URLContext *h, uint8_t *buf, 
int size)
 {
 HTTPContext *s = h->priv_data;
 int len;
+
+if (s->chunksize != UINT64_MAX) {
+if (!s->chunksize) {
+char line[32];
+int err;
+
+do {
+if ((err = http_get_line(s, line, sizeof(line))) < 0)
+return err;
+} while (!*line);/* skip CR LF from last chunk */
+
+s->chunksize = strtoull(line, NULL, 16);
+
+av_log(h, AV_LOG_TRACE,
+   "Chunked encoding data size: %"PRIu64"'\n",
+s->chunksize);
+
+if (!s->chunksize)
+return 0;
+else if (s->chunksize == UINT64_MAX) {
+av_log(h, AV_LOG_ERROR, "Invalid chunk size %"PRIu64"\n",
+   s->chunksize);
+return AVERROR(EINVAL);
+}
+}
+size = FFMIN(size, s->chunksize);
+}
+
 /* read bytes from input buffer first */
 len = s->buf_end - s->buf_ptr;
 if (len > 0) {
@@ -1161,8 +1189,10 @@ static int http_buf_read(URLContext *h, uint8_t *buf, 
int size)
 }
 if (len > 0) {
 s->off += len;
-if (s->chunksize > 0)
+if (s->chunksize > 0) {
+av_assert0(s->chunksize >= len);
 s->chunksize -= len;
+}
 }
 return len;
 }
@@ -1216,31 +1246,6 @@ static int http_read_stream(URLContext *h, uint8_t *buf, 
int size)
 return err;
 }
 
-if (s->chunksize != UINT64_MAX) {
-if (!s->chunksize) {
-char line[32];
-
-do {
-if ((err = http_get_line(s, line, sizeof(line))) < 0)
-return err;
-} while (!*line);/* skip CR LF from last chunk */
-
-s->chunksize = strtoull(line, NULL, 16);
-
-av_log(h, AV_LOG_TRACE,
-   "Chunked encoding data size: %"PRIu64"'\n",
-s->chunksize);
-
-if (!s->chunksize)
-return 0;
-else if (s->chunksize == UINT64_MAX) {
-av_log(h, AV_LOG_ERROR, "Invalid chunk size %"PRIu64"\n",
-   s->chunksize);
-return AVERROR(EINVAL);
-}
-}
-size = FFMIN(size, s->chunksize);
-}
 #if CONFIG_ZLIB
 if (s->compressed)
 return http_buf_read_compressed(h, buf, size);

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

[FFmpeg-cvslog] http: make length/offset-related variables unsigned.

2016-12-05 Thread Ronald S. Bultje

ffmpeg | branch: release/3.0 | Ronald S. Bultje  | Mon Dec  
5 08:02:33 2016 -0500| [2e3f0a1c6f39cf2a35bdda85e43970ffc6db797b] | committer: 
Ronald S. Bultje

http: make length/offset-related variables unsigned.

Fixes #5992, reported and found by Paul Cher .

(cherry picked from commit 2a05c8f813de6f2278827734bf8102291e7484aa)

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=2e3f0a1c6f39cf2a35bdda85e43970ffc6db797b
---

 libavformat/http.c | 70 +-
 1 file changed, 38 insertions(+), 32 deletions(-)

diff --git a/libavformat/http.c b/libavformat/http.c
index 3dad2ef..bd84aa0 100644
--- a/libavformat/http.c
+++ b/libavformat/http.c
@@ -62,8 +62,8 @@ typedef struct HTTPContext {
 int line_count;
 int http_code;
 /* Used if "Transfer-Encoding: chunked" otherwise -1. */
-int64_t chunksize;
-int64_t off, end_off, filesize;
+uint64_t chunksize;
+uint64_t off, end_off, filesize;
 char *location;
 HTTPAuthState auth_state;
 HTTPAuthState proxy_auth_state;
@@ -92,9 +92,9 @@ typedef struct HTTPContext {
 AVDictionary *cookie_dict;
 int icy;
 /* how much data was read since the last ICY metadata packet */
-int icy_data_read;
+uint64_t icy_data_read;
 /* after how many bytes of read data a new metadata packet will be found */
-int icy_metaint;
+uint64_t icy_metaint;
 char *icy_metadata_headers;
 char *icy_metadata_packet;
 AVDictionary *metadata;
@@ -480,7 +480,7 @@ static int http_open(URLContext *h, const char *uri, int 
flags,
 else
 h->is_streamed = 1;
 
-s->filesize = -1;
+s->filesize = UINT64_MAX;
 s->location = av_strdup(uri);
 if (!s->location)
 return AVERROR(ENOMEM);
@@ -607,9 +607,9 @@ static void parse_content_range(URLContext *h, const char 
*p)
 
 if (!strncmp(p, "bytes ", 6)) {
 p += 6;
-s->off = strtoll(p, NULL, 10);
+s->off = strtoull(p, NULL, 10);
 if ((slash = strchr(p, '/')) && strlen(slash) > 0)
-s->filesize = strtoll(slash + 1, NULL, 10);
+s->filesize = strtoull(slash + 1, NULL, 10);
 }
 if (s->seekable == -1 && (!s->is_akamai || s->filesize != 2147483647))
 h->is_streamed = 0; /* we _can_ in fact seek */
@@ -799,8 +799,9 @@ static int process_line(URLContext *h, char *line, int 
line_count,
 if ((ret = parse_location(s, p)) < 0)
 return ret;
 *new_location = 1;
-} else if (!av_strcasecmp(tag, "Content-Length") && s->filesize == -1) 
{
-s->filesize = strtoll(p, NULL, 10);
+} else if (!av_strcasecmp(tag, "Content-Length") &&
+   s->filesize == UINT64_MAX) {
+s->filesize = strtoull(p, NULL, 10);
 } else if (!av_strcasecmp(tag, "Content-Range")) {
 parse_content_range(h, p);
 } else if (!av_strcasecmp(tag, "Accept-Ranges") &&
@@ -809,7 +810,7 @@ static int process_line(URLContext *h, char *line, int 
line_count,
 h->is_streamed = 0;
 } else if (!av_strcasecmp(tag, "Transfer-Encoding") &&
!av_strncasecmp(p, "chunked", 7)) {
-s->filesize  = -1;
+s->filesize  = UINT64_MAX;
 s->chunksize = 0;
 } else if (!av_strcasecmp(tag, "WWW-Authenticate")) {
 ff_http_auth_handle_header(&s->auth_state, tag, p);
@@ -833,7 +834,7 @@ static int process_line(URLContext *h, char *line, int 
line_count,
 if (parse_cookie(s, p, &s->cookie_dict))
 av_log(h, AV_LOG_WARNING, "Unable to parse '%s'\n", p);
 } else if (!av_strcasecmp(tag, "Icy-MetaInt")) {
-s->icy_metaint = strtoll(p, NULL, 10);
+s->icy_metaint = strtoull(p, NULL, 10);
 } else if (!av_strncasecmp(tag, "Icy-", 4)) {
 if ((ret = parse_icy(s, tag, p)) < 0)
 return ret;
@@ -963,7 +964,7 @@ static int http_read_header(URLContext *h, int 
*new_location)
 char line[MAX_URL_SIZE];
 int err = 0;
 
-s->chunksize = -1;
+s->chunksize = UINT64_MAX;
 
 for (;;) {
 if ((err = http_get_line(s, line, sizeof(line))) < 0)
@@ -997,7 +998,7 @@ static int http_connect(URLContext *h, const char *path, 
const char *local_path,
 int post, err;
 char headers[HTTP_HEADERS_SIZE] = "";
 char *authstr = NULL, *proxyauthstr = NULL;
-int64_t off = s->off;
+uint64_t off = s->off;
 int len = 0;
 const char *method;
 int send_expect_100 = 0;
@@ -1045,7 +1046,7 @@ static int http_connect(URLContext *h, const char *path, 
const char *local_p

[FFmpeg-cvslog] http: move chunk handling from http_read_stream() to http_buf_read().

2016-12-05 Thread Ronald S. Bultje

ffmpeg | branch: release/3.0 | Ronald S. Bultje  | Mon Dec  
5 10:18:10 2016 -0500| [726faff0aa86fa040280c57f27eefde47a17ea1b] | committer: 
Ronald S. Bultje

http: move chunk handling from http_read_stream() to http_buf_read().

(cherry picked from commit 845bb401781ef04e342bd558df16a8dbf5f800f9)

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=726faff0aa86fa040280c57f27eefde47a17ea1b
---

 libavformat/http.c | 57 +-
 1 file changed, 31 insertions(+), 26 deletions(-)

diff --git a/libavformat/http.c b/libavformat/http.c
index bd84aa0..f027e03 100644
--- a/libavformat/http.c
+++ b/libavformat/http.c
@@ -1153,6 +1153,34 @@ static int http_buf_read(URLContext *h, uint8_t *buf, 
int size)
 {
 HTTPContext *s = h->priv_data;
 int len;
+
+if (s->chunksize != UINT64_MAX) {
+if (!s->chunksize) {
+char line[32];
+int err;
+
+do {
+if ((err = http_get_line(s, line, sizeof(line))) < 0)
+return err;
+} while (!*line);/* skip CR LF from last chunk */
+
+s->chunksize = strtoull(line, NULL, 16);
+
+av_log(h, AV_LOG_TRACE,
+   "Chunked encoding data size: %"PRIu64"'\n",
+s->chunksize);
+
+if (!s->chunksize)
+return 0;
+else if (s->chunksize == UINT64_MAX) {
+av_log(h, AV_LOG_ERROR, "Invalid chunk size %"PRIu64"\n",
+   s->chunksize);
+return AVERROR(EINVAL);
+}
+}
+size = FFMIN(size, s->chunksize);
+}
+
 /* read bytes from input buffer first */
 len = s->buf_end - s->buf_ptr;
 if (len > 0) {
@@ -1175,8 +1203,10 @@ static int http_buf_read(URLContext *h, uint8_t *buf, 
int size)
 }
 if (len > 0) {
 s->off += len;
-if (s->chunksize > 0)
+if (s->chunksize > 0) {
+av_assert0(s->chunksize >= len);
 s->chunksize -= len;
+}
 }
 return len;
 }
@@ -1230,31 +1260,6 @@ static int http_read_stream(URLContext *h, uint8_t *buf, 
int size)
 return err;
 }
 
-if (s->chunksize != UINT64_MAX) {
-if (!s->chunksize) {
-char line[32];
-
-do {
-if ((err = http_get_line(s, line, sizeof(line))) < 0)
-return err;
-} while (!*line);/* skip CR LF from last chunk */
-
-s->chunksize = strtoull(line, NULL, 16);
-
-av_log(h, AV_LOG_TRACE,
-   "Chunked encoding data size: %"PRIu64"'\n",
-s->chunksize);
-
-if (!s->chunksize)
-return 0;
-else if (s->chunksize == UINT64_MAX) {
-av_log(h, AV_LOG_ERROR, "Invalid chunk size %"PRIu64"\n",
-   s->chunksize);
-return AVERROR(EINVAL);
-}
-}
-size = FFMIN(size, s->chunksize);
-}
 #if CONFIG_ZLIB
 if (s->compressed)
 return http_buf_read_compressed(h, buf, size);

___
ffmpeg-cvslog mailing list
ffmpeg-cvslog@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-cvslog

1 2 3 4 >

1 - 100 of 361 matches

Mail list logo