[libav-devel] [PATCH 2/6] movenc: write hvcC tag for HEVC.

2014-03-03 Thread Tim Walker
---
 libavformat/Makefile |2 +-
 libavformat/hevc.c   | 1076 ++
 libavformat/hevc.h   |   50 +++
 libavformat/movenc.c |   13 +
 4 files changed, 1140 insertions(+), 1 deletion(-)
 create mode 100644 libavformat/hevc.c
 create mode 100644 libavformat/hevc.h

diff --git a/libavformat/Makefile b/libavformat/Makefile
index d491d43..a3cd504 100644
--- a/libavformat/Makefile
+++ b/libavformat/Makefile
@@ -179,7 +179,7 @@ OBJS-$(CONFIG_MM_DEMUXER)+= mm.o
 OBJS-$(CONFIG_MMF_DEMUXER)   += mmf.o pcm.o
 OBJS-$(CONFIG_MMF_MUXER) += mmf.o
 OBJS-$(CONFIG_MOV_DEMUXER)   += mov.o isom.o mov_chan.o
-OBJS-$(CONFIG_MOV_MUXER) += movenc.o isom.o avc.o \
+OBJS-$(CONFIG_MOV_MUXER) += movenc.o isom.o avc.o hevc.o \
 movenchint.o mov_chan.o
 OBJS-$(CONFIG_MP2_MUXER) += mp3enc.o rawenc.o id3v2enc.o
 OBJS-$(CONFIG_MP3_DEMUXER)   += mp3dec.o
diff --git a/libavformat/hevc.c b/libavformat/hevc.c
new file mode 100644
index 000..f359eb5
--- /dev/null
+++ b/libavformat/hevc.c
@@ -0,0 +1,1076 @@
+/*
+ * Copyright (c) 2014 Tim Walker tdskywal...@gmail.com
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include libavcodec/get_bits.h
+#include libavcodec/golomb.h
+#include libavcodec/hevc.h
+#include libavutil/intreadwrite.h
+#include avc.h
+#include avio.h
+#include hevc.h
+
+#define MAX_SPATIAL_SEGMENTATION 4096 // max. value of u(12) field
+
+typedef struct HVCCNALUnitArray {
+uint8_t  array_completeness;
+uint8_t  NAL_unit_type;
+uint16_t numNalus;
+uint16_t *nalUnitLength;
+uint8_t  **nalUnit;
+} HVCCNALUnitArray;
+
+typedef struct HEVCDecoderConfigurationRecord {
+uint8_t  configurationVersion;
+uint8_t  general_profile_space;
+uint8_t  general_tier_flag;
+uint8_t  general_profile_idc;
+uint32_t general_profile_compatibility_flags;
+uint64_t general_constraint_indicator_flags;
+uint8_t  general_level_idc;
+uint16_t min_spatial_segmentation_idc;
+uint8_t  parallelismType;
+uint8_t  chromaFormat;
+uint8_t  bitDepthLumaMinus8;
+uint8_t  bitDepthChromaMinus8;
+uint16_t avgFrameRate;
+uint8_t  constantFrameRate;
+uint8_t  numTemporalLayers;
+uint8_t  temporalIdNested;
+uint8_t  lengthSizeMinusOne;
+uint8_t  numOfArrays;
+HVCCNALUnitArray *array;
+} HEVCDecoderConfigurationRecord;
+
+typedef struct HVCCProfileTierLevel {
+uint8_t  profile_space;
+uint8_t  tier_flag;
+uint8_t  profile_idc;
+uint32_t profile_compatibility_flags;
+uint64_t constraint_indicator_flags;
+uint8_t  level_idc;
+} HVCCProfileTierLevel;
+
+static void hvcc_update_ptl(HEVCDecoderConfigurationRecord *hvcc,
+HVCCProfileTierLevel *ptl)
+{
+/*
+ * The value of general_profile_space in all the parameter sets must be
+ * identical.
+ */
+hvcc-general_profile_space = ptl-profile_space;
+
+/*
+ * The level indication general_level_idc must indicate a level of
+ * capability equal to or greater than the highest level indicated for the
+ * highest tier in all the parameter sets.
+ */
+if (hvcc-general_tier_flag  ptl-tier_flag)
+hvcc-general_level_idc = ptl-level_idc;
+else
+hvcc-general_level_idc = FFMAX(hvcc-general_level_idc, 
ptl-level_idc);
+
+/*
+ * The tier indication general_tier_flag must indicate a tier equal to or
+ * greater than the highest tier indicated in all the parameter sets.
+ */
+hvcc-general_tier_flag = FFMAX(hvcc-general_tier_flag, ptl-tier_flag);
+
+/*
+ * The profile indication general_profile_idc must indicate a profile to
+ * which the stream associated with this configuration record conforms.
+ *
+ * If the sequence parameter sets are marked with different profiles, then
+ * the stream may need examination to determine which profile, if any, the
+ * entire stream conforms to. If the entire stream is not examined, or the
+ * examination reveals that there is no profile to which the entire stream
+ * conforms, then the entire stream must be split into two or 

[libav-devel] [PATCH 3/6] movenc: enable Annex B to MP4 conversion for HEVC tracks.

2014-03-03 Thread Tim Walker
---

Note: this includes a facility for filtering parameter set NALUs from the
bitstream and knowing whether any NALUs were extracted (so that we can e.g.
decide whether to generate a new MP4 sample entry with a new hvcC).

It's unused as I'm not sure how the MP4 side of things would work.

 libavformat/hevc.c   | 101 +++
 libavformat/hevc.h   |  48 
 libavformat/movenc.c |   9 +
 3 files changed, 158 insertions(+)

diff --git a/libavformat/hevc.c b/libavformat/hevc.c
index f359eb5..152e28a 100644
--- a/libavformat/hevc.c
+++ b/libavformat/hevc.c
@@ -1014,6 +1014,107 @@ static int hvcc_write(AVIOContext *pb, 
HEVCDecoderConfigurationRecord *hvcc)
 return 0;
 }
 
+int ff_hevc_annexb2mp4(AVIOContext *pb, const uint8_t *buf_in,
+   int size, int filter_ps, int *ps_count)
+{
+int num_ps = 0, ret = 0;
+uint8_t *buf, *end, *start = NULL;
+
+if (!filter_ps) {
+ret = ff_avc_parse_nal_units(pb, buf_in, size);
+goto end;
+}
+
+ret = ff_avc_parse_nal_units_buf(buf_in, start, size);
+if (ret  0)
+goto end;
+
+ret = 0;
+buf = start;
+end = start + size;
+
+while (end - buf  4) {
+uint32_t len = FFMIN(AV_RB32(buf), end - buf - 4);
+uint8_t type = (buf[4]  1)  0x3f;
+
+buf += 4;
+
+switch (type) {
+case NAL_VPS:
+case NAL_SPS:
+case NAL_PPS:
+num_ps++;
+break;
+default:
+ret += 4 + len;
+avio_wb32 (pb, len);
+avio_write(pb, buf, len);
+break;
+}
+
+buf += len;
+}
+
+end:
+free(start);
+if (ps_count)
+*ps_count = num_ps;
+return ret;
+}
+
+int ff_hevc_annexb2mp4_buf(const uint8_t *buf_in, uint8_t **buf_out,
+   int *size, int filter_ps, int *ps_count)
+{
+AVIOContext *pb;
+int num_ps = 0, ret = 0;
+uint8_t *buf, *end, *start = NULL;
+
+if (!filter_ps) {
+ret = ff_avc_parse_nal_units_buf(buf_in, buf_out, size);
+goto end;
+}
+
+ret = avio_open_dyn_buf(pb);
+if (ret  0)
+goto end;
+
+ret = ff_avc_parse_nal_units_buf(buf_in, start, size);
+if (ret  0)
+goto end;
+
+buf = start;
+end = start + *size;
+
+while (end - buf  4) {
+uint32_t len = FFMIN(AV_RB32(buf), end - buf - 4);
+uint8_t type = (buf[4]  1)  0x3f;
+
+buf += 4;
+
+switch (type) {
+case NAL_VPS:
+case NAL_SPS:
+case NAL_PPS:
+num_ps++;
+break;
+default:
+avio_wb32 (pb, len);
+avio_write(pb, buf, len);
+break;
+}
+
+buf += len;
+}
+
+*size = avio_close_dyn_buf(pb, buf_out);
+
+end:
+free(start);
+if (ps_count)
+*ps_count = num_ps;
+return ret;
+}
+
 int ff_isom_write_hvcc(AVIOContext *pb, const uint8_t *data,
int size, int ps_array_completeness)
 {
diff --git a/libavformat/hevc.h b/libavformat/hevc.h
index 82525ac..f394342 100644
--- a/libavformat/hevc.h
+++ b/libavformat/hevc.h
@@ -30,6 +30,54 @@
 #include avio.h
 
 /**
+ * Writes Annex B formatted HEVC NAL units to the provided AVIOContext.
+ *
+ * The NAL units are converted to an MP4-compatible format (start code prefixes
+ * are replaced by 4-byte size fields, as per ISO/IEC 14496-15).
+ *
+ * If filter_ps is non-zero, any HEVC parameter sets found in the input will be
+ * discarded, and *ps_count will be set to the number of discarded PS NAL 
units.
+ *
+ * @param pb address of the AVIOContext where the data shall be written
+ * @param buf_in address of the buffer holding the input data
+ * @param size size (in bytes) of the input buffer
+ * @param filter_ps whether to write parameter set NAL units to the output (0)
+ *or to discard them (non-zero)
+ * @param ps_count address of the variable where the number of discarded
+ *parameter set NAL units shall be written, may be NULL
+ * @return the amount (in bytes) of data written in case of success, a negative
+ * value corresponding to an AVERROR code in case of failure
+ */
+int ff_hevc_annexb2mp4(AVIOContext *pb, const uint8_t *buf_in,
+   int size, int filter_ps, int *ps_count);
+
+/**
+ * Writes Annex B formatted HEVC NAL units to a data buffer.
+ *
+ * The NAL units are converted to an MP4-compatible format (start code prefixes
+ * are replaced by 4-byte size fields, as per ISO/IEC 14496-15).
+ *
+ * If filter_ps is non-zero, any HEVC parameter sets found in the input will be
+ * discarded, and *ps_count will be set to the number of discarded PS NAL 
units.
+ *
+ * On output, *size holds the size (in bytes) of the output data buffer.
+ *
+ * @param buf_in address of the buffer holding the input data
+ * @param size address of the variable holding the size (in bytes) of the input
+ *

[libav-devel] [PATCH 1/6] movenc: use 'hev1' tag for HEVC in MODE_MOV.

2014-03-03 Thread Tim Walker
'hvc1' requires that parameter set NAL units be
present only in the samples entry, but not in the
samples themselves, requiring that additional
parameter sets, if present, be filtered out of the
samples and placed in new, additional sample entries
if they override or otherwise conflict with the
parameter sets present in the first sample entry.
We do not have any way of doing this at present, so
the files we produce can only comply with the
restrictions set for the 'hev1' sample entry name in
ISO/IEC 14496-15.
---

Note: unlike avplay, VLC does not support hev1 for some reason.

 libavformat/isom.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavformat/isom.c b/libavformat/isom.c
index 9b32b7d..a6197ab 100644
--- a/libavformat/isom.c
+++ b/libavformat/isom.c
@@ -136,8 +136,8 @@ const AVCodecTag ff_codec_movvideo_tags[] = {
 
 { AV_CODEC_ID_RAWVIDEO, MKTAG('W', 'R', 'A', 'W') },
 
-{ AV_CODEC_ID_HEVC, MKTAG('h', 'v', 'c', '1') }, /* HEVC/H.265 which 
indicates parameter sets shall not be in ES */
 { AV_CODEC_ID_HEVC, MKTAG('h', 'e', 'v', '1') }, /* HEVC/H.265 which 
indicates parameter sets may be in ES */
+{ AV_CODEC_ID_HEVC, MKTAG('h', 'v', 'c', '1') }, /* HEVC/H.265 which 
indicates parameter sets shall not be in ES */
 
 { AV_CODEC_ID_H264, MKTAG('a', 'v', 'c', '1') }, /* AVC-1/H.264 */
 { AV_CODEC_ID_H264, MKTAG('a', 'i', '5', 'p') }, /* AVC-Intra  50M 
720p24/30/60 */
-- 
1.8.3.4 (Apple Git-47)

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


[libav-devel] [PATCH 5/6] matroskaenc: write private data in hvcC format for HEVC.

2014-03-03 Thread Tim Walker
---
 libavformat/Makefile  | 2 +-
 libavformat/matroskaenc.c | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/libavformat/Makefile b/libavformat/Makefile
index a3cd504..5694314 100644
--- a/libavformat/Makefile
+++ b/libavformat/Makefile
@@ -168,7 +168,7 @@ OBJS-$(CONFIG_M4V_MUXER) += rawenc.o
 OBJS-$(CONFIG_MATROSKA_DEMUXER)  += matroskadec.o matroska.o  \
 isom.o rmsipr.o
 OBJS-$(CONFIG_MATROSKA_MUXER)+= matroskaenc.o matroska.o \
-isom.o avc.o \
+isom.o avc.o hevc.o \
 flacenc_header.o avlanguage.o wv.o
 OBJS-$(CONFIG_MD5_MUXER) += md5enc.o
 OBJS-$(CONFIG_MJPEG_DEMUXER) += rawdec.o
diff --git a/libavformat/matroskaenc.c b/libavformat/matroskaenc.c
index cc645a5..8fece1f 100644
--- a/libavformat/matroskaenc.c
+++ b/libavformat/matroskaenc.c
@@ -22,6 +22,7 @@
 #include stdint.h
 
 #include avc.h
+#include hevc.h
 #include avformat.h
 #include avlanguage.h
 #include flacenc.h
@@ -500,6 +501,8 @@ static int mkv_write_codecprivate(AVFormatContext *s, 
AVIOContext *pb, AVCodecCo
 ret = put_wv_codecpriv(dyn_cp, codec);
 else if (codec-codec_id == AV_CODEC_ID_H264)
 ret = ff_isom_write_avcc(dyn_cp, codec-extradata, 
codec-extradata_size);
+else if (codec-codec_id == AV_CODEC_ID_HEVC)
+ret = ff_isom_write_hvcc(dyn_cp, codec-extradata, 
codec-extradata_size, 0);
 else if (codec-codec_id == AV_CODEC_ID_ALAC) {
 if (codec-extradata_size  36) {
 av_log(s, AV_LOG_ERROR,
-- 
1.8.3.4 (Apple Git-47)

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


[libav-devel] [PATCH 4/6] movenc: allow muxing HEVC in MODE_MP4.

2014-03-03 Thread Tim Walker
---

Uses 'hev1' for the same reasons as in MODE_MOV.

 libavformat/isom.c   | 1 +
 libavformat/movenc.c | 1 +
 2 files changed, 2 insertions(+)

diff --git a/libavformat/isom.c b/libavformat/isom.c
index a6197ab..76c455b 100644
--- a/libavformat/isom.c
+++ b/libavformat/isom.c
@@ -33,6 +33,7 @@ const AVCodecTag ff_mp4_obj_type[] = {
 { AV_CODEC_ID_MOV_TEXT, 0x08 },
 { AV_CODEC_ID_MPEG4   , 0x20 },
 { AV_CODEC_ID_H264, 0x21 },
+{ AV_CODEC_ID_HEVC, 0x23 },
 { AV_CODEC_ID_AAC , 0x40 },
 { AV_CODEC_ID_MP4ALS  , 0x40 }, /* 14496-3 ALS */
 { AV_CODEC_ID_MPEG2VIDEO  , 0x61 }, /* MPEG2 Main */
diff --git a/libavformat/movenc.c b/libavformat/movenc.c
index 9fa9d7e..7930aa9 100644
--- a/libavformat/movenc.c
+++ b/libavformat/movenc.c
@@ -752,6 +752,7 @@ static int mp4_get_codec_tag(AVFormatContext *s, MOVTrack 
*track)
 return 0;
 
 if  (track-enc-codec_id == AV_CODEC_ID_H264)  tag = 
MKTAG('a','v','c','1');
+else if (track-enc-codec_id == AV_CODEC_ID_HEVC)  tag = 
MKTAG('h','e','v','1');
 else if (track-enc-codec_id == AV_CODEC_ID_AC3)   tag = 
MKTAG('a','c','-','3');
 else if (track-enc-codec_id == AV_CODEC_ID_DIRAC) tag = 
MKTAG('d','r','a','c');
 else if (track-enc-codec_id == AV_CODEC_ID_MOV_TEXT)  tag = 
MKTAG('t','x','3','g');
-- 
1.8.3.4 (Apple Git-47)

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


[libav-devel] [PATCH 6/6] matroskaenc: enable Annex B to MP4 conversion for HEVC tracks.

2014-03-03 Thread Tim Walker
---
 libavformat/matroskaenc.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/libavformat/matroskaenc.c b/libavformat/matroskaenc.c
index 8fece1f..08f5552 100644
--- a/libavformat/matroskaenc.c
+++ b/libavformat/matroskaenc.c
@@ -1163,6 +1163,10 @@ static void mkv_write_block(AVFormatContext *s, 
AVIOContext *pb,
 if (codec-codec_id == AV_CODEC_ID_H264  codec-extradata_size  0 
 (AV_RB24(codec-extradata) == 1 || AV_RB32(codec-extradata) == 1))
 ff_avc_parse_nal_units_buf(pkt-data, data, size);
+else if (codec-codec_id == AV_CODEC_ID_HEVC  codec-extradata_size  6 

+ (AV_RB24(codec-extradata) == 1 || AV_RB32(codec-extradata) == 
1))
+/* extradata is Annex B, assume the bitstream is too and convert it */
+ff_hevc_annexb2mp4_buf(pkt-data, data, size, 0, NULL);
 else if (codec-codec_id == AV_CODEC_ID_WAVPACK) {
 int ret = mkv_strip_wavpack(pkt-data, data, size);
 if (ret  0) {
-- 
1.8.3.4 (Apple Git-47)

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


[libav-devel] [PATCH 1/2] matroskaenc: allow override of writing application tag

2014-03-03 Thread John Stebbins
---
 libavformat/matroskaenc.c | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/libavformat/matroskaenc.c b/libavformat/matroskaenc.c
index cc645a5..3ab3139 100644
--- a/libavformat/matroskaenc.c
+++ b/libavformat/matroskaenc.c
@@ -805,7 +805,8 @@ static int mkv_write_tag(AVFormatContext *s, AVDictionary 
*m, unsigned int eleme
 end_ebml_master(s-pb, targets);
 
 while ((t = av_dict_get(m, , t, AV_DICT_IGNORE_SUFFIX)))
-if (av_strcasecmp(t-key, title))
+if (av_strcasecmp(t-key, title) 
+av_strcasecmp(t-key, encoding_tool))
 mkv_write_simpletag(s-pb, t);
 
 end_ebml_master(s-pb, tag);
@@ -965,7 +966,10 @@ static int mkv_write_header(AVFormatContext *s)
 segment_uid[i] = av_lfg_get(lfg);
 
 put_ebml_string(pb, MATROSKA_ID_MUXINGAPP , LIBAVFORMAT_IDENT);
-put_ebml_string(pb, MATROSKA_ID_WRITINGAPP, LIBAVFORMAT_IDENT);
+if ((tag = av_dict_get(s-metadata, encoding_tool, NULL, 0)))
+put_ebml_string(pb, MATROSKA_ID_WRITINGAPP, tag-value);
+else
+put_ebml_string(pb, MATROSKA_ID_WRITINGAPP, LIBAVFORMAT_IDENT);
 put_ebml_binary(pb, MATROSKA_ID_SEGMENTUID, segment_uid, 16);
 }
 
-- 
1.8.5.3

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


[libav-devel] [PATCH 2/2] movenc: allow override of writing application tag

2014-03-03 Thread John Stebbins
---
 libavformat/movenc.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/libavformat/movenc.c b/libavformat/movenc.c
index 762cfef..6344e38 100644
--- a/libavformat/movenc.c
+++ b/libavformat/movenc.c
@@ -1858,7 +1858,8 @@ static int mov_write_ilst_tag(AVIOContext *pb, 
MOVMuxContext *mov,
 mov_write_string_metadata(s, pb, \251wrt, composer , 1);
 mov_write_string_metadata(s, pb, \251alb, album, 1);
 mov_write_string_metadata(s, pb, \251day, date , 1);
-mov_write_string_tag(pb, \251too, LIBAVFORMAT_IDENT, 0, 1);
+if (!mov_write_string_metadata(s, pb, \251too, encoding_tool, 1))
+mov_write_string_tag(pb, \251too, LIBAVFORMAT_IDENT, 0, 1);
 mov_write_string_metadata(s, pb, \251cmt, comment  , 1);
 mov_write_string_metadata(s, pb, \251gen, genre, 1);
 mov_write_string_metadata(s, pb, \251cpy, copyright, 1);
-- 
1.8.5.3

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


Re: [libav-devel] [PATCH 1/2] matroskaenc: allow override of writing application tag

2014-03-03 Thread Luca Barbato
On 03/03/14 22:20, John Stebbins wrote:
 ---
  libavformat/matroskaenc.c | 8 ++--
  1 file changed, 6 insertions(+), 2 deletions(-)
 

Probably ok.

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


Re: [libav-devel] [PATCH 2/2] movenc: allow override of writing application tag

2014-03-03 Thread Luca Barbato
On 03/03/14 22:20, John Stebbins wrote:
 ---
  libavformat/movenc.c | 3 ++-
  1 file changed, 2 insertions(+), 1 deletion(-)
 

Ok.

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


[libav-devel] [PATCH 1/4] timer: use mach_absolute_time as high resolution clock on darwin

2014-03-03 Thread Janne Grunau
From: Janne Grunau j...@jannau.net

Not guaranteed to be in nanosecond resolution. On iOS 7 the duration
of one tick is 125/3 ns which is still more than an order of magnitude
better then microseconds.

Replace decicycles with the neutral UNITS. Decicycles is strange but
tenths of a nanosecond and unspecific deci-ticks for mach_absolute_time
is just silly.
---
 configure |  4 
 libavutil/timer.h | 14 +++---
 2 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/configure b/configure
index b143335..411b586 100755
--- a/configure
+++ b/configure
@@ -1383,6 +1383,8 @@ HAVE_LIST=
 local_aligned_8
 localtime_r
 loongson
+mach_absolute_time
+mach_mach_time_h
 machine_ioctl_bt848_h
 machine_ioctl_meteor_h
 machine_rw_barrier
@@ -3846,6 +3848,7 @@ check_struct sys/time.h sys/resource.h struct rusage 
ru_maxrss
 check_func  gettimeofday
 check_func  isatty
 check_func  localtime_r
+check_func  mach_absolute_time
 check_func  ${malloc_prefix}memalign enable memalign
 check_func  mkstemp
 check_func  mmap
@@ -3881,6 +3884,7 @@ check_header dlfcn.h
 check_header dxva.h
 check_header dxva2api.h
 check_header io.h
+check_header mach/mach_time.h
 check_header malloc.h
 check_header poll.h
 check_header sys/mman.h
diff --git a/libavutil/timer.h b/libavutil/timer.h
index d2c5001..0d93d7c 100644
--- a/libavutil/timer.h
+++ b/libavutil/timer.h
@@ -32,6 +32,10 @@
 
 #include config.h
 
+#if HAVE_MACH_MACH_TIME_H
+#include mach/mach_time.h
+#endif
+
 #include log.h
 
 #if   ARCH_ARM
@@ -44,8 +48,12 @@
 #   include x86/timer.h
 #endif
 
-#if !defined(AV_READ_TIME)  HAVE_GETHRTIME
-#   define AV_READ_TIME gethrtime
+#if !defined(AV_READ_TIME)
+#   if HAVE_GETHRTIME
+#   define AV_READ_TIME gethrtime
+#   elif HAVE_MACH_ABSOLUTE_TIME
+#   define AV_READ_TIME mach_absolute_time
+#   endif
 #endif
 
 #ifdef AV_READ_TIME
@@ -68,7 +76,7 @@
 tskip_count++;\
 if (((tcount + tskip_count)  (tcount + tskip_count - 1)) == 0) { \
 av_log(NULL, AV_LOG_ERROR,\
-   %PRIu64 decicycles in %s, %d runs, %d skips\n, \
+   %PRIu64 UNITS in %s, %d runs, %d skips\n,  \
tsum * 10 / tcount, id, tcount, tskip_count);  \
 } \
 }
-- 
1.9.0

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


[libav-devel] [PATCH 4/4] aarch64: float_dsp NEON assembler

2014-03-03 Thread Janne Grunau
Ported from arm NEON and added vector_dmul_scalar.

Functions between 1.5 and 5 times faster than the C implementations
using Apple's clang-503.0.19 on A7.
---
 libavutil/aarch64/Makefile |   5 +-
 libavutil/aarch64/float_dsp_init_aarch64.c |  69 ++
 libavutil/aarch64/float_dsp_neon.S | 202 +
 libavutil/float_dsp.c  |  15 ++-
 libavutil/float_dsp.h  |   1 +
 5 files changed, 284 insertions(+), 8 deletions(-)
 create mode 100644 libavutil/aarch64/float_dsp_init_aarch64.c
 create mode 100644 libavutil/aarch64/float_dsp_neon.S

diff --git a/libavutil/aarch64/Makefile b/libavutil/aarch64/Makefile
index 13d26a1..b4500fd 100644
--- a/libavutil/aarch64/Makefile
+++ b/libavutil/aarch64/Makefile
@@ -1 +1,4 @@
-OBJS += aarch64/cpu.o
+OBJS += aarch64/cpu.o \
+aarch64/float_dsp_init_aarch64.o  \
+
+NEON-OBJS += aarch64/float_dsp_neon.o
diff --git a/libavutil/aarch64/float_dsp_init_aarch64.c 
b/libavutil/aarch64/float_dsp_init_aarch64.c
new file mode 100644
index 000..37d34c9
--- /dev/null
+++ b/libavutil/aarch64/float_dsp_init_aarch64.c
@@ -0,0 +1,69 @@
+/*
+ * ARM NEON optimised Float DSP functions
+ * Copyright (c) 2008 Mans Rullgard m...@mansr.com
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include stdint.h
+
+#include libavutil/attributes.h
+#include libavutil/cpu.h
+#include libavutil/float_dsp.h
+#include cpu.h
+
+void ff_vector_fmul_neon(float *dst, const float *src0, const float *src1,
+ int len);
+
+void ff_vector_fmac_scalar_neon(float *dst, const float *src, float mul,
+int len);
+
+void ff_vector_fmul_scalar_neon(float *dst, const float *src, float mul,
+int len);
+
+void ff_vector_dmul_scalar_neon(double *dst, const double *src, double mul,
+int len);
+
+void ff_vector_fmul_window_neon(float *dst, const float *src0,
+const float *src1, const float *win, int len);
+
+void ff_vector_fmul_add_neon(float *dst, const float *src0, const float *src1,
+ const float *src2, int len);
+
+void ff_vector_fmul_reverse_neon(float *dst, const float *src0,
+ const float *src1, int len);
+
+void ff_butterflies_float_neon(float *v1, float *v2, int len);
+
+float ff_scalarproduct_float_neon(const float *v1, const float *v2, int len);
+
+av_cold void ff_float_dsp_init_aarch64(AVFloatDSPContext *fdsp)
+{
+int cpu_flags = av_get_cpu_flags();
+
+if (have_neon(cpu_flags)) {
+fdsp-butterflies_float   = ff_butterflies_float_neon;
+fdsp-scalarproduct_float = ff_scalarproduct_float_neon;
+fdsp-vector_dmul_scalar  = ff_vector_dmul_scalar_neon;
+fdsp-vector_fmul = ff_vector_fmul_neon;
+fdsp-vector_fmac_scalar  = ff_vector_fmac_scalar_neon;
+fdsp-vector_fmul_add = ff_vector_fmul_add_neon;
+fdsp-vector_fmul_reverse = ff_vector_fmul_reverse_neon;
+fdsp-vector_fmul_scalar  = ff_vector_fmul_scalar_neon;
+fdsp-vector_fmul_window  = ff_vector_fmul_window_neon;
+}
+}
diff --git a/libavutil/aarch64/float_dsp_neon.S 
b/libavutil/aarch64/float_dsp_neon.S
new file mode 100644
index 000..776542c
--- /dev/null
+++ b/libavutil/aarch64/float_dsp_neon.S
@@ -0,0 +1,202 @@
+/*
+ * ARM NEON optimised Float DSP functions
+ * Copyright (c) 2008 Mans Rullgard m...@mansr.com
+ * Copyright (c) 2014 Janne Grunau janne-li...@jannau.net
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public

[libav-devel] [PATCH 2/4] float_dsp: fix errors in documentation

2014-03-03 Thread Janne Grunau
---
 libavutil/float_dsp.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/libavutil/float_dsp.h b/libavutil/float_dsp.h
index b215dad..0eb02f8 100644
--- a/libavutil/float_dsp.h
+++ b/libavutil/float_dsp.h
@@ -113,7 +113,7 @@ typedef struct AVFloatDSPContext {
  * constraints: 32-byte aligned
  * @param src1 second input vector
  * constraints: 32-byte aligned
- * @param src1 third input vector
+ * @param src2 third input vector
  * constraints: 32-byte aligned
  * @param len  number of elements in the input
  * constraints: multiple of 16
@@ -132,8 +132,6 @@ typedef struct AVFloatDSPContext {
  * constraints: 32-byte aligned
  * @param src1 second input vector
  * constraints: 32-byte aligned
- * @param src1 third input vector
- * constraints: 32-byte aligned
  * @param len  number of elements in the input
  * constraints: multiple of 16
  */
-- 
1.9.0

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


[libav-devel] [PATCH 3/4] float_dsp: add test program for float dsp

2014-03-03 Thread Janne Grunau
Not hooked up to FATE due to fear of random failures due to float
instability.

float_dsp-test: use 16 * 3 * 5 as length
---
 libavutil/Makefile|   1 +
 libavutil/float_dsp.c | 274 ++
 2 files changed, 275 insertions(+)

diff --git a/libavutil/Makefile b/libavutil/Makefile
index f663f18..5869e67 100644
--- a/libavutil/Makefile
+++ b/libavutil/Makefile
@@ -122,6 +122,7 @@ TESTPROGS = adler32 
\
 des \
 eval\
 fifo\
+float_dsp   \
 hmac\
 lfg \
 lls \
diff --git a/libavutil/float_dsp.c b/libavutil/float_dsp.c
index 3707e06..22c3c15 100644
--- a/libavutil/float_dsp.c
+++ b/libavutil/float_dsp.c
@@ -132,3 +132,277 @@ av_cold void avpriv_float_dsp_init(AVFloatDSPContext 
*fdsp, int bit_exact)
 ff_float_dsp_init_x86(fdsp);
 #endif
 }
+
+#ifdef TEST
+
+#include float.h
+#include math.h
+#include stdint.h
+#include string.h
+
+#include cpu.h
+#include lfg.h
+#include log.h
+#include mem.h
+#include random_seed.h
+
+#define LEN 240
+
+static void fill_float_array(AVLFG *lfg, float *a, int len)
+{
+int i;
+double bmg[2], stddev = 10.0, mean = 0.0;
+
+for (i = 0; i  len; i += 2) {
+av_bmg_get(lfg, bmg);
+a[i] = bmg[0] * stddev + mean;
+a[i + 1] = bmg[1] * stddev + mean;
+}
+}
+static int compare_floats(const float *a, const float *b, int len,
+  float max_diff)
+{
+int i;
+for (i = 0; i  len; i++) {
+if (fabsf(a[i] - b[i])  max_diff) {
+av_log(NULL, AV_LOG_ERROR, %d: %- .12f - %- .12f = % .12g\n,
+   i, a[i], b[i], a[i] - b[i]);
+return -1;
+}
+}
+return 0;
+}
+
+static void fill_double_array(AVLFG *lfg, double *a, int len)
+{
+int i;
+double bmg[2], stddev = 10.0, mean = 0.0;
+
+for (i = 0; i  len; i += 2) {
+av_bmg_get(lfg, bmg);
+a[i] = bmg[0] * stddev + mean;
+a[i + 1] = bmg[1] * stddev + mean;
+}
+}
+
+static int compare_doubles(const double *a, const double *b, int len,
+   double max_diff)
+{
+int i;
+
+for (i = 0; i  len; i++) {
+if (fabs(a[i] - b[i])  max_diff) {
+av_log(NULL, AV_LOG_ERROR, %d: %- .12f - %- .12f = % .12g\n,
+   i, a[i], b[i], a[i] - b[i]);
+return -1;
+}
+}
+return 0;
+}
+
+static int test_vector_fmul(AVFloatDSPContext *fdsp, AVFloatDSPContext *cdsp,
+const float *v1, const float *v2)
+{
+DECLARE_ALIGNED(32, float, cdst)[LEN];
+DECLARE_ALIGNED(32, float, odst)[LEN];
+int ret;
+
+cdsp-vector_fmul(cdst, v1, v2, LEN);
+fdsp-vector_fmul(odst, v1, v2, LEN);
+
+if (ret = compare_floats(cdst, odst, LEN, FLT_EPSILON))
+av_log(NULL, AV_LOG_ERROR, %s failed\n, __func__);
+
+return ret;
+}
+
+static int test_vector_fmac_scalar(AVFloatDSPContext *fdsp, AVFloatDSPContext 
*cdsp,
+   const float *v1, const float *src0, float 
scale)
+{
+DECLARE_ALIGNED(32, float, cdst)[LEN];
+DECLARE_ALIGNED(32, float, odst)[LEN];
+int ret;
+
+memcpy(cdst, v1, LEN * sizeof(*v1));
+memcpy(odst, v1, LEN * sizeof(*v1));
+
+cdsp-vector_fmac_scalar(cdst, src0, scale, LEN);
+fdsp-vector_fmac_scalar(odst, src0, scale, LEN);
+
+if (ret = compare_floats(cdst, odst, LEN, 0.005))
+av_log(NULL, AV_LOG_ERROR, %s failed\n, __func__);
+
+return ret;
+}
+
+static int test_vector_fmul_scalar(AVFloatDSPContext *fdsp, AVFloatDSPContext 
*cdsp,
+   const float *v1, float scale)
+{
+DECLARE_ALIGNED(32, float, cdst)[LEN];
+DECLARE_ALIGNED(32, float, odst)[LEN];
+int ret;
+
+cdsp-vector_fmul_scalar(cdst, v1, scale, LEN);
+fdsp-vector_fmul_scalar(odst, v1, scale, LEN);
+
+if (ret = compare_floats(cdst, odst, LEN, FLT_EPSILON))
+av_log(NULL, AV_LOG_ERROR, %s failed\n, __func__);
+
+return ret;
+}
+
+static int test_vector_dmul_scalar(AVFloatDSPContext *fdsp, AVFloatDSPContext 
*cdsp,
+   const double *v1, double scale)
+{
+DECLARE_ALIGNED(32, double, cdst)[LEN];
+DECLARE_ALIGNED(32, double, odst)[LEN];
+int ret;
+
+cdsp-vector_dmul_scalar(cdst, v1, scale, LEN);
+fdsp-vector_dmul_scalar(odst, v1, scale, LEN);
+
+if (ret = compare_doubles(cdst, odst, LEN, DBL_EPSILON))
+av_log(NULL, 

[libav-devel] [PATCH 1/3] x86/synth_filter: add synth_filter_sse

2014-03-03 Thread James Almer
Build only on x86_32 targets.

Signed-off-by: James Almer jamr...@gmail.com
---
 libavcodec/x86/dcadsp.asm| 55 +---
 libavcodec/x86/dcadsp_init.c | 44 +--
 2 files changed, 69 insertions(+), 30 deletions(-)

diff --git a/libavcodec/x86/dcadsp.asm b/libavcodec/x86/dcadsp.asm
index 56039ba..970ec3d 100644
--- a/libavcodec/x86/dcadsp.asm
+++ b/libavcodec/x86/dcadsp.asm
@@ -199,15 +199,31 @@ INIT_XMM sse
 DCA_LFE_FIR 0
 DCA_LFE_FIR 1
 
-INIT_XMM sse2
+%macro SETZERO 1
+%if cpuflag(sse2)
+pxor  %1, %1
+%else
+xorps %1, %1, %1
+%endif
+%endmacro
+
+%macro SHUF 2
+%if cpuflag(sse2)
+pshufd%1, %2, q0123
+%else
+mova  %1, %2
+shufps%1, %1, q0123
+%endif
+%endmacro
+
 %macro INNER_LOOP   1
 ; reading backwards:  ptr1 = synth_buf + j + i; ptr2 = synth_buf + j - i
 ;~ a += window[i + j]  * (-synth_buf[15 - i + j])
 ;~ b += window[i + j + 16] * (synth_buf[i + j])
-pshufdm5, [ptr2 + j + (15 - 3) * 4], q0123
+SHUF  m5, [ptr2 + j + (15 - 3) * 4]
 mova  m6, [ptr1 + j]
 %if ARCH_X86_64
-pshufd   m11, [ptr2 + j + (15 - 3) * 4 - mmsize], q0123
+SHUF m11, [ptr2 + j + (15 - 3) * 4 - mmsize]
 mova m12, [ptr1 + j + mmsize]
 %endif
 mulps m6, [win  + %1 + j + 16 * 4]
@@ -224,10 +240,10 @@ INIT_XMM sse2
 %endif
 ;~ c += window[i + j + 32] * (synth_buf[16 + i + j])
 ;~ d += window[i + j + 48] * (synth_buf[31 - i + j])
-pshufdm6, [ptr2 + j + (31 - 3) * 4], q0123
+SHUF  m6, [ptr2 + j + (31 - 3) * 4]
 mova  m5, [ptr1 + j + 16 * 4]
 %if ARCH_X86_64
-pshufd   m12, [ptr2 + j + (31 - 3) * 4 - mmsize], q0123
+SHUF m12, [ptr2 + j + (31 - 3) * 4 - mmsize]
 mova m11, [ptr1 + j + mmsize + 16 * 4]
 %endif
 mulps m5, [win  + %1 + j + 32 * 4]
@@ -245,20 +261,25 @@ INIT_XMM sse2
 subj, 64 * 4
 %endmacro
 
-; void ff_synth_filter_inner_sse2(float *synth_buf, float synth_buf2[32],
-; const float window[512], float out[32],
-; intptr_t offset, float scale)
+; void ff_synth_filter_inner_opt(float *synth_buf, float synth_buf2[32],
+;  const float window[512], float out[32],
+;  intptr_t offset, float scale)
+%macro SYNTH_FILTER 0
 cglobal synth_filter_inner, 0, 6 + 4 * ARCH_X86_64, 7 + 6 * ARCH_X86_64, \
   synth_buf, synth_buf2, window, out, off, scale
 %define scale m0
 %if ARCH_X86_32 || WIN64
+%if cpuflag(sse2)
 movd   scale, scalem
+%else
+movss  scale, scalem
+%endif
 ; Make sure offset is in a register and not on the stack
 %define OFFQ  r4q
 %else
 %define OFFQ  offq
 %endif
-pshufdm0, m0, 0
+SPLATDm0
 ; prepare inner counter limit 1
 mov  r5q, 480
 sub  r5q, offmp
@@ -274,8 +295,8 @@ cglobal synth_filter_inner, 0, 6 + 4 * ARCH_X86_64, 7 + 6 * 
ARCH_X86_64, \
 %endif
 .mainloop
 ; m1 = a  m2 = b  m3 = c  m4 = d
-pxor  m3, m3
-pxor  m4, m4
+SETZERO   m3
+SETZERO   m4
 mova  m1, [buf2 + i]
 mova  m2, [buf2 + i + 16 * 4]
 %if ARCH_X86_32
@@ -292,8 +313,8 @@ cglobal synth_filter_inner, 0, 6 + 4 * ARCH_X86_64, 7 + 6 * 
ARCH_X86_64, \
 %define ptr2 r7q ; must be loaded
 %define win  r8q
 %define jr9q
-pxor  m9, m9
-pxor m10, m10
+SETZERO   m9
+SETZERO  m10
 mova  m7, [buf2 + i + mmsize]
 mova  m8, [buf2 + i + mmsize + 16 * 4]
 lea  win, [windowq + i]
@@ -350,3 +371,11 @@ cglobal synth_filter_inner, 0, 6 + 4 * ARCH_X86_64, 7 + 6 
* ARCH_X86_64, \
 subi, (ARCH_X86_64 + 1) * mmsize
 jge.mainloop
 RET
+%endmacro
+
+%if ARCH_X86_32
+INIT_XMM sse
+SYNTH_FILTER
+%endif
+INIT_XMM sse2
+SYNTH_FILTER
diff --git a/libavcodec/x86/dcadsp_init.c b/libavcodec/x86/dcadsp_init.c
index 3821892..f8dd9b1 100644
--- a/libavcodec/x86/dcadsp_init.c
+++ b/libavcodec/x86/dcadsp_init.c
@@ -56,29 +56,39 @@ av_cold void ff_dcadsp_init_x86(DCADSPContext *s)
 }
 }
 
-void ff_synth_filter_inner_sse2(float *synth_buf_ptr, float synth_buf2[32],
-const float window[512],
-float out[32], intptr_t offset, float scale);
+#define SYNTH_FILTER_FUNC(opt) 
\
+void ff_synth_filter_inner_##opt(float *synth_buf_ptr, float synth_buf2[32],   
\
+ const float window[512],  
\
+ float out[32], intptr_t offset, float scale); 
\
+static void synth_filter_##opt(FFTContext *imdct,  
\
+   float *synth_buf_ptr, int 

[libav-devel] [PATCH 0/3] synth filter float ASM

2014-03-03 Thread James Almer
Here are some extra implementations that extend Christophe's work.

The first one (SSE) is only for x86_32 targets as x86_64 guarantees SSE2 is 
available.

Second patch is an AVX implementation using ymm registers.
In my tests it was about 30 cycles faster than SSE2 on a Sandy Bridge CPU.

I don't have proper numbers for the third patch since i could only test on an 
AMD 
rig, where functions using ymm registers tend to have subpar performance.
It still beat the AVX version by a decent marging, though, so Haswell should 
see 
a nice boost with it.

I could add an FMA4 version using xmm registers, which would benefit AMD users 
unlike these AVX/FMA3 ymm ones. Thoughts?

James Almer (3):
  x86/synth_filter: add synth_filter_fma3
  x86/synth_filter: add synth_filter_sse
  x86/synth_filter: add synth_filter_avx

 libavcodec/x86/dcadsp.asm| 109 ---
 libavcodec/x86/dcadsp_init.c |  52 ++---
 2 files changed, 107 insertions(+), 54 deletions(-)

-- 
1.8.3.2

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


[libav-devel] [PATCH 2/3] x86/synth_filter: add synth_filter_avx

2014-03-03 Thread James Almer
Sandy Bridge Win64:
180 cycles in ff_synth_filter_inner_sse2
150 cycles in ff_synth_filter_inner_avx

Also switch to a three operand format for some instructions to avoid 
assembly errors with Yasm 1.1.0 or older.

Signed-off-by: James Almer jamr...@gmail.com
---
 libavcodec/x86/dcadsp.asm| 76 +---
 libavcodec/x86/dcadsp_init.c |  4 +++
 2 files changed, 48 insertions(+), 32 deletions(-)

diff --git a/libavcodec/x86/dcadsp.asm b/libavcodec/x86/dcadsp.asm
index 970ec3d..0d7c86e 100644
--- a/libavcodec/x86/dcadsp.asm
+++ b/libavcodec/x86/dcadsp.asm
@@ -200,18 +200,22 @@ DCA_LFE_FIR 0
 DCA_LFE_FIR 1
 
 %macro SETZERO 1
-%if cpuflag(sse2)
+%if cpuflag(sse2)  notcpuflag(avx)
 pxor  %1, %1
 %else
 xorps %1, %1, %1
 %endif
 %endmacro
 
-%macro SHUF 2
-%if cpuflag(sse2)
-pshufd%1, %2, q0123
+%macro SHUF 3
+%if cpuflag(avx)
+mova  %3, [%2 - 16]
+vperm2f128%1, %3, %3, 1
+vshufps   %1, %1, %1, q0123
+%elif cpuflag(sse2)
+pshufd%1, [%2], q0123
 %else
-mova  %1, %2
+mova  %1, [%2]
 shufps%1, %1, q0123
 %endif
 %endmacro
@@ -220,43 +224,43 @@ DCA_LFE_FIR 1
 ; reading backwards:  ptr1 = synth_buf + j + i; ptr2 = synth_buf + j - i
 ;~ a += window[i + j]  * (-synth_buf[15 - i + j])
 ;~ b += window[i + j + 16] * (synth_buf[i + j])
-SHUF  m5, [ptr2 + j + (15 - 3) * 4]
+SHUF  m5,  ptr2 + j + (15 - 3) * 4, m6
 mova  m6, [ptr1 + j]
 %if ARCH_X86_64
-SHUF m11, [ptr2 + j + (15 - 3) * 4 - mmsize]
+SHUF m11,  ptr2 + j + (15 - 3) * 4 - mmsize, m12
 mova m12, [ptr1 + j + mmsize]
 %endif
-mulps m6, [win  + %1 + j + 16 * 4]
-mulps m5, [win  + %1 + j]
+mulps m6, m6,  [win + %1 + j + 16 * 4]
+mulps m5, m5,  [win + %1 + j]
 %if ARCH_X86_64
-mulpsm12, [win  + %1 + j + mmsize + 16 * 4]
-mulpsm11, [win  + %1 + j + mmsize]
+mulpsm12, m12, [win + %1 + j + mmsize + 16 * 4]
+mulpsm11, m11, [win + %1 + j + mmsize]
 %endif
-addps m2, m6
-subps m1, m5
+addps m2, m2, m6
+subps m1, m1, m5
 %if ARCH_X86_64
-addps m8, m12
-subps m7, m11
+addps m8, m8, m12
+subps m7, m7, m11
 %endif
 ;~ c += window[i + j + 32] * (synth_buf[16 + i + j])
 ;~ d += window[i + j + 48] * (synth_buf[31 - i + j])
-SHUF  m6, [ptr2 + j + (31 - 3) * 4]
+SHUF  m6,  ptr2 + j + (31 - 3) * 4, m5
 mova  m5, [ptr1 + j + 16 * 4]
 %if ARCH_X86_64
-SHUF m12, [ptr2 + j + (31 - 3) * 4 - mmsize]
+SHUF m12,  ptr2 + j + (31 - 3) * 4 - mmsize, m11
 mova m11, [ptr1 + j + mmsize + 16 * 4]
 %endif
-mulps m5, [win  + %1 + j + 32 * 4]
-mulps m6, [win  + %1 + j + 48 * 4]
+mulps m5, m5,  [win + %1 + j + 32 * 4]
+mulps m6, m6,  [win + %1 + j + 48 * 4]
 %if ARCH_X86_64
-mulpsm11, [win  + %1 + j + mmsize + 32 * 4]
-mulpsm12, [win  + %1 + j + mmsize + 48 * 4]
+mulpsm11, m11, [win + %1 + j + mmsize + 32 * 4]
+mulpsm12, m12, [win + %1 + j + mmsize + 48 * 4]
 %endif
-addps m3, m5
-addps m4, m6
+addps m3, m3, m5
+addps m4, m4, m6
 %if ARCH_X86_64
-addps m9, m11
-addpsm10, m12
+addps m9, m9, m11
+addpsm10, m10, m12
 %endif
 subj, 64 * 4
 %endmacro
@@ -269,17 +273,21 @@ cglobal synth_filter_inner, 0, 6 + 4 * ARCH_X86_64, 7 + 6 
* ARCH_X86_64, \
   synth_buf, synth_buf2, window, out, off, scale
 %define scale m0
 %if ARCH_X86_32 || WIN64
-%if cpuflag(sse2)
+%if cpuflag(sse2)  notcpuflag(avx)
 movd   scale, scalem
+SPLATDm0
 %else
-movss  scale, scalem
+VBROADCASTSS  m0, scalem
 %endif
 ; Make sure offset is in a register and not on the stack
 %define OFFQ  r4q
 %else
+SPLATD  xmm0
+%if cpuflag(avx)
+vinsertf128   m0, m0, xmm0, 1
+%endif
 %define OFFQ  offq
 %endif
-SPLATDm0
 ; prepare inner counter limit 1
 mov  r5q, 480
 sub  r5q, offmp
@@ -346,11 +354,11 @@ cglobal synth_filter_inner, 0, 6 + 4 * ARCH_X86_64, 7 + 6 
* ARCH_X86_64, \
 %endif
 ;~ out[i]  = a * scale;
 ;~ out[i + 16] = b * scale;
-mulps m1, scale
-mulps m2, scale
+mulps m1, m1, scale
+mulps m2, m2, scale
 %if ARCH_X86_64
-mulps m7, scale
-mulps m8, scale
+mulps m7, m7, scale
+mulps m8, m8, scale
 %endif
 ;~ synth_buf2[i]  = c;
 ;~ synth_buf2[i + 16] = d;
@@ -379,3 +387,7 @@ SYNTH_FILTER
 %endif
 INIT_XMM sse2
 SYNTH_FILTER
+%if HAVE_AVX_EXTERNAL
+INIT_YMM avx
+SYNTH_FILTER
+%endif
diff --git 

[libav-devel] [PATCH 3/3] x86/synth_filter: add synth_filter_fma3

2014-03-03 Thread James Almer
Signed-off-by: James Almer jamr...@gmail.com
---
 libavcodec/x86/dcadsp.asm| 28 +++-
 libavcodec/x86/dcadsp_init.c |  4 
 2 files changed, 15 insertions(+), 17 deletions(-)

diff --git a/libavcodec/x86/dcadsp.asm b/libavcodec/x86/dcadsp.asm
index 0d7c86e..e1842ef 100644
--- a/libavcodec/x86/dcadsp.asm
+++ b/libavcodec/x86/dcadsp.asm
@@ -230,16 +230,12 @@ DCA_LFE_FIR 1
 SHUF m11,  ptr2 + j + (15 - 3) * 4 - mmsize, m12
 mova m12, [ptr1 + j + mmsize]
 %endif
-mulps m6, m6,  [win + %1 + j + 16 * 4]
+FMULADD_PSm2, m6,  [win + %1 + j + 16 * 4], m2, m6
 mulps m5, m5,  [win + %1 + j]
-%if ARCH_X86_64
-mulpsm12, m12, [win + %1 + j + mmsize + 16 * 4]
-mulpsm11, m11, [win + %1 + j + mmsize]
-%endif
-addps m2, m2, m6
 subps m1, m1, m5
 %if ARCH_X86_64
-addps m8, m8, m12
+FMULADD_PSm8, m12, [win + %1 + j + mmsize + 16 * 4], m8, m12
+mulpsm11, m11, [win + %1 + j + mmsize]
 subps m7, m7, m11
 %endif
 ;~ c += window[i + j + 32] * (synth_buf[16 + i + j])
@@ -250,17 +246,11 @@ DCA_LFE_FIR 1
 SHUF m12,  ptr2 + j + (31 - 3) * 4 - mmsize, m11
 mova m11, [ptr1 + j + mmsize + 16 * 4]
 %endif
-mulps m5, m5,  [win + %1 + j + 32 * 4]
-mulps m6, m6,  [win + %1 + j + 48 * 4]
+FMULADD_PSm3, m5,  [win + %1 + j + 32 * 4], m3, m5
+FMULADD_PSm4, m6,  [win + %1 + j + 48 * 4], m4, m6
 %if ARCH_X86_64
-mulpsm11, m11, [win + %1 + j + mmsize + 32 * 4]
-mulpsm12, m12, [win + %1 + j + mmsize + 48 * 4]
-%endif
-addps m3, m3, m5
-addps m4, m4, m6
-%if ARCH_X86_64
-addps m9, m9, m11
-addpsm10, m10, m12
+FMULADD_PSm9, m11, [win + %1 + j + mmsize + 32 * 4], m9, m11
+FMULADD_PS   m10, m12, [win + %1 + j + mmsize + 48 * 4], m10, m12
 %endif
 subj, 64 * 4
 %endmacro
@@ -391,3 +381,7 @@ SYNTH_FILTER
 INIT_YMM avx
 SYNTH_FILTER
 %endif
+%if HAVE_FMA3_EXTERNAL
+INIT_YMM fma3
+SYNTH_FILTER
+%endif
diff --git a/libavcodec/x86/dcadsp_init.c b/libavcodec/x86/dcadsp_init.c
index ab20635..132f75e 100644
--- a/libavcodec/x86/dcadsp_init.c
+++ b/libavcodec/x86/dcadsp_init.c
@@ -80,6 +80,7 @@ SYNTH_FILTER_FUNC(sse)
 #endif
 SYNTH_FILTER_FUNC(sse2)
 SYNTH_FILTER_FUNC(avx)
+SYNTH_FILTER_FUNC(fma3)
 
 av_cold void ff_synth_filter_init_x86(SynthFilterContext *s)
 {
@@ -96,4 +97,7 @@ av_cold void ff_synth_filter_init_x86(SynthFilterContext *s)
 if (EXTERNAL_AVX(cpu_flags)) {
 s-synth_filter_float = synth_filter_avx;
 }
+if (EXTERNAL_FMA3(cpu_flags)) {
+s-synth_filter_float = synth_filter_fma3;
+}
 }
-- 
1.8.3.2

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


[libav-devel] [PATCH 1/2] af_channelmap: fix ONE_STR mapping mode

2014-03-03 Thread Anton Khirnov
get_channel() returns 0 on success

CC:libav-sta...@libav.org
---
 libavfilter/af_channelmap.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavfilter/af_channelmap.c b/libavfilter/af_channelmap.c
index 71d51e7..6bc8c3a 100644
--- a/libavfilter/af_channelmap.c
+++ b/libavfilter/af_channelmap.c
@@ -188,7 +188,7 @@ static av_cold int channelmap_init(AVFilterContext *ctx)
 s-map[i].out_channel_idx = i;
 break;
 case MAP_ONE_STR:
-if (!get_channel(mapping, in_ch, separator)) {
+if (!get_channel(mapping, in_ch, separator)  0) {
 av_log(ctx, AV_LOG_ERROR, err);
 return AVERROR(EINVAL);
 }
-- 
1.7.10.4

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel


[libav-devel] [PATCH 2/2] FATE: add a test for the ONE_STR mapping mode of the channelmap filter

2014-03-03 Thread Anton Khirnov
---
 tests/fate/filter-audio.mak|   30 +++-
 .../{channelmap = channelmap_one_int} |0
 tests/filtergraphs/channelmap_one_str  |1 +
 3 files changed, 24 insertions(+), 7 deletions(-)
 rename tests/filtergraphs/{channelmap = channelmap_one_int} (100%)
 create mode 100644 tests/filtergraphs/channelmap_one_str

diff --git a/tests/fate/filter-audio.mak b/tests/fate/filter-audio.mak
index 9bf148c..1b3dbfa 100644
--- a/tests/fate/filter-audio.mak
+++ b/tests/fate/filter-audio.mak
@@ -41,13 +41,29 @@ $(FATE_ATRIM): SRC = 
$(TARGET_PATH)/tests/data/asynth-44100-2.wav
 
 FATE_FILTER-$(call FILTERDEMDECENCMUX, ATRIM, WAV, PCM_S16LE, PCM_S16LE, WAV) 
+= $(FATE_ATRIM)
 
-FATE_AFILTER-$(call FILTERDEMDECENCMUX, CHANNELMAP, WAV, PCM_S16LE, PCM_S16LE, 
WAV) += fate-filter-channelmap
-fate-filter-channelmap: tests/data/filtergraphs/channelmap
-fate-filter-channelmap: SRC = $(TARGET_PATH)/tests/data/asynth-44100-6.wav
-fate-filter-channelmap: tests/data/asynth-44100-6.wav
-fate-filter-channelmap: CMD = md5 -i $(SRC) -filter_complex_script 
$(TARGET_PATH)/tests/data/filtergraphs/channelmap -f wav -flags +bitexact
-fate-filter-channelmap: CMP = oneline
-fate-filter-channelmap: REF = 21f1977c4f9705e2057083f84764e685
+FATE_AFILTER-$(call FILTERDEMDECENCMUX, CHANNELMAP, WAV, PCM_S16LE, PCM_S16LE, 
WAV) += fate-filter-channelmap-one-int
+fate-filter-channelmap-one-int: tests/data/filtergraphs/channelmap_one_int
+fate-filter-channelmap-one-int: SRC = 
$(TARGET_PATH)/tests/data/asynth-44100-6.wav
+fate-filter-channelmap-one-int: tests/data/asynth-44100-6.wav
+fate-filter-channelmap-one-int: CMD = md5 -i $(SRC) -filter_complex_script 
$(TARGET_PATH)/tests/data/filtergraphs/channelmap_one_int -f wav -flags 
+bitexact
+fate-filter-channelmap-one-int: CMP = oneline
+fate-filter-channelmap-one-int: REF = 21f1977c4f9705e2057083f84764e685
+
+FATE_AFILTER-$(call FILTERDEMDECENCMUX, CHANNELMAP, WAV, PCM_S16LE, PCM_S16LE, 
WAV) += fate-filter-channelmap-one-str
+fate-filter-channelmap-one-str: tests/data/filtergraphs/channelmap_one_str
+fate-filter-channelmap-one-str: SRC = 
$(TARGET_PATH)/tests/data/asynth-44100-2.wav
+fate-filter-channelmap-one-str: tests/data/asynth-44100-2.wav
+fate-filter-channelmap-one-str: CMD = md5 -i $(SRC) -filter_complex_script 
$(TARGET_PATH)/tests/data/filtergraphs/channelmap_one_str -f wav -flags 
+bitexact
+fate-filter-channelmap-one-str: CMP = oneline
+fate-filter-channelmap-one-str: REF = 9fe9bc452282dfd94fd80e9491607a0c
+
+FATE_AFILTER-$(call FILTERDEMDECENCMUX, CHANNELMAP, WAV, PCM_S16LE, PCM_S16LE, 
WAV) += fate-filter-channelmap-one-str
+fate-filter-channelmap-one-str: tests/data/filtergraphs/channelmap_one_str
+fate-filter-channelmap-one-str: SRC = 
$(TARGET_PATH)/tests/data/asynth-44100-2.wav
+fate-filter-channelmap-one-str: tests/data/asynth-44100-2.wav
+fate-filter-channelmap-one-str: CMD = md5 -i $(SRC) -filter_complex_script 
$(TARGET_PATH)/tests/data/filtergraphs/channelmap_one_str -f wav -flags 
+bitexact
+fate-filter-channelmap-one-str: CMP = oneline
+fate-filter-channelmap-one-str: REF = 9fe9bc452282dfd94fd80e9491607a0c
 
 FATE_AFILTER-$(call FILTERDEMDECENCMUX, CHANNELSPLIT, WAV, PCM_S16LE, 
PCM_S16LE, PCM_S16LE) += fate-filter-channelsplit
 fate-filter-channelsplit: SRC = $(TARGET_PATH)/tests/data/asynth-44100-2.wav
diff --git a/tests/filtergraphs/channelmap 
b/tests/filtergraphs/channelmap_one_int
similarity index 100%
rename from tests/filtergraphs/channelmap
rename to tests/filtergraphs/channelmap_one_int
diff --git a/tests/filtergraphs/channelmap_one_str 
b/tests/filtergraphs/channelmap_one_str
new file mode 100644
index 000..3c658fc
--- /dev/null
+++ b/tests/filtergraphs/channelmap_one_str
@@ -0,0 +1 @@
+channelmap=map=FR|FL:channel_layout=stereo
-- 
1.7.10.4

___
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel