[libav-devel] [PATCH 2/6] movenc: write hvcC tag for HEVC.
--- libavformat/Makefile |2 +- libavformat/hevc.c | 1076 ++ libavformat/hevc.h | 50 +++ libavformat/movenc.c | 13 + 4 files changed, 1140 insertions(+), 1 deletion(-) create mode 100644 libavformat/hevc.c create mode 100644 libavformat/hevc.h diff --git a/libavformat/Makefile b/libavformat/Makefile index d491d43..a3cd504 100644 --- a/libavformat/Makefile +++ b/libavformat/Makefile @@ -179,7 +179,7 @@ OBJS-$(CONFIG_MM_DEMUXER)+= mm.o OBJS-$(CONFIG_MMF_DEMUXER) += mmf.o pcm.o OBJS-$(CONFIG_MMF_MUXER) += mmf.o OBJS-$(CONFIG_MOV_DEMUXER) += mov.o isom.o mov_chan.o -OBJS-$(CONFIG_MOV_MUXER) += movenc.o isom.o avc.o \ +OBJS-$(CONFIG_MOV_MUXER) += movenc.o isom.o avc.o hevc.o \ movenchint.o mov_chan.o OBJS-$(CONFIG_MP2_MUXER) += mp3enc.o rawenc.o id3v2enc.o OBJS-$(CONFIG_MP3_DEMUXER) += mp3dec.o diff --git a/libavformat/hevc.c b/libavformat/hevc.c new file mode 100644 index 000..f359eb5 --- /dev/null +++ b/libavformat/hevc.c @@ -0,0 +1,1076 @@ +/* + * Copyright (c) 2014 Tim Walker tdskywal...@gmail.com + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include libavcodec/get_bits.h +#include libavcodec/golomb.h +#include libavcodec/hevc.h +#include libavutil/intreadwrite.h +#include avc.h +#include avio.h +#include hevc.h + +#define MAX_SPATIAL_SEGMENTATION 4096 // max. value of u(12) field + +typedef struct HVCCNALUnitArray { +uint8_t array_completeness; +uint8_t NAL_unit_type; +uint16_t numNalus; +uint16_t *nalUnitLength; +uint8_t **nalUnit; +} HVCCNALUnitArray; + +typedef struct HEVCDecoderConfigurationRecord { +uint8_t configurationVersion; +uint8_t general_profile_space; +uint8_t general_tier_flag; +uint8_t general_profile_idc; +uint32_t general_profile_compatibility_flags; +uint64_t general_constraint_indicator_flags; +uint8_t general_level_idc; +uint16_t min_spatial_segmentation_idc; +uint8_t parallelismType; +uint8_t chromaFormat; +uint8_t bitDepthLumaMinus8; +uint8_t bitDepthChromaMinus8; +uint16_t avgFrameRate; +uint8_t constantFrameRate; +uint8_t numTemporalLayers; +uint8_t temporalIdNested; +uint8_t lengthSizeMinusOne; +uint8_t numOfArrays; +HVCCNALUnitArray *array; +} HEVCDecoderConfigurationRecord; + +typedef struct HVCCProfileTierLevel { +uint8_t profile_space; +uint8_t tier_flag; +uint8_t profile_idc; +uint32_t profile_compatibility_flags; +uint64_t constraint_indicator_flags; +uint8_t level_idc; +} HVCCProfileTierLevel; + +static void hvcc_update_ptl(HEVCDecoderConfigurationRecord *hvcc, +HVCCProfileTierLevel *ptl) +{ +/* + * The value of general_profile_space in all the parameter sets must be + * identical. + */ +hvcc-general_profile_space = ptl-profile_space; + +/* + * The level indication general_level_idc must indicate a level of + * capability equal to or greater than the highest level indicated for the + * highest tier in all the parameter sets. + */ +if (hvcc-general_tier_flag ptl-tier_flag) +hvcc-general_level_idc = ptl-level_idc; +else +hvcc-general_level_idc = FFMAX(hvcc-general_level_idc, ptl-level_idc); + +/* + * The tier indication general_tier_flag must indicate a tier equal to or + * greater than the highest tier indicated in all the parameter sets. + */ +hvcc-general_tier_flag = FFMAX(hvcc-general_tier_flag, ptl-tier_flag); + +/* + * The profile indication general_profile_idc must indicate a profile to + * which the stream associated with this configuration record conforms. + * + * If the sequence parameter sets are marked with different profiles, then + * the stream may need examination to determine which profile, if any, the + * entire stream conforms to. If the entire stream is not examined, or the + * examination reveals that there is no profile to which the entire stream + * conforms, then the entire stream must be split into two or
[libav-devel] [PATCH 3/6] movenc: enable Annex B to MP4 conversion for HEVC tracks.
--- Note: this includes a facility for filtering parameter set NALUs from the bitstream and knowing whether any NALUs were extracted (so that we can e.g. decide whether to generate a new MP4 sample entry with a new hvcC). It's unused as I'm not sure how the MP4 side of things would work. libavformat/hevc.c | 101 +++ libavformat/hevc.h | 48 libavformat/movenc.c | 9 + 3 files changed, 158 insertions(+) diff --git a/libavformat/hevc.c b/libavformat/hevc.c index f359eb5..152e28a 100644 --- a/libavformat/hevc.c +++ b/libavformat/hevc.c @@ -1014,6 +1014,107 @@ static int hvcc_write(AVIOContext *pb, HEVCDecoderConfigurationRecord *hvcc) return 0; } +int ff_hevc_annexb2mp4(AVIOContext *pb, const uint8_t *buf_in, + int size, int filter_ps, int *ps_count) +{ +int num_ps = 0, ret = 0; +uint8_t *buf, *end, *start = NULL; + +if (!filter_ps) { +ret = ff_avc_parse_nal_units(pb, buf_in, size); +goto end; +} + +ret = ff_avc_parse_nal_units_buf(buf_in, start, size); +if (ret 0) +goto end; + +ret = 0; +buf = start; +end = start + size; + +while (end - buf 4) { +uint32_t len = FFMIN(AV_RB32(buf), end - buf - 4); +uint8_t type = (buf[4] 1) 0x3f; + +buf += 4; + +switch (type) { +case NAL_VPS: +case NAL_SPS: +case NAL_PPS: +num_ps++; +break; +default: +ret += 4 + len; +avio_wb32 (pb, len); +avio_write(pb, buf, len); +break; +} + +buf += len; +} + +end: +free(start); +if (ps_count) +*ps_count = num_ps; +return ret; +} + +int ff_hevc_annexb2mp4_buf(const uint8_t *buf_in, uint8_t **buf_out, + int *size, int filter_ps, int *ps_count) +{ +AVIOContext *pb; +int num_ps = 0, ret = 0; +uint8_t *buf, *end, *start = NULL; + +if (!filter_ps) { +ret = ff_avc_parse_nal_units_buf(buf_in, buf_out, size); +goto end; +} + +ret = avio_open_dyn_buf(pb); +if (ret 0) +goto end; + +ret = ff_avc_parse_nal_units_buf(buf_in, start, size); +if (ret 0) +goto end; + +buf = start; +end = start + *size; + +while (end - buf 4) { +uint32_t len = FFMIN(AV_RB32(buf), end - buf - 4); +uint8_t type = (buf[4] 1) 0x3f; + +buf += 4; + +switch (type) { +case NAL_VPS: +case NAL_SPS: +case NAL_PPS: +num_ps++; +break; +default: +avio_wb32 (pb, len); +avio_write(pb, buf, len); +break; +} + +buf += len; +} + +*size = avio_close_dyn_buf(pb, buf_out); + +end: +free(start); +if (ps_count) +*ps_count = num_ps; +return ret; +} + int ff_isom_write_hvcc(AVIOContext *pb, const uint8_t *data, int size, int ps_array_completeness) { diff --git a/libavformat/hevc.h b/libavformat/hevc.h index 82525ac..f394342 100644 --- a/libavformat/hevc.h +++ b/libavformat/hevc.h @@ -30,6 +30,54 @@ #include avio.h /** + * Writes Annex B formatted HEVC NAL units to the provided AVIOContext. + * + * The NAL units are converted to an MP4-compatible format (start code prefixes + * are replaced by 4-byte size fields, as per ISO/IEC 14496-15). + * + * If filter_ps is non-zero, any HEVC parameter sets found in the input will be + * discarded, and *ps_count will be set to the number of discarded PS NAL units. + * + * @param pb address of the AVIOContext where the data shall be written + * @param buf_in address of the buffer holding the input data + * @param size size (in bytes) of the input buffer + * @param filter_ps whether to write parameter set NAL units to the output (0) + *or to discard them (non-zero) + * @param ps_count address of the variable where the number of discarded + *parameter set NAL units shall be written, may be NULL + * @return the amount (in bytes) of data written in case of success, a negative + * value corresponding to an AVERROR code in case of failure + */ +int ff_hevc_annexb2mp4(AVIOContext *pb, const uint8_t *buf_in, + int size, int filter_ps, int *ps_count); + +/** + * Writes Annex B formatted HEVC NAL units to a data buffer. + * + * The NAL units are converted to an MP4-compatible format (start code prefixes + * are replaced by 4-byte size fields, as per ISO/IEC 14496-15). + * + * If filter_ps is non-zero, any HEVC parameter sets found in the input will be + * discarded, and *ps_count will be set to the number of discarded PS NAL units. + * + * On output, *size holds the size (in bytes) of the output data buffer. + * + * @param buf_in address of the buffer holding the input data + * @param size address of the variable holding the size (in bytes) of the input + *
[libav-devel] [PATCH 1/6] movenc: use 'hev1' tag for HEVC in MODE_MOV.
'hvc1' requires that parameter set NAL units be present only in the samples entry, but not in the samples themselves, requiring that additional parameter sets, if present, be filtered out of the samples and placed in new, additional sample entries if they override or otherwise conflict with the parameter sets present in the first sample entry. We do not have any way of doing this at present, so the files we produce can only comply with the restrictions set for the 'hev1' sample entry name in ISO/IEC 14496-15. --- Note: unlike avplay, VLC does not support hev1 for some reason. libavformat/isom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavformat/isom.c b/libavformat/isom.c index 9b32b7d..a6197ab 100644 --- a/libavformat/isom.c +++ b/libavformat/isom.c @@ -136,8 +136,8 @@ const AVCodecTag ff_codec_movvideo_tags[] = { { AV_CODEC_ID_RAWVIDEO, MKTAG('W', 'R', 'A', 'W') }, -{ AV_CODEC_ID_HEVC, MKTAG('h', 'v', 'c', '1') }, /* HEVC/H.265 which indicates parameter sets shall not be in ES */ { AV_CODEC_ID_HEVC, MKTAG('h', 'e', 'v', '1') }, /* HEVC/H.265 which indicates parameter sets may be in ES */ +{ AV_CODEC_ID_HEVC, MKTAG('h', 'v', 'c', '1') }, /* HEVC/H.265 which indicates parameter sets shall not be in ES */ { AV_CODEC_ID_H264, MKTAG('a', 'v', 'c', '1') }, /* AVC-1/H.264 */ { AV_CODEC_ID_H264, MKTAG('a', 'i', '5', 'p') }, /* AVC-Intra 50M 720p24/30/60 */ -- 1.8.3.4 (Apple Git-47) ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH 5/6] matroskaenc: write private data in hvcC format for HEVC.
--- libavformat/Makefile | 2 +- libavformat/matroskaenc.c | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/libavformat/Makefile b/libavformat/Makefile index a3cd504..5694314 100644 --- a/libavformat/Makefile +++ b/libavformat/Makefile @@ -168,7 +168,7 @@ OBJS-$(CONFIG_M4V_MUXER) += rawenc.o OBJS-$(CONFIG_MATROSKA_DEMUXER) += matroskadec.o matroska.o \ isom.o rmsipr.o OBJS-$(CONFIG_MATROSKA_MUXER)+= matroskaenc.o matroska.o \ -isom.o avc.o \ +isom.o avc.o hevc.o \ flacenc_header.o avlanguage.o wv.o OBJS-$(CONFIG_MD5_MUXER) += md5enc.o OBJS-$(CONFIG_MJPEG_DEMUXER) += rawdec.o diff --git a/libavformat/matroskaenc.c b/libavformat/matroskaenc.c index cc645a5..8fece1f 100644 --- a/libavformat/matroskaenc.c +++ b/libavformat/matroskaenc.c @@ -22,6 +22,7 @@ #include stdint.h #include avc.h +#include hevc.h #include avformat.h #include avlanguage.h #include flacenc.h @@ -500,6 +501,8 @@ static int mkv_write_codecprivate(AVFormatContext *s, AVIOContext *pb, AVCodecCo ret = put_wv_codecpriv(dyn_cp, codec); else if (codec-codec_id == AV_CODEC_ID_H264) ret = ff_isom_write_avcc(dyn_cp, codec-extradata, codec-extradata_size); +else if (codec-codec_id == AV_CODEC_ID_HEVC) +ret = ff_isom_write_hvcc(dyn_cp, codec-extradata, codec-extradata_size, 0); else if (codec-codec_id == AV_CODEC_ID_ALAC) { if (codec-extradata_size 36) { av_log(s, AV_LOG_ERROR, -- 1.8.3.4 (Apple Git-47) ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH 4/6] movenc: allow muxing HEVC in MODE_MP4.
--- Uses 'hev1' for the same reasons as in MODE_MOV. libavformat/isom.c | 1 + libavformat/movenc.c | 1 + 2 files changed, 2 insertions(+) diff --git a/libavformat/isom.c b/libavformat/isom.c index a6197ab..76c455b 100644 --- a/libavformat/isom.c +++ b/libavformat/isom.c @@ -33,6 +33,7 @@ const AVCodecTag ff_mp4_obj_type[] = { { AV_CODEC_ID_MOV_TEXT, 0x08 }, { AV_CODEC_ID_MPEG4 , 0x20 }, { AV_CODEC_ID_H264, 0x21 }, +{ AV_CODEC_ID_HEVC, 0x23 }, { AV_CODEC_ID_AAC , 0x40 }, { AV_CODEC_ID_MP4ALS , 0x40 }, /* 14496-3 ALS */ { AV_CODEC_ID_MPEG2VIDEO , 0x61 }, /* MPEG2 Main */ diff --git a/libavformat/movenc.c b/libavformat/movenc.c index 9fa9d7e..7930aa9 100644 --- a/libavformat/movenc.c +++ b/libavformat/movenc.c @@ -752,6 +752,7 @@ static int mp4_get_codec_tag(AVFormatContext *s, MOVTrack *track) return 0; if (track-enc-codec_id == AV_CODEC_ID_H264) tag = MKTAG('a','v','c','1'); +else if (track-enc-codec_id == AV_CODEC_ID_HEVC) tag = MKTAG('h','e','v','1'); else if (track-enc-codec_id == AV_CODEC_ID_AC3) tag = MKTAG('a','c','-','3'); else if (track-enc-codec_id == AV_CODEC_ID_DIRAC) tag = MKTAG('d','r','a','c'); else if (track-enc-codec_id == AV_CODEC_ID_MOV_TEXT) tag = MKTAG('t','x','3','g'); -- 1.8.3.4 (Apple Git-47) ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH 6/6] matroskaenc: enable Annex B to MP4 conversion for HEVC tracks.
--- libavformat/matroskaenc.c | 4 1 file changed, 4 insertions(+) diff --git a/libavformat/matroskaenc.c b/libavformat/matroskaenc.c index 8fece1f..08f5552 100644 --- a/libavformat/matroskaenc.c +++ b/libavformat/matroskaenc.c @@ -1163,6 +1163,10 @@ static void mkv_write_block(AVFormatContext *s, AVIOContext *pb, if (codec-codec_id == AV_CODEC_ID_H264 codec-extradata_size 0 (AV_RB24(codec-extradata) == 1 || AV_RB32(codec-extradata) == 1)) ff_avc_parse_nal_units_buf(pkt-data, data, size); +else if (codec-codec_id == AV_CODEC_ID_HEVC codec-extradata_size 6 + (AV_RB24(codec-extradata) == 1 || AV_RB32(codec-extradata) == 1)) +/* extradata is Annex B, assume the bitstream is too and convert it */ +ff_hevc_annexb2mp4_buf(pkt-data, data, size, 0, NULL); else if (codec-codec_id == AV_CODEC_ID_WAVPACK) { int ret = mkv_strip_wavpack(pkt-data, data, size); if (ret 0) { -- 1.8.3.4 (Apple Git-47) ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH 1/2] matroskaenc: allow override of writing application tag
--- libavformat/matroskaenc.c | 8 ++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/libavformat/matroskaenc.c b/libavformat/matroskaenc.c index cc645a5..3ab3139 100644 --- a/libavformat/matroskaenc.c +++ b/libavformat/matroskaenc.c @@ -805,7 +805,8 @@ static int mkv_write_tag(AVFormatContext *s, AVDictionary *m, unsigned int eleme end_ebml_master(s-pb, targets); while ((t = av_dict_get(m, , t, AV_DICT_IGNORE_SUFFIX))) -if (av_strcasecmp(t-key, title)) +if (av_strcasecmp(t-key, title) +av_strcasecmp(t-key, encoding_tool)) mkv_write_simpletag(s-pb, t); end_ebml_master(s-pb, tag); @@ -965,7 +966,10 @@ static int mkv_write_header(AVFormatContext *s) segment_uid[i] = av_lfg_get(lfg); put_ebml_string(pb, MATROSKA_ID_MUXINGAPP , LIBAVFORMAT_IDENT); -put_ebml_string(pb, MATROSKA_ID_WRITINGAPP, LIBAVFORMAT_IDENT); +if ((tag = av_dict_get(s-metadata, encoding_tool, NULL, 0))) +put_ebml_string(pb, MATROSKA_ID_WRITINGAPP, tag-value); +else +put_ebml_string(pb, MATROSKA_ID_WRITINGAPP, LIBAVFORMAT_IDENT); put_ebml_binary(pb, MATROSKA_ID_SEGMENTUID, segment_uid, 16); } -- 1.8.5.3 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH 2/2] movenc: allow override of writing application tag
--- libavformat/movenc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libavformat/movenc.c b/libavformat/movenc.c index 762cfef..6344e38 100644 --- a/libavformat/movenc.c +++ b/libavformat/movenc.c @@ -1858,7 +1858,8 @@ static int mov_write_ilst_tag(AVIOContext *pb, MOVMuxContext *mov, mov_write_string_metadata(s, pb, \251wrt, composer , 1); mov_write_string_metadata(s, pb, \251alb, album, 1); mov_write_string_metadata(s, pb, \251day, date , 1); -mov_write_string_tag(pb, \251too, LIBAVFORMAT_IDENT, 0, 1); +if (!mov_write_string_metadata(s, pb, \251too, encoding_tool, 1)) +mov_write_string_tag(pb, \251too, LIBAVFORMAT_IDENT, 0, 1); mov_write_string_metadata(s, pb, \251cmt, comment , 1); mov_write_string_metadata(s, pb, \251gen, genre, 1); mov_write_string_metadata(s, pb, \251cpy, copyright, 1); -- 1.8.5.3 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH 1/2] matroskaenc: allow override of writing application tag
On 03/03/14 22:20, John Stebbins wrote: --- libavformat/matroskaenc.c | 8 ++-- 1 file changed, 6 insertions(+), 2 deletions(-) Probably ok. ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
Re: [libav-devel] [PATCH 2/2] movenc: allow override of writing application tag
On 03/03/14 22:20, John Stebbins wrote: --- libavformat/movenc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) Ok. ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH 1/4] timer: use mach_absolute_time as high resolution clock on darwin
From: Janne Grunau j...@jannau.net Not guaranteed to be in nanosecond resolution. On iOS 7 the duration of one tick is 125/3 ns which is still more than an order of magnitude better then microseconds. Replace decicycles with the neutral UNITS. Decicycles is strange but tenths of a nanosecond and unspecific deci-ticks for mach_absolute_time is just silly. --- configure | 4 libavutil/timer.h | 14 +++--- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/configure b/configure index b143335..411b586 100755 --- a/configure +++ b/configure @@ -1383,6 +1383,8 @@ HAVE_LIST= local_aligned_8 localtime_r loongson +mach_absolute_time +mach_mach_time_h machine_ioctl_bt848_h machine_ioctl_meteor_h machine_rw_barrier @@ -3846,6 +3848,7 @@ check_struct sys/time.h sys/resource.h struct rusage ru_maxrss check_func gettimeofday check_func isatty check_func localtime_r +check_func mach_absolute_time check_func ${malloc_prefix}memalign enable memalign check_func mkstemp check_func mmap @@ -3881,6 +3884,7 @@ check_header dlfcn.h check_header dxva.h check_header dxva2api.h check_header io.h +check_header mach/mach_time.h check_header malloc.h check_header poll.h check_header sys/mman.h diff --git a/libavutil/timer.h b/libavutil/timer.h index d2c5001..0d93d7c 100644 --- a/libavutil/timer.h +++ b/libavutil/timer.h @@ -32,6 +32,10 @@ #include config.h +#if HAVE_MACH_MACH_TIME_H +#include mach/mach_time.h +#endif + #include log.h #if ARCH_ARM @@ -44,8 +48,12 @@ # include x86/timer.h #endif -#if !defined(AV_READ_TIME) HAVE_GETHRTIME -# define AV_READ_TIME gethrtime +#if !defined(AV_READ_TIME) +# if HAVE_GETHRTIME +# define AV_READ_TIME gethrtime +# elif HAVE_MACH_ABSOLUTE_TIME +# define AV_READ_TIME mach_absolute_time +# endif #endif #ifdef AV_READ_TIME @@ -68,7 +76,7 @@ tskip_count++;\ if (((tcount + tskip_count) (tcount + tskip_count - 1)) == 0) { \ av_log(NULL, AV_LOG_ERROR,\ - %PRIu64 decicycles in %s, %d runs, %d skips\n, \ + %PRIu64 UNITS in %s, %d runs, %d skips\n, \ tsum * 10 / tcount, id, tcount, tskip_count); \ } \ } -- 1.9.0 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH 4/4] aarch64: float_dsp NEON assembler
Ported from arm NEON and added vector_dmul_scalar. Functions between 1.5 and 5 times faster than the C implementations using Apple's clang-503.0.19 on A7. --- libavutil/aarch64/Makefile | 5 +- libavutil/aarch64/float_dsp_init_aarch64.c | 69 ++ libavutil/aarch64/float_dsp_neon.S | 202 + libavutil/float_dsp.c | 15 ++- libavutil/float_dsp.h | 1 + 5 files changed, 284 insertions(+), 8 deletions(-) create mode 100644 libavutil/aarch64/float_dsp_init_aarch64.c create mode 100644 libavutil/aarch64/float_dsp_neon.S diff --git a/libavutil/aarch64/Makefile b/libavutil/aarch64/Makefile index 13d26a1..b4500fd 100644 --- a/libavutil/aarch64/Makefile +++ b/libavutil/aarch64/Makefile @@ -1 +1,4 @@ -OBJS += aarch64/cpu.o +OBJS += aarch64/cpu.o \ +aarch64/float_dsp_init_aarch64.o \ + +NEON-OBJS += aarch64/float_dsp_neon.o diff --git a/libavutil/aarch64/float_dsp_init_aarch64.c b/libavutil/aarch64/float_dsp_init_aarch64.c new file mode 100644 index 000..37d34c9 --- /dev/null +++ b/libavutil/aarch64/float_dsp_init_aarch64.c @@ -0,0 +1,69 @@ +/* + * ARM NEON optimised Float DSP functions + * Copyright (c) 2008 Mans Rullgard m...@mansr.com + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with Libav; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include stdint.h + +#include libavutil/attributes.h +#include libavutil/cpu.h +#include libavutil/float_dsp.h +#include cpu.h + +void ff_vector_fmul_neon(float *dst, const float *src0, const float *src1, + int len); + +void ff_vector_fmac_scalar_neon(float *dst, const float *src, float mul, +int len); + +void ff_vector_fmul_scalar_neon(float *dst, const float *src, float mul, +int len); + +void ff_vector_dmul_scalar_neon(double *dst, const double *src, double mul, +int len); + +void ff_vector_fmul_window_neon(float *dst, const float *src0, +const float *src1, const float *win, int len); + +void ff_vector_fmul_add_neon(float *dst, const float *src0, const float *src1, + const float *src2, int len); + +void ff_vector_fmul_reverse_neon(float *dst, const float *src0, + const float *src1, int len); + +void ff_butterflies_float_neon(float *v1, float *v2, int len); + +float ff_scalarproduct_float_neon(const float *v1, const float *v2, int len); + +av_cold void ff_float_dsp_init_aarch64(AVFloatDSPContext *fdsp) +{ +int cpu_flags = av_get_cpu_flags(); + +if (have_neon(cpu_flags)) { +fdsp-butterflies_float = ff_butterflies_float_neon; +fdsp-scalarproduct_float = ff_scalarproduct_float_neon; +fdsp-vector_dmul_scalar = ff_vector_dmul_scalar_neon; +fdsp-vector_fmul = ff_vector_fmul_neon; +fdsp-vector_fmac_scalar = ff_vector_fmac_scalar_neon; +fdsp-vector_fmul_add = ff_vector_fmul_add_neon; +fdsp-vector_fmul_reverse = ff_vector_fmul_reverse_neon; +fdsp-vector_fmul_scalar = ff_vector_fmul_scalar_neon; +fdsp-vector_fmul_window = ff_vector_fmul_window_neon; +} +} diff --git a/libavutil/aarch64/float_dsp_neon.S b/libavutil/aarch64/float_dsp_neon.S new file mode 100644 index 000..776542c --- /dev/null +++ b/libavutil/aarch64/float_dsp_neon.S @@ -0,0 +1,202 @@ +/* + * ARM NEON optimised Float DSP functions + * Copyright (c) 2008 Mans Rullgard m...@mansr.com + * Copyright (c) 2014 Janne Grunau janne-li...@jannau.net + * + * This file is part of Libav. + * + * Libav is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * Libav is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public
[libav-devel] [PATCH 2/4] float_dsp: fix errors in documentation
--- libavutil/float_dsp.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/libavutil/float_dsp.h b/libavutil/float_dsp.h index b215dad..0eb02f8 100644 --- a/libavutil/float_dsp.h +++ b/libavutil/float_dsp.h @@ -113,7 +113,7 @@ typedef struct AVFloatDSPContext { * constraints: 32-byte aligned * @param src1 second input vector * constraints: 32-byte aligned - * @param src1 third input vector + * @param src2 third input vector * constraints: 32-byte aligned * @param len number of elements in the input * constraints: multiple of 16 @@ -132,8 +132,6 @@ typedef struct AVFloatDSPContext { * constraints: 32-byte aligned * @param src1 second input vector * constraints: 32-byte aligned - * @param src1 third input vector - * constraints: 32-byte aligned * @param len number of elements in the input * constraints: multiple of 16 */ -- 1.9.0 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH 3/4] float_dsp: add test program for float dsp
Not hooked up to FATE due to fear of random failures due to float instability. float_dsp-test: use 16 * 3 * 5 as length --- libavutil/Makefile| 1 + libavutil/float_dsp.c | 274 ++ 2 files changed, 275 insertions(+) diff --git a/libavutil/Makefile b/libavutil/Makefile index f663f18..5869e67 100644 --- a/libavutil/Makefile +++ b/libavutil/Makefile @@ -122,6 +122,7 @@ TESTPROGS = adler32 \ des \ eval\ fifo\ +float_dsp \ hmac\ lfg \ lls \ diff --git a/libavutil/float_dsp.c b/libavutil/float_dsp.c index 3707e06..22c3c15 100644 --- a/libavutil/float_dsp.c +++ b/libavutil/float_dsp.c @@ -132,3 +132,277 @@ av_cold void avpriv_float_dsp_init(AVFloatDSPContext *fdsp, int bit_exact) ff_float_dsp_init_x86(fdsp); #endif } + +#ifdef TEST + +#include float.h +#include math.h +#include stdint.h +#include string.h + +#include cpu.h +#include lfg.h +#include log.h +#include mem.h +#include random_seed.h + +#define LEN 240 + +static void fill_float_array(AVLFG *lfg, float *a, int len) +{ +int i; +double bmg[2], stddev = 10.0, mean = 0.0; + +for (i = 0; i len; i += 2) { +av_bmg_get(lfg, bmg); +a[i] = bmg[0] * stddev + mean; +a[i + 1] = bmg[1] * stddev + mean; +} +} +static int compare_floats(const float *a, const float *b, int len, + float max_diff) +{ +int i; +for (i = 0; i len; i++) { +if (fabsf(a[i] - b[i]) max_diff) { +av_log(NULL, AV_LOG_ERROR, %d: %- .12f - %- .12f = % .12g\n, + i, a[i], b[i], a[i] - b[i]); +return -1; +} +} +return 0; +} + +static void fill_double_array(AVLFG *lfg, double *a, int len) +{ +int i; +double bmg[2], stddev = 10.0, mean = 0.0; + +for (i = 0; i len; i += 2) { +av_bmg_get(lfg, bmg); +a[i] = bmg[0] * stddev + mean; +a[i + 1] = bmg[1] * stddev + mean; +} +} + +static int compare_doubles(const double *a, const double *b, int len, + double max_diff) +{ +int i; + +for (i = 0; i len; i++) { +if (fabs(a[i] - b[i]) max_diff) { +av_log(NULL, AV_LOG_ERROR, %d: %- .12f - %- .12f = % .12g\n, + i, a[i], b[i], a[i] - b[i]); +return -1; +} +} +return 0; +} + +static int test_vector_fmul(AVFloatDSPContext *fdsp, AVFloatDSPContext *cdsp, +const float *v1, const float *v2) +{ +DECLARE_ALIGNED(32, float, cdst)[LEN]; +DECLARE_ALIGNED(32, float, odst)[LEN]; +int ret; + +cdsp-vector_fmul(cdst, v1, v2, LEN); +fdsp-vector_fmul(odst, v1, v2, LEN); + +if (ret = compare_floats(cdst, odst, LEN, FLT_EPSILON)) +av_log(NULL, AV_LOG_ERROR, %s failed\n, __func__); + +return ret; +} + +static int test_vector_fmac_scalar(AVFloatDSPContext *fdsp, AVFloatDSPContext *cdsp, + const float *v1, const float *src0, float scale) +{ +DECLARE_ALIGNED(32, float, cdst)[LEN]; +DECLARE_ALIGNED(32, float, odst)[LEN]; +int ret; + +memcpy(cdst, v1, LEN * sizeof(*v1)); +memcpy(odst, v1, LEN * sizeof(*v1)); + +cdsp-vector_fmac_scalar(cdst, src0, scale, LEN); +fdsp-vector_fmac_scalar(odst, src0, scale, LEN); + +if (ret = compare_floats(cdst, odst, LEN, 0.005)) +av_log(NULL, AV_LOG_ERROR, %s failed\n, __func__); + +return ret; +} + +static int test_vector_fmul_scalar(AVFloatDSPContext *fdsp, AVFloatDSPContext *cdsp, + const float *v1, float scale) +{ +DECLARE_ALIGNED(32, float, cdst)[LEN]; +DECLARE_ALIGNED(32, float, odst)[LEN]; +int ret; + +cdsp-vector_fmul_scalar(cdst, v1, scale, LEN); +fdsp-vector_fmul_scalar(odst, v1, scale, LEN); + +if (ret = compare_floats(cdst, odst, LEN, FLT_EPSILON)) +av_log(NULL, AV_LOG_ERROR, %s failed\n, __func__); + +return ret; +} + +static int test_vector_dmul_scalar(AVFloatDSPContext *fdsp, AVFloatDSPContext *cdsp, + const double *v1, double scale) +{ +DECLARE_ALIGNED(32, double, cdst)[LEN]; +DECLARE_ALIGNED(32, double, odst)[LEN]; +int ret; + +cdsp-vector_dmul_scalar(cdst, v1, scale, LEN); +fdsp-vector_dmul_scalar(odst, v1, scale, LEN); + +if (ret = compare_doubles(cdst, odst, LEN, DBL_EPSILON)) +av_log(NULL,
[libav-devel] [PATCH 1/3] x86/synth_filter: add synth_filter_sse
Build only on x86_32 targets. Signed-off-by: James Almer jamr...@gmail.com --- libavcodec/x86/dcadsp.asm| 55 +--- libavcodec/x86/dcadsp_init.c | 44 +-- 2 files changed, 69 insertions(+), 30 deletions(-) diff --git a/libavcodec/x86/dcadsp.asm b/libavcodec/x86/dcadsp.asm index 56039ba..970ec3d 100644 --- a/libavcodec/x86/dcadsp.asm +++ b/libavcodec/x86/dcadsp.asm @@ -199,15 +199,31 @@ INIT_XMM sse DCA_LFE_FIR 0 DCA_LFE_FIR 1 -INIT_XMM sse2 +%macro SETZERO 1 +%if cpuflag(sse2) +pxor %1, %1 +%else +xorps %1, %1, %1 +%endif +%endmacro + +%macro SHUF 2 +%if cpuflag(sse2) +pshufd%1, %2, q0123 +%else +mova %1, %2 +shufps%1, %1, q0123 +%endif +%endmacro + %macro INNER_LOOP 1 ; reading backwards: ptr1 = synth_buf + j + i; ptr2 = synth_buf + j - i ;~ a += window[i + j] * (-synth_buf[15 - i + j]) ;~ b += window[i + j + 16] * (synth_buf[i + j]) -pshufdm5, [ptr2 + j + (15 - 3) * 4], q0123 +SHUF m5, [ptr2 + j + (15 - 3) * 4] mova m6, [ptr1 + j] %if ARCH_X86_64 -pshufd m11, [ptr2 + j + (15 - 3) * 4 - mmsize], q0123 +SHUF m11, [ptr2 + j + (15 - 3) * 4 - mmsize] mova m12, [ptr1 + j + mmsize] %endif mulps m6, [win + %1 + j + 16 * 4] @@ -224,10 +240,10 @@ INIT_XMM sse2 %endif ;~ c += window[i + j + 32] * (synth_buf[16 + i + j]) ;~ d += window[i + j + 48] * (synth_buf[31 - i + j]) -pshufdm6, [ptr2 + j + (31 - 3) * 4], q0123 +SHUF m6, [ptr2 + j + (31 - 3) * 4] mova m5, [ptr1 + j + 16 * 4] %if ARCH_X86_64 -pshufd m12, [ptr2 + j + (31 - 3) * 4 - mmsize], q0123 +SHUF m12, [ptr2 + j + (31 - 3) * 4 - mmsize] mova m11, [ptr1 + j + mmsize + 16 * 4] %endif mulps m5, [win + %1 + j + 32 * 4] @@ -245,20 +261,25 @@ INIT_XMM sse2 subj, 64 * 4 %endmacro -; void ff_synth_filter_inner_sse2(float *synth_buf, float synth_buf2[32], -; const float window[512], float out[32], -; intptr_t offset, float scale) +; void ff_synth_filter_inner_opt(float *synth_buf, float synth_buf2[32], +; const float window[512], float out[32], +; intptr_t offset, float scale) +%macro SYNTH_FILTER 0 cglobal synth_filter_inner, 0, 6 + 4 * ARCH_X86_64, 7 + 6 * ARCH_X86_64, \ synth_buf, synth_buf2, window, out, off, scale %define scale m0 %if ARCH_X86_32 || WIN64 +%if cpuflag(sse2) movd scale, scalem +%else +movss scale, scalem +%endif ; Make sure offset is in a register and not on the stack %define OFFQ r4q %else %define OFFQ offq %endif -pshufdm0, m0, 0 +SPLATDm0 ; prepare inner counter limit 1 mov r5q, 480 sub r5q, offmp @@ -274,8 +295,8 @@ cglobal synth_filter_inner, 0, 6 + 4 * ARCH_X86_64, 7 + 6 * ARCH_X86_64, \ %endif .mainloop ; m1 = a m2 = b m3 = c m4 = d -pxor m3, m3 -pxor m4, m4 +SETZERO m3 +SETZERO m4 mova m1, [buf2 + i] mova m2, [buf2 + i + 16 * 4] %if ARCH_X86_32 @@ -292,8 +313,8 @@ cglobal synth_filter_inner, 0, 6 + 4 * ARCH_X86_64, 7 + 6 * ARCH_X86_64, \ %define ptr2 r7q ; must be loaded %define win r8q %define jr9q -pxor m9, m9 -pxor m10, m10 +SETZERO m9 +SETZERO m10 mova m7, [buf2 + i + mmsize] mova m8, [buf2 + i + mmsize + 16 * 4] lea win, [windowq + i] @@ -350,3 +371,11 @@ cglobal synth_filter_inner, 0, 6 + 4 * ARCH_X86_64, 7 + 6 * ARCH_X86_64, \ subi, (ARCH_X86_64 + 1) * mmsize jge.mainloop RET +%endmacro + +%if ARCH_X86_32 +INIT_XMM sse +SYNTH_FILTER +%endif +INIT_XMM sse2 +SYNTH_FILTER diff --git a/libavcodec/x86/dcadsp_init.c b/libavcodec/x86/dcadsp_init.c index 3821892..f8dd9b1 100644 --- a/libavcodec/x86/dcadsp_init.c +++ b/libavcodec/x86/dcadsp_init.c @@ -56,29 +56,39 @@ av_cold void ff_dcadsp_init_x86(DCADSPContext *s) } } -void ff_synth_filter_inner_sse2(float *synth_buf_ptr, float synth_buf2[32], -const float window[512], -float out[32], intptr_t offset, float scale); +#define SYNTH_FILTER_FUNC(opt) \ +void ff_synth_filter_inner_##opt(float *synth_buf_ptr, float synth_buf2[32], \ + const float window[512], \ + float out[32], intptr_t offset, float scale); \ +static void synth_filter_##opt(FFTContext *imdct, \ + float *synth_buf_ptr, int
[libav-devel] [PATCH 0/3] synth filter float ASM
Here are some extra implementations that extend Christophe's work. The first one (SSE) is only for x86_32 targets as x86_64 guarantees SSE2 is available. Second patch is an AVX implementation using ymm registers. In my tests it was about 30 cycles faster than SSE2 on a Sandy Bridge CPU. I don't have proper numbers for the third patch since i could only test on an AMD rig, where functions using ymm registers tend to have subpar performance. It still beat the AVX version by a decent marging, though, so Haswell should see a nice boost with it. I could add an FMA4 version using xmm registers, which would benefit AMD users unlike these AVX/FMA3 ymm ones. Thoughts? James Almer (3): x86/synth_filter: add synth_filter_fma3 x86/synth_filter: add synth_filter_sse x86/synth_filter: add synth_filter_avx libavcodec/x86/dcadsp.asm| 109 --- libavcodec/x86/dcadsp_init.c | 52 ++--- 2 files changed, 107 insertions(+), 54 deletions(-) -- 1.8.3.2 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH 2/3] x86/synth_filter: add synth_filter_avx
Sandy Bridge Win64: 180 cycles in ff_synth_filter_inner_sse2 150 cycles in ff_synth_filter_inner_avx Also switch to a three operand format for some instructions to avoid assembly errors with Yasm 1.1.0 or older. Signed-off-by: James Almer jamr...@gmail.com --- libavcodec/x86/dcadsp.asm| 76 +--- libavcodec/x86/dcadsp_init.c | 4 +++ 2 files changed, 48 insertions(+), 32 deletions(-) diff --git a/libavcodec/x86/dcadsp.asm b/libavcodec/x86/dcadsp.asm index 970ec3d..0d7c86e 100644 --- a/libavcodec/x86/dcadsp.asm +++ b/libavcodec/x86/dcadsp.asm @@ -200,18 +200,22 @@ DCA_LFE_FIR 0 DCA_LFE_FIR 1 %macro SETZERO 1 -%if cpuflag(sse2) +%if cpuflag(sse2) notcpuflag(avx) pxor %1, %1 %else xorps %1, %1, %1 %endif %endmacro -%macro SHUF 2 -%if cpuflag(sse2) -pshufd%1, %2, q0123 +%macro SHUF 3 +%if cpuflag(avx) +mova %3, [%2 - 16] +vperm2f128%1, %3, %3, 1 +vshufps %1, %1, %1, q0123 +%elif cpuflag(sse2) +pshufd%1, [%2], q0123 %else -mova %1, %2 +mova %1, [%2] shufps%1, %1, q0123 %endif %endmacro @@ -220,43 +224,43 @@ DCA_LFE_FIR 1 ; reading backwards: ptr1 = synth_buf + j + i; ptr2 = synth_buf + j - i ;~ a += window[i + j] * (-synth_buf[15 - i + j]) ;~ b += window[i + j + 16] * (synth_buf[i + j]) -SHUF m5, [ptr2 + j + (15 - 3) * 4] +SHUF m5, ptr2 + j + (15 - 3) * 4, m6 mova m6, [ptr1 + j] %if ARCH_X86_64 -SHUF m11, [ptr2 + j + (15 - 3) * 4 - mmsize] +SHUF m11, ptr2 + j + (15 - 3) * 4 - mmsize, m12 mova m12, [ptr1 + j + mmsize] %endif -mulps m6, [win + %1 + j + 16 * 4] -mulps m5, [win + %1 + j] +mulps m6, m6, [win + %1 + j + 16 * 4] +mulps m5, m5, [win + %1 + j] %if ARCH_X86_64 -mulpsm12, [win + %1 + j + mmsize + 16 * 4] -mulpsm11, [win + %1 + j + mmsize] +mulpsm12, m12, [win + %1 + j + mmsize + 16 * 4] +mulpsm11, m11, [win + %1 + j + mmsize] %endif -addps m2, m6 -subps m1, m5 +addps m2, m2, m6 +subps m1, m1, m5 %if ARCH_X86_64 -addps m8, m12 -subps m7, m11 +addps m8, m8, m12 +subps m7, m7, m11 %endif ;~ c += window[i + j + 32] * (synth_buf[16 + i + j]) ;~ d += window[i + j + 48] * (synth_buf[31 - i + j]) -SHUF m6, [ptr2 + j + (31 - 3) * 4] +SHUF m6, ptr2 + j + (31 - 3) * 4, m5 mova m5, [ptr1 + j + 16 * 4] %if ARCH_X86_64 -SHUF m12, [ptr2 + j + (31 - 3) * 4 - mmsize] +SHUF m12, ptr2 + j + (31 - 3) * 4 - mmsize, m11 mova m11, [ptr1 + j + mmsize + 16 * 4] %endif -mulps m5, [win + %1 + j + 32 * 4] -mulps m6, [win + %1 + j + 48 * 4] +mulps m5, m5, [win + %1 + j + 32 * 4] +mulps m6, m6, [win + %1 + j + 48 * 4] %if ARCH_X86_64 -mulpsm11, [win + %1 + j + mmsize + 32 * 4] -mulpsm12, [win + %1 + j + mmsize + 48 * 4] +mulpsm11, m11, [win + %1 + j + mmsize + 32 * 4] +mulpsm12, m12, [win + %1 + j + mmsize + 48 * 4] %endif -addps m3, m5 -addps m4, m6 +addps m3, m3, m5 +addps m4, m4, m6 %if ARCH_X86_64 -addps m9, m11 -addpsm10, m12 +addps m9, m9, m11 +addpsm10, m10, m12 %endif subj, 64 * 4 %endmacro @@ -269,17 +273,21 @@ cglobal synth_filter_inner, 0, 6 + 4 * ARCH_X86_64, 7 + 6 * ARCH_X86_64, \ synth_buf, synth_buf2, window, out, off, scale %define scale m0 %if ARCH_X86_32 || WIN64 -%if cpuflag(sse2) +%if cpuflag(sse2) notcpuflag(avx) movd scale, scalem +SPLATDm0 %else -movss scale, scalem +VBROADCASTSS m0, scalem %endif ; Make sure offset is in a register and not on the stack %define OFFQ r4q %else +SPLATD xmm0 +%if cpuflag(avx) +vinsertf128 m0, m0, xmm0, 1 +%endif %define OFFQ offq %endif -SPLATDm0 ; prepare inner counter limit 1 mov r5q, 480 sub r5q, offmp @@ -346,11 +354,11 @@ cglobal synth_filter_inner, 0, 6 + 4 * ARCH_X86_64, 7 + 6 * ARCH_X86_64, \ %endif ;~ out[i] = a * scale; ;~ out[i + 16] = b * scale; -mulps m1, scale -mulps m2, scale +mulps m1, m1, scale +mulps m2, m2, scale %if ARCH_X86_64 -mulps m7, scale -mulps m8, scale +mulps m7, m7, scale +mulps m8, m8, scale %endif ;~ synth_buf2[i] = c; ;~ synth_buf2[i + 16] = d; @@ -379,3 +387,7 @@ SYNTH_FILTER %endif INIT_XMM sse2 SYNTH_FILTER +%if HAVE_AVX_EXTERNAL +INIT_YMM avx +SYNTH_FILTER +%endif diff --git
[libav-devel] [PATCH 3/3] x86/synth_filter: add synth_filter_fma3
Signed-off-by: James Almer jamr...@gmail.com --- libavcodec/x86/dcadsp.asm| 28 +++- libavcodec/x86/dcadsp_init.c | 4 2 files changed, 15 insertions(+), 17 deletions(-) diff --git a/libavcodec/x86/dcadsp.asm b/libavcodec/x86/dcadsp.asm index 0d7c86e..e1842ef 100644 --- a/libavcodec/x86/dcadsp.asm +++ b/libavcodec/x86/dcadsp.asm @@ -230,16 +230,12 @@ DCA_LFE_FIR 1 SHUF m11, ptr2 + j + (15 - 3) * 4 - mmsize, m12 mova m12, [ptr1 + j + mmsize] %endif -mulps m6, m6, [win + %1 + j + 16 * 4] +FMULADD_PSm2, m6, [win + %1 + j + 16 * 4], m2, m6 mulps m5, m5, [win + %1 + j] -%if ARCH_X86_64 -mulpsm12, m12, [win + %1 + j + mmsize + 16 * 4] -mulpsm11, m11, [win + %1 + j + mmsize] -%endif -addps m2, m2, m6 subps m1, m1, m5 %if ARCH_X86_64 -addps m8, m8, m12 +FMULADD_PSm8, m12, [win + %1 + j + mmsize + 16 * 4], m8, m12 +mulpsm11, m11, [win + %1 + j + mmsize] subps m7, m7, m11 %endif ;~ c += window[i + j + 32] * (synth_buf[16 + i + j]) @@ -250,17 +246,11 @@ DCA_LFE_FIR 1 SHUF m12, ptr2 + j + (31 - 3) * 4 - mmsize, m11 mova m11, [ptr1 + j + mmsize + 16 * 4] %endif -mulps m5, m5, [win + %1 + j + 32 * 4] -mulps m6, m6, [win + %1 + j + 48 * 4] +FMULADD_PSm3, m5, [win + %1 + j + 32 * 4], m3, m5 +FMULADD_PSm4, m6, [win + %1 + j + 48 * 4], m4, m6 %if ARCH_X86_64 -mulpsm11, m11, [win + %1 + j + mmsize + 32 * 4] -mulpsm12, m12, [win + %1 + j + mmsize + 48 * 4] -%endif -addps m3, m3, m5 -addps m4, m4, m6 -%if ARCH_X86_64 -addps m9, m9, m11 -addpsm10, m10, m12 +FMULADD_PSm9, m11, [win + %1 + j + mmsize + 32 * 4], m9, m11 +FMULADD_PS m10, m12, [win + %1 + j + mmsize + 48 * 4], m10, m12 %endif subj, 64 * 4 %endmacro @@ -391,3 +381,7 @@ SYNTH_FILTER INIT_YMM avx SYNTH_FILTER %endif +%if HAVE_FMA3_EXTERNAL +INIT_YMM fma3 +SYNTH_FILTER +%endif diff --git a/libavcodec/x86/dcadsp_init.c b/libavcodec/x86/dcadsp_init.c index ab20635..132f75e 100644 --- a/libavcodec/x86/dcadsp_init.c +++ b/libavcodec/x86/dcadsp_init.c @@ -80,6 +80,7 @@ SYNTH_FILTER_FUNC(sse) #endif SYNTH_FILTER_FUNC(sse2) SYNTH_FILTER_FUNC(avx) +SYNTH_FILTER_FUNC(fma3) av_cold void ff_synth_filter_init_x86(SynthFilterContext *s) { @@ -96,4 +97,7 @@ av_cold void ff_synth_filter_init_x86(SynthFilterContext *s) if (EXTERNAL_AVX(cpu_flags)) { s-synth_filter_float = synth_filter_avx; } +if (EXTERNAL_FMA3(cpu_flags)) { +s-synth_filter_float = synth_filter_fma3; +} } -- 1.8.3.2 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH 1/2] af_channelmap: fix ONE_STR mapping mode
get_channel() returns 0 on success CC:libav-sta...@libav.org --- libavfilter/af_channelmap.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavfilter/af_channelmap.c b/libavfilter/af_channelmap.c index 71d51e7..6bc8c3a 100644 --- a/libavfilter/af_channelmap.c +++ b/libavfilter/af_channelmap.c @@ -188,7 +188,7 @@ static av_cold int channelmap_init(AVFilterContext *ctx) s-map[i].out_channel_idx = i; break; case MAP_ONE_STR: -if (!get_channel(mapping, in_ch, separator)) { +if (!get_channel(mapping, in_ch, separator) 0) { av_log(ctx, AV_LOG_ERROR, err); return AVERROR(EINVAL); } -- 1.7.10.4 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel
[libav-devel] [PATCH 2/2] FATE: add a test for the ONE_STR mapping mode of the channelmap filter
--- tests/fate/filter-audio.mak| 30 +++- .../{channelmap = channelmap_one_int} |0 tests/filtergraphs/channelmap_one_str |1 + 3 files changed, 24 insertions(+), 7 deletions(-) rename tests/filtergraphs/{channelmap = channelmap_one_int} (100%) create mode 100644 tests/filtergraphs/channelmap_one_str diff --git a/tests/fate/filter-audio.mak b/tests/fate/filter-audio.mak index 9bf148c..1b3dbfa 100644 --- a/tests/fate/filter-audio.mak +++ b/tests/fate/filter-audio.mak @@ -41,13 +41,29 @@ $(FATE_ATRIM): SRC = $(TARGET_PATH)/tests/data/asynth-44100-2.wav FATE_FILTER-$(call FILTERDEMDECENCMUX, ATRIM, WAV, PCM_S16LE, PCM_S16LE, WAV) += $(FATE_ATRIM) -FATE_AFILTER-$(call FILTERDEMDECENCMUX, CHANNELMAP, WAV, PCM_S16LE, PCM_S16LE, WAV) += fate-filter-channelmap -fate-filter-channelmap: tests/data/filtergraphs/channelmap -fate-filter-channelmap: SRC = $(TARGET_PATH)/tests/data/asynth-44100-6.wav -fate-filter-channelmap: tests/data/asynth-44100-6.wav -fate-filter-channelmap: CMD = md5 -i $(SRC) -filter_complex_script $(TARGET_PATH)/tests/data/filtergraphs/channelmap -f wav -flags +bitexact -fate-filter-channelmap: CMP = oneline -fate-filter-channelmap: REF = 21f1977c4f9705e2057083f84764e685 +FATE_AFILTER-$(call FILTERDEMDECENCMUX, CHANNELMAP, WAV, PCM_S16LE, PCM_S16LE, WAV) += fate-filter-channelmap-one-int +fate-filter-channelmap-one-int: tests/data/filtergraphs/channelmap_one_int +fate-filter-channelmap-one-int: SRC = $(TARGET_PATH)/tests/data/asynth-44100-6.wav +fate-filter-channelmap-one-int: tests/data/asynth-44100-6.wav +fate-filter-channelmap-one-int: CMD = md5 -i $(SRC) -filter_complex_script $(TARGET_PATH)/tests/data/filtergraphs/channelmap_one_int -f wav -flags +bitexact +fate-filter-channelmap-one-int: CMP = oneline +fate-filter-channelmap-one-int: REF = 21f1977c4f9705e2057083f84764e685 + +FATE_AFILTER-$(call FILTERDEMDECENCMUX, CHANNELMAP, WAV, PCM_S16LE, PCM_S16LE, WAV) += fate-filter-channelmap-one-str +fate-filter-channelmap-one-str: tests/data/filtergraphs/channelmap_one_str +fate-filter-channelmap-one-str: SRC = $(TARGET_PATH)/tests/data/asynth-44100-2.wav +fate-filter-channelmap-one-str: tests/data/asynth-44100-2.wav +fate-filter-channelmap-one-str: CMD = md5 -i $(SRC) -filter_complex_script $(TARGET_PATH)/tests/data/filtergraphs/channelmap_one_str -f wav -flags +bitexact +fate-filter-channelmap-one-str: CMP = oneline +fate-filter-channelmap-one-str: REF = 9fe9bc452282dfd94fd80e9491607a0c + +FATE_AFILTER-$(call FILTERDEMDECENCMUX, CHANNELMAP, WAV, PCM_S16LE, PCM_S16LE, WAV) += fate-filter-channelmap-one-str +fate-filter-channelmap-one-str: tests/data/filtergraphs/channelmap_one_str +fate-filter-channelmap-one-str: SRC = $(TARGET_PATH)/tests/data/asynth-44100-2.wav +fate-filter-channelmap-one-str: tests/data/asynth-44100-2.wav +fate-filter-channelmap-one-str: CMD = md5 -i $(SRC) -filter_complex_script $(TARGET_PATH)/tests/data/filtergraphs/channelmap_one_str -f wav -flags +bitexact +fate-filter-channelmap-one-str: CMP = oneline +fate-filter-channelmap-one-str: REF = 9fe9bc452282dfd94fd80e9491607a0c FATE_AFILTER-$(call FILTERDEMDECENCMUX, CHANNELSPLIT, WAV, PCM_S16LE, PCM_S16LE, PCM_S16LE) += fate-filter-channelsplit fate-filter-channelsplit: SRC = $(TARGET_PATH)/tests/data/asynth-44100-2.wav diff --git a/tests/filtergraphs/channelmap b/tests/filtergraphs/channelmap_one_int similarity index 100% rename from tests/filtergraphs/channelmap rename to tests/filtergraphs/channelmap_one_int diff --git a/tests/filtergraphs/channelmap_one_str b/tests/filtergraphs/channelmap_one_str new file mode 100644 index 000..3c658fc --- /dev/null +++ b/tests/filtergraphs/channelmap_one_str @@ -0,0 +1 @@ +channelmap=map=FR|FL:channel_layout=stereo -- 1.7.10.4 ___ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel