Re: [FFmpeg-devel] [PATCH 1/6] avcodec/vorbisenc: Add pre-echo detection
On Wed, Aug 23, 2017 at 10:11:50AM +0200, Tomas Härdin wrote: > On 2017-08-22 03:23, Tyler Jones wrote: > > + > > +/** > > + * Calculate the variance of a block of samples > > + * > > + * @param in Array of input samples > > + * @param length Number of input samples being analyzed > > + * @return The variance for the current block > > + */ > > +static float variance(const float *in, int length, AVFloatDSPContext *fdsp) > > +{ > > +int i; > > +float mean = 0.0f, square_sum = 0.0f; > > + > > +for (i = 0; i < length; i++) { > > +mean += in[i]; > > +} > > + > > +square_sum = fdsp->scalarproduct_float(in, in, length); > > + > > +mean /= length; > > +return (square_sum - length * mean * mean) / (length - 1); > > +} > > Isn't this method much more numerically unstable compared to the naïve > method? Might not matter too much when the source data is 16-bit, but > throwing it out there anyway This does have the possibility of being more unstable than the naive version. However, I have not been able to find a sample file where it is even close to influential. The epsilon constant added during comparison between variances has a much greater impact. A quick run of the same samples through python was able to verify this. > DSP methods for computing mean and variance could be a good project for > someone wanting to learn > > /Tomas I am unsure of how many codecs use direct calculation of statistical values. Perhaps someone with more experience than myself could comment on the usefulness of such methods. I appreciate your comments, Tyler Jones signature.asc Description: PGP signature ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
Re: [FFmpeg-devel] [PATCH 2/6] avcodec/vorbisenc: Apply dynamic frame lengths
On Wed, Aug 23, 2017 at 10:31:58AM +0200, Tomas Härdin wrote: > On 2017-08-22 03:23, Tyler Jones wrote: > > +static int create_residues(vorbis_enc_context *venc) > > +{ > > +int res, ret; > > +vorbis_enc_residue *rc; > > + > > +venc->nresidues = 2; > > +venc->residues = av_malloc(sizeof(vorbis_enc_residue) * > > venc->nresidues); > > av_malloc_array()? Applies to most av_malloc() in there I can change it, but I don't feel that it helps readability in this specific case above. As for the others that happen to show up in the diffs, I did not want to make any unnecessary and unrelated functional changes. However, I'll gladly to switch these cases to `av_malloc_array()` in a separate commit if desired. > > -// single mapping > > -mc = &venc->mappings[0]; > > -mc->submaps = 1; > > -mc->mux = av_malloc(sizeof(int) * venc->channels); > > -if (!mc->mux) > > -return AVERROR(ENOMEM); > > -for (i = 0; i < venc->channels; i++) > > -mc->mux[i] = 0; > > -mc->floor = av_malloc(sizeof(int) * mc->submaps); > > -mc->residue = av_malloc(sizeof(int) * mc->submaps); > > -if (!mc->floor || !mc->residue) > > -return AVERROR(ENOMEM); > > -for (i = 0; i < mc->submaps; i++) { > > -mc->floor[i] = 0; > > -mc->residue[i] = 0; > > -} > > -mc->coupling_steps = venc->channels == 2 ? 1 : 0; > > -mc->magnitude = av_malloc(sizeof(int) * mc->coupling_steps); > > -mc->angle = av_malloc(sizeof(int) * mc->coupling_steps); > > -if (!mc->magnitude || !mc->angle) > > -return AVERROR(ENOMEM); > > -if (mc->coupling_steps) { > > -mc->magnitude[0] = 0; > > -mc->angle[0] = 1; > > +for (map = 0; map < venc->nmappings; map++) { > > +mc = &venc->mappings[map]; > > +mc->submaps = 1; > > +mc->mux = av_malloc(sizeof(int) * venc->channels); > > +if (!mc->mux) > > +return AVERROR(ENOMEM); > > +for (i = 0; i < venc->channels; i++) > > +mc->mux[i] = 0; > > +mc->floor = av_malloc(sizeof(int) * mc->submaps); > > +mc->residue = av_malloc(sizeof(int) * mc->submaps); > > +if (!mc->floor || !mc->residue) > > +return AVERROR(ENOMEM); > > +for (i = 0; i < mc->submaps; i++) { > > +mc->floor[i] = map; > > +mc->residue[i] = map; > > +} > > +mc->coupling_steps = venc->channels == 2 ? 1 : 0; > > +mc->magnitude = av_malloc(sizeof(int) * mc->coupling_steps); > > +mc->angle = av_malloc(sizeof(int) * mc->coupling_steps); > > +if (!mc->magnitude || !mc->angle) > > +return AVERROR(ENOMEM); > > +if (mc->coupling_steps) { > > +mc->magnitude[0] = 0; > > +mc->angle[0] = 1; > > +} > > } > > Maybe nitpicking, but it would be clearer what the changes are if you put > the indentation change in a separate commit No, you're right, and it's a good suggestion. I'll move the indentation to a separate commit when enough other changes have been provided to warrant a new version. > > -move_audio(venc, avctx->frame_size); > > +if (venc->transient < 0) { > > +move_audio(venc, avctx->frame_size); > > -for (ch = 0; ch < venc->channels; ch++) { > > -float *scratch = venc->scratch + 2 * ch * frame_size + frame_size; > > +for (ch = 0; ch < venc->channels; ch++) { > > + float *scratch = venc->scratch + 2 * ch * long_win + long_win; > > -if (!ff_psy_vorbis_block_frame(&venc->vpctx, scratch, ch, > > - frame_size, block_size)) > > -curr_win = 0; > > +if (!ff_psy_vorbis_block_frame(&venc->vpctx, scratch, ch, > > + long_win, short_win)) > > +next_win = 0; > > +} > > } > > Same here > > /Tomas > I felt that separating this small amount of lines would just clutter the git log history, but I'll move these along with the mapping indentations. Thanks for taking a look, Tyler Jones signature.asc Description: PGP signature ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
[FFmpeg-devel] [PATCH] avcodec/vorbisenc: Improve documentation
Signed-off-by: Tyler Jones --- This patch applies cleanly onto "[PATCH 6/6] avcodec/vorbisenc: Add support for mono streams". libavcodec/vorbis_enc_data.h | 20 ++-- libavcodec/vorbisenc.c | 43 +-- libavcodec/vorbisenc.h | 20 ++-- 3 files changed, 53 insertions(+), 30 deletions(-) diff --git a/libavcodec/vorbis_enc_data.h b/libavcodec/vorbis_enc_data.h index 32750da803..cf94700350 100644 --- a/libavcodec/vorbis_enc_data.h +++ b/libavcodec/vorbis_enc_data.h @@ -27,14 +27,14 @@ #define RES_MAX_CLASSIF 10 typedef const struct { -int dim; -int len; -int real_len; -const uint8_t *clens; -int lookup; -float min; -float delta; -const uint8_t *quant; +int dim; ///< The number of elements per coded vector +int len; ///< The number of filled entries in the book +int real_len; ///< The expected number of entries, padded with 0 if len < real_len +const uint8_t *clens; ///< List of codeword lengths in bits +int lookup; ///< Flag if vector lookup is available with this book +float min;///< The minimum value encoded by this book +float delta; ///< The distance between encoded points +const uint8_t *quant; ///< Pointer to a (entries)^(1/dim) column map if lookup is set } codebook_setup; typedef const struct { @@ -817,8 +817,8 @@ static codebook_setup res_mono_config[] = { }; static const struct { -int dim; -int subclass; +int dim;///< Dimensions of the class master book +int subclass; ///< Integer log base 2 of the number of subclass books int masterbook; const int nbooks[4]; } floor_classes[2][5] = { diff --git a/libavcodec/vorbisenc.c b/libavcodec/vorbisenc.c index f4af2f4071..78b41b2b49 100644 --- a/libavcodec/vorbisenc.c +++ b/libavcodec/vorbisenc.c @@ -414,7 +414,7 @@ static void put_codebook_header(PutBitContext *pb, vorbis_enc_codebook *cb) int i; int ordered = 0; -put_bits(pb, 24, 0x564342); //magic +put_bits(pb, 24, 0x564342); // Signal the start of a codebook put_bits(pb, 16, cb->ndimensions); put_bits(pb, 24, cb->nentries); @@ -520,6 +520,7 @@ static void put_residue_header(PutBitContext *pb, vorbis_enc_residue *rc, put_bits(pb, 6, rc->classifications - 1); put_bits(pb, 8, book_offset + rc->classbook); +/* We must specify which partition classes are used in each pass */ for (i = 0; i < rc->classifications; i++) { int j, tmp = 0; for (j = 0; j < 8; j++) @@ -540,6 +541,13 @@ static void put_residue_header(PutBitContext *pb, vorbis_enc_residue *rc, } } +/** + * Output necessary information for all primary headers. + * + * @see Vorbis I spec "4.2. Header decode and decode setup" + * @param out Empty buffer on input, encoded headers on output + * @returnNumber of bits written to the buffer or error value + */ static int put_main_header(vorbis_enc_context *venc, uint8_t **out) { int i; @@ -552,15 +560,15 @@ static int put_main_header(vorbis_enc_context *venc, uint8_t **out) // identification header init_put_bits(&pb, p, buffer_len); -put_bits(&pb, 8, 1); //magic +put_bits(&pb, 8, 1); // Signal an ID header for (i = 0; "vorbis"[i]; i++) put_bits(&pb, 8, "vorbis"[i]); put_bits32(&pb, 0); // version put_bits(&pb, 8, venc->channels); put_bits32(&pb, venc->sample_rate); -put_bits32(&pb, 0); // bitrate -put_bits32(&pb, 0); // bitrate -put_bits32(&pb, 0); // bitrate +put_bits32(&pb, 0); // Maximum bitrate +put_bits32(&pb, 0); // Nominal bitrate +put_bits32(&pb, 0); // Minimum bitrate put_bits(&pb, 4, venc->log2_blocksize[0]); put_bits(&pb, 4, venc->log2_blocksize[1]); put_bits(&pb, 1, 1); // framing @@ -572,11 +580,11 @@ static int put_main_header(vorbis_enc_context *venc, uint8_t **out) // comment header init_put_bits(&pb, p, buffer_len); -put_bits(&pb, 8, 3); //magic +put_bits(&pb, 8, 3); // Signal a comment header for (i = 0; "vorbis"[i]; i++) put_bits(&pb, 8, "vorbis"[i]); put_bits32(&pb, 0); // vendor length TODO -put_bits32(&pb, 0); // amount of comments +put_bits32(&pb, 0); // amount of comments TODO put_bits(&pb, 1, 1); // framing flush_put_bits(&pb); @@ -586,7 +594,7 @@ static int put_main_header(vorbis_enc_context *venc, uint8_t **out) // setup header init_put_bits(&pb, p, buffer_len); -put_bits(&pb, 8, 5); //magic +put_bits(&pb, 8, 5); // Signal a setup header for (i = 0; "vorbis"[i]; i++) put_bits(&pb, 8, "
[FFmpeg-devel] [PATCH 4/6] avcodec/vorbisenc: Move encoder structures and macros to separate file
Encoder structures are moved to a separate header file to improve readability. Signed-off-by: Tyler Jones --- libavcodec/vorbisenc.c | 118 +--- libavcodec/vorbisenc.h | 145 + 2 files changed, 146 insertions(+), 117 deletions(-) create mode 100644 libavcodec/vorbisenc.h diff --git a/libavcodec/vorbisenc.c b/libavcodec/vorbisenc.c index 73182c6356..fae90c4a30 100644 --- a/libavcodec/vorbisenc.c +++ b/libavcodec/vorbisenc.c @@ -25,18 +25,12 @@ */ #include -#include "libavutil/float_dsp.h" +#include "vorbisenc.h" #include "avcodec.h" #include "internal.h" -#include "fft.h" #include "mathops.h" -#include "vorbis.h" #include "vorbis_enc_data.h" -#include "vorbispsy.h" - -#include "audio_frame_queue.h" -#include "libavfilter/bufferqueue.h" #define BITSTREAM_WRITER_LE #include "put_bits.h" @@ -44,116 +38,6 @@ #undef NDEBUG #include -typedef struct vorbis_enc_codebook { -int nentries; -uint8_t *lens; -uint32_t *codewords; -int ndimensions; -float min; -float delta; -int seq_p; -int lookup; -int *quantlist; -float *dimensions; -float *pow2; -} vorbis_enc_codebook; - -typedef struct vorbis_enc_floor_class { -int dim; -int subclass; -int masterbook; -int *books; -} vorbis_enc_floor_class; - -typedef struct vorbis_enc_floor { -int partitions; -int *partition_to_class; -int nclasses; -vorbis_enc_floor_class *classes; -int multiplier; -int rangebits; -int values; -vorbis_floor1_entry *list; -} vorbis_enc_floor; - -typedef struct vorbis_enc_residue { -int type; -int begin; -int end; -int partition_size; -int classifications; -int classbook; -int8_t (*books)[8]; -float (*maxes)[2]; -} vorbis_enc_residue; - -typedef struct vorbis_enc_mapping { -int submaps; -int *mux; -int *floor; -int *residue; -int coupling_steps; -int *magnitude; -int *angle; -} vorbis_enc_mapping; - -typedef struct vorbis_enc_mode { -int blockflag; -int mapping; -} vorbis_enc_mode; - -typedef struct vorbis_enc_context { -int channels; -int sample_rate; -int log2_blocksize[2]; -int blockflags[3]; ///< Flags used for the previous, current, next windows -int transient; ///< Negative if a series of transients are not being encoded -int num_transient; ///< Number of short blocks for each frame -FFTContext mdct[2]; -const float *win[2]; -int have_saved; -float *saved; -float *samples; -float *floor; // also used for tmp values for mdct -float *coeffs; // also used for residue after floor -float *scratch; //< Used for temp values for psy model and window application -float quality; - -AudioFrameQueue afq; -struct FFBufQueue bufqueue; - -int ncodebooks; -vorbis_enc_codebook *codebooks; - -int nfloors; -vorbis_enc_floor *floors; - -int nresidues; -vorbis_enc_residue *residues; - -int nmappings; -vorbis_enc_mapping *mappings; - -int nmodes; -vorbis_enc_mode *modes; - -int64_t next_pts; - -AVFloatDSPContext *fdsp; -VorbisPsyContext vpctx; -} vorbis_enc_context; - -#define MAX_CHANNELS 2 -#define MAX_CODEBOOK_DIM 8 - -#define MAX_FLOOR_CLASS_DIM 4 -#define NUM_FLOOR_PARTITIONS 8 -#define MAX_FLOOR_VALUES (MAX_FLOOR_CLASS_DIM*NUM_FLOOR_PARTITIONS+2) - -#define RESIDUE_SIZE 1600 -#define RESIDUE_PART_SIZE 32 -#define NUM_RESIDUE_PARTITIONS (RESIDUE_SIZE/RESIDUE_PART_SIZE) - static inline int put_codeword(PutBitContext *pb, vorbis_enc_codebook *cb, int entry) { diff --git a/libavcodec/vorbisenc.h b/libavcodec/vorbisenc.h new file mode 100644 index 000000..15ee02dfa6 --- /dev/null +++ b/libavcodec/vorbisenc.h @@ -0,0 +1,145 @@ +/* + * Vorbis encoder + * Copyright (C) 2017 Tyler Jones + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_VORBISENC_H +#define AVCODEC_VORBISENC_H + +#define MAX_CHANNELS 2 +#define MAX_CODEBOOK_DIM 8 + +#define MAX_FLOO
[FFmpeg-devel] [PATCH 2/6] avcodec/vorbisenc: Apply dynamic frame lengths
Additional codebooks are added for shorter 128-sample frames. Changes in codeword generation are made to handle valid values of 0 that prepend some codebooks, otherwise books are classified incorrectly and cause unreadable streams. A second residue, floor, and mapping is created for short window lengths so that values are partitioned correctly for transient frames. Signed-off-by: Tyler Jones --- V4: No changes V3: Switch 'bits[p] == 0' to '!bits[p]' in vlc gen V2: Fix double arithmetic in window scale libavcodec/vorbis.c | 10 +- libavcodec/vorbis_enc_data.h | 289 +++-- libavcodec/vorbisenc.c | 424 ++- tests/fate/vorbis.mak| 2 +- 4 files changed, 454 insertions(+), 271 deletions(-) diff --git a/libavcodec/vorbis.c b/libavcodec/vorbis.c index 399020eec5..d8c4b006e7 100644 --- a/libavcodec/vorbis.c +++ b/libavcodec/vorbis.c @@ -59,7 +59,7 @@ int ff_vorbis_len2vlc(uint8_t *bits, uint32_t *codes, unsigned num) unsigned i, j, p, code; for (p = 0; (bits[p] == 0) && (p < num); ++p) -; +codes[p] = 0; if (p == num) return 0; @@ -78,9 +78,11 @@ int ff_vorbis_len2vlc(uint8_t *bits, uint32_t *codes, unsigned num) for (; p < num; ++p) { if (bits[p] > 32) - return AVERROR_INVALIDDATA; -if (bits[p] == 0) - continue; +return AVERROR_INVALIDDATA; +if (!bits[p]) { +codes[p] = 0; +continue; +} // find corresponding exit(node which the tree can grow further from) for (i = bits[p]; i > 0; --i) if (exit_at_level[i]) diff --git a/libavcodec/vorbis_enc_data.h b/libavcodec/vorbis_enc_data.h index a51aaec978..eca43dfded 100644 --- a/libavcodec/vorbis_enc_data.h +++ b/libavcodec/vorbis_enc_data.h @@ -23,15 +23,78 @@ #include -static const uint8_t codebook0[] = { +static const uint8_t floor_128_c0[] = { +10, 7, 8, 13, 9, 6, 7, 11, 10, 8, 8, 12, 17, 17, 17, +17, 7, 5, 5, 9, 6, 4, 4, 8, 8, 5, 5, 8, 16, 14, +13, 16, 7, 5, 5, 7, 6, 3, 3, 5, 8, 5, 4, 7, 14, +12, 12, 15, 10, 7, 8, 9, 7, 5, 5, 6, 9, 6, 5, 5, +15, 12, 9, 10, +}; + +static const uint8_t floor_128_c1[] = { + 8, 13, 17, 17, 8, 11, 17, 17, 11, 13, 17, 17, 17, 17, 17, +17, 6, 10, 16, 17, 6, 10, 15, 17, 8, 10, 16, 17, 17, 17, +17, 17, 9, 13, 15, 17, 8, 11, 17, 17, 10, 12, 17, 17, 17, +17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, +17, 17, 17, 17, 6, 11, 15, 17, 7, 10, 15, 17, 8, 10, 17, +17, 17, 15, 17, 17, 4, 8, 13, 17, 4, 7, 13, 17, 6, 8, +15, 17, 16, 15, 17, 17, 6, 11, 15, 17, 6, 9, 13, 17, 8, +10, 17, 17, 15, 17, 17, 17, 16, 17, 17, 17, 12, 14, 15, 17, +13, 14, 15, 17, 17, 17, 17, 17, 5, 10, 14, 17, 5, 9, 14, +17, 7, 9, 15, 17, 15, 15, 17, 17, 3, 7, 12, 17, 3, 6, +11, 17, 5, 7, 13, 17, 12, 12, 17, 17, 5, 9, 14, 17, 3, + 7, 11, 17, 5, 8, 13, 17, 13, 11, 16, 17, 12, 17, 17, 17, + 9, 14, 15, 17, 10, 11, 14, 17, 16, 14, 17, 17, 8, 12, 17, +17, 8, 12, 17, 17, 10, 12, 17, 17, 17, 17, 17, 17, 5, 10, +17, 17, 5, 9, 15, 17, 7, 9, 17, 17, 13, 13, 17, 17, 7, +11, 17, 17, 6, 10, 15, 17, 7, 9, 15, 17, 12, 11, 17, 17, +12, 15, 17, 17, 11, 14, 17, 17, 11, 10, 15, 17, 17, 16, 17, +17, +}; + +static const uint8_t floor_128_0sub1[] = { + 0, 3, 3, 3, 3, 3, 3, 3, 3, +}; + +static const uint8_t floor_128_0sub2[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 4, 4, 4, + 4, 5, 4, 5, 4, 5, 4, 6, 4, 6, +}; + +static const uint8_t floor_128_0sub3[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 5, 3, 5, 3, + 5, 4, 5, 4, 5, 5, 5, 5, 6, 5, 6, 5, 6, 5, 6, + 5, 6, 5, 7, 8, 9, 11, 13, 13, 13, 13, 13, 13, 13, 13, +13, 13, 13, 13, +}; + +static const uint8_t floor_128_1sub1[] = { + 0, 3, 3, 2, 3, 3, 4, 3, 4, +}; + +static const uint8_t floor_128_1sub2[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 4, 3, 6, 3, 6, + 3, 6, 3, 7, 3, 8, 4, 9, 4, 9, +}; + +static const uint8_t floor_128_1sub3[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 7, 2, 7, 3, + 8, 4, 9, 5, 9, 8, 10, 11, 11, 12, 14, 14, 14, 14, 14, +14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, +13, 13, 13, 13, +}; + +static const uint8_t floor_1024_c1[] = { 2, 10, 8, 14, 7, 12, 11, 14, 1, 5, 3, 7, 4, 9, 7, 13, }; -static const uint8_t codebook1[] = { +static const uint8_t floor_1024_c2[] = { 1, 4, 2, 6, 3, 7, 5, 7, }; -static const uint8_t codebook2[] = { +static const uint8_t floor_1024_c3[] = { 1, 5, 7, 21, 5, 8, 9, 21, 10,
[FFmpeg-devel] [PATCH 1/6] avcodec/vorbisenc: Add pre-echo detection
The encoder will attempt to determine the existence of transient signals by applying a 4th order highpass filter to remove dominant low frequency waveforms. Frames are then split up into blocks where the variance is calculated and compared with blocks from the previous frame. A preecho is only likely to be noticeable when relatively quiet audio is followed by a loud transient signal. Signed-off-by: Tyler Jones --- V4: Use AVFloatDSPContext for variance calculation Correctly change quality factors to const Remove unnecessary malloc and free for VorbisPsyContext V3: Use normal float notation Don't check before freeing NULL pointers Remove unnecessary includes V2: Provide proper prefix for non-static function libavcodec/Makefile| 2 +- libavcodec/vorbisenc.c | 27 +++-- libavcodec/vorbispsy.c | 147 + libavcodec/vorbispsy.h | 82 +++ 4 files changed, 253 insertions(+), 5 deletions(-) create mode 100644 libavcodec/vorbispsy.c create mode 100644 libavcodec/vorbispsy.h diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 982d7f5179..315c403c9c 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -611,7 +611,7 @@ OBJS-$(CONFIG_VMNC_DECODER)+= vmnc.o OBJS-$(CONFIG_VORBIS_DECODER) += vorbisdec.o vorbisdsp.o vorbis.o \ vorbis_data.o OBJS-$(CONFIG_VORBIS_ENCODER) += vorbisenc.o vorbis.o \ - vorbis_data.o + vorbis_data.o vorbispsy.o OBJS-$(CONFIG_VP3_DECODER) += vp3.o OBJS-$(CONFIG_VP5_DECODER) += vp5.o vp56.o vp56data.o vp56rac.o OBJS-$(CONFIG_VP6_DECODER) += vp6.o vp56.o vp56data.o \ diff --git a/libavcodec/vorbisenc.c b/libavcodec/vorbisenc.c index bf21a3b1ff..6da5f012c2 100644 --- a/libavcodec/vorbisenc.c +++ b/libavcodec/vorbisenc.c @@ -33,6 +33,7 @@ #include "mathops.h" #include "vorbis.h" #include "vorbis_enc_data.h" +#include "vorbispsy.h" #include "audio_frame_queue.h" #include "libavfilter/bufferqueue.h" @@ -136,6 +137,7 @@ typedef struct vorbis_enc_context { int64_t next_pts; AVFloatDSPContext *fdsp; +VorbisPsyContext vpctx; } vorbis_enc_context; #define MAX_CHANNELS 2 @@ -272,11 +274,12 @@ static int create_vorbis_context(vorbis_enc_context *venc, vorbis_enc_floor *fc; vorbis_enc_residue *rc; vorbis_enc_mapping *mc; -int i, book, ret; +int i, book, ret, blocks; venc->channels= avctx->channels; venc->sample_rate = avctx->sample_rate; -venc->log2_blocksize[0] = venc->log2_blocksize[1] = 11; +venc->log2_blocksize[0] = 8; +venc->log2_blocksize[1] = 11; venc->ncodebooks = FF_ARRAY_ELEMS(cvectors); venc->codebooks = av_malloc(sizeof(vorbis_enc_codebook) * venc->ncodebooks); @@ -464,6 +467,11 @@ static int create_vorbis_context(vorbis_enc_context *venc, if ((ret = dsp_init(avctx, venc)) < 0) return ret; +blocks = 1 << (venc->log2_blocksize[1] - venc->log2_blocksize[0]); +if ((ret = ff_psy_vorbis_init(&venc->vpctx, venc->sample_rate, + venc->channels, blocks, venc->fdsp)) < 0) +return ret; + return 0; } @@ -1078,15 +1086,17 @@ static void move_audio(vorbis_enc_context *venc, int sf_size) av_frame_free(&cur); } venc->have_saved = 1; -memcpy(venc->scratch, venc->samples, 2 * venc->channels * frame_size); +memcpy(venc->scratch, venc->samples, sizeof(float) * venc->channels * 2 * frame_size); } static int vorbis_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, const AVFrame *frame, int *got_packet_ptr) { vorbis_enc_context *venc = avctx->priv_data; -int i, ret, need_more; +int i, ret, need_more, ch; +int curr_win = 1; int frame_size = 1 << (venc->log2_blocksize[1] - 1); +int block_size = 1 << (venc->log2_blocksize[0] - 1); vorbis_enc_mode *mode; vorbis_enc_mapping *mapping; PutBitContext pb; @@ -1121,6 +1131,14 @@ static int vorbis_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, move_audio(venc, avctx->frame_size); +for (ch = 0; ch < venc->channels; ch++) { +float *scratch = venc->scratch + 2 * ch * frame_size + frame_size; + +if (!ff_psy_vorbis_block_frame(&venc->vpctx, scratch, ch, + frame_size, block_size)) +curr_win = 0; +} + if (!apply_window_and_mdct(venc)) return 0; @@ -1252,6 +1270,7 @@ static av_cold int vorbis_encode_close(AVCodecContext *avctx) ff_mdct_end(&venc->mdct[1]); ff_af_queue_close(&ven
[FFmpeg-devel] [PATCH 6/6] avcodec/vorbisenc: Add support for mono streams
Signed-off-by: Tyler Jones --- libavcodec/vorbis_enc_data.h | 245 ++- libavcodec/vorbisenc.c | 46 2 files changed, 267 insertions(+), 24 deletions(-) diff --git a/libavcodec/vorbis_enc_data.h b/libavcodec/vorbis_enc_data.h index 6f2b10feb9..32750da803 100644 --- a/libavcodec/vorbis_enc_data.h +++ b/libavcodec/vorbis_enc_data.h @@ -23,6 +23,9 @@ #include +#define RES_PASSES 8 +#define RES_MAX_CLASSIF 10 + typedef const struct { int dim; int len; @@ -35,12 +38,11 @@ typedef const struct { } codebook_setup; typedef const struct { -const int type; const int end[2]; const int classifications; const int nbooks; const codebook_setup *config; -const int8_t books[10][8]; +const int8_t books[RES_MAX_CLASSIF][RES_PASSES]; } res_setup; static const uint8_t floor_128_c0[] = { @@ -552,6 +554,209 @@ static const uint8_t res_stereo_p9_2[] = { 9, 10, 10, 10, }; +static const uint8_t res_mono_long_master[] = { + 6, 9, 13, 12, 14, 11, 10, 13, 8, 4, 5, 7, 8, 7, 8, +12, 11, 4, 3, 5, 5, 7, 9, 14, 11, 6, 5, 6, 6, 6, + 7, 13, 13, 7, 5, 6, 4, 5, 7, 14, 11, 7, 6, 6, 5, + 5, 6, 13, 9, 7, 8, 6, 7, 5, 3, 9, 9, 12, 13, 12, +14, 10, 6, 7, +}; + +static const uint8_t res_mono_short_master[] = { +14, 14, 14, 15, 13, 15, 12, 16, 10, 8, 7, 9, 9, 8, 12, +16, 10, 5, 4, 6, 5, 6, 9, 16, 14, 8, 6, 8, 7, 8, +10, 16, 14, 7, 4, 6, 3, 5, 8, 16, 15, 9, 5, 7, 4, + 4, 7, 16, 13, 10, 6, 7, 4, 3, 4, 13, 13, 12, 7, 9, + 5, 5, 6, 12, +}; + +static const uint8_t res_mono_p1_0[] = { + 1, 4, 4, 5, 8, 7, 5, 7, 8, 5, 8, 8, 8, 10, 11, + 8, 10, 11, 5, 8, 8, 8, 11, 10, 8, 11, 11, 4, 8, 8, + 8, 11, 11, 8, 11, 11, 8, 11, 11, 11, 13, 14, 11, 14, 14, + 8, 11, 11, 10, 14, 12, 11, 14, 14, 4, 8, 8, 8, 11, 11, + 8, 11, 11, 7, 11, 11, 11, 14, 14, 10, 12, 14, 8, 11, 11, +11, 14, 14, 11, 14, 13, +}; + +static const uint8_t res_mono_p2_0[] = { + 2, 5, 4, 5, 6, 6, 5, 6, 6, 5, 6, 6, 7, 8, 8, + 6, 8, 8, 5, 6, 6, 6, 8, 8, 7, 8, 8, 5, 7, 6, + 7, 8, 8, 6, 8, 8, 7, 8, 8, 8, 9, 10, 8, 10, 10, + 6, 8, 8, 8, 10, 8, 8, 10, 10, 5, 6, 6, 6, 8, 8, + 7, 8, 8, 6, 8, 8, 8, 10, 10, 8, 8, 10, 7, 8, 8, + 8, 10, 10, 8, 10, 9, +}; + +static const uint8_t res_mono_p3_0[] = { + 2, 4, 4, 7, 7, 5, 7, 7, 9, 9, 5, 7, 7, 9, 9, + 8, 9, 9, 12, 12, 8, 9, 9, 11, 12, 5, 7, 7, 10, 10, + 7, 9, 9, 11, 11, 7, 9, 9, 10, 11, 10, 11, 11, 13, 13, + 9, 10, 11, 13, 13, 5, 7, 7, 10, 10, 7, 9, 9, 11, 10, + 7, 9, 9, 11, 11, 9, 11, 10, 13, 13, 10, 11, 11, 14, 13, + 8, 10, 10, 14, 13, 10, 11, 11, 15, 14, 9, 11, 11, 14, 14, +13, 14, 13, 16, 16, 12, 13, 13, 15, 15, 8, 10, 10, 13, 14, + 9, 11, 11, 14, 14, 10, 11, 11, 14, 15, 12, 13, 13, 15, 15, +13, 14, 14, 15, 16, 5, 7, 7, 10, 10, 7, 9, 9, 11, 11, + 7, 9, 9, 11, 12, 10, 11, 11, 14, 14, 10, 11, 11, 14, 14, + 7, 9, 9, 12, 12, 9, 11, 11, 13, 13, 9, 11, 11, 13, 13, +12, 12, 13, 15, 15, 11, 12, 13, 15, 16, 7, 9, 9, 11, 11, + 8, 11, 10, 13, 12, 9, 11, 11, 13, 13, 11, 13, 12, 15, 13, +11, 13, 13, 15, 16, 9, 12, 11, 15, 14, 11, 12, 13, 16, 15, +11, 13, 13, 15, 16, 14, 14, 15, 17, 16, 13, 15, 16, 0, 17, + 9, 11, 11, 15, 15, 10, 13, 12, 15, 15, 11, 13, 13, 15, 16, +13, 15, 13, 16, 15, 14, 16, 15, 0, 19, 5, 7, 7, 10, 10, + 7, 9, 9, 11, 11, 7, 9, 9, 11, 11, 10, 12, 11, 14, 14, +10, 11, 12, 14, 14, 7, 9, 9, 12, 12, 9, 11, 11, 14, 13, + 9, 10, 11, 12, 13, 11, 13, 13, 16, 16, 11, 12, 13, 13, 16, + 7, 9, 9, 12, 12, 9, 11, 11, 13, 13, 9, 11, 11, 13, 13, +11, 13, 13, 15, 15, 12, 13, 12, 15, 14, 9, 11, 11, 15, 14, +11, 13, 12, 16, 16, 10, 12, 12, 15, 15, 13, 15, 15, 17, 19, +13, 14, 15, 16, 17, 10, 12, 12, 15, 15, 11, 13, 13, 16, 16, +11, 13, 13, 15, 16, 13, 15, 15, 0, 0, 14, 15, 15, 16, 16, + 8, 10, 10, 14, 14, 10, 12, 12, 15, 15, 10, 12, 11, 15, 16, +14, 15, 15, 19, 20, 13, 14, 14, 18, 16, 9, 11, 11, 15, 15, +11, 13, 13, 17, 16, 11, 13, 13, 16, 16, 15, 17, 17, 20, 20, +14, 15, 16, 17, 20, 9, 11, 11, 15, 15, 10, 13, 12, 16, 15, +11, 13, 13, 15, 17, 14, 16, 15, 18, 0, 14, 16, 15, 18, 20, +12, 14, 14, 0, 0, 14, 14, 16, 0, 0, 13, 16, 15, 0, 0, +17, 17, 18, 0, 0, 16, 17, 19, 19, 0, 12, 14, 14, 18, 0, +12, 16, 14, 0, 17, 13, 15, 15, 18, 0, 16, 18, 17, 0, 17, +16, 18, 17, 0, 0, 7, 10, 10, 14, 14, 10, 12, 11, 15, 15, +10, 12, 12, 16, 15, 13, 15, 15, 18, 0, 14, 15, 15, 17, 0, + 9, 11, 11, 15, 15, 11, 13, 13, 16, 16, 11, 12, 13, 16, 16, +14, 15, 16, 17, 17, 14, 16, 16, 16, 18, 9, 11, 12, 16, 16, +11, 13, 13, 17, 17, 11, 14, 13, 20, 17, 15, 16, 16, 19, 0, +15, 16, 17, 0
[FFmpeg-devel] [PATCH 5/6] avcodec/vorbisenc: Separate floor and residue configurations
The settings used for initializing the floor and residue codebooks are separated so that they aren't coupled for later changes for arbitrary channel configurations. Signed-off-by: Tyler Jones --- libavcodec/vorbis_enc_data.h | 112 ++--- libavcodec/vorbisenc.c | 167 --- 2 files changed, 166 insertions(+), 113 deletions(-) diff --git a/libavcodec/vorbis_enc_data.h b/libavcodec/vorbis_enc_data.h index eca43dfded..6f2b10feb9 100644 --- a/libavcodec/vorbis_enc_data.h +++ b/libavcodec/vorbis_enc_data.h @@ -23,6 +23,26 @@ #include +typedef const struct { +int dim; +int len; +int real_len; +const uint8_t *clens; +int lookup; +float min; +float delta; +const uint8_t *quant; +} codebook_setup; + +typedef const struct { +const int type; +const int end[2]; +const int classifications; +const int nbooks; +const codebook_setup *config; +const int8_t books[10][8]; +} res_setup; + static const uint8_t floor_128_c0[] = { 10, 7, 8, 13, 9, 6, 7, 11, 10, 8, 8, 12, 17, 17, 17, 17, 7, 5, 5, 9, 6, 4, 4, 8, 8, 5, 5, 8, 16, 14, @@ -219,7 +239,7 @@ static const uint8_t floor_1024_4sub3[] = { 11, 11, 10, 10, 10, 10, 10, 10, }; -static const uint8_t res_long_master[] = { +static const uint8_t res_stereo_long_master[] = { 5, 6, 11, 11, 11, 11, 10, 10, 12, 11, 5, 2, 11, 5, 6, 6, 7, 9, 11, 13, 13, 10, 7, 11, 6, 7, 8, 9, 10, 12, 11, 5, 11, 6, 8, 7, 9, 11, 14, 15, 11, 6, 6, 8, 4, @@ -229,7 +249,7 @@ static const uint8_t res_long_master[] = { 11, 13, 12, 15, 12, 11, 9, 8, 8, 8, }; -static const uint8_t res_short_master[] = { +static const uint8_t res_stereo_short_master[] = { 10, 9, 13, 11, 14, 10, 12, 13, 13, 14, 7, 2, 12, 5, 10, 5, 7, 10, 12, 14, 12, 6, 9, 8, 7, 7, 9, 11, 13, 16, 10, 4, 12, 5, 10, 6, 8, 12, 14, 16, 12, 6, 8, 7, 6, @@ -239,7 +259,7 @@ static const uint8_t res_short_master[] = { 15, 13, 11, 10, 6, 5, 6, 8, 9, 11, }; -static const uint8_t res_p1_0[] = { +static const uint8_t res_stereo_p1_0[] = { 2, 4, 4, 0, 0, 0, 0, 0, 0, 5, 6, 6, 0, 0, 0, 0, 0, 0, 5, 6, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -352,7 +372,7 @@ static const uint8_t res_p1_0[] = { 0, 0, 0, 8, 9, 8, }; -static const uint8_t res_p2_0[] = { +static const uint8_t res_stereo_p2_0[] = { 2, 5, 5, 0, 0, 0, 5, 5, 0, 0, 0, 5, 5, 0, 0, 0, 7, 8, 0, 0, 0, 0, 0, 0, 0, 5, 6, 6, 0, 0, 0, 7, 7, 0, 0, 0, 7, 7, 0, 0, 0, 10, 10, 0, 0, @@ -385,7 +405,7 @@ static const uint8_t res_p2_0[] = { 0, 9, 9, 0, 0, 0, 10, 10, }; -static const uint8_t res_p3_0[] = { +static const uint8_t res_stereo_p3_0[] = { 2, 4, 3, 6, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 6, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -395,7 +415,7 @@ static const uint8_t res_p3_0[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 6, 7, 9, 9, }; -static const uint8_t res_p4_0[] = { +static const uint8_t res_stereo_p4_0[] = { 2, 3, 3, 6, 6, 0, 0, 0, 0, 0, 4, 4, 6, 6, 0, 0, 0, 0, 0, 4, 4, 6, 6, 0, 0, 0, 0, 0, 5, 5, 6, 6, 0, 0, 0, 0, 0, 0, 0, 6, 6, 0, 0, 0, 0, @@ -403,7 +423,7 @@ static const uint8_t res_p4_0[] = { 0, 0, 0, 0, 0, 0, 9, 9, }; -static const uint8_t res_p5_0[] = { +static const uint8_t res_stereo_p5_0[] = { 1, 3, 4, 6, 6, 7, 7, 9, 9, 0, 5, 5, 7, 7, 7, 8, 9, 9, 0, 5, 5, 7, 7, 8, 8, 9, 9, 0, 7, 7, 8, 8, 8, 8, 10, 10, 0, 0, 0, 8, 8, 8, 8, 10, 10, @@ -412,7 +432,7 @@ static const uint8_t res_p5_0[] = { 0, 0, 10, 10, 11, 11, }; -static const uint8_t res_p6_0[] = { +static const uint8_t res_stereo_p6_0[] = { 2, 3, 3, 6, 6, 7, 7, 8, 8, 8, 8, 9, 9, 10, 10, 11, 10, 0, 5, 5, 7, 7, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 0, 5, 5, 7, 7, 8, 8, 9, 9, 9, 9, @@ -435,7 +455,7 @@ static const uint8_t res_p6_0[] = { 13, 13, 13, 13, }; -static const uint8_t res_p7_0[] = { +static const uint8_t res_stereo_p7_0[] = { 1, 4, 4, 7, 6, 6, 7, 6, 6, 4, 7, 7, 10, 9, 9, 11, 9, 9, 4, 7, 7, 10, 9, 9, 11, 9, 9, 7, 10, 10, 11, 11, 10, 12, 11, 11, 6, 9, 9, 11, 10, 10, 11, 10, 10, @@ -444,7 +464,7 @@ static const uint8_t res_p7_0[] = { 11, 10, 10, 11, 10, 10, }; -static const uint8_t res_p7_1[] = { +static const uint8_t res_stereo_p7_1[] = { 2, 4, 4, 6, 6, 7, 7, 7, 7, 8, 8, 10, 5, 5, 6, 6, 7, 7, 8, 8, 8, 8, 10, 5, 5, 6, 6, 7, 7, 8, 8, 8, 8, 10, 6, 6, 7, 7, 8, 8, 8, 8, 8,
[FFmpeg-devel] [PATCH 3/6] avcodec/vorbisenc: Add clipping avoidance
Clipping is avoided by taking the maximum value of each frame before window application, and scaling down the entire frame by a scalar factor. Signed-off-by: Tyler Jones --- libavcodec/vorbisenc.c | 8 libavcodec/vorbispsy.c | 17 + libavcodec/vorbispsy.h | 10 ++ 3 files changed, 31 insertions(+), 4 deletions(-) diff --git a/libavcodec/vorbisenc.c b/libavcodec/vorbisenc.c index c968956794..73182c6356 100644 --- a/libavcodec/vorbisenc.c +++ b/libavcodec/vorbisenc.c @@ -1037,10 +1037,10 @@ static int residue_encode(vorbis_enc_context *venc, vorbis_enc_residue *rc, * See Vorbis I spec Fig. 2, 3 for examples. */ static void apply_window(vorbis_enc_context *venc, const int *blockflags, - float *out, float* in) + float *out, float* in, const float clip_factor) { int prev_size, curr_size, next_size, bound; -float scale = 1.0f / (float) (1 << (venc->log2_blocksize[blockflags[1]] - 2)); +float scale = clip_factor / (float) (1 << (venc->log2_blocksize[blockflags[1]] - 2)); const float *prev_win, *next_win; AVFloatDSPContext *fdsp = venc->fdsp; @@ -1098,9 +1098,9 @@ static int apply_window_and_mdct(vorbis_enc_context *venc, int next_type) for (channel = 0; channel < venc->channels; channel++) { float *out = venc->scratch; float *in = venc->samples + channel * 2 * long_len + transient_offset; +float clip_factor = ff_psy_vorbis_avoid_clip(in, curr_len, curr_type); -apply_window(venc, blockflags, out, in); - +apply_window(venc, blockflags, out, in, clip_factor); venc->mdct[curr_type].mdct_calc(&venc->mdct[curr_type], venc->coeffs + channel * curr_len, out); } diff --git a/libavcodec/vorbispsy.c b/libavcodec/vorbispsy.c index ab2d41f62f..56e23dea5e 100644 --- a/libavcodec/vorbispsy.c +++ b/libavcodec/vorbispsy.c @@ -140,6 +140,23 @@ int ff_psy_vorbis_block_frame(VorbisPsyContext *vpctx, float *audio, return block_flag; } +float ff_psy_vorbis_avoid_clip(float *audio, int window_len, int blockflag) +{ +int i; +float max = 0, clip = 1.0f; +/* Due to how the mdct scaling works in the vorbis encoder, short blocks are + * more likely to clip. This serves as more fine-grained control */ +const float avoidance_factor = blockflag ? 0.95f : 0.75f; + +for (i = 0; i < window_len; i++) +max = FFMAX(max, fabsf(audio[i])); + +if (max > avoidance_factor) +clip = avoidance_factor / max; + +return clip; +} + av_cold void ff_psy_vorbis_close(VorbisPsyContext *vpctx) { av_freep(&vpctx->filter_delay); diff --git a/libavcodec/vorbispsy.h b/libavcodec/vorbispsy.h index 93a03fd8ca..e632e8ad1d 100644 --- a/libavcodec/vorbispsy.h +++ b/libavcodec/vorbispsy.h @@ -75,6 +75,16 @@ av_cold int ff_psy_vorbis_init(VorbisPsyContext *vpctx, int sample_rate, */ int ff_psy_vorbis_block_frame(VorbisPsyContext *vpctx, float *audio, int ch, int frame_size, int block_size); + +/** + * Provide a scalar coefficient to avoid clipping. + * + * @param audio Raw audio sample input for one channel + * @param window_len Chosen window length for the given frame + * @return Coefficient to be applied alongside the window function + */ +float ff_psy_vorbis_avoid_clip(float *audio, int window_len, int blockflag); + /** * Closes and frees the memory used by the psychoacoustic model */ -- 2.14.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
[FFmpeg-devel] [PATCH 0/6] avcodec: Vorbis encoder improvements
Please see the following patches. The first two patches are identical to my latest pending changes sent to the mailing list on July 28. Changelogs are included regardless. The clipping avoidance patch should remove most clicking that had been noticed by atomnuker in previous versions. Thanks in advance. Tyler Jones (5): avcodec/vorbisenc: Add pre-echo detection V4: Use AVFloatDSPContext for variance calculation Correctly change quality factors to const Remove unnecessary malloc and free for VorbisPsyContext V3: Use normal float notation Don't check before freeing NULL pointers Remove unnecessary includes V2: Provide proper prefix for non-static function avcodec/vorbisenc: Apply dynamic frame lengths V4: No changes V3: Switch 'bits[p] == 0' to '!bits[p]' in vlc gen V2: Fix double arithmetic in window scale avcodec/vorbisenc: Add clipping avoidance avcodec/vorbisenc: Move encoder structures and macros to separate file avcodec/vorbisenc: Separate floor and residue configurations avcodec/vorbisenc: Add support for mono streams libavcodec/Makefile | 2 +- libavcodec/vorbis.c | 10 +- libavcodec/vorbis_enc_data.h | 576 ++--- libavcodec/vorbisenc.c | 666 ++- libavcodec/vorbisenc.h | 145 ++ libavcodec/vorbispsy.c | 164 +++ libavcodec/vorbispsy.h | 92 ++ tests/fate/vorbis.mak| 2 +- 8 files changed, 1220 insertions(+), 437 deletions(-) create mode 100644 libavcodec/vorbisenc.h create mode 100644 libavcodec/vorbispsy.c create mode 100644 libavcodec/vorbispsy.h -- 2.14.1 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
[FFmpeg-devel] [PATCH V4 2/2] avcodec/vorbisenc: Apply dynamic frame lengths
Additional codebooks are added for shorter 128-sample frames. Changes in codeword generation are made to handle valid values of 0 that prepend some codebooks, otherwise books are classified incorrectly and cause unreadable streams. A second residue, floor, and mapping is created for short window lengths so that values are partitioned correctly for transient frames. Signed-off-by: Tyler Jones --- V4: No changes V3: Switch 'bits[p] == 0' to '!bits[p]' in vlc gen V2: Fix double arithmetic in window scale libavcodec/vorbis.c | 10 +- libavcodec/vorbis_enc_data.h | 289 +++-- libavcodec/vorbisenc.c | 424 ++- tests/fate/vorbis.mak| 2 +- 4 files changed, 454 insertions(+), 271 deletions(-) diff --git a/libavcodec/vorbis.c b/libavcodec/vorbis.c index 399020eec5..d8c4b006e7 100644 --- a/libavcodec/vorbis.c +++ b/libavcodec/vorbis.c @@ -59,7 +59,7 @@ int ff_vorbis_len2vlc(uint8_t *bits, uint32_t *codes, unsigned num) unsigned i, j, p, code; for (p = 0; (bits[p] == 0) && (p < num); ++p) -; +codes[p] = 0; if (p == num) return 0; @@ -78,9 +78,11 @@ int ff_vorbis_len2vlc(uint8_t *bits, uint32_t *codes, unsigned num) for (; p < num; ++p) { if (bits[p] > 32) - return AVERROR_INVALIDDATA; -if (bits[p] == 0) - continue; +return AVERROR_INVALIDDATA; +if (!bits[p]) { +codes[p] = 0; +continue; +} // find corresponding exit(node which the tree can grow further from) for (i = bits[p]; i > 0; --i) if (exit_at_level[i]) diff --git a/libavcodec/vorbis_enc_data.h b/libavcodec/vorbis_enc_data.h index a51aaec978..eca43dfded 100644 --- a/libavcodec/vorbis_enc_data.h +++ b/libavcodec/vorbis_enc_data.h @@ -23,15 +23,78 @@ #include -static const uint8_t codebook0[] = { +static const uint8_t floor_128_c0[] = { +10, 7, 8, 13, 9, 6, 7, 11, 10, 8, 8, 12, 17, 17, 17, +17, 7, 5, 5, 9, 6, 4, 4, 8, 8, 5, 5, 8, 16, 14, +13, 16, 7, 5, 5, 7, 6, 3, 3, 5, 8, 5, 4, 7, 14, +12, 12, 15, 10, 7, 8, 9, 7, 5, 5, 6, 9, 6, 5, 5, +15, 12, 9, 10, +}; + +static const uint8_t floor_128_c1[] = { + 8, 13, 17, 17, 8, 11, 17, 17, 11, 13, 17, 17, 17, 17, 17, +17, 6, 10, 16, 17, 6, 10, 15, 17, 8, 10, 16, 17, 17, 17, +17, 17, 9, 13, 15, 17, 8, 11, 17, 17, 10, 12, 17, 17, 17, +17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, +17, 17, 17, 17, 6, 11, 15, 17, 7, 10, 15, 17, 8, 10, 17, +17, 17, 15, 17, 17, 4, 8, 13, 17, 4, 7, 13, 17, 6, 8, +15, 17, 16, 15, 17, 17, 6, 11, 15, 17, 6, 9, 13, 17, 8, +10, 17, 17, 15, 17, 17, 17, 16, 17, 17, 17, 12, 14, 15, 17, +13, 14, 15, 17, 17, 17, 17, 17, 5, 10, 14, 17, 5, 9, 14, +17, 7, 9, 15, 17, 15, 15, 17, 17, 3, 7, 12, 17, 3, 6, +11, 17, 5, 7, 13, 17, 12, 12, 17, 17, 5, 9, 14, 17, 3, + 7, 11, 17, 5, 8, 13, 17, 13, 11, 16, 17, 12, 17, 17, 17, + 9, 14, 15, 17, 10, 11, 14, 17, 16, 14, 17, 17, 8, 12, 17, +17, 8, 12, 17, 17, 10, 12, 17, 17, 17, 17, 17, 17, 5, 10, +17, 17, 5, 9, 15, 17, 7, 9, 17, 17, 13, 13, 17, 17, 7, +11, 17, 17, 6, 10, 15, 17, 7, 9, 15, 17, 12, 11, 17, 17, +12, 15, 17, 17, 11, 14, 17, 17, 11, 10, 15, 17, 17, 16, 17, +17, +}; + +static const uint8_t floor_128_0sub1[] = { + 0, 3, 3, 3, 3, 3, 3, 3, 3, +}; + +static const uint8_t floor_128_0sub2[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 4, 4, 4, + 4, 5, 4, 5, 4, 5, 4, 6, 4, 6, +}; + +static const uint8_t floor_128_0sub3[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 5, 3, 5, 3, + 5, 4, 5, 4, 5, 5, 5, 5, 6, 5, 6, 5, 6, 5, 6, + 5, 6, 5, 7, 8, 9, 11, 13, 13, 13, 13, 13, 13, 13, 13, +13, 13, 13, 13, +}; + +static const uint8_t floor_128_1sub1[] = { + 0, 3, 3, 2, 3, 3, 4, 3, 4, +}; + +static const uint8_t floor_128_1sub2[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 4, 3, 6, 3, 6, + 3, 6, 3, 7, 3, 8, 4, 9, 4, 9, +}; + +static const uint8_t floor_128_1sub3[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 7, 2, 7, 3, + 8, 4, 9, 5, 9, 8, 10, 11, 11, 12, 14, 14, 14, 14, 14, +14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, +13, 13, 13, 13, +}; + +static const uint8_t floor_1024_c1[] = { 2, 10, 8, 14, 7, 12, 11, 14, 1, 5, 3, 7, 4, 9, 7, 13, }; -static const uint8_t codebook1[] = { +static const uint8_t floor_1024_c2[] = { 1, 4, 2, 6, 3, 7, 5, 7, }; -static const uint8_t codebook2[] = { +static const uint8_t floor_1024_c3[] = { 1, 5, 7, 21, 5, 8, 9, 21, 10,
[FFmpeg-devel] [PATCH V4 1/2] avcodec/vorbisenc: Add pre-echo detection
The encoder will attempt to determine the existence of transient signals by applying a 4th order highpass filter to remove dominant low frequency waveforms. Frames are then split up into blocks where the variance is calculated and compared with blocks from the previous frame. A preecho is only likely to be noticeable when relatively quiet audio is followed by a loud transient signal. Signed-off-by: Tyler Jones --- V4: Use AVFloatDSPContext for variance calculation Correctly change quality factors to const Remove unnecessary malloc and free for VorbisPsyContext V3: Use normal float notation Don't check before freeing NULL pointers Remove unnecessary includes V2: Provide proper prefix for non-static function libavcodec/Makefile| 2 +- libavcodec/vorbisenc.c | 27 +++-- libavcodec/vorbispsy.c | 147 + libavcodec/vorbispsy.h | 82 +++ 4 files changed, 253 insertions(+), 5 deletions(-) create mode 100644 libavcodec/vorbispsy.c create mode 100644 libavcodec/vorbispsy.h diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 357fa1a361..08acbc723e 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -611,7 +611,7 @@ OBJS-$(CONFIG_VMNC_DECODER)+= vmnc.o OBJS-$(CONFIG_VORBIS_DECODER) += vorbisdec.o vorbisdsp.o vorbis.o \ vorbis_data.o OBJS-$(CONFIG_VORBIS_ENCODER) += vorbisenc.o vorbis.o \ - vorbis_data.o + vorbis_data.o vorbispsy.o OBJS-$(CONFIG_VP3_DECODER) += vp3.o OBJS-$(CONFIG_VP5_DECODER) += vp5.o vp56.o vp56data.o vp56rac.o OBJS-$(CONFIG_VP6_DECODER) += vp6.o vp56.o vp56data.o \ diff --git a/libavcodec/vorbisenc.c b/libavcodec/vorbisenc.c index bf21a3b1ff..6da5f012c2 100644 --- a/libavcodec/vorbisenc.c +++ b/libavcodec/vorbisenc.c @@ -33,6 +33,7 @@ #include "mathops.h" #include "vorbis.h" #include "vorbis_enc_data.h" +#include "vorbispsy.h" #include "audio_frame_queue.h" #include "libavfilter/bufferqueue.h" @@ -136,6 +137,7 @@ typedef struct vorbis_enc_context { int64_t next_pts; AVFloatDSPContext *fdsp; +VorbisPsyContext vpctx; } vorbis_enc_context; #define MAX_CHANNELS 2 @@ -272,11 +274,12 @@ static int create_vorbis_context(vorbis_enc_context *venc, vorbis_enc_floor *fc; vorbis_enc_residue *rc; vorbis_enc_mapping *mc; -int i, book, ret; +int i, book, ret, blocks; venc->channels= avctx->channels; venc->sample_rate = avctx->sample_rate; -venc->log2_blocksize[0] = venc->log2_blocksize[1] = 11; +venc->log2_blocksize[0] = 8; +venc->log2_blocksize[1] = 11; venc->ncodebooks = FF_ARRAY_ELEMS(cvectors); venc->codebooks = av_malloc(sizeof(vorbis_enc_codebook) * venc->ncodebooks); @@ -464,6 +467,11 @@ static int create_vorbis_context(vorbis_enc_context *venc, if ((ret = dsp_init(avctx, venc)) < 0) return ret; +blocks = 1 << (venc->log2_blocksize[1] - venc->log2_blocksize[0]); +if ((ret = ff_psy_vorbis_init(&venc->vpctx, venc->sample_rate, + venc->channels, blocks, venc->fdsp)) < 0) +return ret; + return 0; } @@ -1078,15 +1086,17 @@ static void move_audio(vorbis_enc_context *venc, int sf_size) av_frame_free(&cur); } venc->have_saved = 1; -memcpy(venc->scratch, venc->samples, 2 * venc->channels * frame_size); +memcpy(venc->scratch, venc->samples, sizeof(float) * venc->channels * 2 * frame_size); } static int vorbis_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, const AVFrame *frame, int *got_packet_ptr) { vorbis_enc_context *venc = avctx->priv_data; -int i, ret, need_more; +int i, ret, need_more, ch; +int curr_win = 1; int frame_size = 1 << (venc->log2_blocksize[1] - 1); +int block_size = 1 << (venc->log2_blocksize[0] - 1); vorbis_enc_mode *mode; vorbis_enc_mapping *mapping; PutBitContext pb; @@ -1121,6 +1131,14 @@ static int vorbis_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, move_audio(venc, avctx->frame_size); +for (ch = 0; ch < venc->channels; ch++) { +float *scratch = venc->scratch + 2 * ch * frame_size + frame_size; + +if (!ff_psy_vorbis_block_frame(&venc->vpctx, scratch, ch, + frame_size, block_size)) +curr_win = 0; +} + if (!apply_window_and_mdct(venc)) return 0; @@ -1252,6 +1270,7 @@ static av_cold int vorbis_encode_close(AVCodecContext *avctx) ff_mdct_end(&venc->mdct[1]); ff_af_queue_close(&ven
Re: [FFmpeg-devel] [PATCH V3 1/2] avcodec/vorbisenc: Add pre-echo detection
> > --- a/libavcodec/vorbisenc.c > > +++ b/libavcodec/vorbisenc.c > > @@ -33,6 +33,7 @@ > > #include "mathops.h" > > #include "vorbis.h" > > #include "vorbis_enc_data.h" > > +#include "vorbispsy.h" > > > > #include "audio_frame_queue.h" > > #include "libavfilter/bufferqueue.h" > > @@ -136,6 +137,7 @@ typedef struct vorbis_enc_context { > > int64_t next_pts; > > > > AVFloatDSPContext *fdsp; > > +VorbisPsyContext *vpctx; > > Why a pointer? I don't see the benefit. It means an unnecessary malloc > and free call. You're probably right. It's changed now. > > @@ -1252,6 +1270,7 @@ static av_cold int vorbis_encode_close(AVCodecContext > > *avctx) > > ff_mdct_end(&venc->mdct[1]); > > ff_af_queue_close(&venc->afq); > > ff_bufqueue_discard_all(&venc->bufqueue); > > +ff_psy_vorbis_close(venc->vpctx); > > You should pass a pointer to venc->vpctx instead, regardless of what you > do with the comment above. I'm not sure I understand what you mean. It is passing a pointer to a VorbisPsyContext, please see the prototype: av_cold void ff_psy_vorbis_close(VorbisPsyContext *vpctx); > > +/** > > + * Calculate the variance of a block of samples > > + * > > + * @param in Array of input samples > > + * @param length Number of input samples being analyzed > > + * @return The variance for the current block > > + */ > > +static float variance(const float *in, int length) > > +{ > > +int i; > > +float mean = 0.0f, square_sum = 0.0f; > > + > > +for (i = 0; i < length; i++) { > > +mean += in[i]; > > +square_sum += in[i] * in[i]; > > Can't you use AVFloatDSPContext's scalarproduct_float for square_sum? > The constrains are lax. 16 byte alignment for in and length a multiple > of 4. You can pad the buffer if needed to achieve that. You are correct, it is switched over now. > > +} > > + > > +mean /= length; > > +return (square_sum - length * mean * mean) / (length - 1); > > +} > > + > > +av_cold int ff_psy_vorbis_init(VorbisPsyContext *vpctx, int sample_rate, > > + int channels, int blocks) > > +{ > > +int crit_freq; > > +float Q[2] = {.54, 1.31}; // Quality values for maximally flat > > cascaded filters > > const float Q[2] Fixed. Thank you for catching these mistakes and providing suggestions. A new version of this patch will be sent soon. Thanks again, Tyler Jones signature.asc Description: PGP signature ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
[FFmpeg-devel] [PATCH V3 2/2] avcodec/vorbisenc: Apply dynamic frame lengths
Additional codebooks are added for shorter 128-sample frames. Changes in codeword generation are made to handle valid values of 0 that prepend some codebooks, otherwise books are classified incorrectly and cause unreadable streams. A second residue, floor, and mapping is created for short window lengths so that values are partitioned correctly for transient frames. Signed-off-by: Tyler Jones --- V3: Switch 'bits[p] == 0' to '!bits[p]' in vlc gen V2: Fix double arithmetic in window scale libavcodec/vorbis.c | 10 +- libavcodec/vorbis_enc_data.h | 289 +++-- libavcodec/vorbisenc.c | 422 ++- tests/fate/vorbis.mak| 2 +- 4 files changed, 453 insertions(+), 270 deletions(-) diff --git a/libavcodec/vorbis.c b/libavcodec/vorbis.c index 399020eec5..d8c4b006e7 100644 --- a/libavcodec/vorbis.c +++ b/libavcodec/vorbis.c @@ -59,7 +59,7 @@ int ff_vorbis_len2vlc(uint8_t *bits, uint32_t *codes, unsigned num) unsigned i, j, p, code; for (p = 0; (bits[p] == 0) && (p < num); ++p) -; +codes[p] = 0; if (p == num) return 0; @@ -78,9 +78,11 @@ int ff_vorbis_len2vlc(uint8_t *bits, uint32_t *codes, unsigned num) for (; p < num; ++p) { if (bits[p] > 32) - return AVERROR_INVALIDDATA; -if (bits[p] == 0) - continue; +return AVERROR_INVALIDDATA; +if (!bits[p]) { +codes[p] = 0; +continue; +} // find corresponding exit(node which the tree can grow further from) for (i = bits[p]; i > 0; --i) if (exit_at_level[i]) diff --git a/libavcodec/vorbis_enc_data.h b/libavcodec/vorbis_enc_data.h index a51aaec978..eca43dfded 100644 --- a/libavcodec/vorbis_enc_data.h +++ b/libavcodec/vorbis_enc_data.h @@ -23,15 +23,78 @@ #include -static const uint8_t codebook0[] = { +static const uint8_t floor_128_c0[] = { +10, 7, 8, 13, 9, 6, 7, 11, 10, 8, 8, 12, 17, 17, 17, +17, 7, 5, 5, 9, 6, 4, 4, 8, 8, 5, 5, 8, 16, 14, +13, 16, 7, 5, 5, 7, 6, 3, 3, 5, 8, 5, 4, 7, 14, +12, 12, 15, 10, 7, 8, 9, 7, 5, 5, 6, 9, 6, 5, 5, +15, 12, 9, 10, +}; + +static const uint8_t floor_128_c1[] = { + 8, 13, 17, 17, 8, 11, 17, 17, 11, 13, 17, 17, 17, 17, 17, +17, 6, 10, 16, 17, 6, 10, 15, 17, 8, 10, 16, 17, 17, 17, +17, 17, 9, 13, 15, 17, 8, 11, 17, 17, 10, 12, 17, 17, 17, +17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, +17, 17, 17, 17, 6, 11, 15, 17, 7, 10, 15, 17, 8, 10, 17, +17, 17, 15, 17, 17, 4, 8, 13, 17, 4, 7, 13, 17, 6, 8, +15, 17, 16, 15, 17, 17, 6, 11, 15, 17, 6, 9, 13, 17, 8, +10, 17, 17, 15, 17, 17, 17, 16, 17, 17, 17, 12, 14, 15, 17, +13, 14, 15, 17, 17, 17, 17, 17, 5, 10, 14, 17, 5, 9, 14, +17, 7, 9, 15, 17, 15, 15, 17, 17, 3, 7, 12, 17, 3, 6, +11, 17, 5, 7, 13, 17, 12, 12, 17, 17, 5, 9, 14, 17, 3, + 7, 11, 17, 5, 8, 13, 17, 13, 11, 16, 17, 12, 17, 17, 17, + 9, 14, 15, 17, 10, 11, 14, 17, 16, 14, 17, 17, 8, 12, 17, +17, 8, 12, 17, 17, 10, 12, 17, 17, 17, 17, 17, 17, 5, 10, +17, 17, 5, 9, 15, 17, 7, 9, 17, 17, 13, 13, 17, 17, 7, +11, 17, 17, 6, 10, 15, 17, 7, 9, 15, 17, 12, 11, 17, 17, +12, 15, 17, 17, 11, 14, 17, 17, 11, 10, 15, 17, 17, 16, 17, +17, +}; + +static const uint8_t floor_128_0sub1[] = { + 0, 3, 3, 3, 3, 3, 3, 3, 3, +}; + +static const uint8_t floor_128_0sub2[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 4, 4, 4, + 4, 5, 4, 5, 4, 5, 4, 6, 4, 6, +}; + +static const uint8_t floor_128_0sub3[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 5, 3, 5, 3, + 5, 4, 5, 4, 5, 5, 5, 5, 6, 5, 6, 5, 6, 5, 6, + 5, 6, 5, 7, 8, 9, 11, 13, 13, 13, 13, 13, 13, 13, 13, +13, 13, 13, 13, +}; + +static const uint8_t floor_128_1sub1[] = { + 0, 3, 3, 2, 3, 3, 4, 3, 4, +}; + +static const uint8_t floor_128_1sub2[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 4, 3, 6, 3, 6, + 3, 6, 3, 7, 3, 8, 4, 9, 4, 9, +}; + +static const uint8_t floor_128_1sub3[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 7, 2, 7, 3, + 8, 4, 9, 5, 9, 8, 10, 11, 11, 12, 14, 14, 14, 14, 14, +14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, +13, 13, 13, 13, +}; + +static const uint8_t floor_1024_c1[] = { 2, 10, 8, 14, 7, 12, 11, 14, 1, 5, 3, 7, 4, 9, 7, 13, }; -static const uint8_t codebook1[] = { +static const uint8_t floor_1024_c2[] = { 1, 4, 2, 6, 3, 7, 5, 7, }; -static const uint8_t codebook2[] = { +static const uint8_t floor_1024_c3[] = { 1, 5, 7, 21, 5, 8, 9, 21, 10, 9, 12, 20, 20,
[FFmpeg-devel] [PATCH V3 1/2] avcodec/vorbisenc: Add pre-echo detection
The encoder will attempt to determine the existence of transient signals by applying a 4th order highpass filter to remove dominant low frequency waveforms. Frames are then split up into blocks where the variance is calculated and compared with blocks from the previous frame. A preecho is only likely to be noticeable when relatively quiet audio is followed by a loud transient signal. Signed-off-by: Tyler Jones --- V3: Use normal float notation Don't check before freeing NULL pointers Remove unnecessary includes V2: Provide proper prefix for non-static function libavcodec/Makefile| 2 +- libavcodec/vorbisenc.c | 27 +++-- libavcodec/vorbispsy.c | 148 + libavcodec/vorbispsy.h | 79 ++ 4 files changed, 251 insertions(+), 5 deletions(-) create mode 100644 libavcodec/vorbispsy.c create mode 100644 libavcodec/vorbispsy.h diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 357fa1a361..08acbc723e 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -611,7 +611,7 @@ OBJS-$(CONFIG_VMNC_DECODER)+= vmnc.o OBJS-$(CONFIG_VORBIS_DECODER) += vorbisdec.o vorbisdsp.o vorbis.o \ vorbis_data.o OBJS-$(CONFIG_VORBIS_ENCODER) += vorbisenc.o vorbis.o \ - vorbis_data.o + vorbis_data.o vorbispsy.o OBJS-$(CONFIG_VP3_DECODER) += vp3.o OBJS-$(CONFIG_VP5_DECODER) += vp5.o vp56.o vp56data.o vp56rac.o OBJS-$(CONFIG_VP6_DECODER) += vp6.o vp56.o vp56data.o \ diff --git a/libavcodec/vorbisenc.c b/libavcodec/vorbisenc.c index bf21a3b1ff..1330b1b376 100644 --- a/libavcodec/vorbisenc.c +++ b/libavcodec/vorbisenc.c @@ -33,6 +33,7 @@ #include "mathops.h" #include "vorbis.h" #include "vorbis_enc_data.h" +#include "vorbispsy.h" #include "audio_frame_queue.h" #include "libavfilter/bufferqueue.h" @@ -136,6 +137,7 @@ typedef struct vorbis_enc_context { int64_t next_pts; AVFloatDSPContext *fdsp; +VorbisPsyContext *vpctx; } vorbis_enc_context; #define MAX_CHANNELS 2 @@ -272,11 +274,12 @@ static int create_vorbis_context(vorbis_enc_context *venc, vorbis_enc_floor *fc; vorbis_enc_residue *rc; vorbis_enc_mapping *mc; -int i, book, ret; +int i, book, ret, blocks; venc->channels= avctx->channels; venc->sample_rate = avctx->sample_rate; -venc->log2_blocksize[0] = venc->log2_blocksize[1] = 11; +venc->log2_blocksize[0] = 8; +venc->log2_blocksize[1] = 11; venc->ncodebooks = FF_ARRAY_ELEMS(cvectors); venc->codebooks = av_malloc(sizeof(vorbis_enc_codebook) * venc->ncodebooks); @@ -464,6 +467,12 @@ static int create_vorbis_context(vorbis_enc_context *venc, if ((ret = dsp_init(avctx, venc)) < 0) return ret; +blocks = 1 << (venc->log2_blocksize[1] - venc->log2_blocksize[0]); +venc->vpctx = av_mallocz(sizeof(VorbisPsyContext)); +if (!venc->vpctx || (ret = ff_psy_vorbis_init(venc->vpctx, venc->sample_rate, + venc->channels, blocks)) < 0) +return AVERROR(ENOMEM); + return 0; } @@ -1078,15 +1087,17 @@ static void move_audio(vorbis_enc_context *venc, int sf_size) av_frame_free(&cur); } venc->have_saved = 1; -memcpy(venc->scratch, venc->samples, 2 * venc->channels * frame_size); +memcpy(venc->scratch, venc->samples, sizeof(float) * venc->channels * 2 * frame_size); } static int vorbis_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, const AVFrame *frame, int *got_packet_ptr) { vorbis_enc_context *venc = avctx->priv_data; -int i, ret, need_more; +int i, ret, need_more, ch; +int curr_win = 1; int frame_size = 1 << (venc->log2_blocksize[1] - 1); +int block_size = 1 << (venc->log2_blocksize[0] - 1); vorbis_enc_mode *mode; vorbis_enc_mapping *mapping; PutBitContext pb; @@ -1121,6 +1132,13 @@ static int vorbis_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, move_audio(venc, avctx->frame_size); +for (ch = 0; ch < venc->channels; ch++) { +float *scratch = venc->scratch + 2 * ch * frame_size + frame_size; + +if (!ff_psy_vorbis_block_frame(venc->vpctx, scratch, ch, frame_size, block_size)) +curr_win = 0; +} + if (!apply_window_and_mdct(venc)) return 0; @@ -1252,6 +1270,7 @@ static av_cold int vorbis_encode_close(AVCodecContext *avctx) ff_mdct_end(&venc->mdct[1]); ff_af_queue_close(&venc->afq); ff_bufqueue_discard_all(&venc->bufqueue); +ff_psy_vorbis_close(venc-&g
Re: [FFmpeg-devel] [PATCH V2 1/2] avcodec/vorbisenc: Add pre-echo detection
On Wed, Jul 26, 2017 at 12:51:31AM +0100, Rostislav Pehlivanov wrote: > On 17 July 2017 at 16:17, Tyler Jones wrote: > > > +float last_var; > > +const float eps = 1e-4; > > > > Use normal notation for floats and add an f at the end to inform the > compiler the constant is a float. Fixed. > > +{ > > +if (vpctx) { > > +if (vpctx->filter_delay) > > +av_freep(&vpctx->filter_delay); > > + > > +if (vpctx->variance) > > +av_freep(&vpctx->variance); > > + > > > > You can free NULL pointers, n o need to check. Fixed. > > +#ifndef AVCODEC_VORBISPSY_H > > +#define AVCODEC_VORBISPSY_H > > + > > +#include "libavutil/attributes.h" > > + > > +/** > > + * Second order IIR Filter > > + */ > > +typedef struct IIRFilter { > > +float b[3]; ///< Normalized cofficients for numerator of transfer > > function > > +float a[3]; ///< Normalized coefficiets for denominator of transfer > > function > > +} IIRFilter; > > > > We already have an IIR filter (libavcodec/iirfilter.h), could you check it > out if it can be reused perhaps? This is where I had initially looked, and my issue was that it was not possible to generate a high-pass butterworth or biquads that behaved like butterworths when cascaded (controlling the quality factor). Manually initializing the structs and only using the filter function resulted in more complex code and more boilerplate than implementing it separately here. If necessary, I can switch to using iirfilter.h now, but I'd rather use this implementation temporarily and add the functionality needed in iirfilter.h in a separate patch if that is acceptable. I'd argue that this would result in cleaner code. From what I can see, psymodel.c is the only component that uses iirfilter.h, and the biquad filter is unused, so it should be a less difficult change to make. > Apart from those patch looks good and should be ready to merge once those > nits get fixed. > I can hear a noticeable positive difference at low rates, good job. Thanks for catching these mistakes and taking a look. Thanks again, Tyler Jones signature.asc Description: PGP signature ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
Re: [FFmpeg-devel] [PATCH 2/2] avcodec/vorbisenc: Apply dynamic frame lengths
On Wed, Jul 26, 2017 at 01:01:00AM +0100, Rostislav Pehlivanov wrote: > On 12 July 2017 at 23:18, Tyler Jones wrote: > > > > > diff --git a/libavcodec/vorbis.c b/libavcodec/vorbis.c > > index 399020e..8befab8 100644 > > --- a/libavcodec/vorbis.c > > +++ b/libavcodec/vorbis.c > > @@ -59,7 +59,7 @@ int ff_vorbis_len2vlc(uint8_t *bits, uint32_t *codes, > > unsigned num) > > unsigned i, j, p, code; > > > > for (p = 0; (bits[p] == 0) && (p < num); ++p) > > -; > > +codes[p] = 0; > > if (p == num) > > return 0; > > > > @@ -78,9 +78,11 @@ int ff_vorbis_len2vlc(uint8_t *bits, uint32_t *codes, > > unsigned num) > > > > for (; p < num; ++p) { > > if (bits[p] > 32) > > - return AVERROR_INVALIDDATA; > > -if (bits[p] == 0) > > - continue; > > +return AVERROR_INVALIDDATA; > > +if (bits[p] == 0) { > > +codes[p] = 0; > > +continue; > > +} > > > > I prefer the if (!bits[p]) way of checking for 0. Most of the codebase does > so too. Agreed. I'll change this. > > diff --git a/libavcodec/vorbis_enc_data.h b/libavcodec/vorbis_enc_data.h > > index a51aaec..eca43df 100644 > > --- a/libavcodec/vorbis_enc_data.h > > +++ b/libavcodec/vorbis_enc_data.h > > @@ -23,15 +23,78 @@ > > > > #include > > > > > > > > Could you move the tables to vorbis_data.c and delete vorbis_enc_data.h? > Would be neater. My only hesitation is that these tables are only used in the encoder. I like the additional clarity from the name, but I can change this. > > Apart from those nits, patch would be fine for merging as well. > > It improves quality in some ways however it introduces some clicking which > I believe is due to the lack of stability. > I think you can improve this by tweaking the constants in the previous > patch and by reducing the fluctuations between > transient and non-transient frames. Its better to have a whole series of > transients rather than interrupting them with non-transients, and vice > versa. I'll see if I can introduce some sort of bias to discourage frequent switching. I appreciate you taking a look and providing feedback. Thanks as always, Tyler Jones signature.asc Description: PGP signature ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
Re: [FFmpeg-devel] [PATCH] avcodec/vorbisdec: Check for legal version, window and transform types
On Mon, Jul 24, 2017 at 02:54:01AM +0200, Carl Eugen Hoyos wrote: > 2017-07-24 2:46 GMT+02:00 Tyler Jones : > > On Mon, Jul 24, 2017 at 01:52:20AM +0200, Carl Eugen Hoyos wrote: > >> 2017-07-24 0:33 GMT+02:00 Tyler Jones : > >> > Vorbis I specification requires that the version number as well as the > >> > window and transform types in the setup header be equal to 0. > >> > > >> > Signed-off-by: Tyler Jones > >> > --- > >> > libavcodec/vorbisdec.c | 18 +++--- > >> > 1 file changed, 15 insertions(+), 3 deletions(-) > >> > > >> > diff --git a/libavcodec/vorbisdec.c b/libavcodec/vorbisdec.c > >> > index 2a4f482031..f9c3848c4e 100644 > >> > --- a/libavcodec/vorbisdec.c > >> > +++ b/libavcodec/vorbisdec.c > >> > @@ -898,8 +898,16 @@ static int > >> > vorbis_parse_setup_hdr_modes(vorbis_context *vc) > >> > vorbis_mode *mode_setup = &vc->modes[i]; > >> > > >> > mode_setup->blockflag = get_bits1(gb); > >> > -mode_setup->windowtype= get_bits(gb, 16); //FIXME check > >> > -mode_setup->transformtype = get_bits(gb, 16); //FIXME check > >> > +mode_setup->windowtype= get_bits(gb, 16); > >> > +if (mode_setup->windowtype) { > >> > +av_log(vc->avctx, AV_LOG_ERROR, "Invalid window type, > >> > must equal 0.\n"); > >> > +return AVERROR_INVALIDDATA; > >> > >> Does this fix anything? > >> > >> By default, FFmpeg decoders should not (and, more so, should not > >> suddenly start to) reject files that can be decoded without any > >> effort. > >> Or are such files already unplayable, the error message was > >> just missing? > >> > >> You can reject such files for strict conformance mode. > >> > >> Carl Eugen > > > > I'll defer to your judgement, but this is how the specifications defines it: > > > > (4.2.4 -- Modes) > > verify ranges; zero is the only legal value in Vorbis I for > > [vorbis_mode_windowtype] > > and [vorbis_mode_transformtype]. [vorbis_mode_mapping] must not be > > greater than the > > highest number mapping in use. Any illegal values render the stream > > undecodable. > > My mail was not meant to imply that the values you reject > are valid. My point was that the spec declares the stream undecodable, not to prove that they are invalid. I communicated that poorly. > > These values are unused in the decoder and otherwise ignored in the > > specification. > > I may misunderstand this but an unused or ignored value > should never be a reason to reject an input stream by > default. > > > What is even the value of storing these values beyond a temporary value? > > > I believed that it is important to check these values so that an encoder > > cannot produce > > a stream that comes out of sync and end up failing a later test anyways. > > I don't understand how this argument is related to a decoder > patch. > > In any case: Please add a check for > "avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT" > to make it possible to reject such "invalid" files without breaking > playback of such files for unexpecting users. I'll do that instead. > > In the interest of consistency, there are several identical cases where > > values > > have the potential to make the stream 'undecodable' even if they have no > > impact > > on the behavior of the decoder. In all of these other cases, the decoding > > quits > > immediately. Should these be reverted to only log an error message and not > > return error values? > > From a quick look at git log, these checks were not introduced lately or > am I wrong? You are correct, these cases have behaved the way they do for years. It was a genuine question however, would it be prefered to log these similar errors and quit decoding only when concerned about strict compliance? If so, I'll do that instead. I was just following the convention in the file and mistakenly believed it followed best practices, but I should've first checked against other decoders since it has been left alone for so long. Thanks, Tyler Jones signature.asc Description: PGP signature ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
Re: [FFmpeg-devel] [PATCH] avcodec/vorbisdec: Check for legal version, window and transform types
On Mon, Jul 24, 2017 at 01:52:20AM +0200, Carl Eugen Hoyos wrote: > 2017-07-24 0:33 GMT+02:00 Tyler Jones : > > Vorbis I specification requires that the version number as well as the > > window and transform types in the setup header be equal to 0. > > > > Signed-off-by: Tyler Jones > > --- > > libavcodec/vorbisdec.c | 18 +++--- > > 1 file changed, 15 insertions(+), 3 deletions(-) > > > > diff --git a/libavcodec/vorbisdec.c b/libavcodec/vorbisdec.c > > index 2a4f482031..f9c3848c4e 100644 > > --- a/libavcodec/vorbisdec.c > > +++ b/libavcodec/vorbisdec.c > > @@ -898,8 +898,16 @@ static int vorbis_parse_setup_hdr_modes(vorbis_context > > *vc) > > vorbis_mode *mode_setup = &vc->modes[i]; > > > > mode_setup->blockflag = get_bits1(gb); > > -mode_setup->windowtype= get_bits(gb, 16); //FIXME check > > -mode_setup->transformtype = get_bits(gb, 16); //FIXME check > > +mode_setup->windowtype= get_bits(gb, 16); > > +if (mode_setup->windowtype) { > > +av_log(vc->avctx, AV_LOG_ERROR, "Invalid window type, > > must equal 0.\n"); > > +return AVERROR_INVALIDDATA; > > Does this fix anything? > > By default, FFmpeg decoders should not (and, more so, should not > suddenly start to) reject files that can be decoded without any > effort. > Or are such files already unplayable, the error message was > just missing? > > You can reject such files for strict conformance mode. > > Carl Eugen I'll defer to your judgement, but this is how the specifications defines it: (4.2.4 -- Modes) verify ranges; zero is the only legal value in Vorbis I for [vorbis_mode_windowtype] and [vorbis_mode_transformtype]. [vorbis_mode_mapping] must not be greater than the highest number mapping in use. Any illegal values render the stream undecodable. These values are unused in the decoder and otherwise ignored in the specification. What is even the value of storing these values beyond a temporary value? I believed that it is important to check these values so that an encoder cannot produce a stream that comes out of sync and end up failing a later test anyways. In the interest of consistency, there are several identical cases where values have the potential to make the stream 'undecodable' even if they have no impact on the behavior of the decoder. In all of these other cases, the decoding quits immediately. Should these be reverted to only log an error message and not return error values? Thanks, Tyler Jones signature.asc Description: PGP signature ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
[FFmpeg-devel] [PATCH] avcodec/vorbisdec: Check for legal version, window and transform types
Vorbis I specification requires that the version number as well as the window and transform types in the setup header be equal to 0. Signed-off-by: Tyler Jones --- libavcodec/vorbisdec.c | 18 +++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/libavcodec/vorbisdec.c b/libavcodec/vorbisdec.c index 2a4f482031..f9c3848c4e 100644 --- a/libavcodec/vorbisdec.c +++ b/libavcodec/vorbisdec.c @@ -898,8 +898,16 @@ static int vorbis_parse_setup_hdr_modes(vorbis_context *vc) vorbis_mode *mode_setup = &vc->modes[i]; mode_setup->blockflag = get_bits1(gb); -mode_setup->windowtype= get_bits(gb, 16); //FIXME check -mode_setup->transformtype = get_bits(gb, 16); //FIXME check +mode_setup->windowtype= get_bits(gb, 16); +if (mode_setup->windowtype) { +av_log(vc->avctx, AV_LOG_ERROR, "Invalid window type, must equal 0.\n"); +return AVERROR_INVALIDDATA; +} +mode_setup->transformtype = get_bits(gb, 16); +if (mode_setup->transformtype) { +av_log(vc->avctx, AV_LOG_ERROR, "Invalid transform type, must equal 0.\n"); +return AVERROR_INVALIDDATA; +} GET_VALIDATED_INDEX(mode_setup->mapping, 8, vc->mapping_count); ff_dlog(NULL, " %u mode: blockflag %d, windowtype %d, transformtype %d, mapping %d\n", @@ -969,7 +977,11 @@ static int vorbis_parse_id_hdr(vorbis_context *vc) return AVERROR_INVALIDDATA; } -vc->version= get_bits_long(gb, 32);//FIXME check 0 +vc->version= get_bits_long(gb, 32); +if (vc->version) { +av_log(vc->avctx, AV_LOG_ERROR, "Invalid version number\n"); +return AVERROR_INVALIDDATA; +} vc->audio_channels = get_bits(gb, 8); if (vc->audio_channels <= 0) { av_log(vc->avctx, AV_LOG_ERROR, "Invalid number of channels\n"); -- 2.13.3 signature.asc Description: PGP signature ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
Re: [FFmpeg-devel] [PATCH V2 1/2] avcodec/vorbisenc: Add pre-echo detection
On Mon, Jul 17, 2017 at 09:17:09AM -0600, Tyler Jones wrote: > The encoder will attempt to determine the existence of transient > signals by applying a 4th order highpass filter to remove dominant > low frequency waveforms. Frames are then split up into blocks > where the variance is calculated and compared with blocks from > the previous frame. A preecho is only likely to be noticeable when > relatively quiet audio is followed by a loud transient signal. > > Signed-off-by: Tyler Jones > --- > V2 - Properly prefix non-static functions with "ff_" > > libavcodec/Makefile| 2 +- > libavcodec/vorbisenc.c | 28 +++-- > libavcodec/vorbispsy.c | 153 > + > libavcodec/vorbispsy.h | 79 + > 4 files changed, 256 insertions(+), 6 deletions(-) Ping for set Tyler Jones signature.asc Description: PGP signature ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
[FFmpeg-devel] [PATCH V2 1/2] avcodec/vorbisenc: Add pre-echo detection
The encoder will attempt to determine the existence of transient signals by applying a 4th order highpass filter to remove dominant low frequency waveforms. Frames are then split up into blocks where the variance is calculated and compared with blocks from the previous frame. A preecho is only likely to be noticeable when relatively quiet audio is followed by a loud transient signal. Signed-off-by: Tyler Jones --- V2 - Properly prefix non-static functions with "ff_" libavcodec/Makefile| 2 +- libavcodec/vorbisenc.c | 28 +++-- libavcodec/vorbispsy.c | 153 + libavcodec/vorbispsy.h | 79 + 4 files changed, 256 insertions(+), 6 deletions(-) create mode 100644 libavcodec/vorbispsy.c create mode 100644 libavcodec/vorbispsy.h diff --git a/libavcodec/Makefile b/libavcodec/Makefile index 59029a853c..8c2beb3315 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -610,7 +610,7 @@ OBJS-$(CONFIG_VMNC_DECODER)+= vmnc.o OBJS-$(CONFIG_VORBIS_DECODER) += vorbisdec.o vorbisdsp.o vorbis.o \ vorbis_data.o OBJS-$(CONFIG_VORBIS_ENCODER) += vorbisenc.o vorbis.o \ - vorbis_data.o + vorbis_data.o vorbispsy.o OBJS-$(CONFIG_VP3_DECODER) += vp3.o OBJS-$(CONFIG_VP5_DECODER) += vp5.o vp56.o vp56data.o vp56rac.o OBJS-$(CONFIG_VP6_DECODER) += vp6.o vp56.o vp56data.o \ diff --git a/libavcodec/vorbisenc.c b/libavcodec/vorbisenc.c index bf21a3b1ff..5dc803aabb 100644 --- a/libavcodec/vorbisenc.c +++ b/libavcodec/vorbisenc.c @@ -33,6 +33,7 @@ #include "mathops.h" #include "vorbis.h" #include "vorbis_enc_data.h" +#include "vorbispsy.h" #include "audio_frame_queue.h" #include "libavfilter/bufferqueue.h" @@ -136,6 +137,7 @@ typedef struct vorbis_enc_context { int64_t next_pts; AVFloatDSPContext *fdsp; +VorbisPsyContext *vpctx; } vorbis_enc_context; #define MAX_CHANNELS 2 @@ -272,11 +274,12 @@ static int create_vorbis_context(vorbis_enc_context *venc, vorbis_enc_floor *fc; vorbis_enc_residue *rc; vorbis_enc_mapping *mc; -int i, book, ret; +int i, book, ret, blocks; venc->channels= avctx->channels; venc->sample_rate = avctx->sample_rate; -venc->log2_blocksize[0] = venc->log2_blocksize[1] = 11; +venc->log2_blocksize[0] = 8; +venc->log2_blocksize[1] = 11; venc->ncodebooks = FF_ARRAY_ELEMS(cvectors); venc->codebooks = av_malloc(sizeof(vorbis_enc_codebook) * venc->ncodebooks); @@ -464,6 +467,12 @@ static int create_vorbis_context(vorbis_enc_context *venc, if ((ret = dsp_init(avctx, venc)) < 0) return ret; +blocks = 1 << (venc->log2_blocksize[1] - venc->log2_blocksize[0]); +venc->vpctx = av_mallocz(sizeof(VorbisPsyContext)); +if (!venc->vpctx || (ret = ff_psy_vorbis_init(venc->vpctx, venc->sample_rate, + venc->channels, blocks)) < 0) +return AVERROR(ENOMEM); + return 0; } @@ -1071,22 +1080,23 @@ static void move_audio(vorbis_enc_context *venc, int sf_size) float *save = venc->saved + ch * frame_size; const float *input = (float *) cur->extended_data[ch]; const size_t len = cur->nb_samples * sizeof(float); - memcpy(offset + sf*sf_size, input, len); memcpy(save + sf*sf_size, input, len); // Move samples for next frame } av_frame_free(&cur); } venc->have_saved = 1; -memcpy(venc->scratch, venc->samples, 2 * venc->channels * frame_size); +memcpy(venc->scratch, venc->samples, sizeof(float) * venc->channels * 2 * frame_size); } static int vorbis_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, const AVFrame *frame, int *got_packet_ptr) { vorbis_enc_context *venc = avctx->priv_data; -int i, ret, need_more; +int i, ret, need_more, ch; +int curr_win = 1; int frame_size = 1 << (venc->log2_blocksize[1] - 1); +int block_size = 1 << (venc->log2_blocksize[0] - 1); vorbis_enc_mode *mode; vorbis_enc_mapping *mapping; PutBitContext pb; @@ -1121,6 +1131,13 @@ static int vorbis_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, move_audio(venc, avctx->frame_size); +for (ch = 0; ch < venc->channels; ch++) { +float *scratch = venc->scratch + 2 * ch * frame_size + frame_size; + +if (!ff_psy_vorbis_block_frame(venc->vpctx, scratch, ch, frame_size, block_size)) +curr_win = 0; +} + if (!apply_window_and_mdct(venc)) retur
[FFmpeg-devel] [PATCH V2 2/2] avcodec/vorbisenc: Apply dynamic frame lengths
Additional codebooks are added for shorter 128-sample frames. Changes in codeword generation are made to handle valid values of 0 that prepend some codebooks, otherwise books are classified incorrectly and cause unreadable streams. A second residue, floor, and mapping is created for short window lengths so that values are partitioned correctly for transient frames. Signed-off-by: Tyler Jones --- V2 -- Remove double arithmetic in window scale constant libavcodec/vorbis.c | 10 +- libavcodec/vorbis_enc_data.h | 289 +++-- libavcodec/vorbisenc.c | 422 ++- tests/fate/vorbis.mak| 2 +- 4 files changed, 453 insertions(+), 270 deletions(-) diff --git a/libavcodec/vorbis.c b/libavcodec/vorbis.c index 399020eec5..8befab8338 100644 --- a/libavcodec/vorbis.c +++ b/libavcodec/vorbis.c @@ -59,7 +59,7 @@ int ff_vorbis_len2vlc(uint8_t *bits, uint32_t *codes, unsigned num) unsigned i, j, p, code; for (p = 0; (bits[p] == 0) && (p < num); ++p) -; +codes[p] = 0; if (p == num) return 0; @@ -78,9 +78,11 @@ int ff_vorbis_len2vlc(uint8_t *bits, uint32_t *codes, unsigned num) for (; p < num; ++p) { if (bits[p] > 32) - return AVERROR_INVALIDDATA; -if (bits[p] == 0) - continue; +return AVERROR_INVALIDDATA; +if (bits[p] == 0) { +codes[p] = 0; +continue; +} // find corresponding exit(node which the tree can grow further from) for (i = bits[p]; i > 0; --i) if (exit_at_level[i]) diff --git a/libavcodec/vorbis_enc_data.h b/libavcodec/vorbis_enc_data.h index a51aaec978..eca43dfded 100644 --- a/libavcodec/vorbis_enc_data.h +++ b/libavcodec/vorbis_enc_data.h @@ -23,15 +23,78 @@ #include -static const uint8_t codebook0[] = { +static const uint8_t floor_128_c0[] = { +10, 7, 8, 13, 9, 6, 7, 11, 10, 8, 8, 12, 17, 17, 17, +17, 7, 5, 5, 9, 6, 4, 4, 8, 8, 5, 5, 8, 16, 14, +13, 16, 7, 5, 5, 7, 6, 3, 3, 5, 8, 5, 4, 7, 14, +12, 12, 15, 10, 7, 8, 9, 7, 5, 5, 6, 9, 6, 5, 5, +15, 12, 9, 10, +}; + +static const uint8_t floor_128_c1[] = { + 8, 13, 17, 17, 8, 11, 17, 17, 11, 13, 17, 17, 17, 17, 17, +17, 6, 10, 16, 17, 6, 10, 15, 17, 8, 10, 16, 17, 17, 17, +17, 17, 9, 13, 15, 17, 8, 11, 17, 17, 10, 12, 17, 17, 17, +17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, +17, 17, 17, 17, 6, 11, 15, 17, 7, 10, 15, 17, 8, 10, 17, +17, 17, 15, 17, 17, 4, 8, 13, 17, 4, 7, 13, 17, 6, 8, +15, 17, 16, 15, 17, 17, 6, 11, 15, 17, 6, 9, 13, 17, 8, +10, 17, 17, 15, 17, 17, 17, 16, 17, 17, 17, 12, 14, 15, 17, +13, 14, 15, 17, 17, 17, 17, 17, 5, 10, 14, 17, 5, 9, 14, +17, 7, 9, 15, 17, 15, 15, 17, 17, 3, 7, 12, 17, 3, 6, +11, 17, 5, 7, 13, 17, 12, 12, 17, 17, 5, 9, 14, 17, 3, + 7, 11, 17, 5, 8, 13, 17, 13, 11, 16, 17, 12, 17, 17, 17, + 9, 14, 15, 17, 10, 11, 14, 17, 16, 14, 17, 17, 8, 12, 17, +17, 8, 12, 17, 17, 10, 12, 17, 17, 17, 17, 17, 17, 5, 10, +17, 17, 5, 9, 15, 17, 7, 9, 17, 17, 13, 13, 17, 17, 7, +11, 17, 17, 6, 10, 15, 17, 7, 9, 15, 17, 12, 11, 17, 17, +12, 15, 17, 17, 11, 14, 17, 17, 11, 10, 15, 17, 17, 16, 17, +17, +}; + +static const uint8_t floor_128_0sub1[] = { + 0, 3, 3, 3, 3, 3, 3, 3, 3, +}; + +static const uint8_t floor_128_0sub2[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 4, 4, 4, + 4, 5, 4, 5, 4, 5, 4, 6, 4, 6, +}; + +static const uint8_t floor_128_0sub3[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 5, 3, 5, 3, + 5, 4, 5, 4, 5, 5, 5, 5, 6, 5, 6, 5, 6, 5, 6, + 5, 6, 5, 7, 8, 9, 11, 13, 13, 13, 13, 13, 13, 13, 13, +13, 13, 13, 13, +}; + +static const uint8_t floor_128_1sub1[] = { + 0, 3, 3, 2, 3, 3, 4, 3, 4, +}; + +static const uint8_t floor_128_1sub2[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 4, 3, 6, 3, 6, + 3, 6, 3, 7, 3, 8, 4, 9, 4, 9, +}; + +static const uint8_t floor_128_1sub3[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 7, 2, 7, 3, + 8, 4, 9, 5, 9, 8, 10, 11, 11, 12, 14, 14, 14, 14, 14, +14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, +13, 13, 13, 13, +}; + +static const uint8_t floor_1024_c1[] = { 2, 10, 8, 14, 7, 12, 11, 14, 1, 5, 3, 7, 4, 9, 7, 13, }; -static const uint8_t codebook1[] = { +static const uint8_t floor_1024_c2[] = { 1, 4, 2, 6, 3, 7, 5, 7, }; -static const uint8_t codebook2[] = { +static const uint8_t floor_1024_c3[] = { 1, 5, 7, 21, 5, 8, 9, 21, 10, 9, 12, 20, 20, 16, 20, 20, 4, 8, 9, 20, 6, 8, 9, 20, 11
Re: [FFmpeg-devel] [PATCH 1/2] avcodec/vorbisenc: Add pre-echo detection
On Fri, Jul 14, 2017 at 12:43:26AM +0200, Michael Niedermayer wrote: > On Wed, Jul 12, 2017 at 04:18:06PM -0600, Tyler Jones wrote: > [...] > > > > +av_cold int psy_vorbis_init(VorbisPsyContext *vpctx, int sample_rate, > > +int channels, int blocks) > > non static functions needs a prefix > in this case ff_ > > > [...] > > -- > Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB > > Does the universe only have a finite lifespan? No, its going to go on > forever, its just that you wont like living in it. -- Hiranya Peiri Noted, thank you for noticing. This is changed for the next revision. Tyler Jones signature.asc Description: PGP signature ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
Re: [FFmpeg-devel] [PATCH 2/2] avcodec/vorbisenc: Apply dynamic frame lengths
On Thu, Jul 13, 2017 at 03:50:13PM +0200, Moritz Barsnick wrote: > On Wed, Jul 12, 2017 at 16:18:20 -0600, Tyler Jones wrote: > > -int window_len = 1 << (venc->log2_blocksize[1] - 1); > > -float n = (float)(1 << venc->log2_blocksize[1]) / 4.0; > > +int prev_size, curr_size, next_size, bound; > > +float scale = 1. / (float) (1 << venc->log2_blocksize[blockflags[1]] - > > 2); > > The "1." is a double, which promotes the calculation to a double > precision operation, which is most likely not intended. Please restrict > it to a float operation by using "1.0f". (I realize the original code > had the same issue.) > > Moritz You're correct, I'll change that for the next revision. Thank you for catching my mistake. Tyler Jones signature.asc Description: PGP signature ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
[FFmpeg-devel] [PATCH 2/2] avcodec/vorbisenc: Apply dynamic frame lengths
Additional codebooks are added for shorter 128-sample frames. Changes in codeword generation are made to handle valid values of 0 that prepend some codebooks, otherwise books are classified incorrectly and cause unreadable streams. A second residue, floor, and mapping is created for short window lengths so that values are partitioned correctly for transient frames. Signed-off-by: Tyler Jones --- libavcodec/vorbis.c | 10 +- libavcodec/vorbis_enc_data.h | 289 +++-- libavcodec/vorbisenc.c | 422 ++- tests/fate/vorbis.mak| 2 +- 4 files changed, 453 insertions(+), 270 deletions(-) diff --git a/libavcodec/vorbis.c b/libavcodec/vorbis.c index 399020e..8befab8 100644 --- a/libavcodec/vorbis.c +++ b/libavcodec/vorbis.c @@ -59,7 +59,7 @@ int ff_vorbis_len2vlc(uint8_t *bits, uint32_t *codes, unsigned num) unsigned i, j, p, code; for (p = 0; (bits[p] == 0) && (p < num); ++p) -; +codes[p] = 0; if (p == num) return 0; @@ -78,9 +78,11 @@ int ff_vorbis_len2vlc(uint8_t *bits, uint32_t *codes, unsigned num) for (; p < num; ++p) { if (bits[p] > 32) - return AVERROR_INVALIDDATA; -if (bits[p] == 0) - continue; +return AVERROR_INVALIDDATA; +if (bits[p] == 0) { +codes[p] = 0; +continue; +} // find corresponding exit(node which the tree can grow further from) for (i = bits[p]; i > 0; --i) if (exit_at_level[i]) diff --git a/libavcodec/vorbis_enc_data.h b/libavcodec/vorbis_enc_data.h index a51aaec..eca43df 100644 --- a/libavcodec/vorbis_enc_data.h +++ b/libavcodec/vorbis_enc_data.h @@ -23,15 +23,78 @@ #include -static const uint8_t codebook0[] = { +static const uint8_t floor_128_c0[] = { +10, 7, 8, 13, 9, 6, 7, 11, 10, 8, 8, 12, 17, 17, 17, +17, 7, 5, 5, 9, 6, 4, 4, 8, 8, 5, 5, 8, 16, 14, +13, 16, 7, 5, 5, 7, 6, 3, 3, 5, 8, 5, 4, 7, 14, +12, 12, 15, 10, 7, 8, 9, 7, 5, 5, 6, 9, 6, 5, 5, +15, 12, 9, 10, +}; + +static const uint8_t floor_128_c1[] = { + 8, 13, 17, 17, 8, 11, 17, 17, 11, 13, 17, 17, 17, 17, 17, +17, 6, 10, 16, 17, 6, 10, 15, 17, 8, 10, 16, 17, 17, 17, +17, 17, 9, 13, 15, 17, 8, 11, 17, 17, 10, 12, 17, 17, 17, +17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, +17, 17, 17, 17, 6, 11, 15, 17, 7, 10, 15, 17, 8, 10, 17, +17, 17, 15, 17, 17, 4, 8, 13, 17, 4, 7, 13, 17, 6, 8, +15, 17, 16, 15, 17, 17, 6, 11, 15, 17, 6, 9, 13, 17, 8, +10, 17, 17, 15, 17, 17, 17, 16, 17, 17, 17, 12, 14, 15, 17, +13, 14, 15, 17, 17, 17, 17, 17, 5, 10, 14, 17, 5, 9, 14, +17, 7, 9, 15, 17, 15, 15, 17, 17, 3, 7, 12, 17, 3, 6, +11, 17, 5, 7, 13, 17, 12, 12, 17, 17, 5, 9, 14, 17, 3, + 7, 11, 17, 5, 8, 13, 17, 13, 11, 16, 17, 12, 17, 17, 17, + 9, 14, 15, 17, 10, 11, 14, 17, 16, 14, 17, 17, 8, 12, 17, +17, 8, 12, 17, 17, 10, 12, 17, 17, 17, 17, 17, 17, 5, 10, +17, 17, 5, 9, 15, 17, 7, 9, 17, 17, 13, 13, 17, 17, 7, +11, 17, 17, 6, 10, 15, 17, 7, 9, 15, 17, 12, 11, 17, 17, +12, 15, 17, 17, 11, 14, 17, 17, 11, 10, 15, 17, 17, 16, 17, +17, +}; + +static const uint8_t floor_128_0sub1[] = { + 0, 3, 3, 3, 3, 3, 3, 3, 3, +}; + +static const uint8_t floor_128_0sub2[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 4, 4, 4, + 4, 5, 4, 5, 4, 5, 4, 6, 4, 6, +}; + +static const uint8_t floor_128_0sub3[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 5, 3, 5, 3, + 5, 4, 5, 4, 5, 5, 5, 5, 6, 5, 6, 5, 6, 5, 6, + 5, 6, 5, 7, 8, 9, 11, 13, 13, 13, 13, 13, 13, 13, 13, +13, 13, 13, 13, +}; + +static const uint8_t floor_128_1sub1[] = { + 0, 3, 3, 2, 3, 3, 4, 3, 4, +}; + +static const uint8_t floor_128_1sub2[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 4, 3, 6, 3, 6, + 3, 6, 3, 7, 3, 8, 4, 9, 4, 9, +}; + +static const uint8_t floor_128_1sub3[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 7, 2, 7, 3, + 8, 4, 9, 5, 9, 8, 10, 11, 11, 12, 14, 14, 14, 14, 14, +14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, +13, 13, 13, 13, +}; + +static const uint8_t floor_1024_c1[] = { 2, 10, 8, 14, 7, 12, 11, 14, 1, 5, 3, 7, 4, 9, 7, 13, }; -static const uint8_t codebook1[] = { +static const uint8_t floor_1024_c2[] = { 1, 4, 2, 6, 3, 7, 5, 7, }; -static const uint8_t codebook2[] = { +static const uint8_t floor_1024_c3[] = { 1, 5, 7, 21, 5, 8, 9, 21, 10, 9, 12, 20, 20, 16, 20, 20, 4, 8, 9, 20, 6, 8, 9, 20, 11, 11, 13, 20, 20, 15, 17, 20, 9, 11, 14, 20, 8, 10, 15, 20
[FFmpeg-devel] [PATCH 1/2] avcodec/vorbisenc: Add pre-echo detection
The encoder will attempt to determine the existence of transient signals by applying a 4th order highpass filter to remove dominant low frequency waveforms. Frames are then split up into blocks where the variance is calculated and compared with blocks from the previous frame. A preecho is only likely to be noticeable when relatively quiet audio is followed by a loud transient signal. Signed-off-by: Tyler Jones --- libavcodec/Makefile| 2 +- libavcodec/vorbisenc.c | 28 +++-- libavcodec/vorbispsy.c | 153 + libavcodec/vorbispsy.h | 79 + 4 files changed, 256 insertions(+), 6 deletions(-) create mode 100644 libavcodec/vorbispsy.c create mode 100644 libavcodec/vorbispsy.h diff --git a/libavcodec/Makefile b/libavcodec/Makefile index b440a00..2db6727 100644 --- a/libavcodec/Makefile +++ b/libavcodec/Makefile @@ -611,7 +611,7 @@ OBJS-$(CONFIG_VMNC_DECODER)+= vmnc.o OBJS-$(CONFIG_VORBIS_DECODER) += vorbisdec.o vorbisdsp.o vorbis.o \ vorbis_data.o OBJS-$(CONFIG_VORBIS_ENCODER) += vorbisenc.o vorbis.o \ - vorbis_data.o + vorbis_data.o vorbispsy.o OBJS-$(CONFIG_VP3_DECODER) += vp3.o OBJS-$(CONFIG_VP5_DECODER) += vp5.o vp56.o vp56data.o vp56rac.o OBJS-$(CONFIG_VP6_DECODER) += vp6.o vp56.o vp56data.o \ diff --git a/libavcodec/vorbisenc.c b/libavcodec/vorbisenc.c index bf21a3b..3482cf0 100644 --- a/libavcodec/vorbisenc.c +++ b/libavcodec/vorbisenc.c @@ -33,6 +33,7 @@ #include "mathops.h" #include "vorbis.h" #include "vorbis_enc_data.h" +#include "vorbispsy.h" #include "audio_frame_queue.h" #include "libavfilter/bufferqueue.h" @@ -136,6 +137,7 @@ typedef struct vorbis_enc_context { int64_t next_pts; AVFloatDSPContext *fdsp; +VorbisPsyContext *vpctx; } vorbis_enc_context; #define MAX_CHANNELS 2 @@ -272,11 +274,12 @@ static int create_vorbis_context(vorbis_enc_context *venc, vorbis_enc_floor *fc; vorbis_enc_residue *rc; vorbis_enc_mapping *mc; -int i, book, ret; +int i, book, ret, blocks; venc->channels= avctx->channels; venc->sample_rate = avctx->sample_rate; -venc->log2_blocksize[0] = venc->log2_blocksize[1] = 11; +venc->log2_blocksize[0] = 8; +venc->log2_blocksize[1] = 11; venc->ncodebooks = FF_ARRAY_ELEMS(cvectors); venc->codebooks = av_malloc(sizeof(vorbis_enc_codebook) * venc->ncodebooks); @@ -464,6 +467,12 @@ static int create_vorbis_context(vorbis_enc_context *venc, if ((ret = dsp_init(avctx, venc)) < 0) return ret; +blocks = 1 << (venc->log2_blocksize[1] - venc->log2_blocksize[0]); +venc->vpctx = av_mallocz(sizeof(VorbisPsyContext)); +if (!venc->vpctx || (ret = psy_vorbis_init(venc->vpctx, venc->sample_rate, + venc->channels, blocks)) < 0) +return AVERROR(ENOMEM); + return 0; } @@ -1071,22 +1080,23 @@ static void move_audio(vorbis_enc_context *venc, int sf_size) float *save = venc->saved + ch * frame_size; const float *input = (float *) cur->extended_data[ch]; const size_t len = cur->nb_samples * sizeof(float); - memcpy(offset + sf*sf_size, input, len); memcpy(save + sf*sf_size, input, len); // Move samples for next frame } av_frame_free(&cur); } venc->have_saved = 1; -memcpy(venc->scratch, venc->samples, 2 * venc->channels * frame_size); +memcpy(venc->scratch, venc->samples, sizeof(float) * venc->channels * 2 * frame_size); } static int vorbis_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, const AVFrame *frame, int *got_packet_ptr) { vorbis_enc_context *venc = avctx->priv_data; -int i, ret, need_more; +int i, ret, need_more, ch; +int curr_win = 1; int frame_size = 1 << (venc->log2_blocksize[1] - 1); +int block_size = 1 << (venc->log2_blocksize[0] - 1); vorbis_enc_mode *mode; vorbis_enc_mapping *mapping; PutBitContext pb; @@ -1121,6 +1131,13 @@ static int vorbis_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, move_audio(venc, avctx->frame_size); +for (ch = 0; ch < venc->channels; ch++) { +float *scratch = venc->scratch + 2 * ch * frame_size + frame_size; + +if (!psy_vorbis_block_frame(venc->vpctx, scratch, ch, frame_size, block_size)) +curr_win = 0; +} + if (!apply_window_and_mdct(venc)) return 0; @@ -1252,6 +1269,7 @@ static av_cold int vorbis_encode_close(AVCodecContext *avctx) ff_mdct_end(&
[FFmpeg-devel] [PATCH 3/3] avcodec/vorbisenc: Stop tracking number of samples per frame
Each frame is now padded with 0 values if not enough samples are present, and all frames are guaranteed to have exactly 1 << (venc->log2_blocksize[1] - 1) samples. Signed-off-by: Tyler Jones --- libavcodec/vorbisenc.c | 33 - 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/libavcodec/vorbisenc.c b/libavcodec/vorbisenc.c index 14de803..bf21a3b 100644 --- a/libavcodec/vorbisenc.c +++ b/libavcodec/vorbisenc.c @@ -997,7 +997,7 @@ static int residue_encode(vorbis_enc_context *venc, vorbis_enc_residue *rc, return 0; } -static int apply_window_and_mdct(vorbis_enc_context *venc, int samples) +static int apply_window_and_mdct(vorbis_enc_context *venc) { int channel; const float * win = venc->win[1]; @@ -1008,13 +1008,13 @@ static int apply_window_and_mdct(vorbis_enc_context *venc, int samples) for (channel = 0; channel < venc->channels; channel++) { float *offset = venc->samples + channel * window_len * 2; -fdsp->vector_fmul(offset, offset, win, samples); -fdsp->vector_fmul_scalar(offset, offset, 1/n, samples); +fdsp->vector_fmul(offset, offset, win, window_len); +fdsp->vector_fmul_scalar(offset, offset, 1/n, window_len); offset += window_len; -fdsp->vector_fmul_reverse(offset, offset, win, samples); -fdsp->vector_fmul_scalar(offset, offset, 1/n, samples); +fdsp->vector_fmul_reverse(offset, offset, win, window_len); +fdsp->vector_fmul_scalar(offset, offset, 1/n, window_len); venc->mdct[1].mdct_calc(&venc->mdct[1], venc->coeffs + channel * window_len, venc->samples + channel * window_len * 2); @@ -1047,7 +1047,7 @@ static AVFrame *spawn_empty_frame(AVCodecContext *avctx, int channels) } /* Set up audio samples for psy analysis and window/mdct */ -static void move_audio(vorbis_enc_context *venc, int *samples, int sf_size) +static void move_audio(vorbis_enc_context *venc, int sf_size) { AVFrame *cur = NULL; int frame_size = 1 << (venc->log2_blocksize[1] - 1); @@ -1065,7 +1065,6 @@ static void move_audio(vorbis_enc_context *venc, int *samples, int sf_size) for (sf = 0; sf < subframes; sf++) { cur = ff_bufqueue_get(&venc->bufqueue); -*samples += cur->nb_samples; for (ch = 0; ch < venc->channels; ch++) { float *offset = venc->samples + 2 * ch * frame_size + frame_size; @@ -1087,7 +1086,7 @@ static int vorbis_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, { vorbis_enc_context *venc = avctx->priv_data; int i, ret, need_more; -int samples = 0, frame_size = 1 << (venc->log2_blocksize[1] - 1); +int frame_size = 1 << (venc->log2_blocksize[1] - 1); vorbis_enc_mode *mode; vorbis_enc_mapping *mapping; PutBitContext pb; @@ -1120,9 +1119,9 @@ static int vorbis_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, } } -move_audio(venc, &samples, avctx->frame_size); +move_audio(venc, avctx->frame_size); -if (!apply_window_and_mdct(venc, samples)) +if (!apply_window_and_mdct(venc)) return 0; if ((ret = ff_alloc_packet2(avctx, avpkt, 8192, 0)) < 0) @@ -1149,21 +1148,21 @@ static int vorbis_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, for (i = 0; i < venc->channels; i++) { vorbis_enc_floor *fc = &venc->floors[mapping->floor[mapping->mux[i]]]; uint16_t posts[MAX_FLOOR_VALUES]; -floor_fit(venc, fc, &venc->coeffs[i * samples], posts, samples); -if (floor_encode(venc, fc, &pb, posts, &venc->floor[i * samples], samples)) { +floor_fit(venc, fc, &venc->coeffs[i * frame_size], posts, frame_size); +if (floor_encode(venc, fc, &pb, posts, &venc->floor[i * frame_size], frame_size)) { av_log(avctx, AV_LOG_ERROR, "output buffer is too small\n"); return AVERROR(EINVAL); } } -for (i = 0; i < venc->channels * samples; i++) +for (i = 0; i < venc->channels * frame_size; i++) venc->coeffs[i] /= venc->floor[i]; for (i = 0; i < mapping->coupling_steps; i++) { -float *mag = venc->coeffs + mapping->magnitude[i] * samples; -float *ang = venc->coeffs + mapping->angle[i] * samples; +float *mag = venc->coeffs + mapping->magnitude[i] * frame_size; +float *ang = venc->coeffs + mapping->angle[i] * frame_size; int j; -for (j = 0; j < samples; j++) { +for (j = 0; j < frame_size; j++) { float a = ang[j]; ang[j] -= mag[j]; if (mag[j] > 0) @@ -1174,7 +1173,7 @@ static int vorbis_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, }
[FFmpeg-devel] [PATCH 2/3] avcodec/vorbisenc: Apply and output correct length window and mdct
Usage of blocksize, window, mode, and mdct indexes are switched from default 0 to a default of 1 to better align with specs. A flag of 0 should correspond with short windows, a flag of 1 with long. Signed-off-by: Tyler Jones --- libavcodec/vorbisenc.c | 25 ++--- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/libavcodec/vorbisenc.c b/libavcodec/vorbisenc.c index 9b66d56..14de803 100644 --- a/libavcodec/vorbisenc.c +++ b/libavcodec/vorbisenc.c @@ -352,7 +352,7 @@ static int create_vorbis_context(vorbis_enc_context *venc, c->books[j] = floor_classes[i].nbooks[j]; } fc->multiplier = 2; -fc->rangebits = venc->log2_blocksize[0] - 1; +fc->rangebits = venc->log2_blocksize[1] - 1; fc->values = 2; for (i = 0; i < fc->partitions; i++) @@ -439,14 +439,17 @@ static int create_vorbis_context(vorbis_enc_context *venc, mc->angle[0] = 1; } -venc->nmodes = 1; +venc->nmodes = 2; venc->modes = av_malloc(sizeof(vorbis_enc_mode) * venc->nmodes); if (!venc->modes) return AVERROR(ENOMEM); -// single mode +// Short block venc->modes[0].blockflag = 0; venc->modes[0].mapping = 0; +// Long block +venc->modes[1].blockflag = 1; +venc->modes[1].mapping = 0; venc->have_saved = 0; venc->saved = av_malloc_array(sizeof(float) * venc->channels, (1 << venc->log2_blocksize[1]) / 2); @@ -997,9 +1000,9 @@ static int residue_encode(vorbis_enc_context *venc, vorbis_enc_residue *rc, static int apply_window_and_mdct(vorbis_enc_context *venc, int samples) { int channel; -const float * win = venc->win[0]; -int window_len = 1 << (venc->log2_blocksize[0] - 1); -float n = (float)(1 << venc->log2_blocksize[0]) / 4.0; +const float * win = venc->win[1]; +int window_len = 1 << (venc->log2_blocksize[1] - 1); +float n = (float)(1 << venc->log2_blocksize[1]) / 4.0; AVFloatDSPContext *fdsp = venc->fdsp; for (channel = 0; channel < venc->channels; channel++) { @@ -1013,7 +1016,7 @@ static int apply_window_and_mdct(vorbis_enc_context *venc, int samples) fdsp->vector_fmul_reverse(offset, offset, win, samples); fdsp->vector_fmul_scalar(offset, offset, 1/n, samples); -venc->mdct[0].mdct_calc(&venc->mdct[0], venc->coeffs + channel * window_len, +venc->mdct[1].mdct_calc(&venc->mdct[1], venc->coeffs + channel * window_len, venc->samples + channel * window_len * 2); } return 1; @@ -1134,13 +1137,13 @@ static int vorbis_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, put_bits(&pb, 1, 0); // magic bit -put_bits(&pb, ilog(venc->nmodes - 1), 0); // 0 bits, the mode +put_bits(&pb, ilog(venc->nmodes - 1), 1); // Mode for current frame -mode= &venc->modes[0]; +mode= &venc->modes[1]; mapping = &venc->mappings[mode->mapping]; if (mode->blockflag) { -put_bits(&pb, 1, 0); -put_bits(&pb, 1, 0); +put_bits(&pb, 1, 1); // Previous windowflag +put_bits(&pb, 1, 1); // Next windowflag } for (i = 0; i < venc->channels; i++) { -- 2.7.4 signature.asc Description: PGP signature ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
[FFmpeg-devel] [PATCH 1/3] avcodec/vorbisenc: Separate copying audio samples from windowing
Audio samples are shifted around when copying from the frame queue so that analysis can be done without negatively impacting calculation of the MDCT. Window coefficients are applied to the current two overlapped windows simultaneously instead of applying overlap for the next frame ahead of time. This improves readability when applying windows of varying lengths. Signed-off-by: Tyler Jones --- libavcodec/vorbisenc.c | 76 +- 1 file changed, 32 insertions(+), 44 deletions(-) diff --git a/libavcodec/vorbisenc.c b/libavcodec/vorbisenc.c index afded40..9b66d56 100644 --- a/libavcodec/vorbisenc.c +++ b/libavcodec/vorbisenc.c @@ -453,7 +453,7 @@ static int create_vorbis_context(vorbis_enc_context *venc, venc->samples= av_malloc_array(sizeof(float) * venc->channels, (1 << venc->log2_blocksize[1])); venc->floor = av_malloc_array(sizeof(float) * venc->channels, (1 << venc->log2_blocksize[1]) / 2); venc->coeffs = av_malloc_array(sizeof(float) * venc->channels, (1 << venc->log2_blocksize[1]) / 2); -venc->scratch= av_malloc_array(sizeof(float) * venc->channels, (1 << venc->log2_blocksize[1]) / 2); +venc->scratch= av_malloc_array(sizeof(float) * venc->channels, (1 << venc->log2_blocksize[1])); if (!venc->saved || !venc->samples || !venc->floor || !venc->coeffs || !venc->scratch) return AVERROR(ENOMEM); @@ -994,8 +994,7 @@ static int residue_encode(vorbis_enc_context *venc, vorbis_enc_residue *rc, return 0; } -static int apply_window_and_mdct(vorbis_enc_context *venc, - float *audio, int samples) +static int apply_window_and_mdct(vorbis_enc_context *venc, int samples) { int channel; const float * win = venc->win[0]; @@ -1003,46 +1002,19 @@ static int apply_window_and_mdct(vorbis_enc_context *venc, float n = (float)(1 << venc->log2_blocksize[0]) / 4.0; AVFloatDSPContext *fdsp = venc->fdsp; -if (!venc->have_saved && !samples) -return 0; +for (channel = 0; channel < venc->channels; channel++) { +float *offset = venc->samples + channel * window_len * 2; -if (venc->have_saved) { -for (channel = 0; channel < venc->channels; channel++) -memcpy(venc->samples + channel * window_len * 2, - venc->saved + channel * window_len, sizeof(float) * window_len); -} else { -for (channel = 0; channel < venc->channels; channel++) -memset(venc->samples + channel * window_len * 2, 0, - sizeof(float) * window_len); -} +fdsp->vector_fmul(offset, offset, win, samples); +fdsp->vector_fmul_scalar(offset, offset, 1/n, samples); -if (samples) { -for (channel = 0; channel < venc->channels; channel++) { -float *offset = venc->samples + channel * window_len * 2 + window_len; +offset += window_len; -fdsp->vector_fmul_reverse(offset, audio + channel * window_len, win, samples); -fdsp->vector_fmul_scalar(offset, offset, 1/n, samples); -} -} else { -for (channel = 0; channel < venc->channels; channel++) -memset(venc->samples + channel * window_len * 2 + window_len, - 0, sizeof(float) * window_len); -} +fdsp->vector_fmul_reverse(offset, offset, win, samples); +fdsp->vector_fmul_scalar(offset, offset, 1/n, samples); -for (channel = 0; channel < venc->channels; channel++) venc->mdct[0].mdct_calc(&venc->mdct[0], venc->coeffs + channel * window_len, venc->samples + channel * window_len * 2); - -if (samples) { -for (channel = 0; channel < venc->channels; channel++) { -float *offset = venc->saved + channel * window_len; - -fdsp->vector_fmul(offset, audio + channel * window_len, win, samples); -fdsp->vector_fmul_scalar(offset, offset, 1/n, samples); -} -venc->have_saved = 1; -} else { -venc->have_saved = 0; } return 1; } @@ -1071,24 +1043,40 @@ static AVFrame *spawn_empty_frame(AVCodecContext *avctx, int channels) return f; } -/* Concatenate audio frames into an appropriately sized array of samples */ -static void move_audio(vorbis_enc_context *venc, float *audio, int *samples, int sf_size) +/* Set up audio samples for psy analysis and window/mdct */ +static void move_audio(vorbis_enc_context *venc, int *samples, int sf_size) { AVFrame *cur = NULL; int frame_size = 1 << (venc->log2_blocksize[1] - 1); int subframes = frame_size / sf_size; +int sf, ch; -for (int sf = 0; sf < subframes; sf++) { +/* Copy samples from last frame into
[FFmpeg-devel] [PATCH] avcodec/vorbisenc: Fix memory leak on errors
Switches temporary samples for processing to be stored in the encoder's context, avoids memory leaks if any errors occur while encoding a frame. Fixes CID1412026 Signed-off-by: Tyler Jones --- libavcodec/vorbisenc.c | 49 - 1 file changed, 12 insertions(+), 37 deletions(-) diff --git a/libavcodec/vorbisenc.c b/libavcodec/vorbisenc.c index 856f590..afded40 100644 --- a/libavcodec/vorbisenc.c +++ b/libavcodec/vorbisenc.c @@ -112,6 +112,7 @@ typedef struct vorbis_enc_context { float *samples; float *floor; // also used for tmp values for mdct float *coeffs; // also used for residue after floor +float *scratch; // used for tmp values for psy model float quality; AudioFrameQueue afq; @@ -452,7 +453,9 @@ static int create_vorbis_context(vorbis_enc_context *venc, venc->samples= av_malloc_array(sizeof(float) * venc->channels, (1 << venc->log2_blocksize[1])); venc->floor = av_malloc_array(sizeof(float) * venc->channels, (1 << venc->log2_blocksize[1]) / 2); venc->coeffs = av_malloc_array(sizeof(float) * venc->channels, (1 << venc->log2_blocksize[1]) / 2); -if (!venc->saved || !venc->samples || !venc->floor || !venc->coeffs) +venc->scratch= av_malloc_array(sizeof(float) * venc->channels, (1 << venc->log2_blocksize[1]) / 2); + +if (!venc->saved || !venc->samples || !venc->floor || !venc->coeffs || !venc->scratch) return AVERROR(ENOMEM); if ((ret = dsp_init(avctx, venc)) < 0) @@ -992,7 +995,7 @@ static int residue_encode(vorbis_enc_context *venc, vorbis_enc_residue *rc, } static int apply_window_and_mdct(vorbis_enc_context *venc, - float **audio, int samples) + float *audio, int samples) { int channel; const float * win = venc->win[0]; @@ -1017,7 +1020,7 @@ static int apply_window_and_mdct(vorbis_enc_context *venc, for (channel = 0; channel < venc->channels; channel++) { float *offset = venc->samples + channel * window_len * 2 + window_len; -fdsp->vector_fmul_reverse(offset, audio[channel], win, samples); +fdsp->vector_fmul_reverse(offset, audio + channel * window_len, win, samples); fdsp->vector_fmul_scalar(offset, offset, 1/n, samples); } } else { @@ -1034,7 +1037,7 @@ static int apply_window_and_mdct(vorbis_enc_context *venc, for (channel = 0; channel < venc->channels; channel++) { float *offset = venc->saved + channel * window_len; -fdsp->vector_fmul(offset, audio[channel], win, samples); +fdsp->vector_fmul(offset, audio + channel * window_len, win, samples); fdsp->vector_fmul_scalar(offset, offset, 1/n, samples); } venc->have_saved = 1; @@ -1068,28 +1071,8 @@ static AVFrame *spawn_empty_frame(AVCodecContext *avctx, int channels) return f; } -static float **alloc_audio_arrays(int channels, int frame_size) -{ -float **audio = av_mallocz_array(channels, sizeof(float *)); -if (!audio) -return NULL; - -for (int ch = 0; ch < channels; ch++) { -audio[ch] = av_mallocz_array(frame_size, sizeof(float)); -if (!audio[ch]) { -// alloc has failed, free everything allocated thus far -for (ch--; ch >= 0; ch--) -av_free(audio[ch]); -av_free(audio); -return NULL; -} -} - -return audio; -} - /* Concatenate audio frames into an appropriately sized array of samples */ -static void move_audio(vorbis_enc_context *venc, float **audio, int *samples, int sf_size) +static void move_audio(vorbis_enc_context *venc, float *audio, int *samples, int sf_size) { AVFrame *cur = NULL; int frame_size = 1 << (venc->log2_blocksize[1] - 1); @@ -1102,7 +1085,7 @@ static void move_audio(vorbis_enc_context *venc, float **audio, int *samples, in for (int ch = 0; ch < venc->channels; ch++) { const float *input = (float *) cur->extended_data[ch]; const size_t len = cur->nb_samples * sizeof(float); -memcpy(&audio[ch][sf*sf_size], input, len); +memcpy(audio + ch*frame_size + sf*sf_size, input, len); } av_frame_free(&cur); } @@ -1112,7 +1095,6 @@ static int vorbis_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, const AVFrame *frame, int *got_packet_ptr) { vorbis_enc_context *venc = avctx->priv_data; -float **audio = NULL; int i, ret, need_more; int samples = 0, frame_size = 1 << (venc->log2_blocksize[1] - 1); vorbis_enc_mode *mode; @@ -1132,10 +1114,6 @@ static int vorbis_encode_frame(AVCodecContext *avctx, AVPacket *a
[FFmpeg-devel] [PATCH 2/2] avcodec/vorbisenc: Use a bufqueue in encoding with smaller lengths
Switching the vorbis encoder to use a buffer queue for input frames allows saving lookahead samples more easily and safely for psychoacoustic systems, requiring less pointer arithmetic in the case of transient windows. --- libavcodec/vorbisenc.c | 120 +++-- 1 file changed, 106 insertions(+), 14 deletions(-) diff --git a/libavcodec/vorbisenc.c b/libavcodec/vorbisenc.c index fdce864..ea2b7f5 100644 --- a/libavcodec/vorbisenc.c +++ b/libavcodec/vorbisenc.c @@ -1024,20 +1024,117 @@ static int apply_window_and_mdct(vorbis_enc_context *venc, return 1; } +/* Used for padding the last encoded packet */ +static AVFrame *spawn_empty_frame(AVCodecContext *avctx, int channels) +{ +AVFrame *f = av_frame_alloc(); + +if (!f) +return NULL; + +f->format = avctx->sample_fmt; +f->nb_samples = avctx->frame_size; +f->channel_layout = avctx->channel_layout; + +if (av_frame_get_buffer(f, 4)) { +av_frame_free(&f); +return NULL; +} + +for (int ch = 0; ch < channels; ch++) { +size_t bps = av_get_bytes_per_sample(f->format); +memset(f->extended_data[ch], 0, bps * f->nb_samples); +} +return f; +} + +static float **alloc_audio_arrays(int channels, int frame_size) +{ +float **audio = av_mallocz_array(channels, sizeof(float *)); +if (!audio) +return NULL; + +for (int ch = 0; ch < channels; ch++) { +audio[ch] = av_mallocz_array(frame_size, sizeof(float)); +if (!audio[ch]) { +// alloc has failed, free everything allocated thus far +for (ch--; ch >= 0; ch--) +av_free(audio[ch]); +av_free(audio); +return NULL; +} +} + +return audio; +} + +/* Concatenate audio frames into an appropriately sized array of samples */ +static void move_audio(vorbis_enc_context *venc, float **audio, int *samples, int sf_size) +{ +AVFrame *cur = NULL; +int frame_size = 1 << (venc->log2_blocksize[1] - 1); +int subframes = frame_size / sf_size; + +for (int sf = 0; sf < subframes; sf++) { +cur = ff_bufqueue_get(&venc->bufqueue); +*samples += cur->nb_samples; + +for (int ch = 0; ch < venc->channels; ch++) { +const float *input = (float *) cur->extended_data[ch]; +const size_t len = cur->nb_samples * sizeof(float); +memcpy(&audio[ch][sf*sf_size], input, len); +} +av_frame_free(&cur); +} +} + static int vorbis_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, const AVFrame *frame, int *got_packet_ptr) { vorbis_enc_context *venc = avctx->priv_data; -float **audio = frame ? (float **)frame->extended_data : NULL; -int samples = frame ? frame->nb_samples : 0; +float **audio = NULL; +int i, ret, need_more; +int samples = 0, frame_size = 1 << (venc->log2_blocksize[1] - 1); vorbis_enc_mode *mode; vorbis_enc_mapping *mapping; PutBitContext pb; -int i, ret; + +if (frame) { +if ((ret = ff_af_queue_add(&venc->afq, frame)) < 0) +return ret; +ff_bufqueue_add(avctx, &venc->bufqueue, av_frame_clone(frame)); +} else +if (!venc->afq.remaining_samples) +return 0; + +need_more = venc->bufqueue.available * avctx->frame_size < frame_size; +need_more = frame && need_more; +if (need_more) +return 0; + +audio = alloc_audio_arrays(venc->channels, frame_size); +if (!audio) +return AVERROR(ENOMEM); + +/* Pad the bufqueue with empty frames for encoding the last packet. */ +if (!frame) { +if (venc->bufqueue.available * avctx->frame_size < frame_size) { +int frames_needed = (frame_size/avctx->frame_size) - venc->bufqueue.available; + +for (int i = 0; i < frames_needed; i++) { + AVFrame *empty = spawn_empty_frame(avctx, venc->channels); + if (!empty) + return AVERROR(ENOMEM); + + ff_bufqueue_add(avctx, &venc->bufqueue, empty); +} +} +} + +move_audio(venc, audio, &samples, avctx->frame_size); if (!apply_window_and_mdct(venc, audio, samples)) return 0; -samples = 1 << (venc->log2_blocksize[0] - 1); if ((ret = ff_alloc_packet2(avctx, avpkt, 8192, 0)) < 0) return ret; @@ -1096,16 +1193,11 @@ static int vorbis_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, flush_put_bits(&pb); avpkt->size = put_bits_count(&pb) >> 3; -avpkt->duration = ff_samples_to_time_base(avctx, avctx->frame_size); -if (frame) { -if (frame->pts != AV_NOPTS_VALUE) -avpkt->pts = ff_samples_to_time_base(avctx, frame->pts); -} else { -avpkt->pts = venc->next_pts; -} -if (avpkt->pts != AV_NOPTS_VALUE) -venc->next_pts = avpkt->pts + avpkt->duration; +for (int ch = 0; ch < venc->channels; c
[FFmpeg-devel] [PATCH 1/2] avcodec/vorbisenc: Include bufqueue and afqueue
--- libavcodec/vorbisenc.c | 10 ++ 1 file changed, 10 insertions(+) diff --git a/libavcodec/vorbisenc.c b/libavcodec/vorbisenc.c index 2974ca2..fdce864 100644 --- a/libavcodec/vorbisenc.c +++ b/libavcodec/vorbisenc.c @@ -33,6 +33,9 @@ #include "vorbis.h" #include "vorbis_enc_data.h" +#include "audio_frame_queue.h" +#include "libavfilter/bufferqueue.h" + #define BITSTREAM_WRITER_LE #include "put_bits.h" @@ -110,6 +113,9 @@ typedef struct vorbis_enc_context { float *coeffs; // also used for residue after floor float quality; +AudioFrameQueue afq; +struct FFBufQueue bufqueue; + int ncodebooks; vorbis_enc_codebook *codebooks; @@ -1158,6 +1164,8 @@ static av_cold int vorbis_encode_close(AVCodecContext *avctx) ff_mdct_end(&venc->mdct[0]); ff_mdct_end(&venc->mdct[1]); +ff_af_queue_close(&venc->afq); +ff_bufqueue_discard_all(&venc->bufqueue); av_freep(&avctx->extradata); @@ -1190,6 +1198,8 @@ static av_cold int vorbis_encode_init(AVCodecContext *avctx) avctx->frame_size = 1 << (venc->log2_blocksize[0] - 1); +ff_af_queue_init(avctx, &venc->afq); + return 0; error: vorbis_encode_close(avctx); -- 2.7.4 signature.asc Description: PGP signature ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
[FFmpeg-devel] [PATCH 1/2] avcodec/vorbisenc: Include fdsp
Signed-off-by: Tyler Jones --- libavcodec/vorbisenc.c | 31 +-- 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/libavcodec/vorbisenc.c b/libavcodec/vorbisenc.c index 2974ca2..7c3cd51 100644 --- a/libavcodec/vorbisenc.c +++ b/libavcodec/vorbisenc.c @@ -25,6 +25,7 @@ */ #include +#include "libavutil/float_dsp.h" #include "avcodec.h" #include "internal.h" @@ -126,6 +127,8 @@ typedef struct vorbis_enc_context { vorbis_enc_mode *modes; int64_t next_pts; + +AVFloatDSPContext *fdsp; } vorbis_enc_context; #define MAX_CHANNELS 2 @@ -236,6 +239,26 @@ static int ready_residue(vorbis_enc_residue *rc, vorbis_enc_context *venc) return 0; } +static av_cold int dsp_init(AVCodecContext *avctx, vorbis_enc_context *venc) +{ +int ret = 0; + +venc->fdsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT); +if (!venc->fdsp) +return AVERROR(ENOMEM); + +// init windows +venc->win[0] = ff_vorbis_vwin[venc->log2_blocksize[0] - 6]; +venc->win[1] = ff_vorbis_vwin[venc->log2_blocksize[1] - 6]; + +if ((ret = ff_mdct_init(&venc->mdct[0], venc->log2_blocksize[0], 0, 1.0)) < 0) +return ret; +if ((ret = ff_mdct_init(&venc->mdct[1], venc->log2_blocksize[1], 0, 1.0)) < 0) +return ret; + +return 0; +} + static int create_vorbis_context(vorbis_enc_context *venc, AVCodecContext *avctx) { @@ -426,12 +449,7 @@ static int create_vorbis_context(vorbis_enc_context *venc, if (!venc->saved || !venc->samples || !venc->floor || !venc->coeffs) return AVERROR(ENOMEM); -venc->win[0] = ff_vorbis_vwin[venc->log2_blocksize[0] - 6]; -venc->win[1] = ff_vorbis_vwin[venc->log2_blocksize[1] - 6]; - -if ((ret = ff_mdct_init(&venc->mdct[0], venc->log2_blocksize[0], 0, 1.0)) < 0) -return ret; -if ((ret = ff_mdct_init(&venc->mdct[1], venc->log2_blocksize[1], 0, 1.0)) < 0) +if ((ret = dsp_init(avctx, venc)) < 0) return ret; return 0; @@ -1155,6 +1173,7 @@ static av_cold int vorbis_encode_close(AVCodecContext *avctx) av_freep(&venc->samples); av_freep(&venc->floor); av_freep(&venc->coeffs); +av_freep(&venc->fdsp); ff_mdct_end(&venc->mdct[0]); ff_mdct_end(&venc->mdct[1]); -- 2.7.4 signature.asc Description: PGP signature ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
[FFmpeg-devel] [PATCH 2/2] avcodec/vorbisenc: Use fdsp for applying windows
Using fdsp improves readability and allows using architecture-specific optimizations. Signed-off-by: Tyler Jones --- libavcodec/vorbisenc.c | 16 +--- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/libavcodec/vorbisenc.c b/libavcodec/vorbisenc.c index 7c3cd51..1777a49 100644 --- a/libavcodec/vorbisenc.c +++ b/libavcodec/vorbisenc.c @@ -988,11 +988,11 @@ static int residue_encode(vorbis_enc_context *venc, vorbis_enc_residue *rc, static int apply_window_and_mdct(vorbis_enc_context *venc, float **audio, int samples) { -int i, channel; +int channel; const float * win = venc->win[0]; int window_len = 1 << (venc->log2_blocksize[0] - 1); float n = (float)(1 << venc->log2_blocksize[0]) / 4.0; -// FIXME use dsp +AVFloatDSPContext *fdsp = venc->fdsp; if (!venc->have_saved && !samples) return 0; @@ -1009,9 +1009,10 @@ static int apply_window_and_mdct(vorbis_enc_context *venc, if (samples) { for (channel = 0; channel < venc->channels; channel++) { -float * offset = venc->samples + channel*window_len*2 + window_len; -for (i = 0; i < samples; i++) -offset[i] = audio[channel][i] / n * win[window_len - i - 1]; +float *offset = venc->samples + channel * window_len * 2 + window_len; + +fdsp->vector_fmul_reverse(offset, audio[channel], win, samples); +fdsp->vector_fmul_scalar(offset, offset, 1/n, samples); } } else { for (channel = 0; channel < venc->channels; channel++) @@ -1026,8 +1027,9 @@ static int apply_window_and_mdct(vorbis_enc_context *venc, if (samples) { for (channel = 0; channel < venc->channels; channel++) { float *offset = venc->saved + channel * window_len; -for (i = 0; i < samples; i++) -offset[i] = audio[channel][i] / n * win[i]; + +fdsp->vector_fmul(offset, audio[channel], win, samples); +fdsp->vector_fmul_scalar(offset, offset, 1/n, samples); } venc->have_saved = 1; } else { -- 2.7.4 signature.asc Description: PGP signature ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
[FFmpeg-devel] [PATCH 1/2] avcodec/vorbis_enc_data: Include scalewidth band factors
Scale factors are copied from the AAC encoder tabulated data. Including the AAC tabulated header includes files that cause improper framing bits to be put in the header and an improper end of file. Providing this for the vorbis encoder is necessary to use the existing AAC psychoacoustic system. Window size is currently only assumed to be 1024 with the vorbis encoder. This is part of a GSoC qualification task. Signed-off-by: Tyler Jones --- libavcodec/vorbis_enc_data.h | 108 +++ 1 file changed, 108 insertions(+) diff --git a/libavcodec/vorbis_enc_data.h b/libavcodec/vorbis_enc_data.h index a51aaec..d65e7cc 100644 --- a/libavcodec/vorbis_enc_data.h +++ b/libavcodec/vorbis_enc_data.h @@ -501,4 +501,112 @@ static const struct { { 3, 2, 3, { -1, 12, 13, 14 } }, }; +static const uint8_t swb_size_128_96[] = { +4, 4, 4, 4, 4, 4, 8, 8, 8, 16, 28, 36 +}; + +static const uint8_t swb_size_128_64[] = { +4, 4, 4, 4, 4, 4, 8, 8, 8, 16, 28, 36 +}; + +static const uint8_t swb_size_128_48[] = { +4, 4, 4, 4, 4, 8, 8, 8, 12, 12, 12, 16, 16, 16 +}; + +static const uint8_t swb_size_128_24[] = { +4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 20 +}; + +static const uint8_t swb_size_128_16[] = { +4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 12, 12, 16, 20, 20 +}; + +static const uint8_t swb_size_128_8[] = { +4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 12, 16, 20, 20 +}; + +static const uint8_t swb_size_1024_96[] = { +4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, +12, 12, 12, 12, 12, 16, 16, 24, 28, 36, 44, +64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64 +}; + +static const uint8_t swb_size_1024_64[] = { +4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, +12, 12, 12, 16, 16, 16, 20, 24, 24, 28, 36, +40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40 +}; + +static const uint8_t swb_size_1024_48[] = { +4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, +12, 12, 12, 12, 16, 16, 20, 20, 24, 24, 28, 28, +32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, +96 +}; + +static const uint8_t swb_size_1024_32[] = { +4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, +12, 12, 12, 12, 16, 16, 20, 20, 24, 24, 28, 28, +32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32 +}; + +static const uint8_t swb_size_1024_24[] = { +4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, +12, 12, 12, 12, 16, 16, 16, 20, 20, 24, 24, 28, 28, +32, 36, 36, 40, 44, 48, 52, 52, 64, 64, 64, 64, 64 +}; + +static const uint8_t swb_size_1024_16[] = { +8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, +12, 12, 12, 12, 12, 12, 12, 12, 12, 16, 16, 16, 16, 20, 20, 20, 24, 24, 28, 28, +32, 36, 40, 40, 44, 48, 52, 56, 60, 64, 64, 64 +}; + +static const uint8_t swb_size_1024_8[] = { +12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, +16, 16, 16, 16, 16, 16, 16, 20, 20, 20, 20, 24, 24, 24, 28, 28, +32, 36, 36, 40, 44, 48, 52, 56, 60, 64, 80 +}; + +const uint8_t *ff_vorbis_swb_size_128[] = { +swb_size_128_96, swb_size_128_96, swb_size_128_64, +swb_size_128_48, swb_size_128_48, swb_size_128_48, +swb_size_128_24, swb_size_128_24, swb_size_128_16, +swb_size_128_16, swb_size_128_16, swb_size_128_8, +swb_size_128_8 +}; + +const uint8_t *ff_vorbis_swb_size_1024[] = { +swb_size_1024_96, swb_size_1024_96, swb_size_1024_64, +swb_size_1024_48, swb_size_1024_48, swb_size_1024_32, +swb_size_1024_24, swb_size_1024_24, swb_size_1024_16, +swb_size_1024_16, swb_size_1024_16, swb_size_1024_8, +swb_size_1024_8 +}; + +const int ff_vorbis_swb_size_128_len = FF_ARRAY_ELEMS(ff_vorbis_swb_size_128); +const int ff_vorbis_swb_size_1024_len = FF_ARRAY_ELEMS(ff_vorbis_swb_size_1024); + +/* duplicated from avpriv_mpeg4audio_sample_rates to avoid shared build + * failures */ +static const int mpeg4audio_sample_rates[16] = { +96000, 88200, 64000, 48000, 44100, 32000, +24000, 22050, 16000, 12000, 11025, 8000, 7350 +}; + +enum WindowSequence { +ONLY_LONG_SEQUENCE, +LONG_START_SEQUENCE, +EIGHT_SHORT_SEQUENCE, +LONG_STOP_SEQUENCE, +}; + +const uint8_t ff_vorbis_num_swb_1024[] = { +41, 41, 47, 49, 49, 51, 47, 47, 43, 43, 43, 40, 40 +}; + +const uint8_t ff_vorbis_num_swb_128[] = { +12, 12, 12, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15 +}; + #endif /* AVCODEC_VORBIS_ENC_DATA_H */ -- 2.7.4 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
[FFmpeg-devel] [PATCH 2/2] avcodec/vorbisenc: Implement transient detection in Vorbis encoder
The existing AAC psychoacoustic system is used to detect transients within the vorbis encoder. This is useful, in general, as an initial step in later utilizing a complex psychoacoustic model for the vorbis encoder, but more specifically allows the cacellation of pre-echo effects that frequently occur with this codec. Signed-off-by: Tyler Jones --- libavcodec/psymodel.c | 1 + libavcodec/vorbisenc.c | 60 ++ 2 files changed, 61 insertions(+) diff --git a/libavcodec/psymodel.c b/libavcodec/psymodel.c index 2b5f111..38831ce 100644 --- a/libavcodec/psymodel.c +++ b/libavcodec/psymodel.c @@ -62,6 +62,7 @@ av_cold int ff_psy_init(FFPsyContext *ctx, AVCodecContext *avctx, int num_lens, switch (ctx->avctx->codec_id) { case AV_CODEC_ID_AAC: +case AV_CODEC_ID_VORBIS: ctx->model = &ff_aac_psy_model; break; } diff --git a/libavcodec/vorbisenc.c b/libavcodec/vorbisenc.c index 2974ca2..e4ec822 100644 --- a/libavcodec/vorbisenc.c +++ b/libavcodec/vorbisenc.c @@ -33,6 +33,8 @@ #include "vorbis.h" #include "vorbis_enc_data.h" +#include "psymodel.h" + #define BITSTREAM_WRITER_LE #include "put_bits.h" @@ -126,6 +128,9 @@ typedef struct vorbis_enc_context { vorbis_enc_mode *modes; int64_t next_pts; + +FFPsyContext psy; +struct FFPsyPreprocessContext* psypp; } vorbis_enc_context; #define MAX_CHANNELS 2 @@ -1024,10 +1029,38 @@ static int vorbis_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, vorbis_enc_context *venc = avctx->priv_data; float **audio = frame ? (float **)frame->extended_data : NULL; int samples = frame ? frame->nb_samples : 0; +float *samples2, *la, *overlap; vorbis_enc_mode *mode; vorbis_enc_mapping *mapping; PutBitContext pb; int i, ret; +int start_ch, ch, chans, cur_channel; +FFPsyWindowInfo windows[MAX_CHANNELS]; +enum WindowSequence window_sequence[MAX_CHANNELS]; + +if (!avctx->frame_number) +return 0; + +if (venc->psypp) +ff_psy_preprocess(venc->psypp, audio, venc->channels); + +if (frame) { +start_ch = 0; +cur_channel = 0; +for (i = 0; i < venc->channels - 1; i++) { +FFPsyWindowInfo* wi = windows + start_ch; +chans = 2; +for (ch = 0; ch < 2; ch++) { +cur_channel = start_ch + ch; +overlap = &audio[cur_channel][0]; +samples2 = overlap + 1024; +la = samples2 + (448+64); +wi[ch] = venc->psy.model->window(&venc->psy, samples2, la, + cur_channel, window_sequence[0]); +} +start_ch += chans; +} +} if (!apply_window_and_mdct(venc, audio, samples)) return 0; @@ -1158,7 +1191,10 @@ static av_cold int vorbis_encode_close(AVCodecContext *avctx) ff_mdct_end(&venc->mdct[0]); ff_mdct_end(&venc->mdct[1]); +ff_psy_end(&venc->psy); +if (venc->psypp) +ff_psy_preprocess_end(venc->psypp); av_freep(&avctx->extradata); return 0 ; @@ -1168,6 +1204,10 @@ static av_cold int vorbis_encode_init(AVCodecContext *avctx) { vorbis_enc_context *venc = avctx->priv_data; int ret; +const uint8_t *sizes[MAX_CHANNELS]; +uint8_t grouping[MAX_CHANNELS]; +int lengths[MAX_CHANNELS]; +int samplerate_index; if (avctx->channels != 2) { av_log(avctx, AV_LOG_ERROR, "Current FFmpeg Vorbis encoder only supports 2 channels.\n"); @@ -1190,6 +1230,26 @@ static av_cold int vorbis_encode_init(AVCodecContext *avctx) avctx->frame_size = 1 << (venc->log2_blocksize[0] - 1); +for (samplerate_index = 0; samplerate_index < 16; samplerate_index++) +if (avctx->sample_rate == mpeg4audio_sample_rates[samplerate_index]) +break; +if (samplerate_index == 16 || +samplerate_index >= ff_vorbis_swb_size_1024_len || +samplerate_index >= ff_vorbis_swb_size_128_len) +av_log(avctx, AV_LOG_ERROR, "Unsupported sample rate %d\n", avctx->sample_rate); + +sizes[0] = ff_vorbis_swb_size_1024[samplerate_index]; +sizes[1] = ff_vorbis_swb_size_128[samplerate_index]; +lengths[0] = ff_vorbis_num_swb_1024[samplerate_index]; +lengths[1] = ff_vorbis_num_swb_128[samplerate_index]; +grouping[0] = 1; + +if ((ret = ff_psy_init(&venc->psy, avctx, 2, + sizes, lengths, + 1, grouping)) < 0) +goto error; +venc->psypp = ff_psy_preprocess_init(avctx); + return 0; error: vorbis_encode_close(avctx); -- 2.7.4 ___ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel