Clipping is avoided by taking the maximum value of each frame before window application, and scaling down the entire frame by a scalar factor.
Signed-off-by: Tyler Jones <tdjones...@gmail.com> --- libavcodec/vorbisenc.c | 8 ++++---- libavcodec/vorbispsy.c | 17 +++++++++++++++++ libavcodec/vorbispsy.h | 10 ++++++++++ 3 files changed, 31 insertions(+), 4 deletions(-) diff --git a/libavcodec/vorbisenc.c b/libavcodec/vorbisenc.c index c968956794..73182c6356 100644 --- a/libavcodec/vorbisenc.c +++ b/libavcodec/vorbisenc.c @@ -1037,10 +1037,10 @@ static int residue_encode(vorbis_enc_context *venc, vorbis_enc_residue *rc, * See Vorbis I spec Fig. 2, 3 for examples. */ static void apply_window(vorbis_enc_context *venc, const int *blockflags, - float *out, float* in) + float *out, float* in, const float clip_factor) { int prev_size, curr_size, next_size, bound; - float scale = 1.0f / (float) (1 << (venc->log2_blocksize[blockflags[1]] - 2)); + float scale = clip_factor / (float) (1 << (venc->log2_blocksize[blockflags[1]] - 2)); const float *prev_win, *next_win; AVFloatDSPContext *fdsp = venc->fdsp; @@ -1098,9 +1098,9 @@ static int apply_window_and_mdct(vorbis_enc_context *venc, int next_type) for (channel = 0; channel < venc->channels; channel++) { float *out = venc->scratch; float *in = venc->samples + channel * 2 * long_len + transient_offset; + float clip_factor = ff_psy_vorbis_avoid_clip(in, curr_len, curr_type); - apply_window(venc, blockflags, out, in); - + apply_window(venc, blockflags, out, in, clip_factor); venc->mdct[curr_type].mdct_calc(&venc->mdct[curr_type], venc->coeffs + channel * curr_len, out); } diff --git a/libavcodec/vorbispsy.c b/libavcodec/vorbispsy.c index ab2d41f62f..56e23dea5e 100644 --- a/libavcodec/vorbispsy.c +++ b/libavcodec/vorbispsy.c @@ -140,6 +140,23 @@ int ff_psy_vorbis_block_frame(VorbisPsyContext *vpctx, float *audio, return block_flag; } +float ff_psy_vorbis_avoid_clip(float *audio, int window_len, int blockflag) +{ + int i; + float max = 0, clip = 1.0f; + /* Due to how the mdct scaling works in the vorbis encoder, short blocks are + * more likely to clip. This serves as more fine-grained control */ + const float avoidance_factor = blockflag ? 0.95f : 0.75f; + + for (i = 0; i < window_len; i++) + max = FFMAX(max, fabsf(audio[i])); + + if (max > avoidance_factor) + clip = avoidance_factor / max; + + return clip; +} + av_cold void ff_psy_vorbis_close(VorbisPsyContext *vpctx) { av_freep(&vpctx->filter_delay); diff --git a/libavcodec/vorbispsy.h b/libavcodec/vorbispsy.h index 93a03fd8ca..e632e8ad1d 100644 --- a/libavcodec/vorbispsy.h +++ b/libavcodec/vorbispsy.h @@ -75,6 +75,16 @@ av_cold int ff_psy_vorbis_init(VorbisPsyContext *vpctx, int sample_rate, */ int ff_psy_vorbis_block_frame(VorbisPsyContext *vpctx, float *audio, int ch, int frame_size, int block_size); + +/** + * Provide a scalar coefficient to avoid clipping. + * + * @param audio Raw audio sample input for one channel + * @param window_len Chosen window length for the given frame + * @return Coefficient to be applied alongside the window function + */ +float ff_psy_vorbis_avoid_clip(float *audio, int window_len, int blockflag); + /** * Closes and frees the memory used by the psychoacoustic model */ -- 2.14.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel