Michael,

In the attached patch I've tried to make all of the changes you've pointed out. 
 I also renamed tone_hz to tone_frequency on Moritz Barsnick's suggestion.

Is there a good way to generate the tone while avoiding floating point 
operations?  If there is then don't bother reviewing this patch and I'll make 
that change once I know better how to do it.

I removed the unrelated changes.  The two parameters, tone_duration and 
tone_frequency, are integers now.  The tone_duration parameter is changed from 
seconds to milliseconds.  I have updated the documentation to reflect that.  I 
moved the tone generation to an initialization function and fill a buffer that 
exists for the duration of the filter instead of needlessly generating the tone 
on the fly.  

Thanks,
Greg

-- 
Greg Rowe
www.shoretel.com

    
From 41405e90cb2fb41441a6cf29c7a0d14362fd1b1f Mon Sep 17 00:00:00 2001
From: Greg Rowe <gr...@shoretel.com>
Date: Fri, 7 Oct 2016 13:39:58 -0400
Subject: [PATCH] avfilter/af_silenceremove: add optional tone when silence is
 removed

This commit adds two options to the af_silenceremove filter.  It adds
tone_duration and tone_hz making it possible to insert a tone when
silence is removed.  Tone insertion is disabled by default (by using a
tone_duration of 0.0 seconds).

Signed-off-by: Greg Rowe <gr...@shoretel.com>
---
 Changelog                      |   1 +
 doc/filters.texi               |  11 ++-
 libavfilter/af_silenceremove.c | 161 +++++++++++++++++++++++++++++++++++------
 libavfilter/version.h          |   2 +-
 4 files changed, 151 insertions(+), 24 deletions(-)

diff --git a/Changelog b/Changelog
index 0da009c..86e031c 100644
--- a/Changelog
+++ b/Changelog
@@ -2,6 +2,7 @@ Entries are sorted chronologically from oldest to youngest 
within each release,
 releases are sorted from youngest to oldest.
 
 version <next>:
+- Added optional tone insertion in af_silenceremove
 - libopenmpt demuxer
 - tee protocol
 - Changed metadata print option to accept general urls
diff --git a/doc/filters.texi b/doc/filters.texi
index 4b2f7bf..e09a303 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -3340,7 +3340,8 @@ ffmpeg -i silence.mp3 -af silencedetect=noise=0.0001 -f 
null -
 
 @section silenceremove
 
-Remove silence from the beginning, middle or end of the audio.
+Remove silence from the beginning, middle or end of the audio while
+optionally inserting a tone where silence was removed.
 
 The filter accepts the following options:
 
@@ -3401,6 +3402,14 @@ Default value is @code{rms}.
 @item window
 Set ratio used to calculate size of window for detecting silence.
 Default value is @code{0.02}. Allowed range is from @code{0} to @code{10}.
+
+@item tone_duration
+Set the duration of the tone inserted in the stream when silence is removed.  
A value of @code{0} disables tone insertion.
+Default value is @code{0.0}.
+
+@item tone_hz
+Set the frequency of the tone inserted in the stream when silence is removed.
+Default value is @code{1000.0}.
 @end table
 
 @subsection Examples
diff --git a/libavfilter/af_silenceremove.c b/libavfilter/af_silenceremove.c
index f156d18..07cf428 100644
--- a/libavfilter/af_silenceremove.c
+++ b/libavfilter/af_silenceremove.c
@@ -3,6 +3,7 @@
  * Copyright (c) 2001 Chris Bagwell
  * Copyright (c) 2003 Donnie Smith
  * Copyright (c) 2014 Paul B Mahol
+ * Copyright (c) 2016 Shoretel <gr...@shoretel.com>
  *
  * This file is part of FFmpeg.
  *
@@ -31,11 +32,20 @@
 #include "internal.h"
 
 enum SilenceMode {
-    SILENCE_TRIM,
+    SILENCE_TRIM = 0,
     SILENCE_TRIM_FLUSH,
     SILENCE_COPY,
     SILENCE_COPY_FLUSH,
-    SILENCE_STOP
+    SILENCE_STOP,
+    SILENCE_END_MARKER
+};
+
+static const char* SILENCE_MODE_NAMES[] = {
+    NULL_IF_CONFIG_SMALL("TRIM"),
+    NULL_IF_CONFIG_SMALL("TRIM_FLUSH"),
+    NULL_IF_CONFIG_SMALL("COPY"),
+    NULL_IF_CONFIG_SMALL("COPY_FLUSH"),
+    NULL_IF_CONFIG_SMALL("STOP")
 };
 
 typedef struct SilenceRemoveContext {
@@ -75,6 +85,10 @@ typedef struct SilenceRemoveContext {
     int detection;
     void (*update)(struct SilenceRemoveContext *s, double sample);
     double(*compute)(struct SilenceRemoveContext *s, double sample);
+
+    double last_pts_seconds;
+    double tone_duration;
+    double tone_hz;
 } SilenceRemoveContext;
 
 #define OFFSET(x) offsetof(SilenceRemoveContext, x)
@@ -91,11 +105,51 @@ static const AVOption silenceremove_options[] = {
     {   "peak",          0,    0,                       AV_OPT_TYPE_CONST,    
{.i64=0},     0,       0, FLAGS, "detection" },
     {   "rms",           0,    0,                       AV_OPT_TYPE_CONST,    
{.i64=1},     0,       0, FLAGS, "detection" },
     { "window",          NULL, OFFSET(window_ratio),    AV_OPT_TYPE_DOUBLE,   
{.dbl=0.02},  0,      10, FLAGS },
-    { NULL }
+    {
+        .name = "tone_duration",
+        .help = "length of tone inserted when silence is detected (0 to 
disable)",
+        .offset = OFFSET(tone_duration),
+        .type = AV_OPT_TYPE_DOUBLE,
+        .default_val = {.dbl=0.0},
+        .min = 0.0,
+        .max = DBL_MAX,
+        .flags = FLAGS,
+        .unit = "tone",
+    },
+    {
+        .name = "tone_hz",
+        .help = "frequency of tone inserted when silence is removed, 1 kHz 
default",
+        .offset = OFFSET(tone_hz),
+        .type = AV_OPT_TYPE_DOUBLE,
+        .default_val = {.dbl=1000.0},
+        .min = 0.0,
+        .max = DBL_MAX,
+        .flags = FLAGS,
+        .unit = "tone",
+    },
+    {NULL}
 };
 
 AVFILTER_DEFINE_CLASS(silenceremove);
 
+static const char* mode_to_string(enum SilenceMode mode)
+{
+    if (mode >= SILENCE_END_MARKER) {
+        return "";
+    }
+    /* This can be null if the config is small.  */
+    return SILENCE_MODE_NAMES[mode] ? SILENCE_MODE_NAMES[mode]:"";
+}
+
+
+static void set_mode(AVFilterContext *ctx, enum SilenceMode new)
+{
+    SilenceRemoveContext *s = ctx->priv;
+    av_log(ctx, AV_LOG_DEBUG, "changing state %s=>%s\n",
+           mode_to_string(s->mode), mode_to_string(new));
+    s->mode = new;
+}
+
 static double compute_peak(SilenceRemoveContext *s, double sample)
 {
     double new_sum;
@@ -209,14 +263,46 @@ static int config_input(AVFilterLink *inlink)
     s->stop_holdoff_end    = 0;
     s->stop_found_periods  = 0;
 
-    if (s->start_periods)
-        s->mode = SILENCE_TRIM;
-    else
-        s->mode = SILENCE_COPY;
+    set_mode(ctx, s->start_periods ? SILENCE_TRIM:SILENCE_COPY);
 
     return 0;
 }
 
+static int insert_tone(AVFilterLink *inlink,
+                       AVFilterLink *outlink,
+                       double tone_hz,
+                       double duration)
+{
+    AVFilterContext *ctx = inlink->dst;
+    int sample_count = duration * inlink->sample_rate;
+    double twopi = 2.0 * M_PI;
+    int i = 0;
+    AVFrame *out = NULL;
+    double *obuf = NULL;
+    double step = 0.0;
+    double s = 0.0;
+
+    out = ff_get_audio_buffer(inlink, sample_count / inlink->channels);
+    if (!out) {
+        return AVERROR(ENOMEM);
+    }
+    obuf = (double *)out->data[0];
+    step = tone_hz / (double)out->sample_rate;
+    s = step;
+
+    av_log(ctx, AV_LOG_DEBUG,
+           "insert beep tone=%fhz duration=%f seconds\n",
+           tone_hz, duration);
+
+
+    for (i=0; i<sample_count; ++i) {
+        *obuf++ = sin(twopi * s);
+        s += step;
+    }
+    return ff_filter_frame(outlink, out);
+}
+
+
 static void flush(AVFrame *out, AVFilterLink *outlink,
                   int *nb_samples_written, int *ret)
 {
@@ -229,6 +315,28 @@ static void flush(AVFrame *out, AVFilterLink *outlink,
     }
 }
 
+
+static int process_tone(AVFilterLink *inlink)
+{
+    int ret = 0;
+    double pts_seconds = 0.0;
+    AVFilterContext *ctx = inlink->dst;
+    AVFilterLink *outlink = ctx->outputs[0];
+    SilenceRemoveContext *s = ctx->priv;
+    pts_seconds = (inlink->current_pts_us / 1000000.0) / AV_TIME_BASE;
+
+    /* Check to be certain that we don't flood the stream with
+     * annoying tones. */
+    if ((s->last_pts_seconds == 0.0)
+        || (pts_seconds - s->last_pts_seconds) > (s->tone_duration * 2.0)) {
+
+        ret = insert_tone(inlink, outlink, s->tone_hz, s->tone_duration);
+        s->last_pts_seconds = pts_seconds;
+    }
+
+    return ret;
+}
+
 static int filter_frame(AVFilterLink *inlink, AVFrame *in)
 {
     AVFilterContext *ctx = inlink->dst;
@@ -243,7 +351,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
 
     switch (s->mode) {
     case SILENCE_TRIM:
-silence_trim:
+    silence_trim:
         nbs = in->nb_samples - nb_samples_read / inlink->channels;
         if (!nbs)
             break;
@@ -263,7 +371,7 @@ silence_trim:
 
                 if (s->start_holdoff_end >= s->start_duration * 
inlink->channels) {
                     if (++s->start_found_periods >= s->start_periods) {
-                        s->mode = SILENCE_TRIM_FLUSH;
+                        set_mode(ctx, SILENCE_TRIM_FLUSH);
                         goto silence_trim_flush;
                     }
 
@@ -283,7 +391,7 @@ silence_trim:
         break;
 
     case SILENCE_TRIM_FLUSH:
-silence_trim_flush:
+    silence_trim_flush:
         nbs  = s->start_holdoff_end - s->start_holdoff_offset;
         nbs -= nbs % inlink->channels;
         if (!nbs)
@@ -304,13 +412,13 @@ silence_trim_flush:
         if (s->start_holdoff_offset == s->start_holdoff_end) {
             s->start_holdoff_offset = 0;
             s->start_holdoff_end = 0;
-            s->mode = SILENCE_COPY;
+            set_mode(ctx, SILENCE_COPY);
             goto silence_copy;
         }
         break;
 
     case SILENCE_COPY:
-silence_copy:
+    silence_copy:
         nbs = in->nb_samples - nb_samples_read / inlink->channels;
         if (!nbs)
             break;
@@ -329,7 +437,7 @@ silence_copy:
                     threshold &= s->compute(s, ibuf[j]) > s->stop_threshold;
 
                 if (threshold && s->stop_holdoff_end && !s->leave_silence) {
-                    s->mode = SILENCE_COPY_FLUSH;
+                    set_mode(ctx, SILENCE_COPY_FLUSH);
                     flush(out, outlink, &nb_samples_written, &ret);
                     goto silence_copy_flush;
                 } else if (threshold) {
@@ -357,7 +465,7 @@ silence_copy:
                             s->stop_holdoff_end = 0;
 
                             if (!s->restart) {
-                                s->mode = SILENCE_STOP;
+                                set_mode(ctx, SILENCE_STOP);
                                 flush(out, outlink, &nb_samples_written, &ret);
                                 goto silence_stop;
                             } else {
@@ -366,12 +474,19 @@ silence_copy:
                                 s->start_holdoff_offset = 0;
                                 s->start_holdoff_end = 0;
                                 clear_window(s);
-                                s->mode = SILENCE_TRIM;
-                                flush(out, outlink, &nb_samples_written, &ret);
-                                goto silence_trim;
+                                set_mode(ctx, SILENCE_TRIM);
+
+                                if (s->tone_duration > 0.0) {
+                                    ret = process_tone(inlink);
+                                }
+                                if (!ret) {
+                                    flush(out, outlink,
+                                          &nb_samples_written, &ret);
+                                    goto silence_trim;
+                                }
                             }
                         }
-                        s->mode = SILENCE_COPY_FLUSH;
+                        set_mode(ctx, SILENCE_COPY_FLUSH);
                         flush(out, outlink, &nb_samples_written, &ret);
                         goto silence_copy_flush;
                     }
@@ -385,7 +500,7 @@ silence_copy:
         break;
 
     case SILENCE_COPY_FLUSH:
-silence_copy_flush:
+    silence_copy_flush:
         nbs  = s->stop_holdoff_end - s->stop_holdoff_offset;
         nbs -= nbs % inlink->channels;
         if (!nbs)
@@ -406,12 +521,12 @@ silence_copy_flush:
         if (s->stop_holdoff_offset == s->stop_holdoff_end) {
             s->stop_holdoff_offset = 0;
             s->stop_holdoff_end = 0;
-            s->mode = SILENCE_COPY;
+            set_mode(ctx, SILENCE_COPY);
             goto silence_copy;
         }
         break;
     case SILENCE_STOP:
-silence_stop:
+    silence_stop:
         break;
     }
 
@@ -427,6 +542,8 @@ static int request_frame(AVFilterLink *outlink)
     int ret;
 
     ret = ff_request_frame(ctx->inputs[0]);
+    /* If there is no more data but the holdoff buffer still has data
+     * then copy the holdoff buffer out */
     if (ret == AVERROR_EOF && (s->mode == SILENCE_COPY_FLUSH ||
                                s->mode == SILENCE_COPY)) {
         int nbs = s->stop_holdoff_end - s->stop_holdoff_offset;
@@ -441,7 +558,7 @@ static int request_frame(AVFilterLink *outlink)
                    nbs * sizeof(double));
             ret = ff_filter_frame(ctx->inputs[0], frame);
         }
-        s->mode = SILENCE_STOP;
+        set_mode(ctx, SILENCE_STOP);
     }
     return ret;
 }
diff --git a/libavfilter/version.h b/libavfilter/version.h
index 93d249b..4626ca4 100644
--- a/libavfilter/version.h
+++ b/libavfilter/version.h
@@ -31,7 +31,7 @@
 
 #define LIBAVFILTER_VERSION_MAJOR   6
 #define LIBAVFILTER_VERSION_MINOR  63
-#define LIBAVFILTER_VERSION_MICRO 100
+#define LIBAVFILTER_VERSION_MICRO 101
 
 #define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \
                                                LIBAVFILTER_VERSION_MINOR, \
-- 
1.9.1

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel

Reply via email to