[FFmpeg-devel] [PATCH] avfilter/af_silenceremove: add optional tone when silence is removed

Fri Oct 14 04:43:48 EEST 2016

On Thu, Oct 13, 2016 at 08:48:11PM +0000, Greg Rowe wrote:
> The attached patch adds two optional parameters to af_silenceremove for the purpose of inserting a tone in place of where silence was removed.  This alerts the user that silence has been trimmed from the original stream.  The parameters are tone_duration which defaults to 0.0 which disables the feature and tone_hz which allows you to specify the frequency of the tone.
> 
> 
> Thanks,
> 
> Greg
> 
> 
> --
> 
> Greg Rowe
> 
> www.shoretel.com

> From 41405e90cb2fb41441a6cf29c7a0d14362fd1b1f Mon Sep 17 00:00:00 2001
> From: Greg Rowe <growe at shoretel.com>
> Date: Fri, 7 Oct 2016 13:39:58 -0400
> Subject: [PATCH] avfilter/af_silenceremove: add optional tone when silence is
>  removed
> 
> This commit adds two options to the af_silenceremove filter.  It adds
> tone_duration and tone_hz making it possible to insert a tone when
> silence is removed.  Tone insertion is disabled by default (by using a
> tone_duration of 0.0 seconds).
> 
> Signed-off-by: Greg Rowe <growe at shoretel.com>
> ---
>  Changelog                      |   1 +
>  doc/filters.texi               |  11 ++-
>  libavfilter/af_silenceremove.c | 161 +++++++++++++++++++++++++++++++++++------
>  libavfilter/version.h          |   2 +-
>  4 files changed, 151 insertions(+), 24 deletions(-)
> 
> diff --git a/Changelog b/Changelog
> index 0da009c..86e031c 100644
> --- a/Changelog
> +++ b/Changelog
> @@ -2,6 +2,7 @@ Entries are sorted chronologically from oldest to youngest within each release,
>  releases are sorted from youngest to oldest.
>  
>  version <next>:
> +- Added optional tone insertion in af_silenceremove
>  - libopenmpt demuxer
>  - tee protocol
>  - Changed metadata print option to accept general urls
> diff --git a/doc/filters.texi b/doc/filters.texi
> index 4b2f7bf..e09a303 100644
> --- a/doc/filters.texi
> +++ b/doc/filters.texi
> @@ -3340,7 +3340,8 @@ ffmpeg -i silence.mp3 -af silencedetect=noise=0.0001 -f null -
>  
>  @section silenceremove
>  
> -Remove silence from the beginning, middle or end of the audio.
> +Remove silence from the beginning, middle or end of the audio while
> +optionally inserting a tone where silence was removed.
>  
>  The filter accepts the following options:
>  
> @@ -3401,6 +3402,14 @@ Default value is @code{rms}.
>  @item window
>  Set ratio used to calculate size of window for detecting silence.
>  Default value is @code{0.02}. Allowed range is from @code{0} to @code{10}.
> +
> + at item tone_duration
> +Set the duration of the tone inserted in the stream when silence is removed.  A value of @code{0} disables tone insertion.
> +Default value is @code{0.0}.
> +
> + at item tone_hz
> +Set the frequency of the tone inserted in the stream when silence is removed.
> +Default value is @code{1000.0}.
>  @end table
>  
>  @subsection Examples
> diff --git a/libavfilter/af_silenceremove.c b/libavfilter/af_silenceremove.c
> index f156d18..07cf428 100644
> --- a/libavfilter/af_silenceremove.c
> +++ b/libavfilter/af_silenceremove.c
> @@ -3,6 +3,7 @@
>   * Copyright (c) 2001 Chris Bagwell
>   * Copyright (c) 2003 Donnie Smith
>   * Copyright (c) 2014 Paul B Mahol
> + * Copyright (c) 2016 Shoretel <growe at shoretel.com>
>   *
>   * This file is part of FFmpeg.
>   *

> @@ -31,11 +32,20 @@
>  #include "internal.h"
>  
>  enum SilenceMode {
> -    SILENCE_TRIM,
> +    SILENCE_TRIM = 0,

unrelated change and thus should not be in this patch


>      SILENCE_TRIM_FLUSH,
>      SILENCE_COPY,
>      SILENCE_COPY_FLUSH,
> -    SILENCE_STOP
> +    SILENCE_STOP,
> +    SILENCE_END_MARKER
> +};
> +
> +static const char* SILENCE_MODE_NAMES[] = {
> +    NULL_IF_CONFIG_SMALL("TRIM"),
> +    NULL_IF_CONFIG_SMALL("TRIM_FLUSH"),
> +    NULL_IF_CONFIG_SMALL("COPY"),
> +    NULL_IF_CONFIG_SMALL("COPY_FLUSH"),
> +    NULL_IF_CONFIG_SMALL("STOP")
>  };
>  
>  typedef struct SilenceRemoveContext {
> @@ -75,6 +85,10 @@ typedef struct SilenceRemoveContext {
>      int detection;
>      void (*update)(struct SilenceRemoveContext *s, double sample);
>      double(*compute)(struct SilenceRemoveContext *s, double sample);
> +
> +    double last_pts_seconds;
> +    double tone_duration;
> +    double tone_hz;
>  } SilenceRemoveContext;
>  
>  #define OFFSET(x) offsetof(SilenceRemoveContext, x)
> @@ -91,11 +105,51 @@ static const AVOption silenceremove_options[] = {
>      {   "peak",          0,    0,                       AV_OPT_TYPE_CONST,    {.i64=0},     0,       0, FLAGS, "detection" },
>      {   "rms",           0,    0,                       AV_OPT_TYPE_CONST,    {.i64=1},     0,       0, FLAGS, "detection" },
>      { "window",          NULL, OFFSET(window_ratio),    AV_OPT_TYPE_DOUBLE,   {.dbl=0.02},  0,      10, FLAGS },
> +    {
> +        .name = "tone_duration",
> +        .help = "length of tone inserted when silence is detected (0 to disable)",
> +        .offset = OFFSET(tone_duration),
> +        .type = AV_OPT_TYPE_DOUBLE,
> +        .default_val = {.dbl=0.0},
> +        .min = 0.0,
> +        .max = DBL_MAX,
> +        .flags = FLAGS,
> +        .unit = "tone",
> +    },
> +    {
> +        .name = "tone_hz",
> +        .help = "frequency of tone inserted when silence is removed, 1 kHz default",
> +        .offset = OFFSET(tone_hz),
> +        .type = AV_OPT_TYPE_DOUBLE,
> +        .default_val = {.dbl=1000.0},
> +        .min = 0.0,
> +        .max = DBL_MAX,
> +        .flags = FLAGS,
> +        .unit = "tone",
> +    },

> -    { NULL }
> +    {NULL}

unrelated


>  };
>  
>  AVFILTER_DEFINE_CLASS(silenceremove);
>  
> +static const char* mode_to_string(enum SilenceMode mode)
> +{
> +    if (mode >= SILENCE_END_MARKER) {
> +        return "";
> +    }
> +    /* This can be null if the config is small.  */
> +    return SILENCE_MODE_NAMES[mode] ? SILENCE_MODE_NAMES[mode]:"";
> +}
> +
> +
> +static void set_mode(AVFilterContext *ctx, enum SilenceMode new)
> +{
> +    SilenceRemoveContext *s = ctx->priv;
> +    av_log(ctx, AV_LOG_DEBUG, "changing state %s=>%s\n",
> +           mode_to_string(s->mode), mode_to_string(new));
> +    s->mode = new;
> +}

looks unneeded


> +
>  static double compute_peak(SilenceRemoveContext *s, double sample)
>  {
>      double new_sum;
> @@ -209,14 +263,46 @@ static int config_input(AVFilterLink *inlink)
>      s->stop_holdoff_end    = 0;
>      s->stop_found_periods  = 0;
>  
> -    if (s->start_periods)
> -        s->mode = SILENCE_TRIM;
> -    else
> -        s->mode = SILENCE_COPY;
> +    set_mode(ctx, s->start_periods ? SILENCE_TRIM:SILENCE_COPY);

unrelated


>  
>      return 0;
>  }
>  
> +static int insert_tone(AVFilterLink *inlink,
> +                       AVFilterLink *outlink,
> +                       double tone_hz,
> +                       double duration)
> +{
> +    AVFilterContext *ctx = inlink->dst;
> +    int sample_count = duration * inlink->sample_rate;
> +    double twopi = 2.0 * M_PI;
> +    int i = 0;
> +    AVFrame *out = NULL;
> +    double *obuf = NULL;
> +    double step = 0.0;
> +    double s = 0.0;
> +
> +    out = ff_get_audio_buffer(inlink, sample_count / inlink->channels);
> +    if (!out) {
> +        return AVERROR(ENOMEM);
> +    }
> +    obuf = (double *)out->data[0];
> +    step = tone_hz / (double)out->sample_rate;
> +    s = step;
> +
> +    av_log(ctx, AV_LOG_DEBUG,
> +           "insert beep tone=%fhz duration=%f seconds\n",
> +           tone_hz, duration);
> +
> +
> +    for (i=0; i<sample_count; ++i) {
> +        *obuf++ = sin(twopi * s);
> +        s += step;
> +    }
> +    return ff_filter_frame(outlink, out);
> +}
> +
> +
>  static void flush(AVFrame *out, AVFilterLink *outlink,
>                    int *nb_samples_written, int *ret)
>  {

> @@ -229,6 +315,28 @@ static void flush(AVFrame *out, AVFilterLink *outlink,
>      }
>  }
>  
> +
> +static int process_tone(AVFilterLink *inlink)
> +{
> +    int ret = 0;
> +    double pts_seconds = 0.0;
> +    AVFilterContext *ctx = inlink->dst;
> +    AVFilterLink *outlink = ctx->outputs[0];
> +    SilenceRemoveContext *s = ctx->priv;
> +    pts_seconds = (inlink->current_pts_us / 1000000.0) / AV_TIME_BASE;

no need to use floating point here or in many other cases
using floating point makes regression tests harder as results can
differ between platforms, also its not accurate double has too few
mantisse bits to accuratly represent int64_t


[...]

> +
> +    return ret;
> +}
> +
>  static int filter_frame(AVFilterLink *inlink, AVFrame *in)
>  {
>      AVFilterContext *ctx = inlink->dst;

> @@ -243,7 +351,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
>  
>      switch (s->mode) {
>      case SILENCE_TRIM:
> -silence_trim:
> +    silence_trim:

unrelated

[...]

-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

If a bugfix only changes things apparently unrelated to the bug with no
further explanation, that is a good sign that the bugfix is wrong.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 181 bytes
Desc: Digital signature
URL: <http://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20161014/c6102551/attachment.sig>