From 41405e90cb2fb41441a6cf29c7a0d14362fd1b1f Mon Sep 17 00:00:00 2001 From: Greg Rowe Date: Fri, 7 Oct 2016 13:39:58 -0400 Subject: [PATCH] avfilter/af_silenceremove: add optional tone when silence is removed This commit adds two options to the af_silenceremove filter. It adds tone_duration and tone_hz making it possible to insert a tone when silence is removed. Tone insertion is disabled by default (by using a tone_duration of 0.0 seconds). Signed-off-by: Greg Rowe --- Changelog | 1 + doc/filters.texi | 11 ++- libavfilter/af_silenceremove.c | 161 +++++++++++++++++++++++++++++++++++------ libavfilter/version.h | 2 +- 4 files changed, 151 insertions(+), 24 deletions(-) diff --git a/Changelog b/Changelog index 0da009c..86e031c 100644 --- a/Changelog +++ b/Changelog @@ -2,6 +2,7 @@ Entries are sorted chronologically from oldest to youngest within each release, releases are sorted from youngest to oldest. version : +- Added optional tone insertion in af_silenceremove - libopenmpt demuxer - tee protocol - Changed metadata print option to accept general urls diff --git a/doc/filters.texi b/doc/filters.texi index 4b2f7bf..e09a303 100644 --- a/doc/filters.texi +++ b/doc/filters.texi @@ -3340,7 +3340,8 @@ ffmpeg -i silence.mp3 -af silencedetect=noise=0.0001 -f null - @section silenceremove -Remove silence from the beginning, middle or end of the audio. +Remove silence from the beginning, middle or end of the audio while +optionally inserting a tone where silence was removed. The filter accepts the following options: @@ -3401,6 +3402,14 @@ Default value is @code{rms}. @item window Set ratio used to calculate size of window for detecting silence. Default value is @code{0.02}. Allowed range is from @code{0} to @code{10}. + +@item tone_duration +Set the duration of the tone inserted in the stream when silence is removed. A value of @code{0} disables tone insertion. +Default value is @code{0.0}. + +@item tone_hz +Set the frequency of the tone inserted in the stream when silence is removed. +Default value is @code{1000.0}. @end table @subsection Examples diff --git a/libavfilter/af_silenceremove.c b/libavfilter/af_silenceremove.c index f156d18..07cf428 100644 --- a/libavfilter/af_silenceremove.c +++ b/libavfilter/af_silenceremove.c @@ -3,6 +3,7 @@ * Copyright (c) 2001 Chris Bagwell * Copyright (c) 2003 Donnie Smith * Copyright (c) 2014 Paul B Mahol + * Copyright (c) 2016 Shoretel * * This file is part of FFmpeg. * @@ -31,11 +32,20 @@ #include "internal.h" enum SilenceMode { - SILENCE_TRIM, + SILENCE_TRIM = 0, SILENCE_TRIM_FLUSH, SILENCE_COPY, SILENCE_COPY_FLUSH, - SILENCE_STOP + SILENCE_STOP, + SILENCE_END_MARKER +}; + +static const char* SILENCE_MODE_NAMES[] = { + NULL_IF_CONFIG_SMALL("TRIM"), + NULL_IF_CONFIG_SMALL("TRIM_FLUSH"), + NULL_IF_CONFIG_SMALL("COPY"), + NULL_IF_CONFIG_SMALL("COPY_FLUSH"), + NULL_IF_CONFIG_SMALL("STOP") }; typedef struct SilenceRemoveContext { @@ -75,6 +85,10 @@ typedef struct SilenceRemoveContext { int detection; void (*update)(struct SilenceRemoveContext *s, double sample); double(*compute)(struct SilenceRemoveContext *s, double sample); + + double last_pts_seconds; + double tone_duration; + double tone_hz; } SilenceRemoveContext; #define OFFSET(x) offsetof(SilenceRemoveContext, x) @@ -91,11 +105,51 @@ static const AVOption silenceremove_options[] = { { "peak", 0, 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, FLAGS, "detection" }, { "rms", 0, 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, FLAGS, "detection" }, { "window", NULL, OFFSET(window_ratio), AV_OPT_TYPE_DOUBLE, {.dbl=0.02}, 0, 10, FLAGS }, - { NULL } + { + .name = "tone_duration", + .help = "length of tone inserted when silence is detected (0 to disable)", + .offset = OFFSET(tone_duration), + .type = AV_OPT_TYPE_DOUBLE, + .default_val = {.dbl=0.0}, + .min = 0.0, + .max = DBL_MAX, + .flags = FLAGS, + .unit = "tone", + }, + { + .name = "tone_hz", + .help = "frequency of tone inserted when silence is removed, 1 kHz default", + .offset = OFFSET(tone_hz), + .type = AV_OPT_TYPE_DOUBLE, + .default_val = {.dbl=1000.0}, + .min = 0.0, + .max = DBL_MAX, + .flags = FLAGS, + .unit = "tone", + }, + {NULL} }; AVFILTER_DEFINE_CLASS(silenceremove); +static const char* mode_to_string(enum SilenceMode mode) +{ + if (mode >= SILENCE_END_MARKER) { + return ""; + } + /* This can be null if the config is small. */ + return SILENCE_MODE_NAMES[mode] ? SILENCE_MODE_NAMES[mode]:""; +} + + +static void set_mode(AVFilterContext *ctx, enum SilenceMode new) +{ + SilenceRemoveContext *s = ctx->priv; + av_log(ctx, AV_LOG_DEBUG, "changing state %s=>%s\n", + mode_to_string(s->mode), mode_to_string(new)); + s->mode = new; +} + static double compute_peak(SilenceRemoveContext *s, double sample) { double new_sum; @@ -209,14 +263,46 @@ static int config_input(AVFilterLink *inlink) s->stop_holdoff_end = 0; s->stop_found_periods = 0; - if (s->start_periods) - s->mode = SILENCE_TRIM; - else - s->mode = SILENCE_COPY; + set_mode(ctx, s->start_periods ? SILENCE_TRIM:SILENCE_COPY); return 0; } +static int insert_tone(AVFilterLink *inlink, + AVFilterLink *outlink, + double tone_hz, + double duration) +{ + AVFilterContext *ctx = inlink->dst; + int sample_count = duration * inlink->sample_rate; + double twopi = 2.0 * M_PI; + int i = 0; + AVFrame *out = NULL; + double *obuf = NULL; + double step = 0.0; + double s = 0.0; + + out = ff_get_audio_buffer(inlink, sample_count / inlink->channels); + if (!out) { + return AVERROR(ENOMEM); + } + obuf = (double *)out->data[0]; + step = tone_hz / (double)out->sample_rate; + s = step; + + av_log(ctx, AV_LOG_DEBUG, + "insert beep tone=%fhz duration=%f seconds\n", + tone_hz, duration); + + + for (i=0; idst; + AVFilterLink *outlink = ctx->outputs[0]; + SilenceRemoveContext *s = ctx->priv; + pts_seconds = (inlink->current_pts_us / 1000000.0) / AV_TIME_BASE; + + /* Check to be certain that we don't flood the stream with + * annoying tones. */ + if ((s->last_pts_seconds == 0.0) + || (pts_seconds - s->last_pts_seconds) > (s->tone_duration * 2.0)) { + + ret = insert_tone(inlink, outlink, s->tone_hz, s->tone_duration); + s->last_pts_seconds = pts_seconds; + } + + return ret; +} + static int filter_frame(AVFilterLink *inlink, AVFrame *in) { AVFilterContext *ctx = inlink->dst; @@ -243,7 +351,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in) switch (s->mode) { case SILENCE_TRIM: -silence_trim: + silence_trim: nbs = in->nb_samples - nb_samples_read / inlink->channels; if (!nbs) break; @@ -263,7 +371,7 @@ silence_trim: if (s->start_holdoff_end >= s->start_duration * inlink->channels) { if (++s->start_found_periods >= s->start_periods) { - s->mode = SILENCE_TRIM_FLUSH; + set_mode(ctx, SILENCE_TRIM_FLUSH); goto silence_trim_flush; } @@ -283,7 +391,7 @@ silence_trim: break; case SILENCE_TRIM_FLUSH: -silence_trim_flush: + silence_trim_flush: nbs = s->start_holdoff_end - s->start_holdoff_offset; nbs -= nbs % inlink->channels; if (!nbs) @@ -304,13 +412,13 @@ silence_trim_flush: if (s->start_holdoff_offset == s->start_holdoff_end) { s->start_holdoff_offset = 0; s->start_holdoff_end = 0; - s->mode = SILENCE_COPY; + set_mode(ctx, SILENCE_COPY); goto silence_copy; } break; case SILENCE_COPY: -silence_copy: + silence_copy: nbs = in->nb_samples - nb_samples_read / inlink->channels; if (!nbs) break; @@ -329,7 +437,7 @@ silence_copy: threshold &= s->compute(s, ibuf[j]) > s->stop_threshold; if (threshold && s->stop_holdoff_end && !s->leave_silence) { - s->mode = SILENCE_COPY_FLUSH; + set_mode(ctx, SILENCE_COPY_FLUSH); flush(out, outlink, &nb_samples_written, &ret); goto silence_copy_flush; } else if (threshold) { @@ -357,7 +465,7 @@ silence_copy: s->stop_holdoff_end = 0; if (!s->restart) { - s->mode = SILENCE_STOP; + set_mode(ctx, SILENCE_STOP); flush(out, outlink, &nb_samples_written, &ret); goto silence_stop; } else { @@ -366,12 +474,19 @@ silence_copy: s->start_holdoff_offset = 0; s->start_holdoff_end = 0; clear_window(s); - s->mode = SILENCE_TRIM; - flush(out, outlink, &nb_samples_written, &ret); - goto silence_trim; + set_mode(ctx, SILENCE_TRIM); + + if (s->tone_duration > 0.0) { + ret = process_tone(inlink); + } + if (!ret) { + flush(out, outlink, + &nb_samples_written, &ret); + goto silence_trim; + } } } - s->mode = SILENCE_COPY_FLUSH; + set_mode(ctx, SILENCE_COPY_FLUSH); flush(out, outlink, &nb_samples_written, &ret); goto silence_copy_flush; } @@ -385,7 +500,7 @@ silence_copy: break; case SILENCE_COPY_FLUSH: -silence_copy_flush: + silence_copy_flush: nbs = s->stop_holdoff_end - s->stop_holdoff_offset; nbs -= nbs % inlink->channels; if (!nbs) @@ -406,12 +521,12 @@ silence_copy_flush: if (s->stop_holdoff_offset == s->stop_holdoff_end) { s->stop_holdoff_offset = 0; s->stop_holdoff_end = 0; - s->mode = SILENCE_COPY; + set_mode(ctx, SILENCE_COPY); goto silence_copy; } break; case SILENCE_STOP: -silence_stop: + silence_stop: break; } @@ -427,6 +542,8 @@ static int request_frame(AVFilterLink *outlink) int ret; ret = ff_request_frame(ctx->inputs[0]); + /* If there is no more data but the holdoff buffer still has data + * then copy the holdoff buffer out */ if (ret == AVERROR_EOF && (s->mode == SILENCE_COPY_FLUSH || s->mode == SILENCE_COPY)) { int nbs = s->stop_holdoff_end - s->stop_holdoff_offset; @@ -441,7 +558,7 @@ static int request_frame(AVFilterLink *outlink) nbs * sizeof(double)); ret = ff_filter_frame(ctx->inputs[0], frame); } - s->mode = SILENCE_STOP; + set_mode(ctx, SILENCE_STOP); } return ret; } diff --git a/libavfilter/version.h b/libavfilter/version.h index 93d249b..4626ca4 100644 --- a/libavfilter/version.h +++ b/libavfilter/version.h @@ -31,7 +31,7 @@ #define LIBAVFILTER_VERSION_MAJOR 6 #define LIBAVFILTER_VERSION_MINOR 63 -#define LIBAVFILTER_VERSION_MICRO 100 +#define LIBAVFILTER_VERSION_MICRO 101 #define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \ LIBAVFILTER_VERSION_MINOR, \ -- 1.9.1