[FFmpeg-devel] [PATCH 2/2] avfilter: add showspectrumpic filter

Paul B Mahol onemda at gmail.com
Fri Jan 1 10:00:31 CET 2016


Signed-off-by: Paul B Mahol <onemda at gmail.com>
---
 doc/filters.texi               | 106 +++++++++++++++++++
 libavfilter/Makefile           |   1 +
 libavfilter/allfilters.c       |   1 +
 libavfilter/avf_showspectrum.c | 233 ++++++++++++++++++++++++++++++++++++++---
 4 files changed, 326 insertions(+), 15 deletions(-)

diff --git a/doc/filters.texi b/doc/filters.texi
index 8aa3b47..224099d 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -14704,6 +14704,112 @@ ffplay -f lavfi 'amovie=input.mp3, asplit [a][out1];
 @end example
 @end itemize
 
+ at section showspectrumpic
+
+Convert input audio to a single video frame, representing the audio frequency
+spectrum.
+
+The filter accepts the following options:
+
+ at table @option
+ at item size, s
+Specify the video size for the output. For the syntax of this option, check the
+ at ref{video size syntax,,"Video size" section in the ffmpeg-utils manual,ffmpeg-utils}.
+Default value is @code{640x512}.
+
+ at item mode
+Specify display mode.
+
+It accepts the following values:
+ at table @samp
+ at item combined
+all channels are displayed in the same row
+ at item separate
+all channels are displayed in separate rows
+ at end table
+Default value is @samp{combined}.
+
+ at item color
+Specify display color mode.
+
+It accepts the following values:
+ at table @samp
+ at item channel
+each channel is displayed in a separate color
+ at item intensity
+each channel is displayed using the same color scheme
+ at item rainbow
+each channel is displayed using the rainbow color scheme
+ at item moreland
+each channel is displayed using the moreland color scheme
+ at item nebulae
+each channel is displayed using the nebulae color scheme
+ at item fire
+each channel is displayed using the fire color scheme
+ at end table
+Default value is @samp{channel}.
+
+ at item scale
+Specify scale used for calculating intensity color values.
+
+It accepts the following values:
+ at table @samp
+ at item lin
+linear
+ at item sqrt
+square root, default
+ at item cbrt
+cubic root
+ at item log
+logarithmic
+ at end table
+Default value is @samp{sqrt}.
+
+ at item saturation
+Set saturation modifier for displayed colors. Negative values provide
+alternative color scheme. @code{0} is no saturation at all.
+Saturation must be in [-10.0, 10.0] range.
+Default value is @code{1}.
+
+ at item win_func
+Set window function.
+
+It accepts the following values:
+ at table @samp
+ at item rect
+ at item bartlett
+ at item hann
+ at item hanning
+ at item hamming
+ at item blackman
+ at item welch
+ at item flattop
+ at item bharris
+ at item bnuttall
+ at item bhann
+ at item sine
+ at item nuttall
+ at item lanczos
+ at item gauss
+ at end table
+Default value is @code{hann}.
+
+ at item orientation
+Set orientation of time vs frequency axis. Can be @code{vertical} or
+ at code{horizontal}. Default is @code{vertical}.
+ at end table
+
+ at subsection Examples
+
+ at itemize
+ at item
+Extract an audio spectrogram of a whole audio track
+in a 1024x1024 picture using @command{ffmpeg}:
+ at example
+ffmpeg -i audio.flac -lavfi showspectrumpic=s=1024x1024 spectrogram.png
+ at end example
+ at end itemize
+
 @section showvolume
 
 Convert input audio volume to a video output.
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index e334016..689da73 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -286,6 +286,7 @@ OBJS-$(CONFIG_CONCAT_FILTER)                 += avf_concat.o
 OBJS-$(CONFIG_SHOWCQT_FILTER)                += avf_showcqt.o lswsutils.o lavfutils.o
 OBJS-$(CONFIG_SHOWFREQS_FILTER)              += avf_showfreqs.o window_func.o
 OBJS-$(CONFIG_SHOWSPECTRUM_FILTER)           += avf_showspectrum.o window_func.o
+OBJS-$(CONFIG_SHOWSPECTRUMPIC_FILTER)        += avf_showspectrum.o window_func.o
 OBJS-$(CONFIG_SHOWVOLUME_FILTER)             += avf_showvolume.o
 OBJS-$(CONFIG_SHOWWAVES_FILTER)              += avf_showwaves.o
 OBJS-$(CONFIG_SHOWWAVESPIC_FILTER)           += avf_showwaves.o
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index a039a39..2267e88 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -306,6 +306,7 @@ void avfilter_register_all(void)
     REGISTER_FILTER(SHOWCQT,        showcqt,        avf);
     REGISTER_FILTER(SHOWFREQS,      showfreqs,      avf);
     REGISTER_FILTER(SHOWSPECTRUM,   showspectrum,   avf);
+    REGISTER_FILTER(SHOWSPECTRUMPIC, showspectrumpic, avf);
     REGISTER_FILTER(SHOWVOLUME,     showvolume,     avf);
     REGISTER_FILTER(SHOWWAVES,      showwaves,      avf);
     REGISTER_FILTER(SHOWWAVESPIC,   showwavespic,   avf);
diff --git a/libavfilter/avf_showspectrum.c b/libavfilter/avf_showspectrum.c
index 0fa1be1..2992c2f 100644
--- a/libavfilter/avf_showspectrum.c
+++ b/libavfilter/avf_showspectrum.c
@@ -63,6 +63,7 @@ typedef struct {
     int rdft_bits;              ///< number of bits (RDFT window size = 1<<rdft_bits)
     FFTSample **rdft_data;      ///< bins holder for each (displayed) channels
     float *window_func_lut;     ///< Window function LUT
+    float **magnitudes;
     int win_func;
     int win_size;
     double win_scale;
@@ -186,6 +187,9 @@ static av_cold void uninit(AVFilterContext *ctx)
         av_freep(&s->rdft_data[i]);
     av_freep(&s->rdft_data);
     av_freep(&s->window_func_lut);
+    for (i = 0; i < s->nb_display_channels; i++)
+        av_freep(&s->magnitudes[i]);
+    av_freep(&s->magnitudes);
     av_frame_free(&s->outpicref);
     av_audio_fifo_free(s->fifo);
 }
@@ -267,6 +271,15 @@ static int config_output(AVFilterLink *outlink)
         av_freep(&s->rdft_data);
         s->nb_display_channels = inlink->channels;
 
+        s->magnitudes = av_calloc(s->nb_display_channels, sizeof(*s->magnitudes));
+        if (!s->magnitudes)
+            return AVERROR(ENOMEM);
+        for (i = 0; i < s->nb_display_channels; i++) {
+            s->magnitudes[i] = av_calloc(s->orientation == VERTICAL ? s->h: s->w, sizeof(**s->magnitudes));
+            if (!s->magnitudes[i])
+                return AVERROR(ENOMEM);
+        }
+
         s->rdft_data = av_calloc(s->nb_display_channels, sizeof(*s->rdft_data));
         if (!s->rdft_data)
             return AVERROR(ENOMEM);
@@ -370,23 +383,13 @@ static int request_frame(AVFilterLink *outlink)
     return ret;
 }
 
-static int plot_spectrum_column(AVFilterLink *inlink, AVFrame *insamples)
+static void run_rdft(ShowSpectrumContext *s, AVFrame *fin)
 {
-    int ret;
-    AVFilterContext *ctx = inlink->dst;
-    AVFilterLink *outlink = ctx->outputs[0];
-    ShowSpectrumContext *s = ctx->priv;
-    AVFrame *outpicref = s->outpicref;
-    const double w = s->win_scale;
-    int h = s->orientation == VERTICAL ? s->channel_height : s->channel_width;
-
-    int ch, plane, n, x, y;
-
-    av_assert0(insamples->nb_samples == s->win_size);
+    int ch, n;
 
     /* fill RDFT input with the number of samples available */
     for (ch = 0; ch < s->nb_display_channels; ch++) {
-        const int16_t *p = (int16_t *)insamples->extended_data[ch];
+        const int16_t *p = (int16_t *)fin->extended_data[ch];
 
         for (n = 0; n < s->win_size; n++)
             s->rdft_data[ch][n] = p[n] * s->window_func_lut[n];
@@ -395,12 +398,61 @@ static int plot_spectrum_column(AVFilterLink *inlink, AVFrame *insamples)
     /* run RDFT on each samples set */
     for (ch = 0; ch < s->nb_display_channels; ch++)
         av_rdft_calc(s->rdft, s->rdft_data[ch]);
+}
 
-    /* fill a new spectrum column */
 #define RE(y, ch) s->rdft_data[ch][2 * (y) + 0]
 #define IM(y, ch) s->rdft_data[ch][2 * (y) + 1]
 #define MAGNITUDE(y, ch) hypot(RE(y, ch), IM(y, ch))
 
+static void calc_magnitudes(ShowSpectrumContext *s)
+{
+    int ch, y, h = s->orientation == VERTICAL ? s->h: s->w;
+
+    for (ch = 0; ch < s->nb_display_channels; ch++) {
+        float *magnitudes = s->magnitudes[ch];
+
+        for (y = 0; y < h; y++)
+            magnitudes[y] = MAGNITUDE(y, ch);
+    }
+}
+
+static void acalc_magnitudes(ShowSpectrumContext *s)
+{
+    int ch, y, h = s->orientation == VERTICAL ? s->h: s->w;
+
+    for (ch = 0; ch < s->nb_display_channels; ch++) {
+        float *magnitudes = s->magnitudes[ch];
+
+        for (y = 0; y < h; y++)
+            magnitudes[y] += MAGNITUDE(y, ch);
+    }
+}
+
+static void scale_magnitudes(ShowSpectrumContext *s, float scale)
+{
+    int ch, y, h = s->orientation == VERTICAL ? s->h: s->w;
+
+    for (ch = 0; ch < s->nb_display_channels; ch++) {
+        float *magnitudes = s->magnitudes[ch];
+
+        for (y = 0; y < h; y++)
+            magnitudes[y] *= scale;
+    }
+}
+
+static int plot_spectrum_column(AVFilterLink *inlink, AVFrame *insamples)
+{
+    int ret;
+    AVFilterContext *ctx = inlink->dst;
+    AVFilterLink *outlink = ctx->outputs[0];
+    ShowSpectrumContext *s = ctx->priv;
+    AVFrame *outpicref = s->outpicref;
+    const double w = s->win_scale;
+    int h = s->orientation == VERTICAL ? s->channel_height : s->channel_width;
+
+    int ch, plane, x, y;
+
+    /* fill a new spectrum column */
     /* initialize buffer for combining to black */
     if (s->orientation == VERTICAL) {
         for (y = 0; y < outlink->h; y++) {
@@ -417,6 +469,7 @@ static int plot_spectrum_column(AVFilterLink *inlink, AVFrame *insamples)
     }
 
     for (ch = 0; ch < s->nb_display_channels; ch++) {
+        float *magnitudes = s->magnitudes[ch];
         float yf, uf, vf;
 
         /* decide color range */
@@ -471,7 +524,7 @@ static int plot_spectrum_column(AVFilterLink *inlink, AVFrame *insamples)
             float *out = &s->combine_buffer[3 * row];
 
             /* get magnitude */
-            float a = w * MAGNITUDE(y, ch);
+            float a = w * magnitudes[y];
 
             /* apply scale */
             switch (s->scale) {
@@ -631,6 +684,11 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *insamples)
         if (ret < 0)
             goto fail;
 
+        av_assert0(fin->nb_samples == s->win_size);
+
+        run_rdft(s, fin);
+        calc_magnitudes(s);
+
         ret = plot_spectrum_column(inlink, fin);
         av_frame_free(&fin);
         av_audio_fifo_drain(s->fifo, s->skip_samples);
@@ -672,3 +730,148 @@ AVFilter ff_avf_showspectrum = {
     .outputs       = showspectrum_outputs,
     .priv_class    = &showspectrum_class,
 };
+
+static const AVOption showspectrumpic_options[] = {
+    { "size", "set video size", OFFSET(w), AV_OPT_TYPE_IMAGE_SIZE, {.str = "640x512"}, 0, 0, FLAGS },
+    { "s",    "set video size", OFFSET(w), AV_OPT_TYPE_IMAGE_SIZE, {.str = "640x512"}, 0, 0, FLAGS },
+    { "mode", "set channel display mode", OFFSET(mode), AV_OPT_TYPE_INT, {.i64=COMBINED}, COMBINED, NB_MODES-1, FLAGS, "mode" },
+        { "combined", "combined mode", 0, AV_OPT_TYPE_CONST, {.i64=COMBINED}, 0, 0, FLAGS, "mode" },
+        { "separate", "separate mode", 0, AV_OPT_TYPE_CONST, {.i64=SEPARATE}, 0, 0, FLAGS, "mode" },
+    { "color", "set channel coloring", OFFSET(color_mode), AV_OPT_TYPE_INT, {.i64=CHANNEL}, CHANNEL, NB_CLMODES-1, FLAGS, "color" },
+        { "channel",   "separate color for each channel", 0, AV_OPT_TYPE_CONST, {.i64=CHANNEL},   0, 0, FLAGS, "color" },
+        { "intensity", "intensity based coloring",        0, AV_OPT_TYPE_CONST, {.i64=INTENSITY}, 0, 0, FLAGS, "color" },
+        { "rainbow",   "rainbow based coloring",          0, AV_OPT_TYPE_CONST, {.i64=RAINBOW},   0, 0, FLAGS, "color" },
+        { "moreland",  "moreland based coloring",         0, AV_OPT_TYPE_CONST, {.i64=MORELAND},  0, 0, FLAGS, "color" },
+        { "nebulae",   "nebulae based coloring",          0, AV_OPT_TYPE_CONST, {.i64=NEBULAE},   0, 0, FLAGS, "color" },
+        { "fire",      "fire based coloring",             0, AV_OPT_TYPE_CONST, {.i64=FIRE},      0, 0, FLAGS, "color" },
+    { "scale", "set display scale", OFFSET(scale), AV_OPT_TYPE_INT, {.i64=SQRT}, LINEAR, NB_SCALES-1, FLAGS, "scale" },
+        { "sqrt", "square root", 0, AV_OPT_TYPE_CONST, {.i64=SQRT},   0, 0, FLAGS, "scale" },
+        { "cbrt", "cubic root",  0, AV_OPT_TYPE_CONST, {.i64=CBRT},   0, 0, FLAGS, "scale" },
+        { "log",  "logarithmic", 0, AV_OPT_TYPE_CONST, {.i64=LOG},    0, 0, FLAGS, "scale" },
+        { "lin",  "linear",      0, AV_OPT_TYPE_CONST, {.i64=LINEAR}, 0, 0, FLAGS, "scale" },
+    { "saturation", "color saturation multiplier", OFFSET(saturation), AV_OPT_TYPE_FLOAT, {.dbl = 1}, -10, 10, FLAGS },
+    { "win_func", "set window function", OFFSET(win_func), AV_OPT_TYPE_INT, {.i64 = WFUNC_HANNING}, 0, NB_WFUNC-1, FLAGS, "win_func" },
+        { "rect",     "Rectangular",      0, AV_OPT_TYPE_CONST, {.i64=WFUNC_RECT},     0, 0, FLAGS, "win_func" },
+        { "bartlett", "Bartlett",         0, AV_OPT_TYPE_CONST, {.i64=WFUNC_BARTLETT}, 0, 0, FLAGS, "win_func" },
+        { "hann",     "Hann",             0, AV_OPT_TYPE_CONST, {.i64=WFUNC_HANNING},  0, 0, FLAGS, "win_func" },
+        { "hanning",  "Hanning",          0, AV_OPT_TYPE_CONST, {.i64=WFUNC_HANNING},  0, 0, FLAGS, "win_func" },
+        { "hamming",  "Hamming",          0, AV_OPT_TYPE_CONST, {.i64=WFUNC_HAMMING},  0, 0, FLAGS, "win_func" },
+        { "blackman", "Blackman",         0, AV_OPT_TYPE_CONST, {.i64=WFUNC_BLACKMAN}, 0, 0, FLAGS, "win_func" },
+        { "welch",    "Welch",            0, AV_OPT_TYPE_CONST, {.i64=WFUNC_WELCH},    0, 0, FLAGS, "win_func" },
+        { "flattop",  "Flat-top",         0, AV_OPT_TYPE_CONST, {.i64=WFUNC_FLATTOP},  0, 0, FLAGS, "win_func" },
+        { "bharris",  "Blackman-Harris",  0, AV_OPT_TYPE_CONST, {.i64=WFUNC_BHARRIS},  0, 0, FLAGS, "win_func" },
+        { "bnuttall", "Blackman-Nuttall", 0, AV_OPT_TYPE_CONST, {.i64=WFUNC_BNUTTALL}, 0, 0, FLAGS, "win_func" },
+        { "bhann",    "Bartlett-Hann",    0, AV_OPT_TYPE_CONST, {.i64=WFUNC_BHANN},    0, 0, FLAGS, "win_func" },
+        { "sine",     "Sine",             0, AV_OPT_TYPE_CONST, {.i64=WFUNC_SINE},     0, 0, FLAGS, "win_func" },
+        { "nuttall",  "Nuttall",          0, AV_OPT_TYPE_CONST, {.i64=WFUNC_NUTTALL},  0, 0, FLAGS, "win_func" },
+        { "lanczos",  "Lanczos",          0, AV_OPT_TYPE_CONST, {.i64=WFUNC_LANCZOS},  0, 0, FLAGS, "win_func" },
+        { "gauss",    "Gauss",            0, AV_OPT_TYPE_CONST, {.i64=WFUNC_GAUSS},    0, 0, FLAGS, "win_func" },
+    { "orientation", "set orientation", OFFSET(orientation), AV_OPT_TYPE_INT, {.i64=VERTICAL}, 0, NB_ORIENTATIONS-1, FLAGS, "orientation" },
+        { "vertical",   NULL, 0, AV_OPT_TYPE_CONST, {.i64=VERTICAL},   0, 0, FLAGS, "orientation" },
+        { "horizontal", NULL, 0, AV_OPT_TYPE_CONST, {.i64=HORIZONTAL}, 0, 0, FLAGS, "orientation" },
+    { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(showspectrumpic);
+
+static int showspectrumpic_request_frame(AVFilterLink *outlink)
+{
+    ShowSpectrumContext *s = outlink->src->priv;
+    AVFilterLink *inlink = outlink->src->inputs[0];
+    int ret;
+
+    ret = ff_request_frame(inlink);
+    if (ret == AVERROR_EOF && s->outpicref) {
+        int samples = av_audio_fifo_size(s->fifo);
+        int consumed = 0;
+        int sz = s->orientation == VERTICAL ? s->w: s->h;
+        int ch, spf, spb;
+        AVFrame *fin;
+
+        if (samples / sz < s->win_size) {
+            spf = lrint(ceil(samples / (float)sz)) % s->win_size;
+            spb = samples / sz;
+        } else {
+            spf = samples % s->win_size;
+            spb = samples / sz;
+        }
+
+        if (!spf)
+            spf = s->win_size;
+        s->sliding = FULLFRAME;
+        fin = ff_get_audio_buffer(inlink, s->win_size);
+        if (!fin)
+            return AVERROR(ENOMEM);
+
+        while (av_audio_fifo_size(s->fifo) > 0) {
+            ret = av_audio_fifo_peek(s->fifo, (void **)fin->extended_data, s->win_size);
+            if (ret < 0) {
+                av_frame_free(&fin);
+                return ret;
+            }
+            av_audio_fifo_drain(s->fifo, spf);
+
+            run_rdft(s, fin);
+            acalc_magnitudes(s);
+
+            consumed += spf;
+            if (consumed >= spb) {
+                int h = s->orientation == VERTICAL ? s->h: s->w;
+
+                scale_magnitudes(s, 1. / (consumed / spf));
+                plot_spectrum_column(inlink, fin);
+                consumed = 0;
+                for (ch = 0; ch < s->nb_display_channels; ch++)
+                    memset(s->magnitudes[ch], 0, h * sizeof(float));
+            }
+        }
+
+        av_frame_free(&fin);
+        s->outpicref->pts = 0;
+        ret = ff_filter_frame(outlink, s->outpicref);
+        s->outpicref = NULL;
+    }
+
+    return ret;
+}
+
+static int showspectrumpic_filter_frame(AVFilterLink *inlink, AVFrame *insamples)
+{
+    AVFilterContext *ctx = inlink->dst;
+    ShowSpectrumContext *s = ctx->priv;
+    int ret;
+
+    ret = av_audio_fifo_write(s->fifo, (void **)insamples->extended_data, insamples->nb_samples);
+    av_frame_free(&insamples);
+    return ret;
+}
+
+static const AVFilterPad showspectrumpic_inputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_AUDIO,
+        .filter_frame = showspectrumpic_filter_frame,
+    },
+    { NULL }
+};
+
+static const AVFilterPad showspectrumpic_outputs[] = {
+    {
+        .name          = "default",
+        .type          = AVMEDIA_TYPE_VIDEO,
+        .config_props  = config_output,
+        .request_frame = showspectrumpic_request_frame,
+    },
+    { NULL }
+};
+
+AVFilter ff_avf_showspectrumpic = {
+    .name          = "showspectrumpic",
+    .description   = NULL_IF_CONFIG_SMALL("Convert input audio to a spectrum video output single picture."),
+    .uninit        = uninit,
+    .query_formats = query_formats,
+    .priv_size     = sizeof(ShowSpectrumContext),
+    .inputs        = showspectrumpic_inputs,
+    .outputs       = showspectrumpic_outputs,
+    .priv_class    = &showspectrumpic_class,
+};
-- 
1.9.1



More information about the ffmpeg-devel mailing list