[FFmpeg-devel] [PATCH] avfilter: add normalize filter
Richard Ling
divetec at rling.com
Thu Sep 14 06:30:51 EEST 2017
Hi,
This patch adds a filter to normalize (contrast stretch) RGB video.
Comments welcome.
R.
>From f08f132ecd79718d0ce6fb07f99c84ab5dd52ee4 Mon Sep 17 00:00:00 2001
From: Richard Ling <divetec at rling.com>
Date: Thu, 14 Sep 2017 13:18:50 +1000
Subject: [PATCH] avfilter: add normalize filter
---
doc/filters.texi | 79 +++++++++
libavfilter/Makefile | 1 +
libavfilter/allfilters.c | 1 +
libavfilter/vf_normalize.c | 415
+++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 496 insertions(+)
create mode 100644 libavfilter/vf_normalize.c
diff --git a/doc/filters.texi b/doc/filters.texi
index 830de54..1e7712a 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -10808,6 +10808,85 @@ Add temporal and uniform noise to input video:
noise=alls=20:allf=t+u
@end example
+ at section normalize
+
+Normalize RGB video (aka histogram stretching, contrast stretching).
+See: https://en.wikipedia.org/wiki/Normalization_(image_processing)
+
+For each channel of each frame, the filter computes the input range and
maps
+it linearly to the user-specified output range. The output range defaults
+to the full dynamic range from pure black to pure white.
+
+Temporal smoothing can be used on the input range to reduce flickering
(rapid
+changes in brightness) caused when small dark or bright objects enter or
leave
+the scene. This is similar to the auto-exposure (automatic gain control)
on a
+video camera, and, like a video camera, it may cause a period of over- or
+under-exposure of the video.
+
+The R,G,B channels can be normalized independently, which may cause some
+color shifting, or linked together as a single channel, which prevents
+color shifting. Linked normalization preserves hue. Independent
normalization
+does not, so it can be used to remove some color casts. Independent and
linked
+normalization can be combined in any ratio.
+
+The normalize filter accepts the following options:
+
+ at table @option
+ at item blackpt
+ at item whitept
+Colors which define the output range. The minimum input value is mapped to
+the @var{blackpt}. The maximum input value is mapped to the @var{whitept}.
+The defaults are black and white respectively. Specifying white for
+ at var{blackpt} and black for @var{whitept} will give color-inverted,
+normalized video. Shades of grey can be used to reduce the dynamic range
+(contrast). Specifying saturated colors here can create some interesting
+effects.
+
+ at item smoothing
+The amount of temporal smoothing, expressed in seconds. the input range of
+each channel is smoothed using a rolling average over that many seconds of
+video. Defaults to 0.0 (no temporal smoothing). The maximum is 60 seconds.
+
+ at item independence
+Controls the ratio of independent (color shifting) channel normalization to
+linked (color preserving) normalization. 0.0 is fully linked, 1.0 is fully
+independent. Defaults to fully independent.
+
+ at item strength
+Overall strength of the filter. 1.0 is full strength. 0.0 is a rather
+expensive no-op.
+
+ at end table
+
+ at subsection Examples
+
+Stretch video contrast to use the full dynamic range, with no temporal
+smoothing; may flicker depending on the source content:
+ at example
+normalize=black:white:0
+ at end example
+
+As above, but with 2 seconds of temporal smoothing; flicker should be
+reduced, depending on the source content:
+ at example
+normalize=black:white:2
+ at end example
+
+As above, but with hue-preserving linked channel normalization:
+ at example
+normalize=black:white:2:1
+ at end example
+
+As above, but with half strength:
+ at example
+normalize=black:white:2:1:0.5
+ at end example
+
+Map the darkest input color to red, the brightest input color to cyan:
+ at example
+normalize=red:cyan
+ at end example
+
@section null
Pass the video source unchanged to the output.
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index 8aa974e..31f8170 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -243,6 +243,7 @@ OBJS-$(CONFIG_NLMEANS_FILTER) +=
vf_nlmeans.o
OBJS-$(CONFIG_NNEDI_FILTER) += vf_nnedi.o
OBJS-$(CONFIG_NOFORMAT_FILTER) += vf_format.o
OBJS-$(CONFIG_NOISE_FILTER) += vf_noise.o
+OBJS-$(CONFIG_NORMALIZE_FILTER) += vf_normalize.o
OBJS-$(CONFIG_NULL_FILTER) += vf_null.o
OBJS-$(CONFIG_OCR_FILTER) += vf_ocr.o
OBJS-$(CONFIG_OCV_FILTER) += vf_libopencv.o
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index 63e8672..af2287b 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -255,6 +255,7 @@ static void register_all(void)
REGISTER_FILTER(NNEDI, nnedi, vf);
REGISTER_FILTER(NOFORMAT, noformat, vf);
REGISTER_FILTER(NOISE, noise, vf);
+ REGISTER_FILTER(NORMALIZE, normalize, vf);
REGISTER_FILTER(NULL, null, vf);
REGISTER_FILTER(OCR, ocr, vf);
REGISTER_FILTER(OCV, ocv, vf);
diff --git a/libavfilter/vf_normalize.c b/libavfilter/vf_normalize.c
new file mode 100644
index 0000000..101651e
--- /dev/null
+++ b/libavfilter/vf_normalize.c
@@ -0,0 +1,415 @@
+/*
+ * Copyright (c) 2017 Richard Ling
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA
+ */
+
+/*
+ * Normalize RGB video (aka histogram stretching, contrast stretching).
+ * See: https://en.wikipedia.org/wiki/Normalization_(image_processing)
+ *
+ * For each channel of each frame, the filter computes the input range and
maps
+ * it linearly to the user-specified output range. The output range
defaults
+ * to the full dynamic range from pure black to pure white.
+ *
+ * Naively maximising the dynamic range of each frame of video in isolation
+ * may cause flickering (rapid changes in brightness of static objects in
the
+ * scene) when small dark or bright objects enter or leave the scene. This
+ * filter can apply temporal smoothing to the input range to reduce
flickering.
+ * Temporal smoothing is similar to the auto-exposure (automatic gain
control)
+ * on a video camera, which performs the same function; and, like a video
+ * camera, it may cause a period of over- or under-exposure of the video.
+ *
+ * The filter can normalize the R,G,B channels independently, which may
cause
+ * color shifting, or link them together as a single channel, which
prevents
+ * color shifting. More precisely, linked normalization preserves hue (as
it's
+ * defined in HSV/HSL color spaces) while independent normalization does
not.
+ * Independent normalization can be used to remove color casts, such as the
+ * blue cast from underwater video, restoring more natural colors. The
filter
+ * can also combine independent and linked normalization in any ratio.
+ *
+ * Finally the overall strength of the filter can be adjusted, from no
effect
+ * to full normalization.
+ *
+ * The 5 AVOptions are:
+ * blackpt, Colors which define the output range. The minimum input
value
+ * whitept is mapped to the blackpt. The maximum input value is
mapped to
+ * the whitept. The defaults are black and white respectively.
+ * Specifying white for blackpt and black for whitept will
give
+ * color-inverted, normalized video. Shades of grey can be
used
+ * to reduce the dynamic range (contrast). Specifying
saturated
+ * colors here can create some interesting effects.
+ *
+ * smoothing The amount of temporal smoothing, expressed in seconds
(0-60).
+ * the minimum and maximum input values of each channel are
+ * smoothed using a rolling average over that many seconds of
+ * video. Defaults to 0.0 (no temporal smoothing).
+ *
+ * independence
+ * Controls the ratio of independent (color shifting) channel
+ * normalization to linked (color preserving) normalization.
0.0
+ * is fully linked, 1.0 is fully independent. Defaults to
fully
+ * independent.
+ *
+ * strength Overall strength of the filter. 1.0 is full strength. 0.0
is
+ * a rather expensive no-op. Values in between can give a
gentle
+ * boost to low-contrast video without creating an artificial
+ * over-processed look. The default is full strength.
+ */
+
+#include "libavutil/imgutils.h"
+#include "libavutil/opt.h"
+#include "libavutil/pixdesc.h"
+#include "avfilter.h"
+#include "formats.h"
+#include "internal.h"
+#include "video.h"
+
+#ifndef MIN
+#define MIN(x,y) ((x) < (y) ? (x) : (y))
+#endif
+#ifndef MAX
+#define MAX(x,y) ((x) > (y) ? (x) : (y))
+#endif
+
+#define MAX_HISTORY_LEN 0x10000
+
+typedef struct NormalizeContext {
+ const AVClass *class;
+
+ // Storage for the corresponding AVOptions
+ uint8_t blackpt[4];
+ uint8_t whitept[4];
+ float smoothing;
+ float independence;
+ float strength;
+
+ int co[4]; // Offsets to R,G,B,A bytes respectively in each
pixel
+ int num_components; // Number of components in the pixel format
+ int history_len; // Number of frames to average; based on smoothing
factor
+ int frame_num; // Increments on each frame, starting from 0.
+
+ // Per-extremum, per-channel history, for temporal smoothing.
+ struct {
+ uint8_t *history; // History entries.
+ uint32_t history_sum; // Sum of history entries.
+ } min[3], max[3]; // Min and max for each channel in {R,G,B}.
+ uint8_t *history_mem; // Single allocation for above history
entries
+
+} NormalizeContext;
+
+#define OFFSET(x) offsetof(NormalizeContext, x)
+#define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
+
+static const AVOption normalize_options[] = {
+ { "blackpt", "output color to which darkest input color is mapped",
OFFSET(blackpt), AV_OPT_TYPE_COLOR, { .str = "black" }, CHAR_MIN, CHAR_MAX,
FLAGS },
+ { "whitept", "output color to which brightest input color is
mapped", OFFSET(whitept), AV_OPT_TYPE_COLOR, { .str = "white" }, CHAR_MIN,
CHAR_MAX, FLAGS },
+ { "smoothing", "amount of temporal smoothing of the input range, to
reduce flicker", OFFSET(smoothing), AV_OPT_TYPE_FLOAT, {.dbl=0.0}, 0.0,
60.0, FLAGS },
+ { "independence", "proportion of independent to linked channel
normalization", OFFSET(independence), AV_OPT_TYPE_FLOAT, {.dbl=1.0}, 0.0,
1.0, FLAGS },
+ { "strength", "strength of filter, from no effect to full
normalization", OFFSET(strength), AV_OPT_TYPE_FLOAT, {.dbl=1.0}, 0.0, 1.0,
FLAGS },
+ { NULL }
+};
+
+AVFILTER_DEFINE_CLASS(normalize);
+
+
+// This function is the main guts of the filter. Normalizes the input frame
+// into the output frame. The frames are known to have the same dimensions
+// and pixel format.
+static void normalize(NormalizeContext *s, AVFrame *in, AVFrame *out)
+{
+ // Per-extremum, per-channel local variables.
+ struct {
+ uint8_t in; // Original input byte value for this frame.
+ float smoothed; // Smoothed input value [0,255].
+ float out; // Output value [0,255].
+ } min[3], max[3]; // Min and max for each channel in {R,G,B}.
+
+ float rgb_min_smoothed; // Min input range for linked normalization
+ float rgb_max_smoothed; // Max input range for linked normalization
+ uint8_t lut[3][256]; // Lookup table
+ int x, y, c;
+
+ // First, scan the input frame to find, for each channel, the minimum
+ // (min.in) and maximum (max.in) values present in the channel.
+
+#define INIT(c) (min[c].in = max[c].in = in->data[0][s->co[c]])
+#define EXTEND(c) (min[c].in = MIN(min[c].in, inp[s->co[c]])), \
+ (max[c].in = MAX(max[c].in, inp[s->co[c]]))
+
+ INIT(0);
+ INIT(1);
+ INIT(2);
+ for (y = 0; y < in->height; ++y) {
+ uint8_t *inp = in->data[0] + y * in->linesize[0];
+ uint8_t *outp = out->data[0] + y * out->linesize[0];
+ for (x = 0; x < in->width; ++x) {
+ EXTEND(0);
+ EXTEND(1);
+ EXTEND(2);
+ inp += s->num_components;
+ outp += s->num_components;
+ }
+ }
+
+ // Next, for each channel, push min.in and max.in into their respective
+ // histories, to determine the min.smoothed and max.smoothed for this
frame.
+ {
+ int history_idx = s->frame_num % s->history_len;
+ // Assume the history is not yet full; num_history_vals is the
number
+ // of frames received so far including the current frame.
+ int num_history_vals = s->frame_num + 1;
+ if (s->frame_num >= s->history_len) {
+ //The history is full; drop oldest value and cap
num_history_vals.
+ for (c = 0; c < 3; c++) {
+ s->min[c].history_sum -= s->min[c].history[history_idx];
+ s->max[c].history_sum -= s->max[c].history[history_idx];
+ }
+ num_history_vals = s->history_len;
+ }
+ // For each extremum, update history_sum and calculate smoothed
value
+ // as the rolling average of the history entries.
+ for (c = 0; c < 3; c++) {
+ s->min[c].history_sum += (s->min[c].history[history_idx] =
min[c].in);
+ min[c].smoothed = s->min[c].history_sum /
(float)num_history_vals;
+ s->max[c].history_sum += (s->max[c].history[history_idx] =
max[c].in);
+ max[c].smoothed = s->max[c].history_sum /
(float)num_history_vals;
+ }
+ }
+
+ // Determine the input range for linked normalization. This is simply
the
+ // minimum of the per-channel minimums, and the maximum of the
per-channel
+ // maximums.
+ rgb_min_smoothed = min[0].smoothed;
+ rgb_max_smoothed = max[0].smoothed;
+ rgb_min_smoothed = MIN(rgb_min_smoothed, min[1].smoothed);
+ rgb_max_smoothed = MAX(rgb_max_smoothed, max[1].smoothed);
+ rgb_min_smoothed = MIN(rgb_min_smoothed, min[2].smoothed);
+ rgb_max_smoothed = MAX(rgb_max_smoothed, max[2].smoothed);
+
+ // Now, process each channel to determine the input and output range
and
+ // build the lookup tables.
+ for (c = 0; c < 3; c++) {
+ int in_val;
+ // Adjust the input range for this channel
[min.smoothed,max.smoothed]
+ // by mixing in the correct proportion of the linked normalization
+ // input range [rgb_min_smoothed,rgb_max_smoothed].
+ min[c].smoothed = (min[c].smoothed * s->independence)
+ + (rgb_min_smoothed * (1.0 - s->independence));
+ max[c].smoothed = (max[c].smoothed * s->independence)
+ + (rgb_max_smoothed * (1.0 - s->independence));
+
+ // Calculate the output range [min.out,max.out] as a ratio of the
full-
+ // strength output range [blackpt,whitept] and the original input
range
+ // [min.in,max.in], based on the user-specified filter strength.
+ min[c].out = (s->blackpt[c] * s->strength)
+ + (min[c].in * (1.0 - s->strength));
+ max[c].out = (s->whitept[c] * s->strength)
+ + (max[c].in * (1.0 - s->strength));
+
+ // Now, build a lookup table which linearly maps the adjusted
input range
+ // [min.smoothed,max.smoothed] to the output range
[min.out,max.out].
+ // Perform the linear interpolation for each x:
+ // lut[x] = (int)(float(x - min.smoothed) * scale + max.out +
0.5)
+ // where scale = (max.out - min.out) / (max.smoothed -
min.smoothed)
+ if (min[c].smoothed == max[c].smoothed) {
+ // There is no dynamic range to expand. No mapping for this
channel.
+ for (in_val = min[c].in; in_val <= max[c].in; in_val++)
+ lut[c][in_val] = min[c].out;
+ } else {
+ // We must set lookup values for all values in the original
input
+ // range [min.in,max.in]. Since the original input range may be
+ // larger than [min.smoothed,max.smoothed], some output values
may
+ // fall outside the [0,255] dynamic range. We need to clamp
them.
+ float scale = (max[c].out - min[c].out) / (max[c].smoothed -
min[c].smoothed);
+ for (in_val = min[c].in; in_val <= max[c].in; in_val++) {
+ int out_val = (in_val - min[c].smoothed) * scale +
min[c].out + 0.5;
+ out_val = MAX(out_val, 0);
+ out_val = MIN(out_val, 255);
+ lut[c][in_val] = out_val;
+ }
+ }
+ }
+
+ // Finally, process the pixels of the input frame using the lookup
tables.
+ for (y = 0; y < in->height; ++y) {
+ uint8_t *inp = in->data[0] + y * in->linesize[0];
+ uint8_t *outp = out->data[0] + y * out->linesize[0];
+ for (x = 0; x < in->width; ++x) {
+ outp[s->co[0]] = lut[0][inp[s->co[0]]];
+ outp[s->co[1]] = lut[1][inp[s->co[1]]];
+ outp[s->co[2]] = lut[2][inp[s->co[2]]];
+ if (s->num_components == 4)
+ // Copy alpha as-is.
+ outp[s->co[3]] = inp[s->co[3]];
+ inp += s->num_components;
+ outp += s->num_components;
+ }
+ }
+
+ s->frame_num++;
+}
+
+// Now we define all the functions accessible from the ff_vf_normalize
class,
+// which is ffmpeg's interface to our filter. See doc/filter_design.txt
and
+// doc/writing_filters.txt for descriptions of what these interface
functions
+// are expected to do.
+
+// For future use...
+static av_cold int init(AVFilterContext *ctx)
+{
+ return 0;
+}
+
+// Set the pixel formats that our filter supports. We should be able to
process
+// any 8-bit RGB formats. 16-bit support might be useful one day.
+static int query_formats(AVFilterContext *ctx)
+{
+ static const enum AVPixelFormat pixel_fmts[] = {
+ AV_PIX_FMT_RGB24,
+ AV_PIX_FMT_BGR24,
+ AV_PIX_FMT_ARGB,
+ AV_PIX_FMT_RGBA,
+ AV_PIX_FMT_ABGR,
+ AV_PIX_FMT_BGRA,
+ AV_PIX_FMT_0RGB,
+ AV_PIX_FMT_RGB0,
+ AV_PIX_FMT_0BGR,
+ AV_PIX_FMT_BGR0,
+ AV_PIX_FMT_NONE
+ };
+ // According to filter_design.txt, using ff_set_common_formats() this
way
+ // ensures the pixel formats of the input and output will be the same.
That
+ // saves a bit of effort possibly needing to handle format conversions.
+ AVFilterFormats *formats = ff_make_format_list(pixel_fmts);
+ if (!formats)
+ return AVERROR(ENOMEM);
+ return ff_set_common_formats(ctx, formats);
+}
+
+// At this point we know the pixel format used for both input and output.
We
+// can also access the frame rate of the input video and allocate some
memory
+// appropriately
+static int config_input(AVFilterLink *inlink)
+{
+ int c;
+ NormalizeContext *s = inlink->dst->priv;
+ // Store offsets to R,G,B,A bytes respectively in each pixel
+ const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
+ for (c = 0; c < 4; ++c)
+ s->co[c] = desc->comp[c].offset;
+ s->num_components = desc->nb_components;
+ // Convert smoothing value (seconds) to history_len (a count of frames
to
+ // average, must be at least 1).
+ s->history_len = (int)(s->smoothing / av_q2d(inlink->time_base)) + 1;
+ // In case the frame rate is unusually high, cap it to MAX_HISTORY_LEN
+ // to avoid allocating stupid amounts of memory.
+ if (s->history_len > MAX_HISTORY_LEN) {
+ av_log(s, AV_LOG_WARNING, "history_len capped to %d from %d. "
+ "This could be due to unusually high frame rate.\n",
+ MAX_HISTORY_LEN, s->history_len);
+ s->history_len = MAX_HISTORY_LEN;
+ }
+ // Allocate the history buffers -- there are 6 -- one for each extrema.
+ s->history_mem = av_malloc(s->history_len * 6);
+ if (s->history_mem == NULL)
+ return AVERROR(ENOMEM);
+ for (c = 0; c < 3; c++) {
+ s->min[c].history = s->history_mem + (c*2) * s->history_len;
+ s->max[c].history = s->history_mem + (c*2+1) * s->history_len;
+ }
+ return 0;
+}
+
+// Free any memory allocations here
+static av_cold void uninit(AVFilterContext *ctx)
+{
+ NormalizeContext *s = ctx->priv;
+ if (s->history_mem != NULL)
+ av_free(s->history_mem);
+}
+
+// This function is pretty much standard from doc/writing_filters.txt. It
+// tries to do in-place filtering where possible, only allocating a new
output
+// frame when absolutely necessary.
+static int filter_frame(AVFilterLink *inlink, AVFrame *in)
+{
+ AVFilterContext *ctx = inlink->dst;
+ AVFilterLink *outlink = ctx->outputs[0];
+ NormalizeContext *s = ctx->priv;
+
+ AVFrame *out;
+ // Set 'direct' if we can modify the input frame in-place. Otherwise
we
+ // need to retrieve a new frame from the output link.
+ int direct = av_frame_is_writable(in) && !ctx->is_disabled;
+ if (direct) {
+ out = in;
+ } else {
+ out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
+ if (!out) {
+ av_frame_free(&in);
+ return AVERROR(ENOMEM);
+ }
+ av_frame_copy_props(out, in);
+ }
+
+ // Now we've got the input and output frames (which may be the same
frame)
+ // perform the filtering with our custom function.
+ normalize(s, in, out);
+
+ if (ctx->is_disabled) {
+ av_frame_free(&out);
+ return ff_filter_frame(outlink, in);
+ }
+
+ if (!direct)
+ av_frame_free(&in);
+
+ return ff_filter_frame(outlink, out);
+}
+
+// The structures below are standard filter plumbing
+
+static const AVFilterPad inputs[] = {
+ {
+ .name = "default",
+ .type = AVMEDIA_TYPE_VIDEO,
+ .filter_frame = filter_frame,
+ .config_props = config_input,
+ },
+ { NULL }
+};
+
+static const AVFilterPad outputs[] = {
+ {
+ .name = "default",
+ .type = AVMEDIA_TYPE_VIDEO,
+ },
+ { NULL }
+};
+
+AVFilter ff_vf_normalize = {
+ .name = "normalize",
+ .description = NULL_IF_CONFIG_SMALL("Normalize RGB video."),
+ .priv_size = sizeof(NormalizeContext),
+ .priv_class = &normalize_class,
+ .init = init,
+ .uninit = uninit,
+ .query_formats = query_formats,
+ .inputs = inputs,
+ .outputs = outputs,
+};
--
2.9.0.windows.1
More information about the ffmpeg-devel
mailing list