[FFmpeg-cvslog] Merge commit '8a02a8031ef4f98faf5647f0e01a8922247bf748'

Mon Apr 18 16:34:32 CEST 2016

ffmpeg | branch: master | Derek Buitenhuis <derek.buitenhuis at gmail.com> | Mon Apr 18 15:33:38 2016 +0100| [94e5f0922b72f0a40ed328b70572ed868571cb96] | committer: Derek Buitenhuis

Merge commit '8a02a8031ef4f98faf5647f0e01a8922247bf748'

* commit '8a02a8031ef4f98faf5647f0e01a8922247bf748':
  lavfi: add an NVIDIA NPP-based scaling filter

Merged-by: Derek Buitenhuis <derek.buitenhuis at gmail.com>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=94e5f0922b72f0a40ed328b70572ed868571cb96
---

 Changelog                  |    1 +
 configure                  |    5 +
 doc/filters.texi           |   41 +++
 libavfilter/Makefile       |    1 +
 libavfilter/allfilters.c   |    1 +
 libavfilter/version.h      |    4 +-
 libavfilter/vf_scale_npp.c |  660 ++++++++++++++++++++++++++++++++++++++++++++
 7 files changed, 711 insertions(+), 2 deletions(-)

diff --git a/Changelog b/Changelog
index 827e3e2..58ac1d7 100644
--- a/Changelog
+++ b/Changelog
@@ -27,6 +27,7 @@ version <next>:
 - hdcd filter
 - readvitc filter
 - VAAPI-accelerated format conversion and scaling
+- libnpp/CUDA-accelerated format conversion and scaling
 
 version 3.0:
 - Common Encryption (CENC) MP4 encoding and decoding support
diff --git a/configure b/configure
index cb3d304..97f374b 100755
--- a/configure
+++ b/configure
@@ -234,6 +234,7 @@ External library support:
   --enable-libmp3lame      enable MP3 encoding via libmp3lame [no]
   --enable-libnut          enable NUT (de)muxing via libnut,
                            native (de)muxer exists [no]
+  --enable-libnpp          enable NVIDIA Performance Primitives-based code [no]
   --enable-libopencore-amrnb enable AMR-NB de/encoding via libopencore-amrnb [no]
   --enable-libopencore-amrwb enable AMR-WB decoding via libopencore-amrwb [no]
   --enable-libopencv       enable video filtering via libopencv [no]
@@ -1482,6 +1483,7 @@ EXTERNAL_LIBRARY_LIST="
     libmodplug
     libmp3lame
     libnut
+    libnpp
     libopencore_amrnb
     libopencore_amrwb
     libopencv
@@ -3002,6 +3004,7 @@ vidstabtransform_filter_deps="libvidstab"
 zmq_filter_deps="libzmq"
 zoompan_filter_deps="swscale"
 zscale_filter_deps="libzimg"
+scale_npp_filter_deps="cuda libnpp"
 scale_vaapi_filter_deps="vaapi VAProcPipelineParameterBuffer"
 
 # examples
@@ -4959,6 +4962,7 @@ die_license_disabled gpl x11grab
 
 die_license_disabled nonfree cuda
 die_license_disabled nonfree libfaac
+die_license_disabled nonfree libnpp
 die_license_disabled nonfree nvenc
 enabled gpl && die_license_disabled_gpl nonfree libfdk_aac
 enabled gpl && die_license_disabled_gpl nonfree openssl
@@ -5567,6 +5571,7 @@ enabled libmfx            && require_pkg_config libmfx "mfx/mfxvideo.h" MFXInit
 enabled libmodplug        && require_pkg_config libmodplug libmodplug/modplug.h ModPlug_Load
 enabled libmp3lame        && require "libmp3lame >= 3.98.3" lame/lame.h lame_set_VBR_quality -lmp3lame
 enabled libnut            && require libnut libnut.h nut_demuxer_init -lnut
+enabled libnpp            && require libnpp npp.h nppGetLibVersion -lnppi -lnppc
 enabled libopencore_amrnb && require libopencore_amrnb opencore-amrnb/interf_dec.h Decoder_Interface_init -lopencore-amrnb
 enabled libopencore_amrwb && require libopencore_amrwb opencore-amrwb/dec_if.h D_IF_init -lopencore-amrwb
 enabled libopencv         && { check_header opencv2/core/core_c.h &&
diff --git a/doc/filters.texi b/doc/filters.texi
index 7634b25..8fca52d 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -3395,6 +3395,47 @@ channels. Default is 0.3.
 Set level of input signal of original channel. Default is 0.8.
 @end table
 
+ at section scale_npp
+
+Use the NVIDIA Performance Primitives (libnpp) to perform scaling and/or pixel
+format conversion on CUDA video frames. Setting the output width and height
+works in the same way as for the @var{scale} filter.
+
+The following additional options are accepted:
+ at table @option
+ at item format
+The pixel format of the output CUDA frames. If set to the string "same" (the
+default), the input format will be kept. Note that automatic format negotiation
+and conversion is not yet supported for hardware frames
+
+ at item interp_algo
+The interpolation algorithm used for resizing. One of the following:
+ at table @option
+ at item nn
+Nearest neighbour.
+
+ at item linear
+ at item cubic
+ at item cubic2p_bspline
+2-parameter cubic (B=1, C=0)
+
+ at item cubic2p_catmullrom
+2-parameter cubic (B=0, C=1/2)
+
+ at item cubic2p_b05c03
+2-parameter cubic (B=1/2, C=3/10)
+
+ at item super
+Supersampling
+
+ at item lanczos
+ at end table
+
+ at end table
+
+ at section select
+Select frames to pass in output.
+
 @section treble
 
 Boost or cut treble (upper) frequencies of the audio using a two-pole
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index d71a17b..917049c 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -242,6 +242,7 @@ OBJS-$(CONFIG_SCALE2REF_FILTER)              += vf_scale.o
 OBJS-$(CONFIG_SELECT_FILTER)                 += f_select.o
 OBJS-$(CONFIG_SELECTIVECOLOR_FILTER)         += vf_selectivecolor.o
 OBJS-$(CONFIG_SENDCMD_FILTER)                += f_sendcmd.o
+OBJS-$(CONFIG_SCALE_NPP_FILTER)              += vf_scale_npp.o
 OBJS-$(CONFIG_SCALE_VAAPI_FILTER)            += vf_scale_vaapi.o
 OBJS-$(CONFIG_SETDAR_FILTER)                 += vf_aspect.o
 OBJS-$(CONFIG_SETFIELD_FILTER)               += vf_setfield.o
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index 7f58c7e..a972576 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -258,6 +258,7 @@ void avfilter_register_all(void)
     REGISTER_FILTER(SAB,            sab,            vf);
     REGISTER_FILTER(SCALE,          scale,          vf);
     REGISTER_FILTER(SCALE2REF,      scale2ref,      vf);
+    REGISTER_FILTER(SCALE_NPP,      scale_npp,      vf);
     REGISTER_FILTER(SCALE_VAAPI,    scale_vaapi,    vf);
     REGISTER_FILTER(SELECT,         select,         vf);
     REGISTER_FILTER(SELECTIVECOLOR, selectivecolor, vf);
diff --git a/libavfilter/version.h b/libavfilter/version.h
index 927ec27..d7f9c54 100644
--- a/libavfilter/version.h
+++ b/libavfilter/version.h
@@ -30,8 +30,8 @@
 #include "libavutil/version.h"
 
 #define LIBAVFILTER_VERSION_MAJOR   6
-#define LIBAVFILTER_VERSION_MINOR  43
-#define LIBAVFILTER_VERSION_MICRO 101
+#define LIBAVFILTER_VERSION_MINOR  44
+#define LIBAVFILTER_VERSION_MICRO 100
 
 #define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \
                                                LIBAVFILTER_VERSION_MINOR, \
diff --git a/libavfilter/vf_scale_npp.c b/libavfilter/vf_scale_npp.c
new file mode 100644
index 0000000..7d2b5df
--- /dev/null
+++ b/libavfilter/vf_scale_npp.c
@@ -0,0 +1,660 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * scale video filter
+ */
+
+#include <nppi.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "libavutil/avstring.h"
+#include "libavutil/common.h"
+#include "libavutil/eval.h"
+#include "libavutil/hwcontext.h"
+#include "libavutil/hwcontext_cuda.h"
+#include "libavutil/internal.h"
+#include "libavutil/mathematics.h"
+#include "libavutil/opt.h"
+#include "libavutil/pixdesc.h"
+
+#include "avfilter.h"
+#include "formats.h"
+#include "internal.h"
+#include "video.h"
+
+static const enum AVPixelFormat supported_formats[] = {
+    AV_PIX_FMT_YUV420P,
+    AV_PIX_FMT_NV12,
+    AV_PIX_FMT_YUV444P,
+};
+
+static const enum AVPixelFormat deinterleaved_formats[][2] = {
+    { AV_PIX_FMT_NV12, AV_PIX_FMT_YUV420P },
+};
+
+static const char *const var_names[] = {
+    "PI",
+    "PHI",
+    "E",
+    "in_w",   "iw",
+    "in_h",   "ih",
+    "out_w",  "ow",
+    "out_h",  "oh",
+    "a", "dar",
+    "sar",
+    NULL
+};
+
+enum var_name {
+    VAR_PI,
+    VAR_PHI,
+    VAR_E,
+    VAR_IN_W,   VAR_IW,
+    VAR_IN_H,   VAR_IH,
+    VAR_OUT_W,  VAR_OW,
+    VAR_OUT_H,  VAR_OH,
+    VAR_A, VAR_DAR,
+    VAR_SAR,
+    VARS_NB
+};
+
+enum ScaleStage {
+    STAGE_DEINTERLEAVE,
+    STAGE_RESIZE,
+    STAGE_INTERLEAVE,
+    STAGE_NB,
+};
+
+typedef struct NPPScaleStageContext {
+    int stage_needed;
+    enum AVPixelFormat in_fmt;
+    enum AVPixelFormat out_fmt;
+
+    struct {
+        int width;
+        int height;
+    } planes_in[3], planes_out[3];
+
+    AVBufferRef *frames_ctx;
+    AVFrame     *frame;
+} NPPScaleStageContext;
+
+typedef struct NPPScaleContext {
+    const AVClass *class;
+
+    NPPScaleStageContext stages[STAGE_NB];
+    AVFrame *tmp_frame;
+    int passthrough;
+
+    int shift_width, shift_height;
+
+    /**
+     * New dimensions. Special values are:
+     *   0 = original width/height
+     *  -1 = keep original aspect
+     */
+    int w, h;
+
+    /**
+     * Output sw format. AV_PIX_FMT_NONE for no conversion.
+     */
+    enum AVPixelFormat format;
+
+    char *w_expr;               ///< width  expression string
+    char *h_expr;               ///< height expression string
+    char *format_str;
+
+    int interp_algo;
+} NPPScaleContext;
+
+static int nppscale_init(AVFilterContext *ctx)
+{
+    NPPScaleContext *s = ctx->priv;
+    int i;
+
+    if (!strcmp(s->format_str, "same")) {
+        s->format = AV_PIX_FMT_NONE;
+    } else {
+        s->format = av_get_pix_fmt(s->format_str);
+        if (s->format == AV_PIX_FMT_NONE) {
+            av_log(ctx, AV_LOG_ERROR, "Unrecognized pixel format: %s\n", s->format_str);
+            return AVERROR(EINVAL);
+        }
+    }
+
+    for (i = 0; i < FF_ARRAY_ELEMS(s->stages); i++) {
+        s->stages[i].frame = av_frame_alloc();
+        if (!s->stages[i].frame)
+            return AVERROR(ENOMEM);
+    }
+    s->tmp_frame = av_frame_alloc();
+    if (!s->tmp_frame)
+        return AVERROR(ENOMEM);
+
+    return 0;
+}
+
+static void nppscale_uninit(AVFilterContext *ctx)
+{
+    NPPScaleContext *s = ctx->priv;
+    int i;
+
+    for (i = 0; i < FF_ARRAY_ELEMS(s->stages); i++) {
+        av_frame_free(&s->stages[i].frame);
+        av_buffer_unref(&s->stages[i].frames_ctx);
+    }
+    av_frame_free(&s->tmp_frame);
+}
+
+static int nppscale_query_formats(AVFilterContext *ctx)
+{
+    static const enum AVPixelFormat pixel_formats[] = {
+        AV_PIX_FMT_CUDA, AV_PIX_FMT_NONE,
+    };
+    AVFilterFormats *pix_fmts  = ff_make_format_list(pixel_formats);
+
+    ff_set_common_formats(ctx, pix_fmts);
+
+    return 0;
+}
+
+static int init_stage(NPPScaleStageContext *stage, AVBufferRef *device_ctx)
+{
+    AVBufferRef *out_ref = NULL;
+    AVHWFramesContext *out_ctx;
+    int in_sw, in_sh, out_sw, out_sh;
+    int ret, i;
+
+    av_pix_fmt_get_chroma_sub_sample(stage->in_fmt,  &in_sw,  &in_sh);
+    av_pix_fmt_get_chroma_sub_sample(stage->out_fmt, &out_sw, &out_sh);
+    if (!stage->planes_out[0].width) {
+        stage->planes_out[0].width  = stage->planes_in[0].width;
+        stage->planes_out[0].height = stage->planes_in[0].height;
+    }
+
+    for (i = 1; i < FF_ARRAY_ELEMS(stage->planes_in); i++) {
+        stage->planes_in[i].width   = stage->planes_in[0].width   >> in_sw;
+        stage->planes_in[i].height  = stage->planes_in[0].height  >> in_sh;
+        stage->planes_out[i].width  = stage->planes_out[0].width  >> out_sw;
+        stage->planes_out[i].height = stage->planes_out[0].height >> out_sh;
+    }
+
+    out_ref = av_hwframe_ctx_alloc(device_ctx);
+    if (!out_ref)
+        return AVERROR(ENOMEM);
+    out_ctx = (AVHWFramesContext*)out_ref->data;
+
+    out_ctx->format    = AV_PIX_FMT_CUDA;
+    out_ctx->sw_format = stage->out_fmt;
+    out_ctx->width     = FFALIGN(stage->planes_out[0].width,  32);
+    out_ctx->height    = FFALIGN(stage->planes_out[0].height, 32);
+
+    ret = av_hwframe_ctx_init(out_ref);
+    if (ret < 0)
+        goto fail;
+
+    av_frame_unref(stage->frame);
+    ret = av_hwframe_get_buffer(out_ref, stage->frame, 0);
+    if (ret < 0)
+        goto fail;
+
+    stage->frame->width  = stage->planes_out[0].width;
+    stage->frame->height = stage->planes_out[0].height;
+
+    av_buffer_unref(&stage->frames_ctx);
+    stage->frames_ctx = out_ref;
+
+    return 0;
+fail:
+    av_buffer_unref(&out_ref);
+    return ret;
+}
+
+static int format_is_supported(enum AVPixelFormat fmt)
+{
+    int i;
+
+    for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++)
+        if (supported_formats[i] == fmt)
+            return 1;
+    return 0;
+}
+
+static enum AVPixelFormat get_deinterleaved_format(enum AVPixelFormat fmt)
+{
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(fmt);
+    int i, planes;
+
+    planes = av_pix_fmt_count_planes(fmt);
+    if (planes == desc->nb_components)
+        return fmt;
+    for (i = 0; i < FF_ARRAY_ELEMS(deinterleaved_formats); i++)
+        if (deinterleaved_formats[i][0] == fmt)
+            return deinterleaved_formats[i][1];
+    return AV_PIX_FMT_NONE;
+}
+
+static int init_processing_chain(AVFilterContext *ctx, int in_width, int in_height,
+                                 int out_width, int out_height)
+{
+    NPPScaleContext *s = ctx->priv;
+
+    AVHWFramesContext *in_frames_ctx;
+
+    enum AVPixelFormat in_format;
+    enum AVPixelFormat out_format;
+    enum AVPixelFormat in_deinterleaved_format;
+    enum AVPixelFormat out_deinterleaved_format;
+
+    int i, ret, last_stage = -1;
+
+    /* check that we have a hw context */
+    if (!ctx->inputs[0]->hw_frames_ctx) {
+        av_log(ctx, AV_LOG_ERROR, "No hw context provided on input\n");
+        return AVERROR(EINVAL);
+    }
+    in_frames_ctx = (AVHWFramesContext*)ctx->inputs[0]->hw_frames_ctx->data;
+    in_format     = in_frames_ctx->sw_format;
+    out_format    = (s->format == AV_PIX_FMT_NONE) ? in_format : s->format;
+
+    if (!format_is_supported(in_format)) {
+        av_log(ctx, AV_LOG_ERROR, "Unsupported input format: %s\n",
+               av_get_pix_fmt_name(in_format));
+        return AVERROR(ENOSYS);
+    }
+    if (!format_is_supported(out_format)) {
+        av_log(ctx, AV_LOG_ERROR, "Unsupported output format: %s\n",
+               av_get_pix_fmt_name(out_format));
+        return AVERROR(ENOSYS);
+    }
+
+    in_deinterleaved_format  = get_deinterleaved_format(in_format);
+    out_deinterleaved_format = get_deinterleaved_format(out_format);
+    if (in_deinterleaved_format  == AV_PIX_FMT_NONE ||
+        out_deinterleaved_format == AV_PIX_FMT_NONE)
+        return AVERROR_BUG;
+
+    /* figure out which stages need to be done */
+    if (in_width != out_width || in_height != out_height ||
+        in_deinterleaved_format != out_deinterleaved_format)
+        s->stages[STAGE_RESIZE].stage_needed = 1;
+
+    if (!s->stages[STAGE_RESIZE].stage_needed && in_format == out_format)
+        s->passthrough = 1;
+
+    if (!s->passthrough) {
+        if (in_format != in_deinterleaved_format)
+            s->stages[STAGE_DEINTERLEAVE].stage_needed = 1;
+        if (out_format != out_deinterleaved_format)
+            s->stages[STAGE_INTERLEAVE].stage_needed = 1;
+    }
+
+    s->stages[STAGE_DEINTERLEAVE].in_fmt              = in_format;
+    s->stages[STAGE_DEINTERLEAVE].out_fmt             = in_deinterleaved_format;
+    s->stages[STAGE_DEINTERLEAVE].planes_in[0].width  = in_width;
+    s->stages[STAGE_DEINTERLEAVE].planes_in[0].height = in_height;
+
+    s->stages[STAGE_RESIZE].in_fmt               = in_deinterleaved_format;
+    s->stages[STAGE_RESIZE].out_fmt              = out_deinterleaved_format;
+    s->stages[STAGE_RESIZE].planes_in[0].width   = in_width;
+    s->stages[STAGE_RESIZE].planes_in[0].height  = in_height;
+    s->stages[STAGE_RESIZE].planes_out[0].width  = out_width;
+    s->stages[STAGE_RESIZE].planes_out[0].height = out_height;
+
+    s->stages[STAGE_INTERLEAVE].in_fmt              = out_deinterleaved_format;
+    s->stages[STAGE_INTERLEAVE].out_fmt             = out_format;
+    s->stages[STAGE_INTERLEAVE].planes_in[0].width  = out_width;
+    s->stages[STAGE_INTERLEAVE].planes_in[0].height = out_height;
+
+    /* init the hardware contexts */
+    for (i = 0; i < FF_ARRAY_ELEMS(s->stages); i++) {
+        if (!s->stages[i].stage_needed)
+            continue;
+
+        ret = init_stage(&s->stages[i], in_frames_ctx->device_ref);
+        if (ret < 0)
+            return ret;
+
+        last_stage = i;
+    }
+
+    if (last_stage < 0)
+        return AVERROR_BUG;
+    ctx->outputs[0]->hw_frames_ctx = av_buffer_ref(s->stages[last_stage].frames_ctx);
+    if (!ctx->outputs[0]->hw_frames_ctx)
+        return AVERROR(ENOMEM);
+
+    return 0;
+}
+
+static int nppscale_config_props(AVFilterLink *outlink)
+{
+    AVFilterContext *ctx = outlink->src;
+    AVFilterLink *inlink = outlink->src->inputs[0];
+    NPPScaleContext  *s = ctx->priv;
+    int64_t w, h;
+    double var_values[VARS_NB], res;
+    char *expr;
+    int ret;
+
+    var_values[VAR_PI]    = M_PI;
+    var_values[VAR_PHI]   = M_PHI;
+    var_values[VAR_E]     = M_E;
+    var_values[VAR_IN_W]  = var_values[VAR_IW] = inlink->w;
+    var_values[VAR_IN_H]  = var_values[VAR_IH] = inlink->h;
+    var_values[VAR_OUT_W] = var_values[VAR_OW] = NAN;
+    var_values[VAR_OUT_H] = var_values[VAR_OH] = NAN;
+    var_values[VAR_A]     = (double) inlink->w / inlink->h;
+    var_values[VAR_SAR]   = inlink->sample_aspect_ratio.num ?
+        (double) inlink->sample_aspect_ratio.num / inlink->sample_aspect_ratio.den : 1;
+    var_values[VAR_DAR]   = var_values[VAR_A] * var_values[VAR_SAR];
+
+    /* evaluate width and height */
+    av_expr_parse_and_eval(&res, (expr = s->w_expr),
+                           var_names, var_values,
+                           NULL, NULL, NULL, NULL, NULL, 0, ctx);
+    s->w = var_values[VAR_OUT_W] = var_values[VAR_OW] = res;
+    if ((ret = av_expr_parse_and_eval(&res, (expr = s->h_expr),
+                                      var_names, var_values,
+                                      NULL, NULL, NULL, NULL, NULL, 0, ctx)) < 0)
+        goto fail;
+    s->h = var_values[VAR_OUT_H] = var_values[VAR_OH] = res;
+    /* evaluate again the width, as it may depend on the output height */
+    if ((ret = av_expr_parse_and_eval(&res, (expr = s->w_expr),
+                                      var_names, var_values,
+                                      NULL, NULL, NULL, NULL, NULL, 0, ctx)) < 0)
+        goto fail;
+    s->w = res;
+
+    w = s->w;
+    h = s->h;
+
+    /* sanity check params */
+    if (w <  -1 || h <  -1) {
+        av_log(ctx, AV_LOG_ERROR, "Size values less than -1 are not acceptable.\n");
+        return AVERROR(EINVAL);
+    }
+    if (w == -1 && h == -1)
+        s->w = s->h = 0;
+
+    if (!(w = s->w))
+        w = inlink->w;
+    if (!(h = s->h))
+        h = inlink->h;
+    if (w == -1)
+        w = av_rescale(h, inlink->w, inlink->h);
+    if (h == -1)
+        h = av_rescale(w, inlink->h, inlink->w);
+
+    if (w > INT_MAX || h > INT_MAX ||
+        (h * inlink->w) > INT_MAX  ||
+        (w * inlink->h) > INT_MAX)
+        av_log(ctx, AV_LOG_ERROR, "Rescaled value for width or height is too big.\n");
+
+    outlink->w = w;
+    outlink->h = h;
+
+    ret = init_processing_chain(ctx, inlink->w, inlink->h, w, h);
+    if (ret < 0)
+        return ret;
+
+    av_log(ctx, AV_LOG_VERBOSE, "w:%d h:%d -> w:%d h:%d\n",
+           inlink->w, inlink->h, outlink->w, outlink->h);
+
+    if (inlink->sample_aspect_ratio.num)
+        outlink->sample_aspect_ratio = av_mul_q((AVRational){outlink->h*inlink->w,
+                                                             outlink->w*inlink->h},
+                                                inlink->sample_aspect_ratio);
+    else
+        outlink->sample_aspect_ratio = inlink->sample_aspect_ratio;
+
+    return 0;
+
+fail:
+    av_log(NULL, AV_LOG_ERROR,
+           "Error when evaluating the expression '%s'\n", expr);
+    return ret;
+}
+
+static int nppscale_deinterleave(AVFilterContext *ctx, NPPScaleStageContext *stage,
+                                 AVFrame *out, AVFrame *in)
+{
+    AVHWFramesContext *in_frames_ctx = (AVHWFramesContext*)in->hw_frames_ctx->data;
+    NppStatus err;
+
+    switch (in_frames_ctx->sw_format) {
+    case AV_PIX_FMT_NV12:
+        err = nppiYCbCr420_8u_P2P3R(in->data[0], in->linesize[0],
+                                    in->data[1], in->linesize[1],
+                                    out->data, out->linesize,
+                                    (NppiSize){ in->width, in->height });
+        break;
+    default:
+        return AVERROR_BUG;
+    }
+    if (err != NPP_SUCCESS) {
+        av_log(ctx, AV_LOG_ERROR, "NPP deinterleave error: %d\n", err);
+        return AVERROR_UNKNOWN;
+    }
+
+    return 0;
+}
+
+static int nppscale_resize(AVFilterContext *ctx, NPPScaleStageContext *stage,
+                           AVFrame *out, AVFrame *in)
+{
+    NPPScaleContext *s = ctx->priv;
+    NppStatus err;
+    int i;
+
+    for (i = 0; i < FF_ARRAY_ELEMS(in->data) && in->data[i]; i++) {
+        int iw = stage->planes_in[i].width;
+        int ih = stage->planes_in[i].height;
+        int ow = stage->planes_out[i].width;
+        int oh = stage->planes_out[i].height;
+
+        err = nppiResizeSqrPixel_8u_C1R(in->data[i], (NppiSize){ iw, ih },
+                                        in->linesize[i], (NppiRect){ 0, 0, iw, ih },
+                                        out->data[i], out->linesize[i],
+                                        (NppiRect){ 0, 0, ow, oh },
+                                        (double)ow / iw, (double)oh / ih,
+                                        0.0, 0.0, s->interp_algo);
+        if (err != NPP_SUCCESS) {
+            av_log(ctx, AV_LOG_ERROR, "NPP resize error: %d\n", err);
+            return AVERROR_UNKNOWN;
+        }
+    }
+
+    return 0;
+}
+
+static int nppscale_interleave(AVFilterContext *ctx, NPPScaleStageContext *stage,
+                               AVFrame *out, AVFrame *in)
+{
+    AVHWFramesContext *out_frames_ctx = (AVHWFramesContext*)out->hw_frames_ctx->data;
+    NppStatus err;
+
+    switch (out_frames_ctx->sw_format) {
+    case AV_PIX_FMT_NV12:
+        err = nppiYCbCr420_8u_P3P2R((const uint8_t**)in->data,
+                                    in->linesize,
+                                    out->data[0], out->linesize[0],
+                                    out->data[1], out->linesize[1],
+                                    (NppiSize){ in->width, in->height });
+        break;
+    default:
+        return AVERROR_BUG;
+    }
+    if (err != NPP_SUCCESS) {
+        av_log(ctx, AV_LOG_ERROR, "NPP deinterleave error: %d\n", err);
+        return AVERROR_UNKNOWN;
+    }
+
+    return 0;
+}
+
+static int (*const nppscale_process[])(AVFilterContext *ctx, NPPScaleStageContext *stage,
+                                       AVFrame *out, AVFrame *in) = {
+    [STAGE_DEINTERLEAVE] = nppscale_deinterleave,
+    [STAGE_RESIZE]       = nppscale_resize,
+    [STAGE_INTERLEAVE]   = nppscale_interleave,
+};
+
+static int nppscale_scale(AVFilterContext *ctx, AVFrame *out, AVFrame *in)
+{
+    NPPScaleContext *s = ctx->priv;
+    AVFrame *src = in;
+    int i, ret, last_stage = -1;
+
+    for (i = 0; i < FF_ARRAY_ELEMS(s->stages); i++) {
+        if (!s->stages[i].stage_needed)
+            continue;
+
+        ret = nppscale_process[i](ctx, &s->stages[i], s->stages[i].frame, src);
+        if (ret < 0)
+            return ret;
+
+        src        = s->stages[i].frame;
+        last_stage = i;
+    }
+
+    if (last_stage < 0)
+        return AVERROR_BUG;
+    ret = av_hwframe_get_buffer(src->hw_frames_ctx, s->tmp_frame, 0);
+    if (ret < 0)
+        return ret;
+
+    av_frame_move_ref(out, src);
+    av_frame_move_ref(src, s->tmp_frame);
+
+    ret = av_frame_copy_props(out, in);
+    if (ret < 0)
+        return ret;
+
+    return 0;
+}
+
+static int nppscale_filter_frame(AVFilterLink *link, AVFrame *in)
+{
+    AVFilterContext              *ctx = link->dst;
+    NPPScaleContext                *s = ctx->priv;
+    AVFilterLink             *outlink = ctx->outputs[0];
+    AVHWFramesContext     *frames_ctx = (AVHWFramesContext*)outlink->hw_frames_ctx->data;
+    AVCUDADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx;
+
+    AVFrame *out = NULL;
+    CUresult err;
+    CUcontext dummy;
+    int ret = 0;
+
+    if (s->passthrough)
+        return ff_filter_frame(outlink, in);
+
+    out = av_frame_alloc();
+    if (!out) {
+        ret = AVERROR(ENOMEM);
+        goto fail;
+    }
+
+    av_reduce(&out->sample_aspect_ratio.num, &out->sample_aspect_ratio.den,
+              (int64_t)in->sample_aspect_ratio.num * outlink->h * link->w,
+              (int64_t)in->sample_aspect_ratio.den * outlink->w * link->h,
+              INT_MAX);
+
+    err = cuCtxPushCurrent(device_hwctx->cuda_ctx);
+    if (err != CUDA_SUCCESS) {
+        ret = AVERROR_UNKNOWN;
+        goto fail;
+    }
+
+    ret = nppscale_scale(ctx, out, in);
+
+    cuCtxPopCurrent(&dummy);
+    if (ret < 0)
+        goto fail;
+
+    av_frame_free(&in);
+    return ff_filter_frame(outlink, out);
+fail:
+    av_frame_free(&in);
+    av_frame_free(&out);
+    return ret;
+}
+
+#define OFFSET(x) offsetof(NPPScaleContext, x)
+#define FLAGS AV_OPT_FLAG_VIDEO_PARAM
+static const AVOption options[] = {
+    { "w",      "Output video width",  OFFSET(w_expr),     AV_OPT_TYPE_STRING, { .str = "iw"   }, .flags = FLAGS },
+    { "h",      "Output video height", OFFSET(h_expr),     AV_OPT_TYPE_STRING, { .str = "ih"   }, .flags = FLAGS },
+    { "format", "Output pixel format", OFFSET(format_str), AV_OPT_TYPE_STRING, { .str = "same" }, .flags = FLAGS },
+
+    { "interp_algo", "Interpolation algorithm used for resizing", OFFSET(interp_algo), AV_OPT_TYPE_INT, { .i64 = NPPI_INTER_CUBIC }, 0, INT_MAX, FLAGS, "interp_algo" },
+        { "nn",                 "nearest neighbour",                 0, AV_OPT_TYPE_CONST, { .i64 = NPPI_INTER_NN                 }, 0, 0, FLAGS, "interp_algo" },
+        { "linear",             "linear",                            0, AV_OPT_TYPE_CONST, { .i64 = NPPI_INTER_LINEAR             }, 0, 0, FLAGS, "interp_algo" },
+        { "cubic",              "cubic",                             0, AV_OPT_TYPE_CONST, { .i64 = NPPI_INTER_CUBIC              }, 0, 0, FLAGS, "interp_algo" },
+        { "cubic2p_bspline",    "2-parameter cubic (B=1, C=0)",      0, AV_OPT_TYPE_CONST, { .i64 = NPPI_INTER_CUBIC2P_BSPLINE    }, 0, 0, FLAGS, "interp_algo" },
+        { "cubic2p_catmullrom", "2-parameter cubic (B=0, C=1/2)",    0, AV_OPT_TYPE_CONST, { .i64 = NPPI_INTER_CUBIC2P_CATMULLROM }, 0, 0, FLAGS, "interp_algo" },
+        { "cubic2p_b05c03",     "2-parameter cubic (B=1/2, C=3/10)", 0, AV_OPT_TYPE_CONST, { .i64 = NPPI_INTER_CUBIC2P_B05C03     }, 0, 0, FLAGS, "interp_algo" },
+        { "super",              "supersampling",                     0, AV_OPT_TYPE_CONST, { .i64 = NPPI_INTER_SUPER              }, 0, 0, FLAGS, "interp_algo" },
+        { "lanczos",            "Lanczos",                           0, AV_OPT_TYPE_CONST, { .i64 = NPPI_INTER_LANCZOS            }, 0, 0, FLAGS, "interp_algo" },
+    { NULL },
+};
+
+static const AVClass nppscale_class = {
+    .class_name = "nppscale",
+    .item_name  = av_default_item_name,
+    .option     = options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+static const AVFilterPad nppscale_inputs[] = {
+    {
+        .name        = "default",
+        .type        = AVMEDIA_TYPE_VIDEO,
+        .filter_frame = nppscale_filter_frame,
+    },
+    { NULL }
+};
+
+static const AVFilterPad nppscale_outputs[] = {
+    {
+        .name         = "default",
+        .type         = AVMEDIA_TYPE_VIDEO,
+        .config_props = nppscale_config_props,
+    },
+    { NULL }
+};
+
+AVFilter ff_vf_scale_npp = {
+    .name      = "scale_npp",
+    .description = NULL_IF_CONFIG_SMALL("NVIDIA Performance Primitives video "
+                                        "scaling and format conversion"),
+
+    .init          = nppscale_init,
+    .uninit        = nppscale_uninit,
+    .query_formats = nppscale_query_formats,
+
+    .priv_size = sizeof(NPPScaleContext),
+    .priv_class = &nppscale_class,
+
+    .inputs    = nppscale_inputs,
+    .outputs   = nppscale_outputs,
+};


======================================================================

diff --cc Changelog
index 827e3e2,692c865..58ac1d7
--- a/Changelog
+++ b/Changelog
@@@ -2,197 -2,17 +2,198 @@@ Entries are sorted chronologically fro
  releases are sorted from youngest to oldest.
  
  version <next>:
 -- aliases and defaults for Ogg subtypes (opus, spx)
 -- HEVC/H.265 RTP payload format (draft v6) packetizer and depacketizer
 -- avplay now exits by default at the end of playback
 -- XCB-based screen-grabber
 -- creating DASH compatible fragmented MP4, MPEG-DASH segmenting muxer
 -- H.261 RTP payload format (RFC 4587) depacketizer and experimental packetizer
 +- DXVA2-accelerated HEVC Main10 decoding
 +- fieldhint filter
 +- loop video filter and aloop audio filter
 +- Bob Weaver deinterlacing filter
 +- firequalizer filter
 +- datascope filter
 +- bench and abench filters
 +- ciescope filter
 +- protocol blacklisting API
 +- MediaCodec H264 decoding
 +- VC-2 HQ RTP payload format (draft v1) depacketizer
 +- AudioToolbox audio decoders
 +- AudioToolbox audio encoders
 +- coreimage filter (GPU based image filtering on OSX)
 +- libdcadec removed
 +- bitstream filter for extracting DTS core
 +- ADPCM IMA DAT4 decoder
 +- musx demuxer
 +- aix demuxer
 +- remap filter
 +- hash and framehash muxers
 +- colorspace filter
 +- hdcd filter
 +- readvitc filter
 +- VAAPI-accelerated format conversion and scaling
++- libnpp/CUDA-accelerated format conversion and scaling
 +
 +version 3.0:
 +- Common Encryption (CENC) MP4 encoding and decoding support
 +- DXV decoding
 +- extrastereo filter
 +- ocr filter
 +- alimiter filter
 +- stereowiden filter
 +- stereotools filter
 +- rubberband filter
 +- tremolo filter
 +- agate filter
 +- chromakey filter
 +- maskedmerge filter
 +- Screenpresso SPV1 decoding
 +- chromaprint fingerprinting muxer
 +- ffplay dynamic volume control
 +- displace filter
 +- selectivecolor filter
 +- extensive native AAC encoder improvements and removal of experimental flag
 +- ADPCM PSX decoder
 +- 3dostr, dcstr, fsb, genh, vag, xvag, ads, msf, svag & vpk demuxer
 +- zscale filter
 +- wve demuxer
 +- zero-copy Intel QSV transcoding in ffmpeg
 +- shuffleframes filter
 +- SDX2 DPCM decoder
 +- vibrato filter
 +- innoHeim/Rsupport Screen Capture Codec decoder
 +- ADPCM AICA decoder
 +- Interplay ACM demuxer and audio decoder
 +- XMA1 & XMA2 decoder
 +- realtime filter
 +- anoisesrc audio filter source
 +- IVR demuxer
 +- compensationdelay filter
 +- acompressor filter
 +- support encoding 16-bit RLE SGI images
 +- apulsator filter
 +- sidechaingate audio filter
 +- mipsdspr1 option has been renamed to mipsdsp
 +- aemphasis filter
 +- mips32r5 option has been removed
 +- mips64r6 option has been removed
 +- DXVA2-accelerated VP9 decoding
 +- SOFAlizer: virtual binaural acoustics filter
 +- VAAPI VP9 hwaccel
 +- audio high-order multiband parametric equalizer
 +- automatic bitstream filtering
 +- showspectrumpic filter
 +- libstagefright support removed
 +- spectrumsynth filter
 +- ahistogram filter
 +- only seek with the right mouse button in ffplay
 +- toggle full screen when double-clicking with the left mouse button in ffplay
 +- afftfilt filter
 +- convolution filter
 +- libquvi support removed
 +- support for dvaudio in wav and avi
 +- libaacplus and libvo-aacenc support removed
 +- Cineform HD decoder
 +- new DCA decoder with full support for DTS-HD extensions
 +- significant performance improvements in Windows Television (WTV) demuxer
 +- nnedi deinterlacer
 +- streamselect video and astreamselect audio filter
 +- swaprect filter
 +- metadata video and ametadata audio filter
 +- SMPTE VC-2 HQ profile support for the Dirac decoder
 +- SMPTE VC-2 native encoder supporting the HQ profile
 +
 +
 +version 2.8:
 +- colorkey video filter
 +- BFSTM/BCSTM demuxer
 +- little-endian ADPCM_THP decoder
 +- Hap decoder and encoder
 +- DirectDraw Surface image/texture decoder
 +- ssim filter
 +- optional new ASF demuxer
 +- showvolume filter
 +- Many improvements to the JPEG 2000 decoder
 +- Go2Meeting decoding support
 +- adrawgraph audio and drawgraph video filter
 +- removegrain video filter
 +- Intel QSV-accelerated MPEG-2 video and HEVC encoding
 +- Intel QSV-accelerated MPEG-2 video and HEVC decoding
 +- Intel QSV-accelerated VC-1 video decoding
 +- libkvazaar HEVC encoder
 +- erosion, dilation, deflate and inflate video filters
 +- Dynamic Audio Normalizer as dynaudnorm filter
 +- Reverse video and areverse audio filter
 +- Random filter
 +- deband filter
 +- AAC fixed-point decoding
 +- sidechaincompress audio filter
 +- bitstream filter for converting HEVC from MP4 to Annex B
 +- acrossfade audio filter
 +- allyuv and allrgb video sources
 +- atadenoise video filter
 +- OS X VideoToolbox support
 +- aphasemeter filter
 +- showfreqs filter
 +- vectorscope filter
 +- waveform filter
 +- hstack and vstack filter
 +- Support DNx100 (1440x1080 at 8)
 +- VAAPI hevc hwaccel
 +- VDPAU hevc hwaccel
 +- framerate filter
 +- Switched default encoders for webm to VP9 and Opus
 +- Removed experimental flag from the JPEG 2000 encoder
 +
 +
 +version 2.7:
 +- FFT video filter
 +- TDSC decoder
 +- DTS lossless extension (XLL) decoding (not lossless, disabled by default)
 +- showwavespic filter
 +- DTS decoding through libdcadec
 +- Drop support for nvenc API before 5.0
 +- nvenc HEVC encoder
 +- Detelecine filter
 +- Intel QSV-accelerated H.264 encoding
 +- MMAL-accelerated H.264 decoding
 +- basic APNG encoder and muxer with default extension "apng"
 +- unpack DivX-style packed B-frames in MPEG-4 bitstream filter
 +- WebM Live Chunk Muxer
 +- nvenc level and tier options
 +- chorus filter
 +- Canopus HQ/HQA decoder
 +- Automatically rotate videos based on metadata in ffmpeg
 +- improved Quickdraw compatibility
 +- VP9 high bit-depth and extended colorspaces decoding support
 +- WebPAnimEncoder API when available for encoding and muxing WebP
 +- Direct3D11-accelerated decoding
 +- Support Secure Transport
 +- Multipart JPEG demuxer
 +
 +
 +version 2.6:
 +- nvenc encoder
 +- 10bit spp filter
 +- colorlevels filter
 +- RIFX format for *.wav files
  - RTP/mpegts muxer
 -- VP8 in Ogg demuxing
 +- non continuous cache protocol support
 +- tblend filter
 +- cropdetect support for non 8bpp, absolute (if limit >= 1) and relative (if limit < 1.0) threshold
 +- Camellia symmetric block cipher
  - OpenH264 encoder wrapper
 +- VOC seeking support
 +- Closed caption Decoder
 +- fspp, uspp, pp7 MPlayer postprocessing filters ported to native filters
 +- showpalette filter
 +- Twofish symmetric block cipher
  - Support DNx100 (960x720 at 8)
 -- Direct3D11-accelerated decoding
 +- eq2 filter ported from libmpcodecs as eq filter
 +- removed libmpcodecs
 +- Changed default DNxHD colour range in QuickTime .mov derivatives to mpeg range
 +- ported softpulldown filter from libmpcodecs as repeatfields filter
 +- dcshift filter
 +- RTP depacketizer for loss tolerant payload format for MP3 audio (RFC 5219)
 +- RTP depacketizer for AC3 payload format (RFC 4184)
 +- palettegen and paletteuse filters
 +- VP9 RTP payload format (draft 0) experimental depacketizer
 +- RTP depacketizer for DV (RFC 6469)
  - DXVA2-accelerated HEVC decoding
  - AAC ELD 480 decoding
  - Intel QSV-accelerated H.264 decoding
diff --cc configure
index cb3d304,a68a5d7..97f374b
--- a/configure
+++ b/configure
@@@ -230,10 -194,8 +230,11 @@@ External library support
    --enable-libilbc         enable iLBC de/encoding via libilbc [no]
    --enable-libkvazaar      enable HEVC encoding via libkvazaar [no]
    --enable-libmfx          enable HW acceleration through libmfx
 +  --enable-libmodplug      enable ModPlug via libmodplug [no]
    --enable-libmp3lame      enable MP3 encoding via libmp3lame [no]
 +  --enable-libnut          enable NUT (de)muxing via libnut,
 +                           native (de)muxer exists [no]
+   --enable-libnpp          enable NVIDIA Performance Primitives-based code [no]
    --enable-libopencore-amrnb enable AMR-NB de/encoding via libopencore-amrnb [no]
    --enable-libopencore-amrwb enable AMR-WB decoding via libopencore-amrwb [no]
    --enable-libopencv       enable video filtering via libopencv [no]
@@@ -1479,9 -1222,8 +1480,10 @@@ EXTERNAL_LIBRARY_LIST=
      libilbc
      libkvazaar
      libmfx
 +    libmodplug
      libmp3lame
 +    libnut
+     libnpp
      libopencore_amrnb
      libopencore_amrwb
      libopencv
@@@ -2948,60 -2358,10 +2950,61 @@@ histeq_filter_deps="gpl
  hqdn3d_filter_deps="gpl"
  hwupload_cuda_filter_deps="cuda"
  interlace_filter_deps="gpl"
 +kerndeint_filter_deps="gpl"
 +ladspa_filter_deps="ladspa dlopen"
 +mcdeint_filter_deps="avcodec gpl"
 +movie_filter_deps="avcodec avformat"
 +mpdecimate_filter_deps="gpl"
 +mpdecimate_filter_select="pixelutils"
 +mptestsrc_filter_deps="gpl"
 +negate_filter_deps="lut_filter"
 +nnedi_filter_deps="gpl"
 +ocr_filter_deps="libtesseract"
  ocv_filter_deps="libopencv"
 +owdenoise_filter_deps="gpl"
 +pan_filter_deps="swresample"
 +perspective_filter_deps="gpl"
 +phase_filter_deps="gpl"
 +pp7_filter_deps="gpl"
 +pp_filter_deps="gpl postproc"
 +pullup_filter_deps="gpl"
 +removelogo_filter_deps="avcodec avformat swscale"
 +repeatfields_filter_deps="gpl"
  resample_filter_deps="avresample"
 +rubberband_filter_deps="librubberband"
 +sab_filter_deps="gpl swscale"
 +scale2ref_filter_deps="swscale"
  scale_filter_deps="swscale"
 +select_filter_select="pixelutils"
 +showcqt_filter_deps="avcodec avformat swscale"
 +showcqt_filter_select="fft"
 +showfreqs_filter_deps="avcodec"
 +showfreqs_filter_select="fft"
 +showspectrum_filter_deps="avcodec"
 +showspectrum_filter_select="fft"
 +showspectrumpic_filter_deps="avcodec"
 +showspectrumpic_filter_select="fft"
 +smartblur_filter_deps="gpl swscale"
 +sofalizer_filter_deps="netcdf avcodec"
 +sofalizer_filter_select="fft"
 +spectrumsynth_filter_deps="avcodec"
 +spectrumsynth_filter_select="fft"
 +spp_filter_deps="gpl avcodec"
 +spp_filter_select="fft idctdsp fdctdsp me_cmp pixblockdsp"
 +stereo3d_filter_deps="gpl"
 +subtitles_filter_deps="avformat avcodec libass"
 +super2xsai_filter_deps="gpl"
 +pixfmts_super2xsai_test_deps="super2xsai_filter"
 +tinterlace_filter_deps="gpl"
 +tinterlace_merge_test_deps="tinterlace_filter"
 +tinterlace_pad_test_deps="tinterlace_filter"
 +uspp_filter_deps="gpl avcodec"
 +vidstabdetect_filter_deps="libvidstab"
 +vidstabtransform_filter_deps="libvidstab"
 +zmq_filter_deps="libzmq"
 +zoompan_filter_deps="swscale"
 +zscale_filter_deps="libzimg"
+ scale_npp_filter_deps="cuda libnpp"
  scale_vaapi_filter_deps="vaapi VAProcPipelineParameterBuffer"
  
  # examples
@@@ -4959,14 -4026,14 +4962,15 @@@ die_license_disabled gpl x11gra
  
  die_license_disabled nonfree cuda
  die_license_disabled nonfree libfaac
 -die_license_disabled nonfree libfdk_aac
+ die_license_disabled nonfree libnpp
  die_license_disabled nonfree nvenc
 -die_license_disabled nonfree openssl
 +enabled gpl && die_license_disabled_gpl nonfree libfdk_aac
 +enabled gpl && die_license_disabled_gpl nonfree openssl
  
 +die_license_disabled version3 gmp
  die_license_disabled version3 libopencore_amrnb
  die_license_disabled version3 libopencore_amrwb
 -die_license_disabled version3 libvo_aacenc
 +die_license_disabled version3 libsmbclient
  die_license_disabled version3 libvo_amrwbenc
  
  enabled version3 && { enabled gpl && enable gplv3 || enable lgplv3; }
@@@ -5564,64 -4523,41 +5568,65 @@@ enabled libgsm            && { for gsm_
  enabled libilbc           && require libilbc ilbc.h WebRtcIlbcfix_InitDecode -lilbc
  enabled libkvazaar        && require_pkg_config "kvazaar >= 0.8.1" kvazaar.h kvz_api_get
  enabled libmfx            && require_pkg_config libmfx "mfx/mfxvideo.h" MFXInit
 +enabled libmodplug        && require_pkg_config libmodplug libmodplug/modplug.h ModPlug_Load
  enabled libmp3lame        && require "libmp3lame >= 3.98.3" lame/lame.h lame_set_VBR_quality -lmp3lame
 +enabled libnut            && require libnut libnut.h nut_demuxer_init -lnut
+ enabled libnpp            && require libnpp npp.h nppGetLibVersion -lnppi -lnppc
  enabled libopencore_amrnb && require libopencore_amrnb opencore-amrnb/interf_dec.h Decoder_Interface_init -lopencore-amrnb
  enabled libopencore_amrwb && require libopencore_amrwb opencore-amrwb/dec_if.h D_IF_init -lopencore-amrwb
 -enabled libopencv         && require_pkg_config opencv opencv/cv.h cvCreateImageHeader
 +enabled libopencv         && { check_header opencv2/core/core_c.h &&
 +                               { use_pkg_config opencv opencv2/core/core_c.h cvCreateImageHeader ||
 +                                 require opencv opencv2/core/core_c.h cvCreateImageHeader -lopencv_core -lopencv_imgproc; } ||
 +                               require_pkg_config opencv opencv/cxcore.h cvCreateImageHeader; }
  enabled libopenh264       && require_pkg_config openh264 wels/codec_api.h WelsGetCodecVersion
 -enabled libopenjpeg       && { { check_header openjpeg.h && check_lib2 openjpeg.h opj_version -lopenjpeg -DOPJ_STATIC; } ||
 -                               { require_pkg_config libopenjpeg1 openjpeg.h opj_version -DOPJ_STATIC; } }
 +enabled libopenjpeg       && { check_lib openjpeg-2.1/openjpeg.h opj_version -lopenjp2 -DOPJ_STATIC ||
 +                               check_lib openjpeg-2.0/openjpeg.h opj_version -lopenjp2 -DOPJ_STATIC ||
 +                               check_lib openjpeg-1.5/openjpeg.h opj_version -lopenjpeg -DOPJ_STATIC ||
 +                               check_lib openjpeg.h opj_version -lopenjpeg -DOPJ_STATIC ||
 +                               die "ERROR: libopenjpeg not found"; }
  enabled libopus           && require_pkg_config opus opus_multistream.h opus_multistream_decoder_create
 -enabled libpulse          && require_pkg_config libpulse-simple pulse/simple.h pa_simple_new
 +enabled libpulse          && require_pkg_config libpulse pulse/pulseaudio.h pa_context_new
  enabled librtmp           && require_pkg_config librtmp librtmp/rtmp.h RTMP_Socket
 +enabled librubberband     && require_pkg_config "rubberband >= 1.8.1" rubberband/rubberband-c.h rubberband_new
  enabled libschroedinger   && require_pkg_config schroedinger-1.0 schroedinger/schro.h schro_init
 +enabled libshine          && require_pkg_config shine shine/layer3.h shine_encode_buffer
 +enabled libsmbclient      && { use_pkg_config smbclient libsmbclient.h smbc_init ||
 +                               require smbclient libsmbclient.h smbc_init -lsmbclient; }
  enabled libsnappy         && require snappy snappy-c.h snappy_compress -lsnappy
 +enabled libsoxr           && require libsoxr soxr.h soxr_create -lsoxr && LIBSOXR="-lsoxr"
 +enabled libssh            && require_pkg_config libssh libssh/sftp.h sftp_init
  enabled libspeex          && require_pkg_config speex speex/speex.h speex_decoder_init -lspeex
 +enabled libtesseract      && require_pkg_config tesseract tesseract/capi.h TessBaseAPICreate
  enabled libtheora         && require libtheora theora/theoraenc.h th_info_init -ltheoraenc -ltheoradec -logg
 -enabled libtwolame        && require libtwolame twolame.h twolame_init -ltwolame
 -enabled libvo_aacenc      && require libvo_aacenc vo-aacenc/voAAC.h voGetAACEncAPI -lvo-aacenc
 +enabled libtwolame        && require libtwolame twolame.h twolame_init -ltwolame &&
 +                             { check_lib twolame.h twolame_encode_buffer_float32_interleaved -ltwolame ||
 +                               die "ERROR: libtwolame must be installed and version must be >= 0.3.10"; }
 +enabled libutvideo        && require_cpp utvideo "stdint.h stdlib.h utvideo/utvideo.h utvideo/Codec.h" 'CCodec*' -lutvideo -lstdc++
 +enabled libv4l2           && require_pkg_config libv4l2 libv4l2.h v4l2_ioctl
 +enabled libvidstab        && require_pkg_config "vidstab >= 0.98" vid.stab/libvidstab.h vsMotionDetectInit
  enabled libvo_amrwbenc    && require libvo_amrwbenc vo-amrwbenc/enc_if.h E_IF_init -lvo-amrwbenc
  enabled libvorbis         && require libvorbis vorbis/vorbisenc.h vorbis_info_init -lvorbisenc -lvorbis -logg
 -enabled libvpx            && require_pkg_config "vpx >= 1.3.0" vpx/vpx_codec.h vpx_codec_version && {
 +
 +enabled libvpx            && {
      enabled libvpx_vp8_decoder && {
 -        check_pkg_config vpx "vpx/vpx_decoder.h vpx/vp8dx.h" vpx_codec_vp8_dx ||
 -            disable libvpx_vp8_decoder;
 +        use_pkg_config "vpx >= 0.9.1" "vpx/vpx_decoder.h vpx/vp8dx.h" vpx_codec_vp8_dx ||
 +            check_lib2 "vpx/vpx_decoder.h vpx/vp8dx.h" vpx_codec_dec_init_ver -lvpx ||
 +                die "ERROR: libvpx decoder version must be >=0.9.1";
      }
      enabled libvpx_vp8_encoder && {
 -        check_pkg_config vpx "vpx/vpx_encoder.h vpx/vp8cx.h" vpx_codec_vp8_cx ||
 -            disable libvpx_vp8_encoder;
 +        use_pkg_config "vpx >= 0.9.7" "vpx/vpx_encoder.h vpx/vp8cx.h" vpx_codec_vp8_cx ||
 +            check_lib2 "vpx/vpx_encoder.h vpx/vp8cx.h" "vpx_codec_enc_init_ver VP8E_SET_MAX_INTRA_BITRATE_PCT" -lvpx ||
 +                die "ERROR: libvpx encoder version must be >=0.9.7";
      }
      enabled libvpx_vp9_decoder && {
 -        check_pkg_config vpx "vpx/vpx_decoder.h vpx/vp8dx.h" vpx_codec_vp9_dx ||
 -            disable libvpx_vp9_decoder;
 +        use_pkg_config "vpx >= 1.3.0" "vpx/vpx_decoder.h vpx/vp8dx.h" vpx_codec_vp9_dx ||
 +            check_lib2 "vpx/vpx_decoder.h vpx/vp8dx.h" "vpx_codec_vp9_dx" -lvpx ||
 +                disable libvpx_vp9_decoder;
      }
      enabled libvpx_vp9_encoder && {
 -        check_pkg_config vpx "vpx/vpx_encoder.h vpx/vp8cx.h" vpx_codec_vp9_cx ||
 -            disable libvpx_vp9_encoder;
 +        use_pkg_config "vpx >= 1.3.0" "vpx/vpx_encoder.h vpx/vp8cx.h" vpx_codec_vp9_cx ||
 +            check_lib2 "vpx/vpx_encoder.h vpx/vp8cx.h" "vpx_codec_vp9_cx VP9E_SET_AQ_MODE" -lvpx ||
 +                disable libvpx_vp9_encoder;
      }
      if disabled_all libvpx_vp8_decoder libvpx_vp9_decoder libvpx_vp8_encoder libvpx_vp9_encoder; then
          die "libvpx enabled but no supported decoders found"
diff --cc doc/filters.texi
index 7634b25,23e4286..8fca52d
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@@ -2637,954 -1724,583 +2637,995 @@@ Only used if plugin have zero inputs
  
  @end table
  
 -All expressions default to "val".
 + at subsection Examples
  
 -Some examples:
 + at itemize
 + at item
 +List all available plugins within amp (LADSPA example plugin) library:
  @example
 -# Negate input video
 -lutrgb="r=maxval+minval-val:g=maxval+minval-val:b=maxval+minval-val"
 -lutyuv="y=maxval+minval-val:u=maxval+minval-val:v=maxval+minval-val"
 +ladspa=file=amp
 + at end example
  
 -# The above is the same as
 -lutrgb="r=negval:g=negval:b=negval"
 -lutyuv="y=negval:u=negval:v=negval"
 + at item
 +List all available controls and their valid ranges for @code{vcf_notch}
 +plugin from @code{VCF} library:
 + at example
 +ladspa=f=vcf:p=vcf_notch:c=help
 + at end example
  
 -# Negate luminance
 -lutyuv=negval
 + at item
 +Simulate low quality audio equipment using @code{Computer Music Toolkit} (CMT)
 +plugin library:
 + at example
 +ladspa=file=cmt:plugin=lofi:controls=c0=22|c1=12|c2=12
 + at end example
  
 -# Remove chroma components, turning the video into a graytone image
 -lutyuv="u=128:v=128"
 + at item
 +Add reverberation to the audio using TAP-plugins
 +(Tom's Audio Processing plugins):
 + at example
 +ladspa=file=tap_reverb:tap_reverb
 + at end example
  
 -# Apply a luma burning effect
 -lutyuv="y=2*val"
 + at item
 +Generate white noise, with 0.2 amplitude:
 + at example
 +ladspa=file=cmt:noise_source_white:c=c0=.2
 + at end example
  
 -# Remove green and blue components
 -lutrgb="g=0:b=0"
 + at item
 +Generate 20 bpm clicks using plugin @code{C* Click - Metronome} from the
 + at code{C* Audio Plugin Suite} (CAPS) library:
 + at example
 +ladspa=file=caps:Click:c=c1=20'
 + at end example
  
 -# Set a constant alpha channel value on input
 -format=rgba,lutrgb=a="maxval-minval/2"
 + at item
 +Apply @code{C* Eq10X2 - Stereo 10-band equaliser} effect:
 + at example
 +ladspa=caps:Eq10X2:c=c0=-48|c9=-24|c3=12|c4=2
 + at end example
  
 -# Correct luminance gamma by a factor of 0.5
 -lutyuv=y=gammaval(0.5)
 + at item
 +Increase volume by 20dB using fast lookahead limiter from Steve Harris
 + at code{SWH Plugins} collection:
 + at example
 +ladspa=fast_lookahead_limiter_1913:fastLookaheadLimiter:20|0|2
  @end example
  
 - at section negate
 + at item
 +Attenuate low frequencies using Multiband EQ from Steve Harris
 + at code{SWH Plugins} collection:
 + at example
 +ladspa=mbeq_1197:mbeq:-24|-24|-24|0|0|0|0|0|0|0|0|0|0|0|0
 + at end example
 + at end itemize
  
 -Negate input video.
 + at subsection Commands
  
 -It accepts an integer in input; if non-zero it negates the
 -alpha component (if available). The default value in input is 0.
 +This filter supports the following commands:
 + at table @option
 + at item cN
 +Modify the @var{N}-th control value.
  
 - at section noformat
 +If the specified value is not valid, it is ignored and prior one is kept.
 + at end table
  
 -Force libavfilter not to use any of the specified pixel formats for the
 -input to the next filter.
 + at section lowpass
 +
 +Apply a low-pass filter with 3dB point frequency.
 +The filter can be either single-pole or double-pole (the default).
 +The filter roll off at 6dB per pole per octave (20dB per pole per decade).
 +
 +The filter accepts the following options:
  
 -It accepts the following parameters:
  @table @option
 + at item frequency, f
 +Set frequency in Hz. Default is 500.
  
 - at item pix_fmts
 -A '|'-separated list of pixel format names, such as
 -apix_fmts=yuv420p|monow|rgb24".
 + at item poles, p
 +Set number of poles. Default is 2.
  
 + at item width_type
 +Set method to specify band-width of filter.
 + at table @option
 + at item h
 +Hz
 + at item q
 +Q-Factor
 + at item o
 +octave
 + at item s
 +slope
  @end table
  
 -Some examples:
 - at example
 -# Force libavfilter to use a format different from "yuv420p" for the
 -# input to the vflip filter
 -noformat=pix_fmts=yuv420p,vflip
 + at item width, w
 +Specify the band-width of a filter in width_type units.
 +Applies only to double-pole filter.
 +The default is 0.707q and gives a Butterworth response.
 + at end table
  
 -# Convert the input video to any of the formats not contained in the list
 -noformat=yuv420p|yuv444p|yuv410p
 - at end example
 + at anchor{pan}
 + at section pan
  
 - at section null
 +Mix channels with specific gain levels. The filter accepts the output
 +channel layout followed by a set of channels definitions.
  
 -Pass the video source unchanged to the output.
 +This filter is also designed to efficiently remap the channels of an audio
 +stream.
  
 - at section ocv
 +The filter accepts parameters of the form:
 +"@var{l}|@var{outdef}|@var{outdef}|..."
  
 -Apply a video transform using libopencv.
 + at table @option
 + at item l
 +output channel layout or number of channels
  
 -To enable this filter, install the libopencv library and headers and
 -configure Libav with --enable-libopencv.
 + at item outdef
 +output channel specification, of the form:
 +"@var{out_name}=[@var{gain}*]@var{in_name}[+[@var{gain}*]@var{in_name}...]"
  
 -It accepts the following parameters:
 + at item out_name
 +output channel to define, either a channel name (FL, FR, etc.) or a channel
 +number (c0, c1, etc.)
  
 - at table @option
 -
 - at item filter_name
 -The name of the libopencv filter to apply.
 -
 - at item filter_params
 -The parameters to pass to the libopencv filter. If not specified, the default
 -values are assumed.
 + at item gain
 +multiplicative coefficient for the channel, 1 leaving the volume unchanged
  
 + at item in_name
 +input channel to use, see out_name for details; it is not possible to mix
 +named and numbered input channels
  @end table
  
 -Refer to the official libopencv documentation for more precise
 -information:
 - at url{http://opencv.willowgarage.com/documentation/c/image_filtering.html}
 -
 -Several libopencv filters are supported; see the following subsections.
 +If the `=' in a channel specification is replaced by `<', then the gains for
 +that specification will be renormalized so that the total is 1, thus
 +avoiding clipping noise.
  
 - at anchor{dilate}
 - at subsection dilate
 + at subsection Mixing examples
  
 -Dilate an image by using a specific structuring element.
 -It corresponds to the libopencv function @code{cvDilate}.
 +For example, if you want to down-mix from stereo to mono, but with a bigger
 +factor for the left channel:
 + at example
 +pan=1c|c0=0.9*c0+0.1*c1
 + at end example
  
 -It accepts the parameters: @var{struct_el}|@var{nb_iterations}.
 +A customized down-mix to stereo that works automatically for 3-, 4-, 5- and
 +7-channels surround:
 + at example
 +pan=stereo| FL < FL + 0.5*FC + 0.6*BL + 0.6*SL | FR < FR + 0.5*FC + 0.6*BR + 0.6*SR
 + at end example
  
 - at var{struct_el} represents a structuring element, and has the syntax:
 - at var{cols}x at var{rows}+ at var{anchor_x}x at var{anchor_y}/@var{shape}
 +Note that @command{ffmpeg} integrates a default down-mix (and up-mix) system
 +that should be preferred (see "-ac" option) unless you have very specific
 +needs.
  
 - at var{cols} and @var{rows} represent the number of columns and rows of
 -the structuring element, @var{anchor_x} and @var{anchor_y} the anchor
 -point, and @var{shape} the shape for the structuring element. @var{shape}
 -must be "rect", "cross", "ellipse", or "custom".
 + at subsection Remapping examples
  
 -If the value for @var{shape} is "custom", it must be followed by a
 -string of the form "=@var{filename}". The file with name
 - at var{filename} is assumed to represent a binary image, with each
 -printable character corresponding to a bright pixel. When a custom
 - at var{shape} is used, @var{cols} and @var{rows} are ignored, the number
 -or columns and rows of the read file are assumed instead.
 +The channel remapping will be effective if, and only if:
  
 -The default value for @var{struct_el} is "3x3+0x0/rect".
 + at itemize
 + at item gain coefficients are zeroes or ones,
 + at item only one input per channel output,
 + at end itemize
  
 - at var{nb_iterations} specifies the number of times the transform is
 -applied to the image, and defaults to 1.
 +If all these conditions are satisfied, the filter will notify the user ("Pure
 +channel mapping detected"), and use an optimized and lossless method to do the
 +remapping.
  
 -Some examples:
 +For example, if you have a 5.1 source and want a stereo audio stream by
 +dropping the extra channels:
  @example
 -# Use the default values
 -ocv=dilate
 -
 -# Dilate using a structuring element with a 5x5 cross, iterating two times
 -ocv=filter_name=dilate:filter_params=5x5+2x2/cross|2
 -
 -# Read the shape from the file diamond.shape, iterating two times.
 -# The file diamond.shape may contain a pattern of characters like this
 -#   *
 -#  ***
 -# *****
 -#  ***
 -#   *
 -# The specified columns and rows are ignored
 -# but the anchor point coordinates are not
 -ocv=dilate:0x0+2x2/custom=diamond.shape|2
 +pan="stereo| c0=FL | c1=FR"
  @end example
  
 - at subsection erode
 -
 -Erode an image by using a specific structuring element.
 -It corresponds to the libopencv function @code{cvErode}.
 -
 -It accepts the parameters: @var{struct_el}:@var{nb_iterations},
 -with the same syntax and semantics as the @ref{dilate} filter.
 +Given the same source, you can also switch front left and front right channels
 +and keep the input channel layout:
 + at example
 +pan="5.1| c0=c1 | c1=c0 | c2=c2 | c3=c3 | c4=c4 | c5=c5"
 + at end example
  
 - at subsection smooth
 +If the input is a stereo audio stream, you can mute the front left channel (and
 +still keep the stereo channel layout) with:
 + at example
 +pan="stereo|c1=c1"
 + at end example
  
 -Smooth the input video.
 +Still with a stereo audio stream input, you can copy the right channel in both
 +front left and right:
 + at example
 +pan="stereo| c0=FR | c1=FR"
 + at end example
  
 -The filter takes the following parameters:
 - at var{type}|@var{param1}|@var{param2}|@var{param3}|@var{param4}.
 + at section replaygain
  
 - at var{type} is the type of smooth filter to apply, and must be one of
 -the following values: "blur", "blur_no_scale", "median", "gaussian",
 -or "bilateral". The default value is "gaussian".
 +ReplayGain scanner filter. This filter takes an audio stream as an input and
 +outputs it unchanged.
 +At end of filtering it displays @code{track_gain} and @code{track_peak}.
  
 -The meaning of @var{param1}, @var{param2}, @var{param3}, and @var{param4}
 -depend on the smooth type. @var{param1} and
 - at var{param2} accept integer positive values or 0. @var{param3} and
 - at var{param4} accept floating point values.
 + at section resample
  
 -The default value for @var{param1} is 3. The default value for the
 -other parameters is 0.
 +Convert the audio sample format, sample rate and channel layout. It is
 +not meant to be used directly.
  
 -These parameters correspond to the parameters assigned to the
 -libopencv function @code{cvSmooth}.
 + at section rubberband
 +Apply time-stretching and pitch-shifting with librubberband.
  
 - at anchor{overlay}
 - at section overlay
 +The filter accepts the following options:
  
 -Overlay one video on top of another.
 + at table @option
 + at item tempo
 +Set tempo scale factor.
  
 -It takes two inputs and has one output. The first input is the "main"
 -video on which the second input is overlayed.
 + at item pitch
 +Set pitch scale factor.
  
 -It accepts the following parameters:
 + at item transients
 +Set transients detector.
 +Possible values are:
 + at table @var
 + at item crisp
 + at item mixed
 + at item smooth
 + at end table
  
 - at table @option
 + at item detector
 +Set detector.
 +Possible values are:
 + at table @var
 + at item compound
 + at item percussive
 + at item soft
 + at end table
  
 - at item x
 -The horizontal position of the left edge of the overlaid video on the main video.
 + at item phase
 +Set phase.
 +Possible values are:
 + at table @var
 + at item laminar
 + at item independent
 + at end table
  
 - at item y
 -The vertical position of the top edge of the overlaid video on the main video.
 + at item window
 +Set processing window size.
 +Possible values are:
 + at table @var
 + at item standard
 + at item short
 + at item long
 + at end table
  
 + at item smoothing
 +Set smoothing.
 +Possible values are:
 + at table @var
 + at item off
 + at item on
  @end table
  
 -The parameters are expressions containing the following parameters:
 + at item formant
 +Enable formant preservation when shift pitching.
 +Possible values are:
 + at table @var
 + at item shifted
 + at item preserved
 + at end table
  
 - at table @option
 - at item main_w, main_h
 -The main input width and height.
 + at item pitchq
 +Set pitch quality.
 +Possible values are:
 + at table @var
 + at item quality
 + at item speed
 + at item consistency
 + at end table
  
 - at item W, H
 -These are the same as @var{main_w} and @var{main_h}.
 + at item channels
 +Set channels.
 +Possible values are:
 + at table @var
 + at item apart
 + at item together
 + at end table
 + at end table
  
 - at item overlay_w, overlay_h
 -The overlay input width and height.
 + at section sidechaincompress
  
 - at item w, h
 -These are the same as @var{overlay_w} and @var{overlay_h}.
 +This filter acts like normal compressor but has the ability to compress
 +detected signal using second input signal.
 +It needs two input streams and returns one output stream.
 +First input stream will be processed depending on second stream signal.
 +The filtered signal then can be filtered with other filters in later stages of
 +processing. See @ref{pan} and @ref{amerge} filter.
  
 - at item eof_action
 -The action to take when EOF is encountered on the secondary input; it accepts
 -one of the following values:
 +The filter accepts the following options:
  
  @table @option
 - at item repeat
 -Repeat the last frame (the default).
 - at item endall
 -End both streams.
 - at item pass
 -Pass the main input through.
 - at end table
 + at item level_in
 +Set input gain. Default is 1. Range is between 0.015625 and 64.
  
 + at item threshold
 +If a signal of second stream raises above this level it will affect the gain
 +reduction of first stream.
 +By default is 0.125. Range is between 0.00097563 and 1.
 +
 + at item ratio
 +Set a ratio about which the signal is reduced. 1:2 means that if the level
 +raised 4dB above the threshold, it will be only 2dB above after the reduction.
 +Default is 2. Range is between 1 and 20.
 +
 + at item attack
 +Amount of milliseconds the signal has to rise above the threshold before gain
 +reduction starts. Default is 20. Range is between 0.01 and 2000.
 +
 + at item release
 +Amount of milliseconds the signal has to fall below the threshold before
 +reduction is decreased again. Default is 250. Range is between 0.01 and 9000.
 +
 + at item makeup
 +Set the amount by how much signal will be amplified after processing.
 +Default is 2. Range is from 1 and 64.
 +
 + at item knee
 +Curve the sharp knee around the threshold to enter gain reduction more softly.
 +Default is 2.82843. Range is between 1 and 8.
 +
 + at item link
 +Choose if the @code{average} level between all channels of side-chain stream
 +or the louder(@code{maximum}) channel of side-chain stream affects the
 +reduction. Default is @code{average}.
 +
 + at item detection
 +Should the exact signal be taken in case of @code{peak} or an RMS one in case
 +of @code{rms}. Default is @code{rms} which is mainly smoother.
 +
 + at item level_sc
 +Set sidechain gain. Default is 1. Range is between 0.015625 and 64.
 +
 + at item mix
 +How much to use compressed signal in output. Default is 1.
 +Range is between 0 and 1.
  @end table
  
 -Be aware that frames are taken from each input video in timestamp
 -order, hence, if their initial timestamps differ, it is a a good idea
 -to pass the two inputs through a @var{setpts=PTS-STARTPTS} filter to
 -have them begin in the same zero timestamp, as the example for
 -the @var{movie} filter does.
 + at subsection Examples
  
 -Some examples:
 + at itemize
 + at item
 +Full ffmpeg example taking 2 audio inputs, 1st input to be compressed
 +depending on the signal of 2nd input and later compressed signal to be
 +merged with 2nd input:
  @example
 -# Draw the overlay at 10 pixels from the bottom right
 -# corner of the main video
 -overlay=x=main_w-overlay_w-10:y=main_h-overlay_h-10
 +ffmpeg -i main.flac -i sidechain.flac -filter_complex "[1:a]asplit=2[sc][mix];[0:a][sc]sidechaincompress[compr];[compr][mix]amerge"
 + at end example
 + at end itemize
  
 -# Insert a transparent PNG logo in the bottom left corner of the input
 -avconv -i input -i logo -filter_complex 'overlay=x=10:y=main_h-overlay_h-10' output
 + at section sidechaingate
  
 -# Insert 2 different transparent PNG logos (second logo on bottom
 -# right corner)
 -avconv -i input -i logo1 -i logo2 -filter_complex
 -'overlay=x=10:y=H-h-10,overlay=x=W-w-10:y=H-h-10' output
 +A sidechain gate acts like a normal (wideband) gate but has the ability to
 +filter the detected signal before sending it to the gain reduction stage.
 +Normally a gate uses the full range signal to detect a level above the
 +threshold.
 +For example: If you cut all lower frequencies from your sidechain signal
 +the gate will decrease the volume of your track only if not enough highs
 +appear. With this technique you are able to reduce the resonation of a
 +natural drum or remove "rumbling" of muted strokes from a heavily distorted
 +guitar.
 +It needs two input streams and returns one output stream.
 +First input stream will be processed depending on second stream signal.
  
 -# Add a transparent color layer on top of the main video;
 -# WxH specifies the size of the main input to the overlay filter
 -color=red at .3:WxH [over]; [in][over] overlay [out]
 +The filter accepts the following options:
  
 -# Mask 10-20 seconds of a video by applying the delogo filter to a section
 -avconv -i test.avi -codec:v:0 wmv2 -ar 11025 -b:v 9000k
 --vf '[in]split[split_main][split_delogo];[split_delogo]trim=start=360:end=371,delogo=0:0:640:480[delogoed];[split_main][delogoed]overlay=eof_action=pass[out]'
 -masked.avi
 - at end example
 + at table @option
 + at item level_in
 +Set input level before filtering.
 +Default is 1. Allowed range is from 0.015625 to 64.
  
 -You can chain together more overlays but the efficiency of such
 -approach is yet to be tested.
 + at item range
 +Set the level of gain reduction when the signal is below the threshold.
 +Default is 0.06125. Allowed range is from 0 to 1.
  
 - at section pad
 + at item threshold
 +If a signal rises above this level the gain reduction is released.
 +Default is 0.125. Allowed range is from 0 to 1.
 +
 + at item ratio
 +Set a ratio about which the signal is reduced.
 +Default is 2. Allowed range is from 1 to 9000.
 +
 + at item attack
 +Amount of milliseconds the signal has to rise above the threshold before gain
 +reduction stops.
 +Default is 20 milliseconds. Allowed range is from 0.01 to 9000.
 +
 + at item release
 +Amount of milliseconds the signal has to fall below the threshold before the
 +reduction is increased again. Default is 250 milliseconds.
 +Allowed range is from 0.01 to 9000.
 +
 + at item makeup
 +Set amount of amplification of signal after processing.
 +Default is 1. Allowed range is from 1 to 64.
 +
 + at item knee
 +Curve the sharp knee around the threshold to enter gain reduction more softly.
 +Default is 2.828427125. Allowed range is from 1 to 8.
 +
 + at item detection
 +Choose if exact signal should be taken for detection or an RMS like one.
 +Default is rms. Can be peak or rms.
 +
 + at item link
 +Choose if the average level between all channels or the louder channel affects
 +the reduction.
 +Default is average. Can be average or maximum.
 +
 + at item level_sc
 +Set sidechain gain. Default is 1. Range is from 0.015625 to 64.
 + at end table
  
 -Add paddings to the input image, and place the original input at the
 -provided @var{x}, @var{y} coordinates.
 + at section silencedetect
  
 -It accepts the following parameters:
 +Detect silence in an audio stream.
  
 - at table @option
 - at item width, height
 +This filter logs a message when it detects that the input audio volume is less
 +or equal to a noise tolerance value for a duration greater or equal to the
 +minimum detected noise duration.
  
 -Specify the size of the output image with the paddings added. If the
 -value for @var{width} or @var{height} is 0, the corresponding input size
 -is used for the output.
 +The printed times and duration are expressed in seconds.
  
 -The @var{width} expression can reference the value set by the
 - at var{height} expression, and vice versa.
 +The filter accepts the following options:
  
 -The default value of @var{width} and @var{height} is 0.
 + at table @option
 + at item duration, d
 +Set silence duration until notification (default is 2 seconds).
  
 - at item x, y
 + at item noise, n
 +Set noise tolerance. Can be specified in dB (in case "dB" is appended to the
 +specified value) or amplitude ratio. Default is -60dB, or 0.001.
 + at end table
  
 -Specify the offsets to place the input image at within the padded area,
 -with respect to the top/left border of the output image.
 + at subsection Examples
  
 -The @var{x} expression can reference the value set by the @var{y}
 -expression, and vice versa.
 + at itemize
 + at item
 +Detect 5 seconds of silence with -50dB noise tolerance:
 + at example
 +silencedetect=n=-50dB:d=5
 + at end example
  
 -The default value of @var{x} and @var{y} is 0.
 + at item
 +Complete example with @command{ffmpeg} to detect silence with 0.0001 noise
 +tolerance in @file{silence.mp3}:
 + at example
 +ffmpeg -i silence.mp3 -af silencedetect=noise=0.0001 -f null -
 + at end example
 + at end itemize
  
 - at item color
 + at section silenceremove
  
 -Specify the color of the padded area. It can be the name of a color
 -(case insensitive match) or an 0xRRGGBB[AA] sequence.
 +Remove silence from the beginning, middle or end of the audio.
  
 -The default value of @var{color} is "black".
 +The filter accepts the following options:
  
 + at table @option
 + at item start_periods
 +This value is used to indicate if audio should be trimmed at beginning of
 +the audio. A value of zero indicates no silence should be trimmed from the
 +beginning. When specifying a non-zero value, it trims audio up until it
 +finds non-silence. Normally, when trimming silence from beginning of audio
 +the @var{start_periods} will be @code{1} but it can be increased to higher
 +values to trim all audio up to specific count of non-silence periods.
 +Default value is @code{0}.
 +
 + at item start_duration
 +Specify the amount of time that non-silence must be detected before it stops
 +trimming audio. By increasing the duration, bursts of noises can be treated
 +as silence and trimmed off. Default value is @code{0}.
 +
 + at item start_threshold
 +This indicates what sample value should be treated as silence. For digital
 +audio, a value of @code{0} may be fine but for audio recorded from analog,
 +you may wish to increase the value to account for background noise.
 +Can be specified in dB (in case "dB" is appended to the specified value)
 +or amplitude ratio. Default value is @code{0}.
 +
 + at item stop_periods
 +Set the count for trimming silence from the end of audio.
 +To remove silence from the middle of a file, specify a @var{stop_periods}
 +that is negative. This value is then treated as a positive value and is
 +used to indicate the effect should restart processing as specified by
 + at var{start_periods}, making it suitable for removing periods of silence
 +in the middle of the audio.
 +Default value is @code{0}.
 +
 + at item stop_duration
 +Specify a duration of silence that must exist before audio is not copied any
 +more. By specifying a higher duration, silence that is wanted can be left in
 +the audio.
 +Default value is @code{0}.
 +
 + at item stop_threshold
 +This is the same as @option{start_threshold} but for trimming silence from
 +the end of audio.
 +Can be specified in dB (in case "dB" is appended to the specified value)
 +or amplitude ratio. Default value is @code{0}.
 +
 + at item leave_silence
 +This indicate that @var{stop_duration} length of audio should be left intact
 +at the beginning of each period of silence.
 +For example, if you want to remove long pauses between words but do not want
 +to remove the pauses completely. Default value is @code{0}.
 +
 + at item detection
 +Set how is silence detected. Can be @code{rms} or @code{peak}. Second is faster
 +and works better with digital silence which is exactly 0.
 +Default value is @code{rms}.
 +
 + at item window
 +Set ratio used to calculate size of window for detecting silence.
 +Default value is @code{0.02}. Allowed range is from @code{0} to @code{10}.
  @end table
  
 -The parameters @var{width}, @var{height}, @var{x}, and @var{y} are
 -expressions containing the following constants:
 + at subsection Examples
  
 - at table @option
 - at item E, PI, PHI
 -These are approximated values for the mathematical constants e
 -(Euler's number), pi (Greek pi), and phi (the golden ratio).
 + at itemize
 + at item
 +The following example shows how this filter can be used to start a recording
 +that does not contain the delay at the start which usually occurs between
 +pressing the record button and the start of the performance:
 + at example
 +silenceremove=1:5:0.02
 + at end example
  
 - at item in_w, in_h
 -The input video width and height.
 + at item
 +Trim all silence encountered from begining to end where there is more than 1
 +second of silence in audio:
 + at example
 +silenceremove=0:0:0:-1:1:-90dB
 + at end example
 + at end itemize
  
 - at item iw, ih
 -These are the same as @var{in_w} and @var{in_h}.
 + at section sofalizer
  
 - at item out_w, out_h
 -The output width and height (the size of the padded area), as
 -specified by the @var{width} and @var{height} expressions.
 +SOFAlizer uses head-related transfer functions (HRTFs) to create virtual
 +loudspeakers around the user for binaural listening via headphones (audio
 +formats up to 9 channels supported).
 +The HRTFs are stored in SOFA files (see @url{http://www.sofacoustics.org/} for a database).
 +SOFAlizer is developed at the Acoustics Research Institute (ARI) of the
 +Austrian Academy of Sciences.
  
 - at item ow, oh
 -These are the same as @var{out_w} and @var{out_h}.
 +To enable compilation of this filter you need to configure FFmpeg with
 + at code{--enable-netcdf}.
  
 - at item x, y
 -The x and y offsets as specified by the @var{x} and @var{y}
 -expressions, or NAN if not yet specified.
 +The filter accepts the following options:
  
 - at item a
 -The input display aspect ratio, same as @var{iw} / @var{ih}.
 + at table @option
 + at item sofa
 +Set the SOFA file used for rendering.
  
 - at item hsub, vsub
 -The horizontal and vertical chroma subsample values. For example for the
 -pixel format "yuv422p" @var{hsub} is 2 and @var{vsub} is 1.
 - at end table
 + at item gain
 +Set gain applied to audio. Value is in dB. Default is 0.
  
 -Some examples:
 + at item rotation
 +Set rotation of virtual loudspeakers in deg. Default is 0.
  
 - at example
 -# Add paddings with the color "violet" to the input video. The output video
 -# size is 640x480, and the top-left corner of the input video is placed at
 -# column 0, row 40
 -pad=width=640:height=480:x=0:y=40:color=violet
 + at item elevation
 +Set elevation of virtual speakers in deg. Default is 0.
  
 -# Pad the input to get an output with dimensions increased by 3/2,
 -# and put the input video at the center of the padded area
 -pad="3/2*iw:3/2*ih:(ow-iw)/2:(oh-ih)/2"
 + at item radius
 +Set distance in meters between loudspeakers and the listener with near-field
 +HRTFs. Default is 1.
  
 -# Pad the input to get a squared output with size equal to the maximum
 -# value between the input width and height, and put the input video at
 -# the center of the padded area
 -pad="max(iw\,ih):ow:(ow-iw)/2:(oh-ih)/2"
 + at item type
 +Set processing type. Can be @var{time} or @var{freq}. @var{time} is
 +processing audio in time domain which is slow.
 + at var{freq} is processing audio in frequency domain which is fast.
 +Default is @var{freq}.
 +
 + at item speakers
 +Set custom positions of virtual loudspeakers. Syntax for this option is:
 +<CH> <AZIM> <ELEV>[|<CH> <AZIM> <ELEV>|...].
 +Each virtual loudspeaker is described with short channel name following with
 +azimuth and elevation in degreees.
 +Each virtual loudspeaker description is separated by '|'.
 +For example to override front left and front right channel positions use:
 +'speakers=FL 45 15|FR 345 15'.
 +Descriptions with unrecognised channel names are ignored.
 + at end table
  
 -# Pad the input to get a final w/h ratio of 16:9
 -pad="ih*16/9:ih:(ow-iw)/2:(oh-ih)/2"
 + at subsection Examples
  
 -# Double the output size and put the input video in the bottom-right
 -# corner of the output padded area
 -pad="2*iw:2*ih:ow-iw:oh-ih"
 + at itemize
 + at item
 +Using ClubFritz6 sofa file:
 + at example
 +sofalizer=sofa=/path/to/ClubFritz6.sofa:type=freq:radius=1
  @end example
  
 - at section pixdesctest
 -
 -Pixel format descriptor test filter, mainly useful for internal
 -testing. The output video should be equal to the input video.
 -
 -For example:
 + at item
 +Using ClubFritz12 sofa file and bigger radius with small rotation:
  @example
 -format=monow, pixdesctest
 +sofalizer=sofa=/path/to/ClubFritz12.sofa:type=freq:radius=2:rotation=5
  @end example
  
 -can be used to test the monowhite pixel format descriptor definition.
 + at item
 +Similar as above but with custom speaker positions for front left, front right, rear left and rear right
 +and also with custom gain:
 + at example
 +"sofalizer=sofa=/path/to/ClubFritz6.sofa:type=freq:radius=2:speakers=FL 45|FR 315|RL 135|RR 225:gain=28"
 + at end example
 + at end itemize
  
 - at anchor{scale}
 - at section scale
 + at section stereotools
  
 -Scale the input video and/or convert the image format.
 +This filter has some handy utilities to manage stereo signals, for converting
 +M/S stereo recordings to L/R signal while having control over the parameters
 +or spreading the stereo image of master track.
  
 -It accepts the following parameters:
 +The filter accepts the following options:
  
  @table @option
 + at item level_in
 +Set input level before filtering for both channels. Defaults is 1.
 +Allowed range is from 0.015625 to 64.
  
 - at item w
 -The output video width.
 + at item level_out
 +Set output level after filtering for both channels. Defaults is 1.
 +Allowed range is from 0.015625 to 64.
  
 - at item h
 -The output video height.
 + at item balance_in
 +Set input balance between both channels. Default is 0.
 +Allowed range is from -1 to 1.
  
 - at end table
 + at item balance_out
 +Set output balance between both channels. Default is 0.
 +Allowed range is from -1 to 1.
  
 -The parameters @var{w} and @var{h} are expressions containing
 -the following constants:
 + at item softclip
 +Enable softclipping. Results in analog distortion instead of harsh digital 0dB
 +clipping. Disabled by default.
  
 - at table @option
 - at item E, PI, PHI
 -These are approximated values for the mathematical constants e
 -(Euler's number), pi (Greek pi), and phi (the golden ratio).
 + at item mutel
 +Mute the left channel. Disabled by default.
  
 - at item in_w, in_h
 -The input width and height.
 + at item muter
 +Mute the right channel. Disabled by default.
  
 - at item iw, ih
 -These are the same as @var{in_w} and @var{in_h}.
 + at item phasel
 +Change the phase of the left channel. Disabled by default.
  
 - at item out_w, out_h
 -The output (cropped) width and height.
 + at item phaser
 +Change the phase of the right channel. Disabled by default.
  
 - at item ow, oh
 -These are the same as @var{out_w} and @var{out_h}.
 + at item mode
 +Set stereo mode. Available values are:
  
 - at item a
 -This is the same as @var{iw} / @var{ih}.
 + at table @samp
 + at item lr>lr
 +Left/Right to Left/Right, this is default.
  
 - at item sar
 -input sample aspect ratio
 + at item lr>ms
 +Left/Right to Mid/Side.
  
 - at item dar
 -The input display aspect ratio; it is the same as
 -(@var{iw} / @var{ih}) * @var{sar}.
 + at item ms>lr
 +Mid/Side to Left/Right.
  
 - at item hsub, vsub
 -The horizontal and vertical chroma subsample values. For example, for the
 -pixel format "yuv422p" @var{hsub} is 2 and @var{vsub} is 1.
 + at item lr>ll
 +Left/Right to Left/Left.
 +
 + at item lr>rr
 +Left/Right to Right/Right.
 +
 + at item lr>l+r
 +Left/Right to Left + Right.
 +
 + at item lr>rl
 +Left/Right to Right/Left.
  @end table
  
 -If the input image format is different from the format requested by
 -the next filter, the scale filter will convert the input to the
 -requested format.
 + at item slev
 +Set level of side signal. Default is 1.
 +Allowed range is from 0.015625 to 64.
  
 -If the value for @var{w} or @var{h} is 0, the respective input
 -size is used for the output.
 + at item sbal
 +Set balance of side signal. Default is 0.
 +Allowed range is from -1 to 1.
  
 -If the value for @var{w} or @var{h} is -1, the scale filter will use, for the
 -respective output size, a value that maintains the aspect ratio of the input
 -image.
 + at item mlev
 +Set level of the middle signal. Default is 1.
 +Allowed range is from 0.015625 to 64.
  
 -The default value of @var{w} and @var{h} is 0.
 + at item mpan
 +Set middle signal pan. Default is 0. Allowed range is from -1 to 1.
  
 -Some examples:
 + at item base
 +Set stereo base between mono and inversed channels. Default is 0.
 +Allowed range is from -1 to 1.
 +
 + at item delay
 +Set delay in milliseconds how much to delay left from right channel and
 +vice versa. Default is 0. Allowed range is from -20 to 20.
 +
 + at item sclevel
 +Set S/C level. Default is 1. Allowed range is from 1 to 100.
 +
 + at item phase
 +Set the stereo phase in degrees. Default is 0. Allowed range is from 0 to 360.
 + at end table
 +
 + at subsection Examples
 +
 + at itemize
 + at item
 +Apply karaoke like effect:
  @example
 -# Scale the input video to a size of 200x100
 -scale=w=200:h=100
 +stereotools=mlev=0.015625
 + at end example
  
 -# Scale the input to 2x
 -scale=w=2*iw:h=2*ih
 -# The above is the same as
 -scale=2*in_w:2*in_h
 + at item
 +Convert M/S signal to L/R:
 + at example
 +"stereotools=mode=ms>lr"
 + at end example
 + at end itemize
  
 -# Scale the input to half the original size
 -scale=w=iw/2:h=ih/2
 + at section stereowiden
  
 -# Increase the width, and set the height to the same size
 -scale=3/2*iw:ow
 +This filter enhance the stereo effect by suppressing signal common to both
 +channels and by delaying the signal of left into right and vice versa,
 +thereby widening the stereo effect.
  
 -# Seek Greek harmony
 -scale=iw:1/PHI*iw
 -scale=ih*PHI:ih
 +The filter accepts the following options:
  
 -# Increase the height, and set the width to 3/2 of the height
 -scale=w=3/2*oh:h=3/5*ih
 + at table @option
 + at item delay
 +Time in milliseconds of the delay of left signal into right and vice versa.
 +Default is 20 milliseconds.
  
 -# Increase the size, making the size a multiple of the chroma
 -scale="trunc(3/2*iw/hsub)*hsub:trunc(3/2*ih/vsub)*vsub"
 + at item feedback
 +Amount of gain in delayed signal into right and vice versa. Gives a delay
 +effect of left signal in right output and vice versa which gives widening
 +effect. Default is 0.3.
  
 -# Increase the width to a maximum of 500 pixels,
 -# keeping the same aspect ratio as the input
 -scale=w='min(500\, iw*3/2):h=-1'
 - at end example
 + at item crossfeed
 +Cross feed of left into right with inverted phase. This helps in suppressing
 +the mono. If the value is 1 it will cancel all the signal common to both
 +channels. Default is 0.3.
 +
 + at item drymix
 +Set level of input signal of original channel. Default is 0.8.
 + at end table
  
+ @section scale_npp
+ 
+ Use the NVIDIA Performance Primitives (libnpp) to perform scaling and/or pixel
+ format conversion on CUDA video frames. Setting the output width and height
+ works in the same way as for the @var{scale} filter.
+ 
+ The following additional options are accepted:
+ @table @option
+ @item format
+ The pixel format of the output CUDA frames. If set to the string "same" (the
+ default), the input format will be kept. Note that automatic format negotiation
+ and conversion is not yet supported for hardware frames
+ 
+ @item interp_algo
+ The interpolation algorithm used for resizing. One of the following:
+ @table @option
+ @item nn
+ Nearest neighbour.
+ 
+ @item linear
+ @item cubic
+ @item cubic2p_bspline
+ 2-parameter cubic (B=1, C=0)
+ 
+ @item cubic2p_catmullrom
+ 2-parameter cubic (B=0, C=1/2)
+ 
+ @item cubic2p_b05c03
+ 2-parameter cubic (B=1/2, C=3/10)
+ 
+ @item super
+ Supersampling
+ 
+ @item lanczos
+ @end table
+ 
+ @end table
+ 
+ @section select
+ Select frames to pass in output.
+ 
 -It accepts the following parameters:
 + at section treble
 +
 +Boost or cut treble (upper) frequencies of the audio using a two-pole
 +shelving filter with a response similar to that of a standard
 +hi-fi's tone-controls. This is also known as shelving equalisation (EQ).
 +
 +The filter accepts the following options:
  
  @table @option
 + at item gain, g
 +Give the gain at whichever is the lower of ~22 kHz and the
 +Nyquist frequency. Its useful range is about -20 (for a large cut)
 +to +20 (for a large boost). Beware of clipping when using a positive gain.
 +
 + at item frequency, f
 +Set the filter's central frequency and so can be used
 +to extend or reduce the frequency range to be boosted or cut.
 +The default value is @code{3000} Hz.
 +
 + at item width_type
 +Set method to specify band-width of filter.
 + at table @option
 + at item h
 +Hz
 + at item q
 +Q-Factor
 + at item o
 +octave
 + at item s
 +slope
 + at end table
  
 - at item expr
 -An expression, which is evaluated for each input frame. If the expression is
 -evaluated to a non-zero value, the frame is selected and passed to the output,
 -otherwise it is discarded.
 + at item width, w
 +Determine how steep is the filter's shelf transition.
 + at end table
  
 + at section tremolo
 +
 +Sinusoidal amplitude modulation.
 +
 +The filter accepts the following options:
 +
 + at table @option
 + at item f
 +Modulation frequency in Hertz. Modulation frequencies in the subharmonic range
 +(20 Hz or lower) will result in a tremolo effect.
 +This filter may also be used as a ring modulator by specifying
 +a modulation frequency higher than 20 Hz.
 +Range is 0.1 - 20000.0. Default value is 5.0 Hz.
 +
 + at item d
 +Depth of modulation as a percentage. Range is 0.0 - 1.0.
 +Default value is 0.5.
  @end table
  
 -The expression can contain the following constants:
 + at section vibrato
 +
 +Sinusoidal phase modulation.
 +
 +The filter accepts the following options:
  
  @table @option
 - at item E, PI, PHI
 -These are approximated values for the mathematical constants e
 -(Euler's number), pi (Greek pi), and phi (the golden ratio).
 + at item f
 +Modulation frequency in Hertz.
 +Range is 0.1 - 20000.0. Default value is 5.0 Hz.
  
 - at item n
 -The (sequential) number of the filtered frame, starting from 0.
 + at item d
 +Depth of modulation as a percentage. Range is 0.0 - 1.0.
 +Default value is 0.5.
 + at end table
  
 - at item selected_n
 -The (sequential) number of the selected frame, starting from 0.
 + at section volume
  
 - at item prev_selected_n
 -The sequential number of the last selected frame. It's NAN if undefined.
 +Adjust the input audio volume.
  
 - at item TB
 -The timebase of the input timestamps.
 +It accepts the following parameters:
 + at table @option
  
 - at item pts
 -The PTS (Presentation TimeStamp) of the filtered video frame,
 -expressed in @var{TB} units. It's NAN if undefined.
 + at item volume
 +Set audio volume expression.
  
 - at item t
 -The PTS of the filtered video frame,
 -expressed in seconds. It's NAN if undefined.
 +Output values are clipped to the maximum value.
  
 - at item prev_pts
 -The PTS of the previously filtered video frame. It's NAN if undefined.
 +The output audio volume is given by the relation:
 + at example
 + at var{output_volume} = @var{volume} * @var{input_volume}
 + at end example
  
 - at item prev_selected_pts
 -The PTS of the last previously filtered video frame. It's NAN if undefined.
 +The default value for @var{volume} is "1.0".
  
 - at item prev_selected_t
 -The PTS of the last previously selected video frame. It's NAN if undefined.
 + at item precision
 +This parameter represents the mathematical precision.
  
 - at item start_pts
 -The PTS of the first video frame in the video. It's NAN if undefined.
 +It determines which input sample formats will be allowed, which affects the
 +precision of the volume scaling.
  
 - at item start_t
 -The time of the first video frame in the video. It's NAN if undefined.
 + at table @option
 + at item fixed
 +8-bit fixed-point; this limits input sample format to U8, S16, and S32.
 + at item float
 +32-bit floating-point; this limits input sample format to FLT. (default)
 + at item double
 +64-bit floating-point; this limits input sample format to DBL.
 + at end table
 +
 + at item replaygain
 +Choose the behaviour on encountering ReplayGain side data in input frames.
  
 - at item pict_type
 -The type of the filtered frame. It can assume one of the following
 -values:
  @table @option
 - at item I
 - at item P
 - at item B
 - at item S
 - at item SI
 - at item SP
 - at item BI
 + at item drop
 +Remove ReplayGain side data, ignoring its contents (the default).
 +
 + at item ignore
 +Ignore ReplayGain side data, but leave it in the frame.
 +
 + at item track
 +Prefer the track gain, if present.
 +
 + at item album
 +Prefer the album gain, if present.
  @end table
  
 - at item interlace_type
 -The frame interlace type. It can assume one of the following values:
 + at item replaygain_preamp
 +Pre-amplification gain in dB to apply to the selected replaygain gain.
 +
 +Default value for @var{replaygain_preamp} is 0.0.
 +
 + at item eval
 +Set when the volume expression is evaluated.
 +
 +It accepts the following values:
 + at table @samp
 + at item once
 +only evaluate expression once during the filter initialization, or
 +when the @samp{volume} command is sent
 +
 + at item frame
 +evaluate expression for each incoming frame
 + at end table
 +
 +Default value is @samp{once}.
 + at end table
 +
 +The volume expression can contain the following parameters.
 +
  @table @option
 - at item PROGRESSIVE
 -The frame is progressive (not interlaced).
 - at item TOPFIRST
 -The frame is top-field-first.
 - at item BOTTOMFIRST
 -The frame is bottom-field-first.
 + at item n
 +frame number (starting at zero)
 + at item nb_channels
 +number of channels
 + at item nb_consumed_samples
 +number of samples consumed by the filter
 + at item nb_samples
 +number of samples in the current frame
 + at item pos
 +original frame position in the file
 + at item pts
 +frame PTS
 + at item sample_rate
 +sample rate
 + at item startpts
 +PTS at start of stream
 + at item startt
 +time at start of stream
 + at item t
 +frame time
 + at item tb
 +timestamp timebase
 + at item volume
 +last set volume value
  @end table
  
 - at item key
 -This is 1 if the filtered frame is a key-frame, 0 otherwise.
 +Note that when @option{eval} is set to @samp{once} only the
 + at var{sample_rate} and @var{tb} variables are available, all other
 +variables will evaluate to NAN.
 +
 + at subsection Commands
 +
 +This filter supports the following commands:
 + at table @option
 + at item volume
 +Modify the volume expression.
 +The command accepts the same syntax of the corresponding option.
 +
 +If the specified expression is not valid, it is kept at its current
 +value.
 + at item replaygain_noclip
 +Prevent clipping by limiting the gain applied.
 +
 +Default value for @var{replaygain_noclip} is 1.
  
  @end table
  
diff --cc libavfilter/Makefile
index d71a17b,39e167f..917049c
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@@ -199,52 -63,18 +199,53 @@@ OBJS-$(CONFIG_LUT3D_FILTER
  OBJS-$(CONFIG_LUT_FILTER)                    += vf_lut.o
  OBJS-$(CONFIG_LUTRGB_FILTER)                 += vf_lut.o
  OBJS-$(CONFIG_LUTYUV_FILTER)                 += vf_lut.o
 +OBJS-$(CONFIG_MASKEDMERGE_FILTER)            += vf_maskedmerge.o framesync.o
 +OBJS-$(CONFIG_MCDEINT_FILTER)                += vf_mcdeint.o
 +OBJS-$(CONFIG_MERGEPLANES_FILTER)            += vf_mergeplanes.o framesync.o
 +OBJS-$(CONFIG_METADATA_FILTER)               += f_metadata.o
 +OBJS-$(CONFIG_MPDECIMATE_FILTER)             += vf_mpdecimate.o
  OBJS-$(CONFIG_NEGATE_FILTER)                 += vf_lut.o
 +OBJS-$(CONFIG_NNEDI_FILTER)                  += vf_nnedi.o
  OBJS-$(CONFIG_NOFORMAT_FILTER)               += vf_format.o
 +OBJS-$(CONFIG_NOISE_FILTER)                  += vf_noise.o
  OBJS-$(CONFIG_NULL_FILTER)                   += vf_null.o
 +OBJS-$(CONFIG_OCR_FILTER)                    += vf_ocr.o
  OBJS-$(CONFIG_OCV_FILTER)                    += vf_libopencv.o
 -OBJS-$(CONFIG_OVERLAY_FILTER)                += vf_overlay.o
 +OBJS-$(CONFIG_OPENCL)                        += deshake_opencl.o unsharp_opencl.o
 +OBJS-$(CONFIG_OVERLAY_FILTER)                += vf_overlay.o dualinput.o framesync.o
 +OBJS-$(CONFIG_OWDENOISE_FILTER)              += vf_owdenoise.o
  OBJS-$(CONFIG_PAD_FILTER)                    += vf_pad.o
 +OBJS-$(CONFIG_PALETTEGEN_FILTER)             += vf_palettegen.o
 +OBJS-$(CONFIG_PALETTEUSE_FILTER)             += vf_paletteuse.o dualinput.o framesync.o
 +OBJS-$(CONFIG_PERMS_FILTER)                  += f_perms.o
 +OBJS-$(CONFIG_PERSPECTIVE_FILTER)            += vf_perspective.o
 +OBJS-$(CONFIG_PHASE_FILTER)                  += vf_phase.o
  OBJS-$(CONFIG_PIXDESCTEST_FILTER)            += vf_pixdesctest.o
 +OBJS-$(CONFIG_PP_FILTER)                     += vf_pp.o
 +OBJS-$(CONFIG_PP7_FILTER)                    += vf_pp7.o
 +OBJS-$(CONFIG_PSNR_FILTER)                   += vf_psnr.o dualinput.o framesync.o
 +OBJS-$(CONFIG_PULLUP_FILTER)                 += vf_pullup.o
 +OBJS-$(CONFIG_QP_FILTER)                     += vf_qp.o
 +OBJS-$(CONFIG_RANDOM_FILTER)                 += vf_random.o
 +OBJS-$(CONFIG_READVITC_FILTER)               += vf_readvitc.o
 +OBJS-$(CONFIG_REALTIME_FILTER)               += f_realtime.o
 +OBJS-$(CONFIG_REMAP_FILTER)                  += vf_remap.o framesync.o
 +OBJS-$(CONFIG_REMOVEGRAIN_FILTER)            += vf_removegrain.o
 +OBJS-$(CONFIG_REMOVELOGO_FILTER)             += bbox.o lswsutils.o lavfutils.o vf_removelogo.o
 +OBJS-$(CONFIG_REPEATFIELDS_FILTER)           += vf_repeatfields.o
 +OBJS-$(CONFIG_REVERSE_FILTER)                += f_reverse.o
 +OBJS-$(CONFIG_ROTATE_FILTER)                 += vf_rotate.o
 +OBJS-$(CONFIG_SEPARATEFIELDS_FILTER)         += vf_separatefields.o
 +OBJS-$(CONFIG_SAB_FILTER)                    += vf_sab.o
  OBJS-$(CONFIG_SCALE_FILTER)                  += vf_scale.o
 +OBJS-$(CONFIG_SCALE2REF_FILTER)              += vf_scale.o
 +OBJS-$(CONFIG_SELECT_FILTER)                 += f_select.o
 +OBJS-$(CONFIG_SELECTIVECOLOR_FILTER)         += vf_selectivecolor.o
 +OBJS-$(CONFIG_SENDCMD_FILTER)                += f_sendcmd.o
+ OBJS-$(CONFIG_SCALE_NPP_FILTER)              += vf_scale_npp.o
  OBJS-$(CONFIG_SCALE_VAAPI_FILTER)            += vf_scale_vaapi.o
 -OBJS-$(CONFIG_SELECT_FILTER)                 += vf_select.o
  OBJS-$(CONFIG_SETDAR_FILTER)                 += vf_aspect.o
 +OBJS-$(CONFIG_SETFIELD_FILTER)               += vf_setfield.o
  OBJS-$(CONFIG_SETPTS_FILTER)                 += setpts.o
  OBJS-$(CONFIG_SETSAR_FILTER)                 += vf_aspect.o
  OBJS-$(CONFIG_SETTB_FILTER)                  += settb.o
diff --cc libavfilter/allfilters.c
index 7f58c7e,c6eeb1f..a972576
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@@ -220,51 -89,18 +220,52 @@@ void avfilter_register_all(void
      REGISTER_FILTER(LUT,            lut,            vf);
      REGISTER_FILTER(LUTRGB,         lutrgb,         vf);
      REGISTER_FILTER(LUTYUV,         lutyuv,         vf);
 +    REGISTER_FILTER(MASKEDMERGE,    maskedmerge,    vf);
 +    REGISTER_FILTER(MCDEINT,        mcdeint,        vf);
 +    REGISTER_FILTER(MERGEPLANES,    mergeplanes,    vf);
 +    REGISTER_FILTER(METADATA,       metadata,       vf);
 +    REGISTER_FILTER(MPDECIMATE,     mpdecimate,     vf);
      REGISTER_FILTER(NEGATE,         negate,         vf);
 +    REGISTER_FILTER(NNEDI,          nnedi,          vf);
      REGISTER_FILTER(NOFORMAT,       noformat,       vf);
 +    REGISTER_FILTER(NOISE,          noise,          vf);
      REGISTER_FILTER(NULL,           null,           vf);
 +    REGISTER_FILTER(OCR,            ocr,            vf);
      REGISTER_FILTER(OCV,            ocv,            vf);
      REGISTER_FILTER(OVERLAY,        overlay,        vf);
 +    REGISTER_FILTER(OWDENOISE,      owdenoise,      vf);
      REGISTER_FILTER(PAD,            pad,            vf);
 +    REGISTER_FILTER(PALETTEGEN,     palettegen,     vf);
 +    REGISTER_FILTER(PALETTEUSE,     paletteuse,     vf);
 +    REGISTER_FILTER(PERMS,          perms,          vf);
 +    REGISTER_FILTER(PERSPECTIVE,    perspective,    vf);
 +    REGISTER_FILTER(PHASE,          phase,          vf);
      REGISTER_FILTER(PIXDESCTEST,    pixdesctest,    vf);
 +    REGISTER_FILTER(PP,             pp,             vf);
 +    REGISTER_FILTER(PP7,            pp7,            vf);
 +    REGISTER_FILTER(PSNR,           psnr,           vf);
 +    REGISTER_FILTER(PULLUP,         pullup,         vf);
 +    REGISTER_FILTER(QP,             qp,             vf);
 +    REGISTER_FILTER(RANDOM,         random,         vf);
 +    REGISTER_FILTER(READVITC,       readvitc,       vf);
 +    REGISTER_FILTER(REALTIME,       realtime,       vf);
 +    REGISTER_FILTER(REMAP,          remap,          vf);
 +    REGISTER_FILTER(REMOVEGRAIN,    removegrain,    vf);
 +    REGISTER_FILTER(REMOVELOGO,     removelogo,     vf);
 +    REGISTER_FILTER(REPEATFIELDS,   repeatfields,   vf);
 +    REGISTER_FILTER(REVERSE,        reverse,        vf);
 +    REGISTER_FILTER(ROTATE,         rotate,         vf);
 +    REGISTER_FILTER(SAB,            sab,            vf);
      REGISTER_FILTER(SCALE,          scale,          vf);
 +    REGISTER_FILTER(SCALE2REF,      scale2ref,      vf);
+     REGISTER_FILTER(SCALE_NPP,      scale_npp,      vf);
      REGISTER_FILTER(SCALE_VAAPI,    scale_vaapi,    vf);
      REGISTER_FILTER(SELECT,         select,         vf);
 +    REGISTER_FILTER(SELECTIVECOLOR, selectivecolor, vf);
 +    REGISTER_FILTER(SENDCMD,        sendcmd,        vf);
 +    REGISTER_FILTER(SEPARATEFIELDS, separatefields, vf);
      REGISTER_FILTER(SETDAR,         setdar,         vf);
 +    REGISTER_FILTER(SETFIELD,       setfield,       vf);
      REGISTER_FILTER(SETPTS,         setpts,         vf);
      REGISTER_FILTER(SETSAR,         setsar,         vf);
      REGISTER_FILTER(SETTB,          settb,          vf);
diff --cc libavfilter/version.h
index 927ec27,c8102dd..d7f9c54
--- a/libavfilter/version.h
+++ b/libavfilter/version.h
@@@ -29,9 -29,9 +29,9 @@@
  
  #include "libavutil/version.h"
  
 -#define LIBAVFILTER_VERSION_MAJOR  6
 -#define LIBAVFILTER_VERSION_MINOR  4
 -#define LIBAVFILTER_VERSION_MICRO  0
 +#define LIBAVFILTER_VERSION_MAJOR   6
- #define LIBAVFILTER_VERSION_MINOR  43
- #define LIBAVFILTER_VERSION_MICRO 101
++#define LIBAVFILTER_VERSION_MINOR  44
++#define LIBAVFILTER_VERSION_MICRO 100
  
  #define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \
                                                 LIBAVFILTER_VERSION_MINOR, \
diff --cc libavfilter/vf_scale_npp.c
index 0000000,247baf1..7d2b5df
mode 000000,100644..100644
--- a/libavfilter/vf_scale_npp.c
+++ b/libavfilter/vf_scale_npp.c
@@@ -1,0 -1,660 +1,660 @@@
+ /*
 - * This file is part of Libav.
++ * This file is part of FFmpeg.
+  *
 - * Libav is free software; you can redistribute it and/or
++ * FFmpeg is free software; you can redistribute it and/or
+  * modify it under the terms of the GNU Lesser General Public
+  * License as published by the Free Software Foundation; either
+  * version 2.1 of the License, or (at your option) any later version.
+  *
 - * Libav is distributed in the hope that it will be useful,
++ * FFmpeg is distributed in the hope that it will be useful,
+  * but WITHOUT ANY WARRANTY; without even the implied warranty of
+  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  * Lesser General Public License for more details.
+  *
+  * You should have received a copy of the GNU Lesser General Public
 - * License along with Libav; if not, write to the Free Software
++ * License along with FFmpeg; if not, write to the Free Software
+  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+  */
+ 
+ /**
+  * @file
+  * scale video filter
+  */
+ 
+ #include <nppi.h>
+ #include <stdio.h>
+ #include <string.h>
+ 
+ #include "libavutil/avstring.h"
+ #include "libavutil/common.h"
+ #include "libavutil/eval.h"
+ #include "libavutil/hwcontext.h"
+ #include "libavutil/hwcontext_cuda.h"
+ #include "libavutil/internal.h"
+ #include "libavutil/mathematics.h"
+ #include "libavutil/opt.h"
+ #include "libavutil/pixdesc.h"
+ 
+ #include "avfilter.h"
+ #include "formats.h"
+ #include "internal.h"
+ #include "video.h"
+ 
+ static const enum AVPixelFormat supported_formats[] = {
+     AV_PIX_FMT_YUV420P,
+     AV_PIX_FMT_NV12,
+     AV_PIX_FMT_YUV444P,
+ };
+ 
+ static const enum AVPixelFormat deinterleaved_formats[][2] = {
+     { AV_PIX_FMT_NV12, AV_PIX_FMT_YUV420P },
+ };
+ 
+ static const char *const var_names[] = {
+     "PI",
+     "PHI",
+     "E",
+     "in_w",   "iw",
+     "in_h",   "ih",
+     "out_w",  "ow",
+     "out_h",  "oh",
+     "a", "dar",
+     "sar",
+     NULL
+ };
+ 
+ enum var_name {
+     VAR_PI,
+     VAR_PHI,
+     VAR_E,
+     VAR_IN_W,   VAR_IW,
+     VAR_IN_H,   VAR_IH,
+     VAR_OUT_W,  VAR_OW,
+     VAR_OUT_H,  VAR_OH,
+     VAR_A, VAR_DAR,
+     VAR_SAR,
+     VARS_NB
+ };
+ 
+ enum ScaleStage {
+     STAGE_DEINTERLEAVE,
+     STAGE_RESIZE,
+     STAGE_INTERLEAVE,
+     STAGE_NB,
+ };
+ 
+ typedef struct NPPScaleStageContext {
+     int stage_needed;
+     enum AVPixelFormat in_fmt;
+     enum AVPixelFormat out_fmt;
+ 
+     struct {
+         int width;
+         int height;
+     } planes_in[3], planes_out[3];
+ 
+     AVBufferRef *frames_ctx;
+     AVFrame     *frame;
+ } NPPScaleStageContext;
+ 
+ typedef struct NPPScaleContext {
+     const AVClass *class;
+ 
+     NPPScaleStageContext stages[STAGE_NB];
+     AVFrame *tmp_frame;
+     int passthrough;
+ 
+     int shift_width, shift_height;
+ 
+     /**
+      * New dimensions. Special values are:
+      *   0 = original width/height
+      *  -1 = keep original aspect
+      */
+     int w, h;
+ 
+     /**
+      * Output sw format. AV_PIX_FMT_NONE for no conversion.
+      */
+     enum AVPixelFormat format;
+ 
+     char *w_expr;               ///< width  expression string
+     char *h_expr;               ///< height expression string
+     char *format_str;
+ 
+     int interp_algo;
+ } NPPScaleContext;
+ 
+ static int nppscale_init(AVFilterContext *ctx)
+ {
+     NPPScaleContext *s = ctx->priv;
+     int i;
+ 
+     if (!strcmp(s->format_str, "same")) {
+         s->format = AV_PIX_FMT_NONE;
+     } else {
+         s->format = av_get_pix_fmt(s->format_str);
+         if (s->format == AV_PIX_FMT_NONE) {
+             av_log(ctx, AV_LOG_ERROR, "Unrecognized pixel format: %s\n", s->format_str);
+             return AVERROR(EINVAL);
+         }
+     }
+ 
+     for (i = 0; i < FF_ARRAY_ELEMS(s->stages); i++) {
+         s->stages[i].frame = av_frame_alloc();
+         if (!s->stages[i].frame)
+             return AVERROR(ENOMEM);
+     }
+     s->tmp_frame = av_frame_alloc();
+     if (!s->tmp_frame)
+         return AVERROR(ENOMEM);
+ 
+     return 0;
+ }
+ 
+ static void nppscale_uninit(AVFilterContext *ctx)
+ {
+     NPPScaleContext *s = ctx->priv;
+     int i;
+ 
+     for (i = 0; i < FF_ARRAY_ELEMS(s->stages); i++) {
+         av_frame_free(&s->stages[i].frame);
+         av_buffer_unref(&s->stages[i].frames_ctx);
+     }
+     av_frame_free(&s->tmp_frame);
+ }
+ 
+ static int nppscale_query_formats(AVFilterContext *ctx)
+ {
+     static const enum AVPixelFormat pixel_formats[] = {
+         AV_PIX_FMT_CUDA, AV_PIX_FMT_NONE,
+     };
+     AVFilterFormats *pix_fmts  = ff_make_format_list(pixel_formats);
+ 
+     ff_set_common_formats(ctx, pix_fmts);
+ 
+     return 0;
+ }
+ 
+ static int init_stage(NPPScaleStageContext *stage, AVBufferRef *device_ctx)
+ {
+     AVBufferRef *out_ref = NULL;
+     AVHWFramesContext *out_ctx;
+     int in_sw, in_sh, out_sw, out_sh;
+     int ret, i;
+ 
+     av_pix_fmt_get_chroma_sub_sample(stage->in_fmt,  &in_sw,  &in_sh);
+     av_pix_fmt_get_chroma_sub_sample(stage->out_fmt, &out_sw, &out_sh);
+     if (!stage->planes_out[0].width) {
+         stage->planes_out[0].width  = stage->planes_in[0].width;
+         stage->planes_out[0].height = stage->planes_in[0].height;
+     }
+ 
+     for (i = 1; i < FF_ARRAY_ELEMS(stage->planes_in); i++) {
+         stage->planes_in[i].width   = stage->planes_in[0].width   >> in_sw;
+         stage->planes_in[i].height  = stage->planes_in[0].height  >> in_sh;
+         stage->planes_out[i].width  = stage->planes_out[0].width  >> out_sw;
+         stage->planes_out[i].height = stage->planes_out[0].height >> out_sh;
+     }
+ 
+     out_ref = av_hwframe_ctx_alloc(device_ctx);
+     if (!out_ref)
+         return AVERROR(ENOMEM);
+     out_ctx = (AVHWFramesContext*)out_ref->data;
+ 
+     out_ctx->format    = AV_PIX_FMT_CUDA;
+     out_ctx->sw_format = stage->out_fmt;
+     out_ctx->width     = FFALIGN(stage->planes_out[0].width,  32);
+     out_ctx->height    = FFALIGN(stage->planes_out[0].height, 32);
+ 
+     ret = av_hwframe_ctx_init(out_ref);
+     if (ret < 0)
+         goto fail;
+ 
+     av_frame_unref(stage->frame);
+     ret = av_hwframe_get_buffer(out_ref, stage->frame, 0);
+     if (ret < 0)
+         goto fail;
+ 
+     stage->frame->width  = stage->planes_out[0].width;
+     stage->frame->height = stage->planes_out[0].height;
+ 
+     av_buffer_unref(&stage->frames_ctx);
+     stage->frames_ctx = out_ref;
+ 
+     return 0;
+ fail:
+     av_buffer_unref(&out_ref);
+     return ret;
+ }
+ 
+ static int format_is_supported(enum AVPixelFormat fmt)
+ {
+     int i;
+ 
+     for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++)
+         if (supported_formats[i] == fmt)
+             return 1;
+     return 0;
+ }
+ 
+ static enum AVPixelFormat get_deinterleaved_format(enum AVPixelFormat fmt)
+ {
+     const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(fmt);
+     int i, planes;
+ 
+     planes = av_pix_fmt_count_planes(fmt);
+     if (planes == desc->nb_components)
+         return fmt;
+     for (i = 0; i < FF_ARRAY_ELEMS(deinterleaved_formats); i++)
+         if (deinterleaved_formats[i][0] == fmt)
+             return deinterleaved_formats[i][1];
+     return AV_PIX_FMT_NONE;
+ }
+ 
+ static int init_processing_chain(AVFilterContext *ctx, int in_width, int in_height,
+                                  int out_width, int out_height)
+ {
+     NPPScaleContext *s = ctx->priv;
+ 
+     AVHWFramesContext *in_frames_ctx;
+ 
+     enum AVPixelFormat in_format;
+     enum AVPixelFormat out_format;
+     enum AVPixelFormat in_deinterleaved_format;
+     enum AVPixelFormat out_deinterleaved_format;
+ 
+     int i, ret, last_stage = -1;
+ 
+     /* check that we have a hw context */
+     if (!ctx->inputs[0]->hw_frames_ctx) {
+         av_log(ctx, AV_LOG_ERROR, "No hw context provided on input\n");
+         return AVERROR(EINVAL);
+     }
+     in_frames_ctx = (AVHWFramesContext*)ctx->inputs[0]->hw_frames_ctx->data;
+     in_format     = in_frames_ctx->sw_format;
+     out_format    = (s->format == AV_PIX_FMT_NONE) ? in_format : s->format;
+ 
+     if (!format_is_supported(in_format)) {
+         av_log(ctx, AV_LOG_ERROR, "Unsupported input format: %s\n",
+                av_get_pix_fmt_name(in_format));
+         return AVERROR(ENOSYS);
+     }
+     if (!format_is_supported(out_format)) {
+         av_log(ctx, AV_LOG_ERROR, "Unsupported output format: %s\n",
+                av_get_pix_fmt_name(out_format));
+         return AVERROR(ENOSYS);
+     }
+ 
+     in_deinterleaved_format  = get_deinterleaved_format(in_format);
+     out_deinterleaved_format = get_deinterleaved_format(out_format);
+     if (in_deinterleaved_format  == AV_PIX_FMT_NONE ||
+         out_deinterleaved_format == AV_PIX_FMT_NONE)
+         return AVERROR_BUG;
+ 
+     /* figure out which stages need to be done */
+     if (in_width != out_width || in_height != out_height ||
+         in_deinterleaved_format != out_deinterleaved_format)
+         s->stages[STAGE_RESIZE].stage_needed = 1;
+ 
+     if (!s->stages[STAGE_RESIZE].stage_needed && in_format == out_format)
+         s->passthrough = 1;
+ 
+     if (!s->passthrough) {
+         if (in_format != in_deinterleaved_format)
+             s->stages[STAGE_DEINTERLEAVE].stage_needed = 1;
+         if (out_format != out_deinterleaved_format)
+             s->stages[STAGE_INTERLEAVE].stage_needed = 1;
+     }
+ 
+     s->stages[STAGE_DEINTERLEAVE].in_fmt              = in_format;
+     s->stages[STAGE_DEINTERLEAVE].out_fmt             = in_deinterleaved_format;
+     s->stages[STAGE_DEINTERLEAVE].planes_in[0].width  = in_width;
+     s->stages[STAGE_DEINTERLEAVE].planes_in[0].height = in_height;
+ 
+     s->stages[STAGE_RESIZE].in_fmt               = in_deinterleaved_format;
+     s->stages[STAGE_RESIZE].out_fmt              = out_deinterleaved_format;
+     s->stages[STAGE_RESIZE].planes_in[0].width   = in_width;
+     s->stages[STAGE_RESIZE].planes_in[0].height  = in_height;
+     s->stages[STAGE_RESIZE].planes_out[0].width  = out_width;
+     s->stages[STAGE_RESIZE].planes_out[0].height = out_height;
+ 
+     s->stages[STAGE_INTERLEAVE].in_fmt              = out_deinterleaved_format;
+     s->stages[STAGE_INTERLEAVE].out_fmt             = out_format;
+     s->stages[STAGE_INTERLEAVE].planes_in[0].width  = out_width;
+     s->stages[STAGE_INTERLEAVE].planes_in[0].height = out_height;
+ 
+     /* init the hardware contexts */
+     for (i = 0; i < FF_ARRAY_ELEMS(s->stages); i++) {
+         if (!s->stages[i].stage_needed)
+             continue;
+ 
+         ret = init_stage(&s->stages[i], in_frames_ctx->device_ref);
+         if (ret < 0)
+             return ret;
+ 
+         last_stage = i;
+     }
+ 
+     if (last_stage < 0)
+         return AVERROR_BUG;
+     ctx->outputs[0]->hw_frames_ctx = av_buffer_ref(s->stages[last_stage].frames_ctx);
+     if (!ctx->outputs[0]->hw_frames_ctx)
+         return AVERROR(ENOMEM);
+ 
+     return 0;
+ }
+ 
+ static int nppscale_config_props(AVFilterLink *outlink)
+ {
+     AVFilterContext *ctx = outlink->src;
+     AVFilterLink *inlink = outlink->src->inputs[0];
+     NPPScaleContext  *s = ctx->priv;
+     int64_t w, h;
+     double var_values[VARS_NB], res;
+     char *expr;
+     int ret;
+ 
+     var_values[VAR_PI]    = M_PI;
+     var_values[VAR_PHI]   = M_PHI;
+     var_values[VAR_E]     = M_E;
+     var_values[VAR_IN_W]  = var_values[VAR_IW] = inlink->w;
+     var_values[VAR_IN_H]  = var_values[VAR_IH] = inlink->h;
+     var_values[VAR_OUT_W] = var_values[VAR_OW] = NAN;
+     var_values[VAR_OUT_H] = var_values[VAR_OH] = NAN;
+     var_values[VAR_A]     = (double) inlink->w / inlink->h;
+     var_values[VAR_SAR]   = inlink->sample_aspect_ratio.num ?
+         (double) inlink->sample_aspect_ratio.num / inlink->sample_aspect_ratio.den : 1;
+     var_values[VAR_DAR]   = var_values[VAR_A] * var_values[VAR_SAR];
+ 
+     /* evaluate width and height */
+     av_expr_parse_and_eval(&res, (expr = s->w_expr),
+                            var_names, var_values,
+                            NULL, NULL, NULL, NULL, NULL, 0, ctx);
+     s->w = var_values[VAR_OUT_W] = var_values[VAR_OW] = res;
+     if ((ret = av_expr_parse_and_eval(&res, (expr = s->h_expr),
+                                       var_names, var_values,
+                                       NULL, NULL, NULL, NULL, NULL, 0, ctx)) < 0)
+         goto fail;
+     s->h = var_values[VAR_OUT_H] = var_values[VAR_OH] = res;
+     /* evaluate again the width, as it may depend on the output height */
+     if ((ret = av_expr_parse_and_eval(&res, (expr = s->w_expr),
+                                       var_names, var_values,
+                                       NULL, NULL, NULL, NULL, NULL, 0, ctx)) < 0)
+         goto fail;
+     s->w = res;
+ 
+     w = s->w;
+     h = s->h;
+ 
+     /* sanity check params */
+     if (w <  -1 || h <  -1) {
+         av_log(ctx, AV_LOG_ERROR, "Size values less than -1 are not acceptable.\n");
+         return AVERROR(EINVAL);
+     }
+     if (w == -1 && h == -1)
+         s->w = s->h = 0;
+ 
+     if (!(w = s->w))
+         w = inlink->w;
+     if (!(h = s->h))
+         h = inlink->h;
+     if (w == -1)
+         w = av_rescale(h, inlink->w, inlink->h);
+     if (h == -1)
+         h = av_rescale(w, inlink->h, inlink->w);
+ 
+     if (w > INT_MAX || h > INT_MAX ||
+         (h * inlink->w) > INT_MAX  ||
+         (w * inlink->h) > INT_MAX)
+         av_log(ctx, AV_LOG_ERROR, "Rescaled value for width or height is too big.\n");
+ 
+     outlink->w = w;
+     outlink->h = h;
+ 
+     ret = init_processing_chain(ctx, inlink->w, inlink->h, w, h);
+     if (ret < 0)
+         return ret;
+ 
+     av_log(ctx, AV_LOG_VERBOSE, "w:%d h:%d -> w:%d h:%d\n",
+            inlink->w, inlink->h, outlink->w, outlink->h);
+ 
+     if (inlink->sample_aspect_ratio.num)
+         outlink->sample_aspect_ratio = av_mul_q((AVRational){outlink->h*inlink->w,
+                                                              outlink->w*inlink->h},
+                                                 inlink->sample_aspect_ratio);
+     else
+         outlink->sample_aspect_ratio = inlink->sample_aspect_ratio;
+ 
+     return 0;
+ 
+ fail:
+     av_log(NULL, AV_LOG_ERROR,
+            "Error when evaluating the expression '%s'\n", expr);
+     return ret;
+ }
+ 
+ static int nppscale_deinterleave(AVFilterContext *ctx, NPPScaleStageContext *stage,
+                                  AVFrame *out, AVFrame *in)
+ {
+     AVHWFramesContext *in_frames_ctx = (AVHWFramesContext*)in->hw_frames_ctx->data;
+     NppStatus err;
+ 
+     switch (in_frames_ctx->sw_format) {
+     case AV_PIX_FMT_NV12:
+         err = nppiYCbCr420_8u_P2P3R(in->data[0], in->linesize[0],
+                                     in->data[1], in->linesize[1],
+                                     out->data, out->linesize,
+                                     (NppiSize){ in->width, in->height });
+         break;
+     default:
+         return AVERROR_BUG;
+     }
+     if (err != NPP_SUCCESS) {
+         av_log(ctx, AV_LOG_ERROR, "NPP deinterleave error: %d\n", err);
+         return AVERROR_UNKNOWN;
+     }
+ 
+     return 0;
+ }
+ 
+ static int nppscale_resize(AVFilterContext *ctx, NPPScaleStageContext *stage,
+                            AVFrame *out, AVFrame *in)
+ {
+     NPPScaleContext *s = ctx->priv;
+     NppStatus err;
+     int i;
+ 
+     for (i = 0; i < FF_ARRAY_ELEMS(in->data) && in->data[i]; i++) {
+         int iw = stage->planes_in[i].width;
+         int ih = stage->planes_in[i].height;
+         int ow = stage->planes_out[i].width;
+         int oh = stage->planes_out[i].height;
+ 
+         err = nppiResizeSqrPixel_8u_C1R(in->data[i], (NppiSize){ iw, ih },
+                                         in->linesize[i], (NppiRect){ 0, 0, iw, ih },
+                                         out->data[i], out->linesize[i],
+                                         (NppiRect){ 0, 0, ow, oh },
+                                         (double)ow / iw, (double)oh / ih,
+                                         0.0, 0.0, s->interp_algo);
+         if (err != NPP_SUCCESS) {
+             av_log(ctx, AV_LOG_ERROR, "NPP resize error: %d\n", err);
+             return AVERROR_UNKNOWN;
+         }
+     }
+ 
+     return 0;
+ }
+ 
+ static int nppscale_interleave(AVFilterContext *ctx, NPPScaleStageContext *stage,
+                                AVFrame *out, AVFrame *in)
+ {
+     AVHWFramesContext *out_frames_ctx = (AVHWFramesContext*)out->hw_frames_ctx->data;
+     NppStatus err;
+ 
+     switch (out_frames_ctx->sw_format) {
+     case AV_PIX_FMT_NV12:
+         err = nppiYCbCr420_8u_P3P2R((const uint8_t**)in->data,
+                                     in->linesize,
+                                     out->data[0], out->linesize[0],
+                                     out->data[1], out->linesize[1],
+                                     (NppiSize){ in->width, in->height });
+         break;
+     default:
+         return AVERROR_BUG;
+     }
+     if (err != NPP_SUCCESS) {
+         av_log(ctx, AV_LOG_ERROR, "NPP deinterleave error: %d\n", err);
+         return AVERROR_UNKNOWN;
+     }
+ 
+     return 0;
+ }
+ 
+ static int (*const nppscale_process[])(AVFilterContext *ctx, NPPScaleStageContext *stage,
+                                        AVFrame *out, AVFrame *in) = {
+     [STAGE_DEINTERLEAVE] = nppscale_deinterleave,
+     [STAGE_RESIZE]       = nppscale_resize,
+     [STAGE_INTERLEAVE]   = nppscale_interleave,
+ };
+ 
+ static int nppscale_scale(AVFilterContext *ctx, AVFrame *out, AVFrame *in)
+ {
+     NPPScaleContext *s = ctx->priv;
+     AVFrame *src = in;
+     int i, ret, last_stage = -1;
+ 
+     for (i = 0; i < FF_ARRAY_ELEMS(s->stages); i++) {
+         if (!s->stages[i].stage_needed)
+             continue;
+ 
+         ret = nppscale_process[i](ctx, &s->stages[i], s->stages[i].frame, src);
+         if (ret < 0)
+             return ret;
+ 
+         src        = s->stages[i].frame;
+         last_stage = i;
+     }
+ 
+     if (last_stage < 0)
+         return AVERROR_BUG;
+     ret = av_hwframe_get_buffer(src->hw_frames_ctx, s->tmp_frame, 0);
+     if (ret < 0)
+         return ret;
+ 
+     av_frame_move_ref(out, src);
+     av_frame_move_ref(src, s->tmp_frame);
+ 
+     ret = av_frame_copy_props(out, in);
+     if (ret < 0)
+         return ret;
+ 
+     return 0;
+ }
+ 
+ static int nppscale_filter_frame(AVFilterLink *link, AVFrame *in)
+ {
+     AVFilterContext              *ctx = link->dst;
+     NPPScaleContext                *s = ctx->priv;
+     AVFilterLink             *outlink = ctx->outputs[0];
+     AVHWFramesContext     *frames_ctx = (AVHWFramesContext*)outlink->hw_frames_ctx->data;
+     AVCUDADeviceContext *device_hwctx = frames_ctx->device_ctx->hwctx;
+ 
+     AVFrame *out = NULL;
+     CUresult err;
+     CUcontext dummy;
+     int ret = 0;
+ 
+     if (s->passthrough)
+         return ff_filter_frame(outlink, in);
+ 
+     out = av_frame_alloc();
+     if (!out) {
+         ret = AVERROR(ENOMEM);
+         goto fail;
+     }
+ 
+     av_reduce(&out->sample_aspect_ratio.num, &out->sample_aspect_ratio.den,
+               (int64_t)in->sample_aspect_ratio.num * outlink->h * link->w,
+               (int64_t)in->sample_aspect_ratio.den * outlink->w * link->h,
+               INT_MAX);
+ 
+     err = cuCtxPushCurrent(device_hwctx->cuda_ctx);
+     if (err != CUDA_SUCCESS) {
+         ret = AVERROR_UNKNOWN;
+         goto fail;
+     }
+ 
+     ret = nppscale_scale(ctx, out, in);
+ 
+     cuCtxPopCurrent(&dummy);
+     if (ret < 0)
+         goto fail;
+ 
+     av_frame_free(&in);
+     return ff_filter_frame(outlink, out);
+ fail:
+     av_frame_free(&in);
+     av_frame_free(&out);
+     return ret;
+ }
+ 
+ #define OFFSET(x) offsetof(NPPScaleContext, x)
+ #define FLAGS AV_OPT_FLAG_VIDEO_PARAM
+ static const AVOption options[] = {
+     { "w",      "Output video width",  OFFSET(w_expr),     AV_OPT_TYPE_STRING, { .str = "iw"   }, .flags = FLAGS },
+     { "h",      "Output video height", OFFSET(h_expr),     AV_OPT_TYPE_STRING, { .str = "ih"   }, .flags = FLAGS },
+     { "format", "Output pixel format", OFFSET(format_str), AV_OPT_TYPE_STRING, { .str = "same" }, .flags = FLAGS },
+ 
+     { "interp_algo", "Interpolation algorithm used for resizing", OFFSET(interp_algo), AV_OPT_TYPE_INT, { .i64 = NPPI_INTER_CUBIC }, 0, INT_MAX, FLAGS, "interp_algo" },
+         { "nn",                 "nearest neighbour",                 0, AV_OPT_TYPE_CONST, { .i64 = NPPI_INTER_NN                 }, 0, 0, FLAGS, "interp_algo" },
+         { "linear",             "linear",                            0, AV_OPT_TYPE_CONST, { .i64 = NPPI_INTER_LINEAR             }, 0, 0, FLAGS, "interp_algo" },
+         { "cubic",              "cubic",                             0, AV_OPT_TYPE_CONST, { .i64 = NPPI_INTER_CUBIC              }, 0, 0, FLAGS, "interp_algo" },
+         { "cubic2p_bspline",    "2-parameter cubic (B=1, C=0)",      0, AV_OPT_TYPE_CONST, { .i64 = NPPI_INTER_CUBIC2P_BSPLINE    }, 0, 0, FLAGS, "interp_algo" },
+         { "cubic2p_catmullrom", "2-parameter cubic (B=0, C=1/2)",    0, AV_OPT_TYPE_CONST, { .i64 = NPPI_INTER_CUBIC2P_CATMULLROM }, 0, 0, FLAGS, "interp_algo" },
+         { "cubic2p_b05c03",     "2-parameter cubic (B=1/2, C=3/10)", 0, AV_OPT_TYPE_CONST, { .i64 = NPPI_INTER_CUBIC2P_B05C03     }, 0, 0, FLAGS, "interp_algo" },
+         { "super",              "supersampling",                     0, AV_OPT_TYPE_CONST, { .i64 = NPPI_INTER_SUPER              }, 0, 0, FLAGS, "interp_algo" },
+         { "lanczos",            "Lanczos",                           0, AV_OPT_TYPE_CONST, { .i64 = NPPI_INTER_LANCZOS            }, 0, 0, FLAGS, "interp_algo" },
+     { NULL },
+ };
+ 
+ static const AVClass nppscale_class = {
+     .class_name = "nppscale",
+     .item_name  = av_default_item_name,
+     .option     = options,
+     .version    = LIBAVUTIL_VERSION_INT,
+ };
+ 
+ static const AVFilterPad nppscale_inputs[] = {
+     {
+         .name        = "default",
+         .type        = AVMEDIA_TYPE_VIDEO,
+         .filter_frame = nppscale_filter_frame,
+     },
+     { NULL }
+ };
+ 
+ static const AVFilterPad nppscale_outputs[] = {
+     {
+         .name         = "default",
+         .type         = AVMEDIA_TYPE_VIDEO,
+         .config_props = nppscale_config_props,
+     },
+     { NULL }
+ };
+ 
+ AVFilter ff_vf_scale_npp = {
+     .name      = "scale_npp",
+     .description = NULL_IF_CONFIG_SMALL("NVIDIA Performance Primitives video "
+                                         "scaling and format conversion"),
+ 
+     .init          = nppscale_init,
+     .uninit        = nppscale_uninit,
+     .query_formats = nppscale_query_formats,
+ 
+     .priv_size = sizeof(NPPScaleContext),
+     .priv_class = &nppscale_class,
+ 
+     .inputs    = nppscale_inputs,
+     .outputs   = nppscale_outputs,
+ };