[FFmpeg-devel] [PATCH] avfilter: add OpenCL scale filter

Tue Mar 27 16:18:44 EEST 2018

On 27 March 2018 at 05:48, Gabriel Machado <gabriel_machado at live.com> wrote:

> From: Gabriel Machado <gabriel_machado at live.com>
>
> Some scaling filters implemented as OpenCL kernels. Can be used as:
>
> scale_opencl=<width>:<height>:flags=<filter>
> where <filter> can be `neighbor', `bilinear', `bicubic' or `fast_bicubic'
>
> This is an initial draft, there's still a long way to go in terms of
> completeness, configurability and performance.
>
> ---
>  configure                     |   1 +
>  libavfilter/Makefile          |   1 +
>  libavfilter/allfilters.c      |   1 +
>  libavfilter/opencl/scale.cl   | 165 ++++++++++++++++++++++++
>  libavfilter/opencl_source.h   |   1 +
>  libavfilter/vf_scale_opencl.c | 289 ++++++++++++++++++++++++++++++
> ++++++++++++
>  6 files changed, 458 insertions(+)
>  create mode 100644 libavfilter/opencl/scale.cl
>  create mode 100644 libavfilter/vf_scale_opencl.c
>
> diff --git a/configure b/configure
> index 5ccf3ce..4007ee8 100755
> --- a/configure
> +++ b/configure
> @@ -2821,6 +2821,7 @@ v4l2_m2m_deps_any="linux_videodev2_h"
>
>  hwupload_cuda_filter_deps="ffnvcodec"
>  scale_npp_filter_deps="ffnvcodec libnpp"
> +scale_opencl_filter_deps="opencl"
>  scale_cuda_filter_deps="cuda_sdk"
>  thumbnail_cuda_filter_deps="cuda_sdk"
>
> diff --git a/libavfilter/Makefile b/libavfilter/Makefile
> index a90ca30..6303cbd 100644
> --- a/libavfilter/Makefile
> +++ b/libavfilter/Makefile
> @@ -302,6 +302,7 @@ OBJS-$(CONFIG_SAB_FILTER)                    +=
> vf_sab.o
>  OBJS-$(CONFIG_SCALE_FILTER)                  += vf_scale.o scale.o
>  OBJS-$(CONFIG_SCALE_CUDA_FILTER)             += vf_scale_cuda.o
> vf_scale_cuda.ptx.o
>  OBJS-$(CONFIG_SCALE_NPP_FILTER)              += vf_scale_npp.o scale.o
> +OBJS-$(CONFIG_SCALE_OPENCL_FILTER)           += vf_scale_opencl.o
> opencl.o opencl/scale.o
>  OBJS-$(CONFIG_SCALE_QSV_FILTER)              += vf_scale_qsv.o
>  OBJS-$(CONFIG_SCALE_VAAPI_FILTER)            += vf_scale_vaapi.o scale.o
> vaapi_vpp.o
>  OBJS-$(CONFIG_SCALE2REF_FILTER)              += vf_scale.o scale.o
> diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
> index 1cf1340..3185b17 100644
> --- a/libavfilter/allfilters.c
> +++ b/libavfilter/allfilters.c
> @@ -309,6 +309,7 @@ static void register_all(void)
>      REGISTER_FILTER(SCALE,          scale,          vf);
>      REGISTER_FILTER(SCALE_CUDA,     scale_cuda,     vf);
>      REGISTER_FILTER(SCALE_NPP,      scale_npp,      vf);
> +    REGISTER_FILTER(SCALE_OPENCL,   scale_opencl,   vf);
>      REGISTER_FILTER(SCALE_QSV,      scale_qsv,      vf);
>      REGISTER_FILTER(SCALE_VAAPI,    scale_vaapi,    vf);
>      REGISTER_FILTER(SCALE2REF,      scale2ref,      vf);
> diff --git a/libavfilter/opencl/scale.cl b/libavfilter/opencl/scale.cl
> new file mode 100644
> index 0000000..b0e6cb2
> --- /dev/null
> +++ b/libavfilter/opencl/scale.cl
> @@ -0,0 +1,165 @@
> +/*
> + * Copyright (c) 2018 Gabriel Machado
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> 02110-1301 USA
> + */
> +
> +__kernel void neighbor(__write_only image2d_t dst,
> +                       __read_only  image2d_t src)
> +{
> +    const sampler_t sampler = (CLK_NORMALIZED_COORDS_TRUE |
> +                               CLK_ADDRESS_CLAMP_TO_EDGE |
> +                               CLK_FILTER_NEAREST);
> +
> +    int2 coord = {get_global_id(0), get_global_id(1)};
> +    int2 size = {get_global_size(0), get_global_size(1)};
> +
> +    float2 pos = (convert_float2(coord) + 0.5) / convert_float2(size);
> +
> +    float4 c = read_imagef(src, sampler, pos);
> +    write_imagef(dst, coord, c);
> +}
> +
> +__kernel void bilinear(__write_only image2d_t dst,
> +                       __read_only  image2d_t src)
> +{
> +    const sampler_t sampler = (CLK_NORMALIZED_COORDS_TRUE |
> +                               CLK_ADDRESS_CLAMP_TO_EDGE |
> +                               CLK_FILTER_LINEAR);
> +
> +    int2 coord = {get_global_id(0), get_global_id(1)};
> +    int2 size = {get_global_size(0), get_global_size(1)};
> +
> +    float2 pos = (convert_float2(coord) + 0.5) / convert_float2(size);
> +
> +    float4 c = read_imagef(src, sampler, pos);
> +    write_imagef(dst, coord, c);
> +}
> +
> +// https://developer.nvidia.com/gpugems/GPUGems/gpugems_ch24.html
> +float MitchellNetravali(float x, float B, float C)
> +{
> +    float t = fabs(x);
> +    float tt = t*t;
> +    float ttt = tt*t;
> +
> +    if (t < 1) {
> +        return ((12 - 9 * B - 6 * C) * ttt +
> +                (-18 + 12 * B + 6 * C) * tt + (6 - 2 * B)) / 6;
> +    } else if ((t >= 1) && (t < 2)) {
> +        return ((-B - 6 * C) * ttt +
> +                (6 * B + 30 * C) * tt + (-12 * B - 48 * C) *
> +                t + (8 * B + 24 * C)) / 6;
> +    } else {
> +        return 0;
> +    }
> +}
>

License unclear, I don't think you can use it. Moreover it comes from a
book.

+
> +float4 cubic(float4 c0, float4 c1, float4 c2, float4 c3, float t)
> +{
> +    float B = 0, C = 0.6; // libswscale default
> +    float a = MitchellNetravali(t + 1, B, C);
> +    float b = MitchellNetravali(t, B, C);
> +    float c = MitchellNetravali(1 - t, B, C);
> +    float d = MitchellNetravali(2 - t, B, C);
> +    return a*c0 + b*c1 + c*c2 + d*c3;
> +}
> +
> +__kernel void bicubic(__write_only image2d_t dst,
> +                      __read_only  image2d_t src)
> +{
> +    const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE |
> +                               CLK_ADDRESS_CLAMP_TO_EDGE |
> +                               CLK_FILTER_NEAREST);
> +
> +    int2 dst_coord = {get_global_id(0), get_global_id(1)};
> +
> +    float2 dst_size = {get_global_size(0), get_global_size(1)};
> +    float2 src_size = convert_float2(get_image_dim(src));
> +
> +    float2 uv = convert_float2(dst_coord) / dst_size;
> +
> +    float2 src_pos = uv * convert_float2(src_size) - 0.5;
> +
> +    float2 src_coordf;
> +    float2 t = fract(src_pos, &src_coordf);
> +    int2 src_coord = convert_int2(src_coordf);
> +
> +#define TEX(x,y) read_imagef(src, sampler, src_coord + (int2){x,y})
> +    float4 col = cubic(cubic(TEX(-1,-1), TEX(0,-1), TEX(1,-1), TEX(2,-1),
> t.x),
> +                       cubic(TEX(-1, 0), TEX(0, 0), TEX(1, 0), TEX(2, 0),
> t.x),
> +                       cubic(TEX(-1, 1), TEX(0, 1), TEX(1, 1), TEX(2, 1),
> t.x),
> +                       cubic(TEX(-1, 2), TEX(0, 2), TEX(1, 2), TEX(2, 2),
> t.x),
> +                       t.y);
> +#undef TEX
> +
> +    write_imagef(dst, dst_coord, col);
> +}
> +
> +// https://www.shadertoy.com/view/4df3Dn
> +// 4x4 bicubic filter using 4 bilinear texture lookups
> +// cubic B-spline basis functions
> +float w0(float a) { return (1.0/6.0)*(a*(a*(-a + 3.0) - 3.0) + 1.0); }
> +float w1(float a) { return (1.0/6.0)*(a*a*(3.0*a - 6.0) + 4.0); }
> +float w2(float a) { return (1.0/6.0)*(a*(a*(-3.0*a + 3.0) + 3.0) + 1.0); }
> +float w3(float a) { return (1.0/6.0)*(a*a*a); }
>

No license, can't use it. Shadertoy has no explicit license.

Moreover the whole filter is incorrectly designed. Take a look at what mpv
does and how it has no explicit per-algorithm scaling functions.