[FFmpeg-devel] [PATCH]libavfilter: deshake opencl based on comments on 20130402 2nd

Stefano Sabatini stefasab at gmail.com
Tue Apr 2 14:14:26 CEST 2013


On date Tuesday 2013-04-02 19:30:44 +0800, Wei Gao encoded:
> 

> From 2f095498919ee8984097cde50df11d7770c6a315 Mon Sep 17 00:00:00 2001
> From: highgod0401 <highgod0401 at gmail.com>
> Date: Tue, 2 Apr 2013 19:28:09 +0800
> Subject: [PATCH] deshake opencl based on comments on 20130402 2nd
> 
> ---
>  doc/filters.texi                |   6 +-
>  libavfilter/Makefile            |   2 +
>  libavfilter/allfilters.c        |   2 +
>  libavfilter/deshake.h           | 104 +++++++++++++++++++
>  libavfilter/deshake_kernel.h    | 215 ++++++++++++++++++++++++++++++++++++++++
>  libavfilter/deshake_opencl.c    | 180 +++++++++++++++++++++++++++++++++
>  libavfilter/deshake_opencl.h    |  39 ++++++++
>  libavfilter/opencl_allkernels.c |  39 ++++++++
>  libavfilter/opencl_allkernels.h |  29 ++++++
>  libavfilter/vf_deshake.c        | 117 +++++++++++-----------
>  10 files changed, 675 insertions(+), 58 deletions(-)
>  create mode 100644 libavfilter/deshake.h
>  create mode 100644 libavfilter/deshake_kernel.h
>  create mode 100644 libavfilter/deshake_opencl.c
>  create mode 100644 libavfilter/deshake_opencl.h
>  create mode 100644 libavfilter/opencl_allkernels.c
>  create mode 100644 libavfilter/opencl_allkernels.h
> 
> diff --git a/doc/filters.texi b/doc/filters.texi
> index 2c82ac3..401125b 100644
> --- a/doc/filters.texi
> +++ b/doc/filters.texi
> @@ -2504,7 +2504,7 @@ tripod, moving on a vehicle, etc.
>  The filter accepts parameters as a list of @var{key}=@var{value}
>  pairs, separated by ":". If the key of the first options is omitted,
>  the arguments are interpreted according to the syntax
> - at var{x}:@var{y}:@var{w}:@var{h}:@var{rx}:@var{ry}:@var{edge}:@var{blocksize}:@var{contrast}:@var{search}:@var{filename}.
> + at var{x}:@var{y}:@var{w}:@var{h}:@var{rx}:@var{ry}:@var{edge}:@var{blocksize}:@var{contrast}:@var{search}:@var{filename}:@var{opencl}.
>  
>  A description of the accepted parameters follows.
>  
> @@ -2570,6 +2570,10 @@ Default value is @samp{exhaustive}.
>  If set then a detailed log of the motion search is written to the
>  specified file.
>  
> + at item opencl
> +If set to 1, specify using OpenCL capabilities, only available if
> +FFmpeg was configured with @code{--enable-opencl}. Default value is 0.
> +
>  @end table
>  
>  @section drawbox
> diff --git a/libavfilter/Makefile b/libavfilter/Makefile
> index 690b1cb..e865aef 100644
> --- a/libavfilter/Makefile
> +++ b/libavfilter/Makefile
> @@ -40,6 +40,7 @@ OBJS = allfilters.o                                                     \
>         formats.o                                                        \
>         graphdump.o                                                      \
>         graphparser.o                                                    \
> +       opencl_allkernels.o                                              \
>         transform.o                                                      \
>         video.o                                                          \
>  
> @@ -139,6 +140,7 @@ OBJS-$(CONFIG_NOFORMAT_FILTER)               += vf_format.o
>  OBJS-$(CONFIG_NOISE_FILTER)                  += vf_noise.o
>  OBJS-$(CONFIG_NULL_FILTER)                   += vf_null.o
>  OBJS-$(CONFIG_OCV_FILTER)                    += vf_libopencv.o
> +OBJS-$(CONFIG_OPENCL)                        += deshake_opencl.o
>  OBJS-$(CONFIG_OVERLAY_FILTER)                += vf_overlay.o
>  OBJS-$(CONFIG_PAD_FILTER)                    += vf_pad.o
>  OBJS-$(CONFIG_PERMS_FILTER)                  += f_perms.o
> diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
> index 45a67e5..4ca180a 100644
> --- a/libavfilter/allfilters.c
> +++ b/libavfilter/allfilters.c
> @@ -21,6 +21,7 @@
>  
>  #include "avfilter.h"
>  #include "config.h"
> +#include "opencl_allkernels.h"
>  
>  
>  #define REGISTER_FILTER(X, x, y)                                        \
> @@ -199,4 +200,5 @@ void avfilter_register_all(void)
>      REGISTER_FILTER_UNCONDITIONAL(vsink_buffer);
>      REGISTER_FILTER_UNCONDITIONAL(af_afifo);
>      REGISTER_FILTER_UNCONDITIONAL(vf_fifo);
> +    ff_opencl_register_filter_kernel_code_all();
>  }
> diff --git a/libavfilter/deshake.h b/libavfilter/deshake.h
> new file mode 100644
> index 0000000..c24090e
> --- /dev/null
> +++ b/libavfilter/deshake.h
> @@ -0,0 +1,104 @@
> +/*
> + * Copyright (C) 2013 Wei Gao <weigao at multicorewareinc.com>
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#ifndef AVFILTER_DESHAKE_H
> +#define AVFILTER_DESHAKE_H
> +
> +#include "config.h"
> +#include "avfilter.h"
> +#include "libavcodec/dsputil.h"
> +#include "transform.h"
> +#if CONFIG_OPENCL
> +#include "libavutil/opencl.h"
> +#endif
> +
> +
> +enum SearchMethod {
> +    EXHAUSTIVE,        ///< Search all possible positions
> +    SMART_EXHAUSTIVE,  ///< Search most possible positions (faster)
> +    SEARCH_COUNT
> +};
> +
> +typedef struct {
> +    int x;             ///< Horizontal shift
> +    int y;             ///< Vertical shift
> +} IntMotionVector;
> +
> +typedef struct {
> +    double x;             ///< Horizontal shift
> +    double y;             ///< Vertical shift
> +} MotionVector;
> +
> +typedef struct {
> +    MotionVector vector;  ///< Motion vector
> +    double angle;         ///< Angle of rotation
> +    double zoom;          ///< Zoom percentage
> +} Transform;
> +
> +#if CONFIG_OPENCL
> +
> +typedef struct {
> +    size_t matrix_size;
> +    float matrix_y[9];
> +    float matrix_uv[9];
> +    cl_mem cl_matrix_y;
> +    cl_mem cl_matrix_uv;
> +    int in_plane_size[8];
> +    int out_plane_size[8];
> +    int plane_num;
> +    cl_mem cl_inbuf;
> +    size_t cl_inbuf_size;
> +    cl_mem cl_outbuf;
> +    size_t cl_outbuf_size;
> +    AVOpenCLKernelEnv kernel_env;
> +} DeshakeOpenclContext;
> +
> +#endif
> +
> +typedef struct {
> +    const AVClass *class;
> +    AVFrame *ref;              ///< Previous frame
> +    int rx;                    ///< Maximum horizontal shift
> +    int ry;                    ///< Maximum vertical shift
> +    int edge;                  ///< Edge fill method
> +    int blocksize;             ///< Size of blocks to compare
> +    int contrast;              ///< Contrast threshold
> +    int search;                ///< Motion search method
> +    AVCodecContext *avctx;
> +    DSPContext c;              ///< Context providing optimized SAD methods
> +    Transform last;            ///< Transform from last frame
> +    int refcount;              ///< Number of reference frames (defines averaging window)
> +    FILE *fp;
> +    Transform avg;
> +    int cw;                    ///< Crop motion search to this box
> +    int ch;
> +    int cx;
> +    int cy;
> +    char *filename;            ///< Motion search detailed log filename
> +    int opencl;
> +#if CONFIG_OPENCL
> +    DeshakeOpenclContext opencl_ctx;
> +#endif
> +    int (* transform)(AVFilterContext *ctx, int width, int height, int cw, int ch,
> +                      const float *matrix_y, const float *matrix_uv, enum InterpolateMethod interpolate,
> +                      enum FillMethod fill, AVFrame *in, AVFrame *out);
> +} DeshakeContext;
> +
> +#endif /* AVFILTER_DESHAKE_H */
> diff --git a/libavfilter/deshake_kernel.h b/libavfilter/deshake_kernel.h
> new file mode 100644
> index 0000000..0adddb6
> --- /dev/null
> +++ b/libavfilter/deshake_kernel.h
> @@ -0,0 +1,215 @@
> +/*
> + * Copyright (C) 2013 Wei Gao <weigao at multicorewareinc.com>
> + *
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#ifndef AVFILTER_DESHAKE_KERNEL_H
> +#define AVFILTER_DESHAKE_KERNEL_H
> +
> +#include "libavutil/opencl.h"
> +
> +const char *ff_kernel_deshake_opencl = AV_OPENCL_KERNEL(
> +
> +inline unsigned char pixel(global const unsigned char *src, float x, float y,int w, int h,int stride, unsigned char def)
> +{
> +    return (x < 0 || y < 0 || x >= w || y >= h) ? def : src[(int)x + (int)y * stride];
> +}

> +unsigned char interpolate_nearest(float x, float y, global const unsigned char *src,
> +                        int width, int height, int stride, unsigned char def)
> +{
> +    return pixel(src, (int)(x + 0.5), (int)(y + 0.5), width, height, stride, def);
> +}
> +

> +unsigned char interpolate_bilinear(float x, float y, global const unsigned char *src,
> +                        int width, int height, int stride, unsigned char def)

Nit, here and below:

unsigned char interpolate_bilinear(float x, float y, global const unsigned char *src,
                                   int width, int height, int stride, unsigned char def)

The second line should be aligned to the first argument in the first
line.

> +{
> +    int x_c, x_f, y_c, y_f;
> +    int v1, v2, v3, v4;
> +
> +    if (x < -1 || x > width || y < -1 || y > height) {
> +        return def;
> +    } else {
> +        x_f = (int)x;
> +        x_c = x_f + 1;
> +
> +        y_f = (int)y;
> +        y_c = y_f + 1;
> +
> +        v1 = pixel(src, x_c, y_c, width, height, stride, def);
> +        v2 = pixel(src, x_c, y_f, width, height, stride, def);
> +        v3 = pixel(src, x_f, y_c, width, height, stride, def);
> +        v4 = pixel(src, x_f, y_f, width, height, stride, def);
> +
> +        return (v1*(x - x_f)*(y - y_f) + v2*((x - x_f)*(y_c - y)) +
> +                v3*(x_c - x)*(y - y_f) + v4*((x_c - x)*(y_c - y)));
> +    }
> +}
> +
> +unsigned char interpolate_biquadratic(float x, float y, global const unsigned char *src,
> +                        int width, int height, int stride, unsigned char def)
> +{
> +    int     x_c, x_f, y_c, y_f;
> +    unsigned char v1,  v2,  v3,  v4;
> +    float   f1,  f2,  f3,  f4;
> +
> +    if (x < - 1 || x > width || y < -1 || y > height)
> +        return def;
> +    else {
> +        x_f = (int)x;
> +        x_c = x_f + 1;
> +        y_f = (int)y;
> +        y_c = y_f + 1;
> +
> +        v1 = pixel(src, x_c, y_c, width, height, stride, def);
> +        v2 = pixel(src, x_c, y_f, width, height, stride, def);
> +        v3 = pixel(src, x_f, y_c, width, height, stride, def);
> +        v4 = pixel(src, x_f, y_f, width, height, stride, def);
> +
> +        f1 = 1 - sqrt((x_c - x) * (y_c - y));
> +        f2 = 1 - sqrt((x_c - x) * (y - y_f));
> +        f3 = 1 - sqrt((x - x_f) * (y_c - y));
> +        f4 = 1 - sqrt((x - x_f) * (y - y_f));
> +        return (v1 * f1 + v2 * f2 + v3 * f3 + v4 * f4) / (f1 + f2 + f3 + f4);
> +    }
> +}
> +
> +inline const float clipf(float a, float amin, float amax)
> +{
> +    if      (a < amin) return amin;
> +    else if (a > amax) return amax;
> +    else               return a;
> +}
> +
> +inline int mirror(int v, int m)
> +{
> +    while ((unsigned)v > (unsigned)m) {
> +        v = -v;
> +        if (v < 0)
> +            v += 2 * m;
> +    }
> +    return v;
> +}
> +

> +kernel void avfilter_transform(global  unsigned char *src,
> +                               global  unsigned char *dst,
> +                               global          float *matrix,
> +                               global          float *matrix2,
> +                                                 int interpolate,
> +                                                 int fillmethod,
> +                                                 int src_stride_lu,
> +                                                 int dst_stride_lu,
> +                                                 int src_stride_ch,
> +                                                 int dst_stride_ch,
> +                                                 int height,
> +                                                 int width,
> +                                                 int ch,
> +                                                 int cw)
> +{
> +     int global_id = get_global_id(0);
> +
> +     global unsigned char *dst_y = dst;
> +     global unsigned char *dst_u = dst_y + height * dst_stride_lu;
> +     global unsigned char *dst_v = dst_u + ch * dst_stride_ch;
> +
> +     global unsigned char *src_y = src;
> +     global unsigned char *src_u = src_y + height * src_stride_lu;
> +     global unsigned char *src_v = src_u + ch * src_stride_ch;
> +
> +     global unsigned char *tempdst;
> +     global unsigned char *tempsrc;
> +
> +     int x;
> +     int y;
> +     float x_s;
> +     float y_s;
> +     int tempsrc_stride;
> +     int tempdst_stride;
> +     int temp_height;
> +     int temp_width;
> +     int curpos;

> +     unsigned char def;

needs to be set to 0, like in transform.c...

> +     if (global_id < width*height) {
> +        y = global_id/width;
> +        x = global_id%width;
> +        x_s = x * matrix[0] + y * matrix[1] + matrix[2];
> +        y_s = x * matrix[3] + y * matrix[4] + matrix[5];
> +        tempdst = dst_y;
> +        tempsrc = src_y;
> +        tempsrc_stride = src_stride_lu;
> +        tempdst_stride = dst_stride_lu;
> +        temp_height = height;
> +        temp_width = width;
> +     } else if ((global_id >= width*height)&&(global_id < width*height + ch*cw)) {
> +        y = (global_id - width*height)/cw;
> +        x = (global_id - width*height)%cw;
> +        x_s = x * matrix2[0] + y * matrix2[1] + matrix2[2];
> +        y_s = x * matrix2[3] + y * matrix2[4] + matrix2[5];
> +        tempdst = dst_u;
> +        tempsrc = src_u;
> +        tempsrc_stride = src_stride_ch;
> +        tempdst_stride = dst_stride_ch;
> +        temp_height = height;
> +        temp_width = width;
> +        temp_height = ch;
> +        temp_width = cw;
> +     } else {
> +        y = (global_id - width*height - ch*cw)/cw;
> +        x = (global_id - width*height - ch*cw)%cw;
> +        x_s = x * matrix2[0] + y * matrix2[1] + matrix2[2];
> +        y_s = x * matrix2[3] + y * matrix2[4] + matrix2[5];
> +        tempdst = dst_v;
> +        tempsrc = src_v;
> +        tempsrc_stride = src_stride_ch;
> +        tempdst_stride = dst_stride_ch;
> +        temp_height = ch;
> +        temp_width = cw;
> +     }
> +     curpos = y * tempdst_stride + x;
> +     switch (fillmethod) {

... or you add a specific case:
case 0; //FILL_BLANK
    def = 0;
    break;

> +        case 1: //FILL_ORIGINAL
> +            def = tempsrc[y*tempsrc_stride+x];
> +            break;
> +        case 2: //FILL_CLAMP
> +            y_s = clipf(y_s, 0, temp_height - 1);
> +            x_s = clipf(x_s, 0, temp_width - 1);
> +            def = tempsrc[(int)y_s * tempsrc_stride + (int)x_s];
> +            break;
> +        case 3: //FILL_MIRROR
> +            y_s = mirror(y_s,temp_height - 1);
> +            x_s = mirror(x_s,temp_width - 1);
> +            def = tempsrc[(int)y_s * tempsrc_stride + (int)x_s];
> +            break;

> +         }

weird indent (it should be aligned to the switch()

> +    switch (interpolate) {
> +        case 0: //INTERPOLATE_NEAREST
> +            tempdst[curpos] = interpolate_nearest(x_s, y_s, tempsrc, temp_width, temp_height, tempsrc_stride, def);
> +            break;
> +        case 1: //INTERPOLATE_BILINEAR
> +            tempdst[curpos] = interpolate_bilinear(x_s, y_s, tempsrc, temp_width, temp_height, tempsrc_stride, def);
> +            break;
> +        case 2: //INTERPOLATE_BIQUADRATIC
> +            tempdst[curpos] = interpolate_biquadratic(x_s, y_s, tempsrc, temp_width, temp_height, tempsrc_stride, def);
> +            break;
> +        default:
> +            return;
> +        }
> +}
> +);
> +
> +#endif /* AVFILTER_DESHAKE_KERNEL_H */
> diff --git a/libavfilter/deshake_opencl.c b/libavfilter/deshake_opencl.c
> new file mode 100644
> index 0000000..6ed8a60
> --- /dev/null
> +++ b/libavfilter/deshake_opencl.c
> @@ -0,0 +1,180 @@
> +/*
> + * Copyright (C) 2013 Wei Gao <weigao at multicorewareinc.com>
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +/**
> + * @file
> + * transform input video
> + */
> +
> +#include "libavutil/common.h"
> +#include "libavutil/dict.h"
> +#include "libavutil/pixdesc.h"
> +#include "deshake_opencl.h"
> +
> +#define MATRIX_SIZE 6
> +#define PLANE_NUM 3
> +
> +#define TRANSFORM_OPENCL_CHECK(method, ...)                                                                  \
> +    status = method(__VA_ARGS__);                                                                            \
> +    if (status != CL_SUCCESS) {                                                                              \
> +        av_log(ctx, AV_LOG_ERROR, "error %s %d\n", # method, status);                                        \
> +        return AVERROR_EXTERNAL;                                                                             \
> +    }
> +
> +#define TRANSFORM_OPENCL_SET_KERNEL_ARG(arg_ptr)                                                             \
> +    status = clSetKernelArg((kernel),(arg_no++),(sizeof(arg_ptr)),(void*)(&(arg_ptr)));                      \
> +    if (status != CL_SUCCESS) {                                                                              \
> +        av_log(ctx, AV_LOG_ERROR, "cannot set kernel argument: %d\n", status );                              \
> +        return AVERROR_EXTERNAL;                                                                             \
> +    }
> +
> +int ff_opencl_transform(AVFilterContext *ctx,
> +                        int width, int height, int cw, int ch,
> +                        const float *matrix_y, const float *matrix_uv,
> +                        enum InterpolateMethod interpolate,
> +                        enum FillMethod fill, AVFrame *in, AVFrame *out)
> +{
> +    int arg_no, ret = 0;
> +    const size_t global_work_size = width * height + 2 * ch * cw;
> +    cl_kernel kernel;
> +    cl_int status;
> +    DeshakeContext *deshake = ctx->priv;
> +    ret = av_opencl_buffer_write(deshake->opencl_ctx.cl_matrix_y, (uint8_t *)matrix_y, deshake->opencl_ctx.matrix_size * sizeof(cl_float));
> +    if (ret < 0)
> +        return ret;
> +    ret = av_opencl_buffer_write(deshake->opencl_ctx.cl_matrix_uv, (uint8_t *)matrix_uv, deshake->opencl_ctx.matrix_size * sizeof(cl_float));
> +    if (ret < 0)
> +        return ret;
> +    kernel = deshake->opencl_ctx.kernel_env.kernel;
> +    arg_no = 0;
> +
> +    if((unsigned int)interpolate > INTERPOLATE_BIQUADRATIC) {

nit++: if_((

> +        av_log(ctx, AV_LOG_ERROR, "Selected interpolate method is invalid\n");
> +        return AVERROR(EINVAL);
> +    }
> +    TRANSFORM_OPENCL_SET_KERNEL_ARG(deshake->opencl_ctx.cl_inbuf);
> +    TRANSFORM_OPENCL_SET_KERNEL_ARG(deshake->opencl_ctx.cl_outbuf);
> +    TRANSFORM_OPENCL_SET_KERNEL_ARG(deshake->opencl_ctx.cl_matrix_y);
> +    TRANSFORM_OPENCL_SET_KERNEL_ARG(deshake->opencl_ctx.cl_matrix_uv);
> +    TRANSFORM_OPENCL_SET_KERNEL_ARG(interpolate);
> +    TRANSFORM_OPENCL_SET_KERNEL_ARG(fill);
> +    TRANSFORM_OPENCL_SET_KERNEL_ARG(in->linesize[0]);
> +    TRANSFORM_OPENCL_SET_KERNEL_ARG(out->linesize[0]);
> +    TRANSFORM_OPENCL_SET_KERNEL_ARG(in->linesize[1]);
> +    TRANSFORM_OPENCL_SET_KERNEL_ARG(out->linesize[1]);
> +    TRANSFORM_OPENCL_SET_KERNEL_ARG(height);
> +    TRANSFORM_OPENCL_SET_KERNEL_ARG(width);
> +    TRANSFORM_OPENCL_SET_KERNEL_ARG(ch);
> +    TRANSFORM_OPENCL_SET_KERNEL_ARG(cw);
> +    TRANSFORM_OPENCL_CHECK(clEnqueueNDRangeKernel, deshake->opencl_ctx.kernel_env.command_queue, deshake->opencl_ctx.kernel_env.kernel, 1, NULL,
> +                           &global_work_size, NULL, 0, NULL, NULL);
> +    clFinish(deshake->opencl_ctx.kernel_env.command_queue);
> +    ret = av_opencl_buffer_read_image(out->data, deshake->opencl_ctx.out_plane_size,
> +                                      deshake->opencl_ctx.plane_num, deshake->opencl_ctx.cl_outbuf,
> +                                      deshake->opencl_ctx.cl_outbuf_size);
> +    if (ret < 0)
> +        return ret;
> +    return ret;
> +}
> +
> +int ff_opencl_deshake_init(AVFilterContext *ctx)
> +{
> +    int ret = 0;
> +    DeshakeContext *deshake = ctx->priv;
> +    AVDictionary *options = NULL;
> +    av_dict_set(&options, "build_options", "-I.", 0);
> +    ret = av_opencl_init(options, NULL);
> +    av_dict_free(&options);
> +    if (ret < 0)
> +        return ret;
> +    deshake->opencl_ctx.matrix_size = MATRIX_SIZE;
> +    deshake->opencl_ctx.plane_num   = PLANE_NUM;
> +    ret = av_opencl_buffer_create(&deshake->opencl_ctx.cl_matrix_y,
> +        deshake->opencl_ctx.matrix_size*sizeof(cl_float), CL_MEM_READ_ONLY, NULL);
> +    if (ret < 0)
> +        return ret;
> +    ret = av_opencl_buffer_create(&deshake->opencl_ctx.cl_matrix_uv,
> +        deshake->opencl_ctx.matrix_size*sizeof(cl_float), CL_MEM_READ_ONLY, NULL);
> +    if (ret < 0)
> +        return ret;
> +    if (!deshake->opencl_ctx.kernel_env.kernel) {
> +        ret =  av_opencl_create_kernel(&deshake->opencl_ctx.kernel_env, "avfilter_transform");
> +        if (ret < 0) {
> +            av_log(ctx, AV_LOG_ERROR, "OpenCL failed to create kernel for name 'avfilter_transform'\n");
> +            return ret;
> +        }
> +    }
> +    return ret;
> +}
> +
> +void ff_opencl_deshake_uninit(AVFilterContext *ctx)
> +{
> +    DeshakeContext *deshake = ctx->priv;
> +    av_opencl_buffer_release(&deshake->opencl_ctx.cl_inbuf);
> +    av_opencl_buffer_release(&deshake->opencl_ctx.cl_outbuf);
> +    av_opencl_buffer_release(&deshake->opencl_ctx.cl_matrix_y);
> +    av_opencl_buffer_release(&deshake->opencl_ctx.cl_matrix_uv);
> +    av_opencl_release_kernel(&deshake->opencl_ctx.kernel_env);
> +    av_opencl_uninit();
> +}
> +
> +

> +int ff_opencl_deshake_process_inout_buf(AVFilterLink *link, AVFrame *in, AVFrame *out)
> +{

I think it is better if you pass the filter context like you was doing
in your previous patch, and get the link as: link = ctx->inputs[0];

Alternatively you can store the chroma subsampling in the context
during configuration.

> +    int ret = 0;
> +    DeshakeContext *deshake = link->dst->priv;
> +    int chroma_height = -((-link->h) >> av_pix_fmt_desc_get(link->format)->log2_chroma_h);
> +
> +    if ((!deshake->opencl_ctx.cl_inbuf) || (!deshake->opencl_ctx.cl_outbuf)) {
> +        deshake->opencl_ctx.in_plane_size[0]  = (in->linesize[0] * in->height);
> +        deshake->opencl_ctx.in_plane_size[1]  = (in->linesize[1] * chroma_height);
> +        deshake->opencl_ctx.in_plane_size[2]  = (in->linesize[2] * chroma_height);
> +        deshake->opencl_ctx.out_plane_size[0] = (out->linesize[0] * out->height);
> +        deshake->opencl_ctx.out_plane_size[1] = (out->linesize[1] * chroma_height);
> +        deshake->opencl_ctx.out_plane_size[2] = (out->linesize[2] * chroma_height);
> +        deshake->opencl_ctx.cl_inbuf_size  = deshake->opencl_ctx.in_plane_size[0] +
> +                                             deshake->opencl_ctx.in_plane_size[1] +
> +                                             deshake->opencl_ctx.in_plane_size[2];
> +        deshake->opencl_ctx.cl_outbuf_size = deshake->opencl_ctx.out_plane_size[0] +
> +                                             deshake->opencl_ctx.out_plane_size[1] +
> +                                             deshake->opencl_ctx.out_plane_size[2];
> +        if (!deshake->opencl_ctx.cl_inbuf) {
> +            ret = av_opencl_buffer_create(&deshake->opencl_ctx.cl_inbuf,
> +                                            deshake->opencl_ctx.cl_inbuf_size,
> +                                            CL_MEM_READ_ONLY, NULL);
> +            if (ret < 0)
> +                return ret;
> +        }
> +        if (!deshake->opencl_ctx.cl_outbuf) {
> +            ret = av_opencl_buffer_create(&deshake->opencl_ctx.cl_outbuf,
> +                                            deshake->opencl_ctx.cl_outbuf_size,
> +                                            CL_MEM_READ_WRITE, NULL);
> +            if (ret < 0)
> +                return ret;
> +        }
> +    }
> +    ret = av_opencl_buffer_write_image(deshake->opencl_ctx.cl_inbuf,
> +                                 deshake->opencl_ctx.cl_inbuf_size,
> +                                 0, in->data,deshake->opencl_ctx.in_plane_size,
> +                                 deshake->opencl_ctx.plane_num);
> +    if(ret < 0)
> +        return ret;
> +    return ret;
> +}
> diff --git a/libavfilter/deshake_opencl.h b/libavfilter/deshake_opencl.h
> new file mode 100644
> index 0000000..69f6e4e
> --- /dev/null
> +++ b/libavfilter/deshake_opencl.h
> @@ -0,0 +1,39 @@
> +/*
> + * Copyright (C) 2013 Wei Gao <weigao at multicorewareinc.com>
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#ifndef AVFILTER_DESHAKE_OPENCL_H
> +#define AVFILTER_DESHAKE_OPENCL_H
> +
> +#include "deshake.h"
> +
> +int ff_opencl_deshake_init(AVFilterContext *ctx);
> +
> +void ff_opencl_deshake_uninit(AVFilterContext *ctx);
> +
> +int ff_opencl_deshake_process_inout_buf(AVFilterLink *link, AVFrame *in, AVFrame *out);
> +
> +int ff_opencl_transform(AVFilterContext *ctx,
> +                        int width, int height, int cw, int ch,
> +                        const float *matrix_y, const float *matrix_uv,
> +                        enum InterpolateMethod interpolate,
> +                        enum FillMethod fill, AVFrame *in, AVFrame *out);
> +
> +#endif /* AVFILTER_DESHAKE_OPENCL_H */
> +
> diff --git a/libavfilter/opencl_allkernels.c b/libavfilter/opencl_allkernels.c
> new file mode 100644
> index 0000000..021eec2
> --- /dev/null
> +++ b/libavfilter/opencl_allkernels.c
> @@ -0,0 +1,39 @@
> +/*
> + * Copyright (C) 2013 Wei Gao <weigao at multicorewareinc.com>
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#include "opencl_allkernels.h"
> +#if CONFIG_OPENCL
> +#include "libavutil/opencl.h"
> +#include "deshake_kernel.h"
> +#endif
> +
> +#define OPENCL_REGISTER_KERNEL_CODE(X, x)                                              \
> +    {                                                                                  \
> +        if (CONFIG_##X##_FILTER) {                                                     \
> +            av_opencl_register_kernel_code(ff_kernel_##x##_opencl);                    \
> +        }                                                                              \
> +    }
> +
> +void ff_opencl_register_filter_kernel_code_all(void)
> +{
> + #if CONFIG_OPENCL
> +   OPENCL_REGISTER_KERNEL_CODE(DESHAKE,     deshake);
> + #endif
> +}
> diff --git a/libavfilter/opencl_allkernels.h b/libavfilter/opencl_allkernels.h
> new file mode 100644
> index 0000000..aca02e0
> --- /dev/null
> +++ b/libavfilter/opencl_allkernels.h
> @@ -0,0 +1,29 @@
> +/*
> + * Copyright (C) 2013 Wei Gao <weigao at multicorewareinc.com>
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#ifndef AVFILTER_OPENCL_ALLKERNEL_H
> +#define AVFILTER_OPENCL_ALLKERNEL_H
> +
> +#include "avfilter.h"
> +#include "config.h"
> +
> +void ff_opencl_register_filter_kernel_code_all(void);
> +
> +#endif /* AVFILTER_OPENCL_ALLKERNEL_H */
> diff --git a/libavfilter/vf_deshake.c b/libavfilter/vf_deshake.c
> index 2740bba..56b06a7 100644
> --- a/libavfilter/vf_deshake.c
> +++ b/libavfilter/vf_deshake.c
> @@ -59,55 +59,12 @@
>  #include "libavutil/pixdesc.h"
>  #include "libavcodec/dsputil.h"
>  
> -#include "transform.h"
> +#include "deshake.h"
> +#include "deshake_opencl.h"
>  
>  #define CHROMA_WIDTH(link)  -((-link->w) >> av_pix_fmt_desc_get(link->format)->log2_chroma_w)
>  #define CHROMA_HEIGHT(link) -((-link->h) >> av_pix_fmt_desc_get(link->format)->log2_chroma_h)
>  
> -enum SearchMethod {
> -    EXHAUSTIVE,        ///< Search all possible positions
> -    SMART_EXHAUSTIVE,  ///< Search most possible positions (faster)
> -    SEARCH_COUNT
> -};
> -
> -typedef struct {
> -    int x;             ///< Horizontal shift
> -    int y;             ///< Vertical shift
> -} IntMotionVector;
> -
> -typedef struct {
> -    double x;             ///< Horizontal shift
> -    double y;             ///< Vertical shift
> -} MotionVector;
> -
> -typedef struct {
> -    MotionVector vector;  ///< Motion vector
> -    double angle;         ///< Angle of rotation
> -    double zoom;          ///< Zoom percentage
> -} Transform;
> -
> -typedef struct {
> -    const AVClass *class;
> -    AVFrame *ref;              ///< Previous frame
> -    int rx;                    ///< Maximum horizontal shift
> -    int ry;                    ///< Maximum vertical shift
> -    int edge;                  ///< Edge fill method
> -    int blocksize;             ///< Size of blocks to compare
> -    int contrast;              ///< Contrast threshold
> -    int search;                ///< Motion search method
> -    AVCodecContext *avctx;
> -    DSPContext c;              ///< Context providing optimized SAD methods
> -    Transform last;            ///< Transform from last frame
> -    int refcount;              ///< Number of reference frames (defines averaging window)
> -    FILE *fp;
> -    Transform avg;
> -    int cw;                    ///< Crop motion search to this box
> -    int ch;
> -    int cx;
> -    int cy;
> -    char *filename;            ///< Motion search detailed log filename
> -} DeshakeContext;
> -
>  #define OFFSET(x) offsetof(DeshakeContext, x)
>  #define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
>  
> @@ -129,6 +86,7 @@ static const AVOption deshake_options[] = {
>          { "exhaustive", "exhaustive search",      0, AV_OPT_TYPE_CONST, {.i64=EXHAUSTIVE},       INT_MIN, INT_MAX, FLAGS, "smode" },
>          { "less",       "less exhaustive search", 0, AV_OPT_TYPE_CONST, {.i64=SMART_EXHAUSTIVE}, INT_MIN, INT_MAX, FLAGS, "smode" },
>      { "filename", "set motion search detailed log file name", OFFSET(filename), AV_OPT_TYPE_STRING, {.str=NULL}, .flags = FLAGS },
> +    { "opencl", "use OpenCL filtering capabilities", OFFSET(opencl), AV_OPT_TYPE_INT, {.i64=0}, 0, 1, .flags = FLAGS },
>      { NULL }
>  };
>  
> @@ -360,8 +318,35 @@ static void find_motion(DeshakeContext *deshake, uint8_t *src1, uint8_t *src2,
>      av_free(angles);
>  }
>  
> +static int deshake_transform_c(AVFilterContext *ctx,
> +                                    int width, int height, int cw, int ch,
> +                                    const float *matrix_y, const float *matrix_uv,
> +                                    enum InterpolateMethod interpolate,
> +                                    enum FillMethod fill, AVFrame *in, AVFrame *out)
> +{
> +    int i = 0, ret = 0;
> +    const float *matrixs[3];
> +    int plane_w[3], plane_h[3];
> +    matrixs[0] = matrix_y;
> +    matrixs[1] =  matrixs[2] = matrix_uv;
> +    plane_w[0] = width;
> +    plane_w[1] = plane_w[2] = cw;
> +    plane_h[0] = height;
> +    plane_h[1] = plane_h[2] = ch;
> +
> +    for (i = 0; i < 3; i++) {
> +        // Transform the luma and chroma planes
> +        ret = avfilter_transform(in->data[i], out->data[i], in->linesize[i], out->linesize[i],
> +                                 plane_w[i], plane_h[i], matrixs[i], interpolate, fill);
> +        if (ret < 0)
> +            return ret;
> +    }
> +    return ret;
> +}
> +
>  static av_cold int init(AVFilterContext *ctx, const char *args)
>  {
> +    int ret;
>      DeshakeContext *deshake = ctx->priv;
>  
>      deshake->refcount = 20; // XXX: add to options?
> @@ -379,7 +364,18 @@ static av_cold int init(AVFilterContext *ctx, const char *args)
>          deshake->cw += deshake->cx - (deshake->cx & ~15);
>          deshake->cx &= ~15;
>      }
> +    deshake->transform = deshake_transform_c;
> +    if (!CONFIG_OPENCL && deshake->opencl) {
> +        av_log(ctx, AV_LOG_ERROR, "OpenCL support was not enabled in this build, cannot be selected\n");
> +        return AVERROR(EINVAL);
> +    }
>  
> +    if (deshake->opencl && CONFIG_OPENCL) {
> +        deshake->transform = ff_opencl_transform;
> +        ret = ff_opencl_deshake_init(ctx);
> +        if (ret < 0)
> +            return ret;
> +    }
>      av_log(ctx, AV_LOG_VERBOSE, "cx: %d, cy: %d, cw: %d, ch: %d, rx: %d, ry: %d, edge: %d blocksize: %d contrast: %d search: %d\n",
>             deshake->cx, deshake->cy, deshake->cw, deshake->ch,
>             deshake->rx, deshake->ry, deshake->edge, deshake->blocksize * 2, deshake->contrast, deshake->search);
> @@ -419,7 +415,9 @@ static int config_props(AVFilterLink *link)
>  static av_cold void uninit(AVFilterContext *ctx)
>  {
>      DeshakeContext *deshake = ctx->priv;
> -
> +    if (deshake->opencl && CONFIG_OPENCL) {
> +        ff_opencl_deshake_uninit(ctx);
> +    }
>      av_frame_free(&deshake->ref);
>      if (deshake->fp)
>          fclose(deshake->fp);
> @@ -434,9 +432,10 @@ static int filter_frame(AVFilterLink *link, AVFrame *in)
>      AVFilterLink *outlink = link->dst->outputs[0];
>      AVFrame *out;
>      Transform t = {{0},0}, orig = {{0},0};
> -    float matrix[9];
> +    float matrix_y[9], matrix_uv[9];
>      float alpha = 2.0 / deshake->refcount;
>      char tmp[256];
> +    int ret = 0;
>  
>      out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
>      if (!out) {
> @@ -445,6 +444,12 @@ static int filter_frame(AVFilterLink *link, AVFrame *in)
>      }
>      av_frame_copy_props(out, in);
>  
> +    if (deshake->opencl && CONFIG_OPENCL) {
> +        ret = ff_opencl_deshake_process_inout_buf(link,in, out);
> +        if (ret < 0)
> +            return ret;
> +    }
> +
>      if (deshake->cx < 0 || deshake->cy < 0 || deshake->cw < 0 || deshake->ch < 0) {
>          // Find the most likely global motion for the current frame
>          find_motion(deshake, (deshake->ref == NULL) ? in->data[0] : deshake->ref->data[0], in->data[0], link->w, link->h, in->linesize[0], &t);
> @@ -517,21 +522,19 @@ static int filter_frame(AVFilterLink *link, AVFrame *in)
>      deshake->last.zoom = t.zoom;
>  
>      // Generate a luma transformation matrix
> -    avfilter_get_matrix(t.vector.x, t.vector.y, t.angle, 1.0 + t.zoom / 100.0, matrix);
> -
> -    // Transform the luma plane
> -    avfilter_transform(in->data[0], out->data[0], in->linesize[0], out->linesize[0], link->w, link->h, matrix, INTERPOLATE_BILINEAR, deshake->edge);
> -
> +    avfilter_get_matrix(t.vector.x, t.vector.y, t.angle, 1.0 + t.zoom / 100.0, matrix_y);
>      // Generate a chroma transformation matrix
> -    avfilter_get_matrix(t.vector.x / (link->w / CHROMA_WIDTH(link)), t.vector.y / (link->h / CHROMA_HEIGHT(link)), t.angle, 1.0 + t.zoom / 100.0, matrix);
> -
> -    // Transform the chroma planes
> -    avfilter_transform(in->data[1], out->data[1], in->linesize[1], out->linesize[1], CHROMA_WIDTH(link), CHROMA_HEIGHT(link), matrix, INTERPOLATE_BILINEAR, deshake->edge);
> -    avfilter_transform(in->data[2], out->data[2], in->linesize[2], out->linesize[2], CHROMA_WIDTH(link), CHROMA_HEIGHT(link), matrix, INTERPOLATE_BILINEAR, deshake->edge);
> +    avfilter_get_matrix(t.vector.x / (link->w / CHROMA_WIDTH(link)), t.vector.y / (link->h / CHROMA_HEIGHT(link)), t.angle, 1.0 + t.zoom / 100.0, matrix_uv);
> +    // Transform the luma and chroma planes
> +    ret = deshake->transform(link->dst, link->w, link->h, CHROMA_WIDTH(link), CHROMA_HEIGHT(link),
> +                             matrix_y, matrix_uv, INTERPOLATE_BILINEAR, deshake->edge, in, out);
>  
>      // Cleanup the old reference frame
>      av_frame_free(&deshake->ref);
>  
> +    if (ret < 0)
> +        return ret;
> +
>      // Store the current frame as the reference frame for calculating the
>      // motion of the next frame
>      deshake->ref = in;

Looks good to me otherwise, thank you.

BTW we should add some tests to FATE for deshake, with and without
OpenCL support.
-- 
FFmpeg = Fostering and Funny Mind-dumbing Purposeless Extreme Goblin


More information about the ffmpeg-devel mailing list