FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
vf_unsharp_opencl.c
Go to the documentation of this file.
1 /*
2  * This file is part of FFmpeg.
3  *
4  * FFmpeg is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * FFmpeg is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with FFmpeg; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17  */
18 
19 #include "libavutil/common.h"
20 #include "libavutil/imgutils.h"
21 #include "libavutil/mem.h"
22 #include "libavutil/opt.h"
23 #include "libavutil/pixdesc.h"
24 
25 #include "avfilter.h"
26 #include "internal.h"
27 #include "opencl.h"
28 #include "opencl_source.h"
29 #include "video.h"
30 
31 #define MAX_DIAMETER 23
32 
33 typedef struct UnsharpOpenCLContext {
35 
37  cl_kernel kernel;
38  cl_command_queue command_queue;
39 
40  float luma_size_x;
41  float luma_size_y;
42  float luma_amount;
46 
47  int global;
48 
49  int nb_planes;
50  struct {
53 
54  cl_mem matrix;
55  cl_mem coef_x;
56  cl_mem coef_y;
57 
58  cl_int size_x;
59  cl_int size_y;
60  cl_float amount;
61  cl_float threshold;
62  } plane[4];
64 
65 
67 {
68  UnsharpOpenCLContext *ctx = avctx->priv;
69  cl_int cle;
70  int err;
71 
73  if (err < 0)
74  goto fail;
75 
76  ctx->command_queue = clCreateCommandQueue(ctx->ocf.hwctx->context,
77  ctx->ocf.hwctx->device_id,
78  0, &cle);
79  CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create OpenCL "
80  "command queue %d.\n", cle);
81 
82  // Use global kernel if mask size will be too big for the local store..
83  ctx->global = (ctx->luma_size_x > 17.0f ||
84  ctx->luma_size_y > 17.0f ||
85  ctx->chroma_size_x > 17.0f ||
86  ctx->chroma_size_y > 17.0f);
87 
88  ctx->kernel = clCreateKernel(ctx->ocf.program,
89  ctx->global ? "unsharp_global"
90  : "unsharp_local", &cle);
91  CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create kernel %d.\n", cle);
92 
93  ctx->initialised = 1;
94  return 0;
95 
96 fail:
97  if (ctx->command_queue)
98  clReleaseCommandQueue(ctx->command_queue);
99  if (ctx->kernel)
100  clReleaseKernel(ctx->kernel);
101  return err;
102 }
103 
105 {
106  UnsharpOpenCLContext *ctx = avctx->priv;
107  const AVPixFmtDescriptor *desc;
108  float *matrix;
109  double val, sum;
110  cl_int cle;
111  cl_mem buffer;
112  size_t matrix_bytes;
113  float diam_x, diam_y, amount;
114  int err, p, x, y, size_x, size_y;
115 
117 
118  ctx->nb_planes = 0;
119  for (p = 0; p < desc->nb_components; p++)
120  ctx->nb_planes = FFMAX(ctx->nb_planes, desc->comp[p].plane + 1);
121 
122  for (p = 0; p < ctx->nb_planes; p++) {
123  if (p == 0 || (desc->flags & AV_PIX_FMT_FLAG_RGB)) {
124  diam_x = ctx->luma_size_x;
125  diam_y = ctx->luma_size_y;
126  amount = ctx->luma_amount;
127  } else {
128  diam_x = ctx->chroma_size_x;
129  diam_y = ctx->chroma_size_y;
130  amount = ctx->chroma_amount;
131  }
132  size_x = (int)ceil(diam_x) | 1;
133  size_y = (int)ceil(diam_y) | 1;
134  matrix_bytes = size_x * size_y * sizeof(float);
135 
136  matrix = av_malloc(matrix_bytes);
137  if (!matrix) {
138  err = AVERROR(ENOMEM);
139  goto fail;
140  }
141 
142  sum = 0.0;
143  for (x = 0; x < size_x; x++) {
144  double dx = (double)(x - size_x / 2) / diam_x;
145  sum += ctx->plane[p].blur_x[x] = exp(-16.0 * (dx * dx));
146  }
147  for (x = 0; x < size_x; x++)
148  ctx->plane[p].blur_x[x] /= sum;
149 
150  sum = 0.0;
151  for (y = 0; y < size_y; y++) {
152  double dy = (double)(y - size_y / 2) / diam_y;
153  sum += ctx->plane[p].blur_y[y] = exp(-16.0 * (dy * dy));
154  }
155  for (y = 0; y < size_y; y++)
156  ctx->plane[p].blur_y[y] /= sum;
157 
158  for (y = 0; y < size_y; y++) {
159  for (x = 0; x < size_x; x++) {
160  val = ctx->plane[p].blur_x[x] * ctx->plane[p].blur_y[y];
161  matrix[y * size_x + x] = val;
162  }
163  }
164 
165  if (ctx->global) {
166  buffer = clCreateBuffer(ctx->ocf.hwctx->context,
167  CL_MEM_READ_ONLY |
168  CL_MEM_COPY_HOST_PTR |
169  CL_MEM_HOST_NO_ACCESS,
170  matrix_bytes, matrix, &cle);
171  CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create matrix buffer: "
172  "%d.\n", cle);
173  ctx->plane[p].matrix = buffer;
174  } else {
175  buffer = clCreateBuffer(ctx->ocf.hwctx->context,
176  CL_MEM_READ_ONLY |
177  CL_MEM_COPY_HOST_PTR |
178  CL_MEM_HOST_NO_ACCESS,
179  sizeof(ctx->plane[p].blur_x),
180  ctx->plane[p].blur_x, &cle);
181  CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create x-coef buffer: "
182  "%d.\n", cle);
183  ctx->plane[p].coef_x = buffer;
184 
185  buffer = clCreateBuffer(ctx->ocf.hwctx->context,
186  CL_MEM_READ_ONLY |
187  CL_MEM_COPY_HOST_PTR |
188  CL_MEM_HOST_NO_ACCESS,
189  sizeof(ctx->plane[p].blur_y),
190  ctx->plane[p].blur_y, &cle);
191  CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create y-coef buffer: "
192  "%d.\n", cle);
193  ctx->plane[p].coef_y = buffer;
194  }
195 
196  av_freep(&matrix);
197 
198  ctx->plane[p].size_x = size_x;
199  ctx->plane[p].size_y = size_y;
200  ctx->plane[p].amount = amount;
201  }
202 
203  err = 0;
204 fail:
205  av_freep(&matrix);
206  return err;
207 }
208 
210 {
211  AVFilterContext *avctx = inlink->dst;
212  AVFilterLink *outlink = avctx->outputs[0];
213  UnsharpOpenCLContext *ctx = avctx->priv;
214  AVFrame *output = NULL;
215  cl_int cle;
216  size_t global_work[2];
217  size_t local_work[2];
218  cl_mem src, dst;
219  int err, p;
220 
221  av_log(ctx, AV_LOG_DEBUG, "Filter input: %s, %ux%u (%"PRId64").\n",
222  av_get_pix_fmt_name(input->format),
223  input->width, input->height, input->pts);
224 
225  if (!input->hw_frames_ctx)
226  return AVERROR(EINVAL);
227 
228  if (!ctx->initialised) {
229  err = unsharp_opencl_init(avctx);
230  if (err < 0)
231  goto fail;
232 
234  if (err < 0)
235  goto fail;
236  }
237 
238  output = ff_get_video_buffer(outlink, outlink->w, outlink->h);
239  if (!output) {
240  err = AVERROR(ENOMEM);
241  goto fail;
242  }
243 
244  for (p = 0; p < FF_ARRAY_ELEMS(output->data); p++) {
245  src = (cl_mem) input->data[p];
246  dst = (cl_mem)output->data[p];
247 
248  if (!dst)
249  break;
250 
251  CL_SET_KERNEL_ARG(ctx->kernel, 0, cl_mem, &dst);
252  CL_SET_KERNEL_ARG(ctx->kernel, 1, cl_mem, &src);
253  CL_SET_KERNEL_ARG(ctx->kernel, 2, cl_int, &ctx->plane[p].size_x);
254  CL_SET_KERNEL_ARG(ctx->kernel, 3, cl_int, &ctx->plane[p].size_y);
255  CL_SET_KERNEL_ARG(ctx->kernel, 4, cl_float, &ctx->plane[p].amount);
256 
257  if (ctx->global) {
258  CL_SET_KERNEL_ARG(ctx->kernel, 5, cl_mem, &ctx->plane[p].matrix);
259  } else {
260  CL_SET_KERNEL_ARG(ctx->kernel, 5, cl_mem, &ctx->plane[p].coef_x);
261  CL_SET_KERNEL_ARG(ctx->kernel, 6, cl_mem, &ctx->plane[p].coef_y);
262  }
263 
264  err = ff_opencl_filter_work_size_from_image(avctx, global_work, output, p,
265  ctx->global ? 0 : 16);
266  if (err < 0)
267  goto fail;
268 
269  local_work[0] = 16;
270  local_work[1] = 16;
271 
272  av_log(avctx, AV_LOG_DEBUG, "Run kernel on plane %d "
273  "(%"SIZE_SPECIFIER"x%"SIZE_SPECIFIER").\n",
274  p, global_work[0], global_work[1]);
275 
276  cle = clEnqueueNDRangeKernel(ctx->command_queue, ctx->kernel, 2, NULL,
277  global_work, ctx->global ? NULL : local_work,
278  0, NULL, NULL);
279  CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to enqueue kernel: %d.\n", cle);
280  }
281 
282  cle = clFinish(ctx->command_queue);
283  CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to finish command queue: %d.\n", cle);
284 
285  err = av_frame_copy_props(output, input);
286  if (err < 0)
287  goto fail;
288 
289  av_frame_free(&input);
290 
291  av_log(ctx, AV_LOG_DEBUG, "Filter output: %s, %ux%u (%"PRId64").\n",
292  av_get_pix_fmt_name(output->format),
293  output->width, output->height, output->pts);
294 
295  return ff_filter_frame(outlink, output);
296 
297 fail:
298  clFinish(ctx->command_queue);
299  av_frame_free(&input);
300  av_frame_free(&output);
301  return err;
302 }
303 
305 {
306  UnsharpOpenCLContext *ctx = avctx->priv;
307  cl_int cle;
308  int i;
309 
310  for (i = 0; i < ctx->nb_planes; i++) {
311  if (ctx->plane[i].matrix)
312  clReleaseMemObject(ctx->plane[i].matrix);
313  if (ctx->plane[i].coef_x)
314  clReleaseMemObject(ctx->plane[i].coef_x);
315  if (ctx->plane[i].coef_y)
316  clReleaseMemObject(ctx->plane[i].coef_y);
317  }
318 
319  if (ctx->kernel) {
320  cle = clReleaseKernel(ctx->kernel);
321  if (cle != CL_SUCCESS)
322  av_log(avctx, AV_LOG_ERROR, "Failed to release "
323  "kernel: %d.\n", cle);
324  }
325 
326  if (ctx->command_queue) {
327  cle = clReleaseCommandQueue(ctx->command_queue);
328  if (cle != CL_SUCCESS)
329  av_log(avctx, AV_LOG_ERROR, "Failed to release "
330  "command queue: %d.\n", cle);
331  }
332 
334 }
335 
336 #define OFFSET(x) offsetof(UnsharpOpenCLContext, x)
337 #define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
339  { "luma_msize_x", "Set luma mask horizontal diameter (pixels)",
340  OFFSET(luma_size_x), AV_OPT_TYPE_FLOAT,
341  { .dbl = 5.0 }, 1, MAX_DIAMETER, FLAGS },
342  { "lx", "Set luma mask horizontal diameter (pixels)",
343  OFFSET(luma_size_x), AV_OPT_TYPE_FLOAT,
344  { .dbl = 5.0 }, 1, MAX_DIAMETER, FLAGS },
345  { "luma_msize_y", "Set luma mask vertical diameter (pixels)",
346  OFFSET(luma_size_y), AV_OPT_TYPE_FLOAT,
347  { .dbl = 5.0 }, 1, MAX_DIAMETER, FLAGS },
348  { "ly", "Set luma mask vertical diameter (pixels)",
349  OFFSET(luma_size_y), AV_OPT_TYPE_FLOAT,
350  { .dbl = 5.0 }, 1, MAX_DIAMETER, FLAGS },
351  { "luma_amount", "Set luma amount (multiplier)",
352  OFFSET(luma_amount), AV_OPT_TYPE_FLOAT,
353  { .dbl = 1.0 }, -10, 10, FLAGS },
354  { "la", "Set luma amount (multiplier)",
355  OFFSET(luma_amount), AV_OPT_TYPE_FLOAT,
356  { .dbl = 1.0 }, -10, 10, FLAGS },
357 
358  { "chroma_msize_x", "Set chroma mask horizontal diameter (pixels after subsampling)",
359  OFFSET(chroma_size_x), AV_OPT_TYPE_FLOAT,
360  { .dbl = 5.0 }, 1, MAX_DIAMETER, FLAGS },
361  { "cx", "Set chroma mask horizontal diameter (pixels after subsampling)",
362  OFFSET(chroma_size_x), AV_OPT_TYPE_FLOAT,
363  { .dbl = 5.0 }, 1, MAX_DIAMETER, FLAGS },
364  { "chroma_msize_y", "Set chroma mask vertical diameter (pixels after subsampling)",
365  OFFSET(chroma_size_y), AV_OPT_TYPE_FLOAT,
366  { .dbl = 5.0 }, 1, MAX_DIAMETER, FLAGS },
367  { "cy", "Set chroma mask vertical diameter (pixels after subsampling)",
368  OFFSET(chroma_size_y), AV_OPT_TYPE_FLOAT,
369  { .dbl = 5.0 }, 1, MAX_DIAMETER, FLAGS },
370  { "chroma_amount", "Set chroma amount (multiplier)",
371  OFFSET(chroma_amount), AV_OPT_TYPE_FLOAT,
372  { .dbl = 0.0 }, -10, 10, FLAGS },
373  { "ca", "Set chroma amount (multiplier)",
374  OFFSET(chroma_amount), AV_OPT_TYPE_FLOAT,
375  { .dbl = 0.0 }, -10, 10, FLAGS },
376 
377  { NULL }
378 };
379 
380 AVFILTER_DEFINE_CLASS(unsharp_opencl);
381 
383  {
384  .name = "default",
385  .type = AVMEDIA_TYPE_VIDEO,
386  .filter_frame = &unsharp_opencl_filter_frame,
387  .config_props = &ff_opencl_filter_config_input,
388  },
389  { NULL }
390 };
391 
393  {
394  .name = "default",
395  .type = AVMEDIA_TYPE_VIDEO,
396  .config_props = &ff_opencl_filter_config_output,
397  },
398  { NULL }
399 };
400 
402  .name = "unsharp_opencl",
403  .description = NULL_IF_CONFIG_SMALL("Apply unsharp mask to input video"),
404  .priv_size = sizeof(UnsharpOpenCLContext),
405  .priv_class = &unsharp_opencl_class,
409  .inputs = unsharp_opencl_inputs,
410  .outputs = unsharp_opencl_outputs,
411  .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
412 };
int plane
Which of the 4 planes contains the component.
Definition: pixdesc.h:35
#define NULL
Definition: coverity.c:32
const char const char void * val
Definition: avisynth_c.h:771
#define FF_FILTER_FLAG_HWFRAME_AWARE
The filter is aware of hardware frames, and any hardware frame context should not be automatically pr...
Definition: internal.h:385
AVFilter ff_vf_unsharp_opencl
static av_cold void unsharp_opencl_uninit(AVFilterContext *avctx)
const AVPixFmtDescriptor * av_pix_fmt_desc_get(enum AVPixelFormat pix_fmt)
Definition: pixdesc.c:2406
This structure describes decoded (raw) audio or video data.
Definition: frame.h:218
AVOption.
Definition: opt.h:246
int ff_opencl_filter_work_size_from_image(AVFilterContext *avctx, size_t *work_size, AVFrame *frame, int plane, int block_alignment)
Find the work size needed needed for a given plane of an image.
Definition: opencl.c:278
int ff_opencl_filter_config_input(AVFilterLink *inlink)
Check that the input link contains a suitable hardware frames context and extract the device from it...
Definition: opencl.c:60
int ff_opencl_filter_query_formats(AVFilterContext *avctx)
Return that all inputs and outputs support only AV_PIX_FMT_OPENCL.
Definition: opencl.c:28
misc image utilities
Main libavfilter public API header.
Memory handling functions.
const char * desc
Definition: nvenc.c:65
static av_cold int init(AVCodecContext *avctx)
Definition: avrndec.c:35
static const AVFilterPad unsharp_opencl_outputs[]
AVFrame * ff_get_video_buffer(AVFilterLink *link, int w, int h)
Request a picture buffer with a specific set of permissions.
Definition: video.c:99
static int unsharp_opencl_filter_frame(AVFilterLink *inlink, AVFrame *input)
#define src
Definition: vp8dsp.c:254
#define MAX_DIAMETER
int ff_opencl_filter_init(AVFilterContext *avctx)
Initialise an OpenCL filter context.
Definition: opencl.c:147
AVOpenCLDeviceContext * hwctx
Definition: opencl.h:40
AVBufferRef * hw_frames_ctx
For hwaccel-format frames, this should be a reference to the AVHWFramesContext describing the frame...
Definition: frame.h:556
const char * name
Pad name.
Definition: internal.h:60
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
Definition: avfilter.c:1080
AVComponentDescriptor comp[4]
Parameters that describe how pixels are packed.
Definition: pixdesc.h:117
#define av_cold
Definition: attributes.h:82
#define av_malloc(s)
static av_cold int uninit(AVCodecContext *avctx)
Definition: crystalhd.c:279
AVOptions.
static const AVOption unsharp_opencl_options[]
cl_device_id device_id
The primary device ID of the device.
cl_command_queue command_queue
int64_t pts
Presentation timestamp in time_base units (time when frame should be shown to user).
Definition: frame.h:311
OpenCLFilterContext ocf
int ff_opencl_filter_config_output(AVFilterLink *outlink)
Create a suitable hardware frames context for the output.
Definition: opencl.c:96
#define av_log(a,...)
A filter pad used for either input or output.
Definition: internal.h:54
#define OFFSET(x)
int width
Definition: frame.h:276
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:176
static int unsharp_opencl_make_filter_params(AVFilterContext *avctx)
#define AVERROR(e)
Definition: error.h:43
#define AV_PIX_FMT_FLAG_RGB
The pixel format contains RGB-like data (as opposed to YUV/grayscale).
Definition: pixdesc.h:148
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
Definition: frame.c:202
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification. ...
Definition: internal.h:186
void * priv
private data for use by the filter
Definition: avfilter.h:353
#define AV_LOG_DEBUG
Stuff which is only useful for libav* developers.
Definition: log.h:197
const char * ff_opencl_source_unsharp
#define FFMAX(a, b)
Definition: common.h:94
#define fail()
Definition: checkasm.h:117
int8_t exp
Definition: eval.c:72
enum AVPixelFormat output_format
Definition: opencl.h:44
uint64_t flags
Combination of AV_PIX_FMT_FLAG_...
Definition: pixdesc.h:106
uint8_t nb_components
The number of components each pixel has, (1-4)
Definition: pixdesc.h:83
float blur_y[MAX_DIAMETER]
struct UnsharpOpenCLContext::@234 plane[4]
AVFormatContext * ctx
Definition: movenc.c:48
static const AVFilterPad inputs[]
Definition: af_acontrast.c:193
static const AVFilterPad outputs[]
Definition: af_acontrast.c:203
#define FF_ARRAY_ELEMS(a)
int format
format of the frame, -1 if unknown or unset Values correspond to enum AVPixelFormat for video frames...
Definition: frame.h:291
#define CL_SET_KERNEL_ARG(kernel, arg_num, type, arg)
set argument to specific Kernel.
Definition: opencl.h:55
#define FLAGS
Descriptor that unambiguously describes how the bits of a pixel are stored in the up to 4 data planes...
Definition: pixdesc.h:81
AVFILTER_DEFINE_CLASS(unsharp_opencl)
float blur_x[MAX_DIAMETER]
Filter definition.
Definition: avfilter.h:144
const char * name
Filter name.
Definition: avfilter.h:148
static int unsharp_opencl_init(AVFilterContext *avctx)
AVFilterLink ** outputs
array of pointers to output links
Definition: avfilter.h:350
#define CL_FAIL_ON_ERROR(errcode,...)
A helper macro to handle OpenCL errors.
Definition: opencl.h:68
#define SIZE_SPECIFIER
Definition: internal.h:262
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
Definition: frame.h:232
int
static int query_formats(AVFilterContext *ctx)
Definition: aeval.c:244
common internal and external API header
static const AVFilterPad unsharp_opencl_inputs[]
void ff_opencl_filter_uninit(AVFilterContext *avctx)
Uninitialise an OpenCL filter context.
Definition: opencl.c:156
cl_context context
The OpenCL context which will contain all operations and frames on this device.
An instance of a filter.
Definition: avfilter.h:338
int height
Definition: frame.h:276
#define av_freep(p)
cl_program program
Definition: opencl.h:42
int ff_opencl_filter_load_program(AVFilterContext *avctx, const char **program_source_array, int nb_strings)
Load a new OpenCL program from strings in memory.
Definition: opencl.c:171
const char * av_get_pix_fmt_name(enum AVPixelFormat pix_fmt)
Return the short name for a pixel format, NULL in case pix_fmt is unknown.
Definition: pixdesc.c:2322
internal API functions
for(j=16;j >0;--j)
int av_frame_copy_props(AVFrame *dst, const AVFrame *src)
Copy only "metadata" fields from src to dst.
Definition: frame.c:652
GLuint buffer
Definition: opengl_enc.c:102