FFmpeg
vf_unsharp_opencl.c
Go to the documentation of this file.
1 /*
2  * This file is part of FFmpeg.
3  *
4  * FFmpeg is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * FFmpeg is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with FFmpeg; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17  */
18 
19 #include "libavutil/common.h"
20 #include "libavutil/imgutils.h"
21 #include "libavutil/mem.h"
22 #include "libavutil/opt.h"
23 #include "libavutil/pixdesc.h"
24 
25 #include "avfilter.h"
26 #include "internal.h"
27 #include "opencl.h"
28 #include "opencl_source.h"
29 #include "video.h"
30 
31 #define MAX_DIAMETER 23
32 
33 typedef struct UnsharpOpenCLContext {
35 
37  cl_kernel kernel;
38  cl_command_queue command_queue;
39 
40  float luma_size_x;
41  float luma_size_y;
42  float luma_amount;
46 
47  int global;
48 
49  int nb_planes;
50  struct {
53 
54  cl_mem matrix;
55  cl_mem coef_x;
56  cl_mem coef_y;
57 
58  cl_int size_x;
59  cl_int size_y;
60  cl_float amount;
61  cl_float threshold;
62  } plane[4];
64 
65 
67 {
68  UnsharpOpenCLContext *ctx = avctx->priv;
69  cl_int cle;
70  int err;
71 
73  if (err < 0)
74  goto fail;
75 
76  ctx->command_queue = clCreateCommandQueue(ctx->ocf.hwctx->context,
77  ctx->ocf.hwctx->device_id,
78  0, &cle);
79  CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create OpenCL "
80  "command queue %d.\n", cle);
81 
82  // Use global kernel if mask size will be too big for the local store..
83  ctx->global = (ctx->luma_size_x > 17.0f ||
84  ctx->luma_size_y > 17.0f ||
85  ctx->chroma_size_x > 17.0f ||
86  ctx->chroma_size_y > 17.0f);
87 
88  ctx->kernel = clCreateKernel(ctx->ocf.program,
89  ctx->global ? "unsharp_global"
90  : "unsharp_local", &cle);
91  CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create kernel %d.\n", cle);
92 
93  ctx->initialised = 1;
94  return 0;
95 
96 fail:
97  if (ctx->command_queue)
98  clReleaseCommandQueue(ctx->command_queue);
99  if (ctx->kernel)
100  clReleaseKernel(ctx->kernel);
101  return err;
102 }
103 
105 {
106  UnsharpOpenCLContext *ctx = avctx->priv;
107  const AVPixFmtDescriptor *desc;
108  float *matrix;
109  double val, sum;
110  cl_int cle;
111  cl_mem buffer;
112  size_t matrix_bytes;
113  float diam_x, diam_y, amount;
114  int err, p, x, y, size_x, size_y;
115 
117 
118  ctx->nb_planes = 0;
119  for (p = 0; p < desc->nb_components; p++)
120  ctx->nb_planes = FFMAX(ctx->nb_planes, desc->comp[p].plane + 1);
121 
122  for (p = 0; p < ctx->nb_planes; p++) {
123  if (p == 0 || (desc->flags & AV_PIX_FMT_FLAG_RGB)) {
124  diam_x = ctx->luma_size_x;
125  diam_y = ctx->luma_size_y;
126  amount = ctx->luma_amount;
127  } else {
128  diam_x = ctx->chroma_size_x;
129  diam_y = ctx->chroma_size_y;
130  amount = ctx->chroma_amount;
131  }
132  size_x = (int)ceil(diam_x) | 1;
133  size_y = (int)ceil(diam_y) | 1;
134  matrix_bytes = size_x * size_y * sizeof(float);
135 
136  matrix = av_malloc(matrix_bytes);
137  if (!matrix) {
138  err = AVERROR(ENOMEM);
139  goto fail;
140  }
141 
142  sum = 0.0;
143  for (x = 0; x < size_x; x++) {
144  double dx = (double)(x - size_x / 2) / diam_x;
145  sum += ctx->plane[p].blur_x[x] = exp(-16.0 * (dx * dx));
146  }
147  for (x = 0; x < size_x; x++)
148  ctx->plane[p].blur_x[x] /= sum;
149 
150  sum = 0.0;
151  for (y = 0; y < size_y; y++) {
152  double dy = (double)(y - size_y / 2) / diam_y;
153  sum += ctx->plane[p].blur_y[y] = exp(-16.0 * (dy * dy));
154  }
155  for (y = 0; y < size_y; y++)
156  ctx->plane[p].blur_y[y] /= sum;
157 
158  for (y = 0; y < size_y; y++) {
159  for (x = 0; x < size_x; x++) {
160  val = ctx->plane[p].blur_x[x] * ctx->plane[p].blur_y[y];
161  matrix[y * size_x + x] = val;
162  }
163  }
164 
165  if (ctx->global) {
166  buffer = clCreateBuffer(ctx->ocf.hwctx->context,
167  CL_MEM_READ_ONLY |
168  CL_MEM_COPY_HOST_PTR |
169  CL_MEM_HOST_NO_ACCESS,
170  matrix_bytes, matrix, &cle);
171  CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create matrix buffer: "
172  "%d.\n", cle);
173  ctx->plane[p].matrix = buffer;
174  } else {
175  buffer = clCreateBuffer(ctx->ocf.hwctx->context,
176  CL_MEM_READ_ONLY |
177  CL_MEM_COPY_HOST_PTR |
178  CL_MEM_HOST_NO_ACCESS,
179  sizeof(ctx->plane[p].blur_x),
180  ctx->plane[p].blur_x, &cle);
181  CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create x-coef buffer: "
182  "%d.\n", cle);
183  ctx->plane[p].coef_x = buffer;
184 
185  buffer = clCreateBuffer(ctx->ocf.hwctx->context,
186  CL_MEM_READ_ONLY |
187  CL_MEM_COPY_HOST_PTR |
188  CL_MEM_HOST_NO_ACCESS,
189  sizeof(ctx->plane[p].blur_y),
190  ctx->plane[p].blur_y, &cle);
191  CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create y-coef buffer: "
192  "%d.\n", cle);
193  ctx->plane[p].coef_y = buffer;
194  }
195 
196  av_freep(&matrix);
197 
198  ctx->plane[p].size_x = size_x;
199  ctx->plane[p].size_y = size_y;
200  ctx->plane[p].amount = amount;
201  }
202 
203  err = 0;
204 fail:
205  av_freep(&matrix);
206  return err;
207 }
208 
210 {
211  AVFilterContext *avctx = inlink->dst;
212  AVFilterLink *outlink = avctx->outputs[0];
213  UnsharpOpenCLContext *ctx = avctx->priv;
214  AVFrame *output = NULL;
215  cl_int cle;
216  size_t global_work[2];
217  size_t local_work[2];
218  cl_mem src, dst;
219  int err, p;
220 
221  av_log(ctx, AV_LOG_DEBUG, "Filter input: %s, %ux%u (%"PRId64").\n",
222  av_get_pix_fmt_name(input->format),
223  input->width, input->height, input->pts);
224 
225  if (!input->hw_frames_ctx)
226  return AVERROR(EINVAL);
227 
228  if (!ctx->initialised) {
229  err = unsharp_opencl_init(avctx);
230  if (err < 0)
231  goto fail;
232 
234  if (err < 0)
235  goto fail;
236  }
237 
238  output = ff_get_video_buffer(outlink, outlink->w, outlink->h);
239  if (!output) {
240  err = AVERROR(ENOMEM);
241  goto fail;
242  }
243 
244  for (p = 0; p < FF_ARRAY_ELEMS(output->data); p++) {
245  src = (cl_mem) input->data[p];
246  dst = (cl_mem)output->data[p];
247 
248  if (!dst)
249  break;
250 
251  CL_SET_KERNEL_ARG(ctx->kernel, 0, cl_mem, &dst);
252  CL_SET_KERNEL_ARG(ctx->kernel, 1, cl_mem, &src);
253  CL_SET_KERNEL_ARG(ctx->kernel, 2, cl_int, &ctx->plane[p].size_x);
254  CL_SET_KERNEL_ARG(ctx->kernel, 3, cl_int, &ctx->plane[p].size_y);
255  CL_SET_KERNEL_ARG(ctx->kernel, 4, cl_float, &ctx->plane[p].amount);
256 
257  if (ctx->global) {
258  CL_SET_KERNEL_ARG(ctx->kernel, 5, cl_mem, &ctx->plane[p].matrix);
259  } else {
260  CL_SET_KERNEL_ARG(ctx->kernel, 5, cl_mem, &ctx->plane[p].coef_x);
261  CL_SET_KERNEL_ARG(ctx->kernel, 6, cl_mem, &ctx->plane[p].coef_y);
262  }
263 
264  err = ff_opencl_filter_work_size_from_image(avctx, global_work, output, p,
265  ctx->global ? 0 : 16);
266  if (err < 0)
267  goto fail;
268 
269  local_work[0] = 16;
270  local_work[1] = 16;
271 
272  av_log(avctx, AV_LOG_DEBUG, "Run kernel on plane %d "
273  "(%"SIZE_SPECIFIER"x%"SIZE_SPECIFIER").\n",
274  p, global_work[0], global_work[1]);
275 
276  cle = clEnqueueNDRangeKernel(ctx->command_queue, ctx->kernel, 2, NULL,
277  global_work, ctx->global ? NULL : local_work,
278  0, NULL, NULL);
279  CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to enqueue kernel: %d.\n", cle);
280  }
281 
282  cle = clFinish(ctx->command_queue);
283  CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to finish command queue: %d.\n", cle);
284 
285  err = av_frame_copy_props(output, input);
286  if (err < 0)
287  goto fail;
288 
289  av_frame_free(&input);
290 
291  av_log(ctx, AV_LOG_DEBUG, "Filter output: %s, %ux%u (%"PRId64").\n",
292  av_get_pix_fmt_name(output->format),
293  output->width, output->height, output->pts);
294 
295  return ff_filter_frame(outlink, output);
296 
297 fail:
298  clFinish(ctx->command_queue);
299  av_frame_free(&input);
300  av_frame_free(&output);
301  return err;
302 }
303 
305 {
306  UnsharpOpenCLContext *ctx = avctx->priv;
307  cl_int cle;
308  int i;
309 
310  for (i = 0; i < ctx->nb_planes; i++) {
311  if (ctx->plane[i].matrix)
312  clReleaseMemObject(ctx->plane[i].matrix);
313  if (ctx->plane[i].coef_x)
314  clReleaseMemObject(ctx->plane[i].coef_x);
315  if (ctx->plane[i].coef_y)
316  clReleaseMemObject(ctx->plane[i].coef_y);
317  }
318 
319  if (ctx->kernel) {
320  cle = clReleaseKernel(ctx->kernel);
321  if (cle != CL_SUCCESS)
322  av_log(avctx, AV_LOG_ERROR, "Failed to release "
323  "kernel: %d.\n", cle);
324  }
325 
326  if (ctx->command_queue) {
327  cle = clReleaseCommandQueue(ctx->command_queue);
328  if (cle != CL_SUCCESS)
329  av_log(avctx, AV_LOG_ERROR, "Failed to release "
330  "command queue: %d.\n", cle);
331  }
332 
334 }
335 
336 #define OFFSET(x) offsetof(UnsharpOpenCLContext, x)
337 #define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
339  { "luma_msize_x", "Set luma mask horizontal diameter (pixels)",
341  { .dbl = 5.0 }, 1, MAX_DIAMETER, FLAGS },
342  { "lx", "Set luma mask horizontal diameter (pixels)",
344  { .dbl = 5.0 }, 1, MAX_DIAMETER, FLAGS },
345  { "luma_msize_y", "Set luma mask vertical diameter (pixels)",
347  { .dbl = 5.0 }, 1, MAX_DIAMETER, FLAGS },
348  { "ly", "Set luma mask vertical diameter (pixels)",
350  { .dbl = 5.0 }, 1, MAX_DIAMETER, FLAGS },
351  { "luma_amount", "Set luma amount (multiplier)",
353  { .dbl = 1.0 }, -10, 10, FLAGS },
354  { "la", "Set luma amount (multiplier)",
356  { .dbl = 1.0 }, -10, 10, FLAGS },
357 
358  { "chroma_msize_x", "Set chroma mask horizontal diameter (pixels after subsampling)",
360  { .dbl = 5.0 }, 1, MAX_DIAMETER, FLAGS },
361  { "cx", "Set chroma mask horizontal diameter (pixels after subsampling)",
363  { .dbl = 5.0 }, 1, MAX_DIAMETER, FLAGS },
364  { "chroma_msize_y", "Set chroma mask vertical diameter (pixels after subsampling)",
366  { .dbl = 5.0 }, 1, MAX_DIAMETER, FLAGS },
367  { "cy", "Set chroma mask vertical diameter (pixels after subsampling)",
369  { .dbl = 5.0 }, 1, MAX_DIAMETER, FLAGS },
370  { "chroma_amount", "Set chroma amount (multiplier)",
372  { .dbl = 0.0 }, -10, 10, FLAGS },
373  { "ca", "Set chroma amount (multiplier)",
375  { .dbl = 0.0 }, -10, 10, FLAGS },
376 
377  { NULL }
378 };
379 
380 AVFILTER_DEFINE_CLASS(unsharp_opencl);
381 
383  {
384  .name = "default",
385  .type = AVMEDIA_TYPE_VIDEO,
386  .filter_frame = &unsharp_opencl_filter_frame,
387  .config_props = &ff_opencl_filter_config_input,
388  },
389  { NULL }
390 };
391 
393  {
394  .name = "default",
395  .type = AVMEDIA_TYPE_VIDEO,
396  .config_props = &ff_opencl_filter_config_output,
397  },
398  { NULL }
399 };
400 
402  .name = "unsharp_opencl",
403  .description = NULL_IF_CONFIG_SMALL("Apply unsharp mask to input video"),
404  .priv_size = sizeof(UnsharpOpenCLContext),
405  .priv_class = &unsharp_opencl_class,
409  .inputs = unsharp_opencl_inputs,
410  .outputs = unsharp_opencl_outputs,
411  .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
412 };
int plane
Which of the 4 planes contains the component.
Definition: pixdesc.h:35
#define NULL
Definition: coverity.c:32
const char const char void * val
Definition: avisynth_c.h:863
#define FF_FILTER_FLAG_HWFRAME_AWARE
The filter is aware of hardware frames, and any hardware frame context should not be automatically pr...
Definition: internal.h:385
AVFilter ff_vf_unsharp_opencl
static av_cold void unsharp_opencl_uninit(AVFilterContext *avctx)
const AVPixFmtDescriptor * av_pix_fmt_desc_get(enum AVPixelFormat pix_fmt)
Definition: pixdesc.c:2522
This structure describes decoded (raw) audio or video data.
Definition: frame.h:295
AVOption.
Definition: opt.h:246
int ff_opencl_filter_work_size_from_image(AVFilterContext *avctx, size_t *work_size, AVFrame *frame, int plane, int block_alignment)
Find the work size needed needed for a given plane of an image.
Definition: opencl.c:278
int ff_opencl_filter_config_input(AVFilterLink *inlink)
Check that the input link contains a suitable hardware frames context and extract the device from it...
Definition: opencl.c:60
int ff_opencl_filter_query_formats(AVFilterContext *avctx)
Return that all inputs and outputs support only AV_PIX_FMT_OPENCL.
Definition: opencl.c:28
misc image utilities
Main libavfilter public API header.
Memory handling functions.
const char * desc
Definition: nvenc.c:68
static av_cold int init(AVCodecContext *avctx)
Definition: avrndec.c:35
struct UnsharpOpenCLContext::@252 plane[4]
static const AVFilterPad unsharp_opencl_outputs[]
AVFrame * ff_get_video_buffer(AVFilterLink *link, int w, int h)
Request a picture buffer with a specific set of permissions.
Definition: video.c:99
static int unsharp_opencl_filter_frame(AVFilterLink *inlink, AVFrame *input)
#define src
Definition: vp8dsp.c:254
#define MAX_DIAMETER
int ff_opencl_filter_init(AVFilterContext *avctx)
Initialise an OpenCL filter context.
Definition: opencl.c:147
AVOpenCLDeviceContext * hwctx
Definition: opencl.h:41
AVBufferRef * hw_frames_ctx
For hwaccel-format frames, this should be a reference to the AVHWFramesContext describing the frame...
Definition: frame.h:634
const char * name
Pad name.
Definition: internal.h:60
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
Definition: avfilter.c:1093
AVComponentDescriptor comp[4]
Parameters that describe how pixels are packed.
Definition: pixdesc.h:117
#define av_cold
Definition: attributes.h:82
#define av_malloc(s)
static av_cold int uninit(AVCodecContext *avctx)
Definition: crystalhd.c:279
AVOptions.
static const AVOption unsharp_opencl_options[]
cl_device_id device_id
The primary device ID of the device.
cl_command_queue command_queue
filter_frame For filters that do not use the this method is called when a frame is pushed to the filter s input It can be called at any time except in a reentrant way If the input frame is enough to produce output
int64_t pts
Presentation timestamp in time_base units (time when frame should be shown to user).
Definition: frame.h:388
OpenCLFilterContext ocf
int ff_opencl_filter_config_output(AVFilterLink *outlink)
Create a suitable hardware frames context for the output.
Definition: opencl.c:96
#define av_log(a,...)
A filter pad used for either input or output.
Definition: internal.h:54
#define OFFSET(x)
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:259
int width
Definition: frame.h:353
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:176
static int unsharp_opencl_make_filter_params(AVFilterContext *avctx)
#define AV_PIX_FMT_FLAG_RGB
The pixel format contains RGB-like data (as opposed to YUV/grayscale).
Definition: pixdesc.h:148
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
Definition: frame.c:202
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification. ...
Definition: internal.h:186
void * priv
private data for use by the filter
Definition: avfilter.h:353
#define AV_LOG_DEBUG
Stuff which is only useful for libav* developers.
Definition: log.h:197
const char * ff_opencl_source_unsharp
#define FFMAX(a, b)
Definition: common.h:94
#define fail()
Definition: checkasm.h:122
int8_t exp
Definition: eval.c:72
enum AVPixelFormat output_format
Definition: opencl.h:45
uint64_t flags
Combination of AV_PIX_FMT_FLAG_...
Definition: pixdesc.h:106
uint8_t nb_components
The number of components each pixel has, (1-4)
Definition: pixdesc.h:83
float blur_y[MAX_DIAMETER]
AVFormatContext * ctx
Definition: movenc.c:48
static const AVFilterPad outputs[]
Definition: af_acontrast.c:203
#define FF_ARRAY_ELEMS(a)
int format
format of the frame, -1 if unknown or unset Values correspond to enum AVPixelFormat for video frames...
Definition: frame.h:368
#define CL_SET_KERNEL_ARG(kernel, arg_num, type, arg)
set argument to specific Kernel.
Definition: opencl.h:61
#define FLAGS
Descriptor that unambiguously describes how the bits of a pixel are stored in the up to 4 data planes...
Definition: pixdesc.h:81
AVFILTER_DEFINE_CLASS(unsharp_opencl)
float blur_x[MAX_DIAMETER]
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several inputs
Filter definition.
Definition: avfilter.h:144
const char * name
Filter name.
Definition: avfilter.h:148
and forward the test the status of outputs and forward it to the corresponding return FFERROR_NOT_READY If the filters stores internally one or a few frame for some input
static int unsharp_opencl_init(AVFilterContext *avctx)
AVFilterLink ** outputs
array of pointers to output links
Definition: avfilter.h:350
#define CL_FAIL_ON_ERROR(errcode,...)
A helper macro to handle OpenCL errors.
Definition: opencl.h:74
#define SIZE_SPECIFIER
Definition: internal.h:262
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
Definition: frame.h:309
The exact code depends on how similar the blocks are and how related they are to the and needs to apply these operations to the correct inlink or outlink if there are several Macros are available to factor that when no extra processing is inlink
int
static int query_formats(AVFilterContext *ctx)
Definition: aeval.c:244
common internal and external API header
static const AVFilterPad unsharp_opencl_inputs[]
void ff_opencl_filter_uninit(AVFilterContext *avctx)
Uninitialise an OpenCL filter context.
Definition: opencl.c:156
cl_context context
The OpenCL context which will contain all operations and frames on this device.
An instance of a filter.
Definition: avfilter.h:338
int height
Definition: frame.h:353
#define av_freep(p)
cl_program program
Definition: opencl.h:43
int ff_opencl_filter_load_program(AVFilterContext *avctx, const char **program_source_array, int nb_strings)
Load a new OpenCL program from strings in memory.
Definition: opencl.c:171
const char * av_get_pix_fmt_name(enum AVPixelFormat pix_fmt)
Return the short name for a pixel format, NULL in case pix_fmt is unknown.
Definition: pixdesc.c:2438
internal API functions
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later.That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another.Frame references ownership and permissions
for(j=16;j >0;--j)
int av_frame_copy_props(AVFrame *dst, const AVFrame *src)
Copy only "metadata" fields from src to dst.
Definition: frame.c:654
GLuint buffer
Definition: opengl_enc.c:101