FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
unsharp_opencl.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2013 Wei Gao <weigao@multicorewareinc.com>
3  * Copyright (C) 2013 Lenny Wang
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 /**
23  * @file
24  * unsharp input video
25  */
26 
27 #include "unsharp_opencl.h"
28 #include "libavutil/common.h"
30 
31 #define PLANE_NUM 3
32 #define ROUND_TO_16(a) (((((a) - 1)/16)+1)*16)
33 
34 static inline void add_mask_counter(uint32_t *dst, uint32_t *counter1, uint32_t *counter2, int len)
35 {
36  int i;
37  for (i = 0; i < len; i++) {
38  dst[i] = counter1[i] + counter2[i];
39  }
40 }
41 
42 static int compute_mask(int step, uint32_t *mask)
43 {
44  int i, z, ret = 0;
45  int counter_size = sizeof(uint32_t) * (2 * step + 1);
46  uint32_t *temp1_counter, *temp2_counter, **counter;
47  temp1_counter = av_mallocz(counter_size);
48  if (!temp1_counter) {
49  ret = AVERROR(ENOMEM);
50  goto end;
51  }
52  temp2_counter = av_mallocz(counter_size);
53  if (!temp2_counter) {
54  ret = AVERROR(ENOMEM);
55  goto end;
56  }
57  counter = av_mallocz_array(2 * step + 1, sizeof(uint32_t *));
58  if (!counter) {
59  ret = AVERROR(ENOMEM);
60  goto end;
61  }
62  for (i = 0; i < 2 * step + 1; i++) {
63  counter[i] = av_mallocz(counter_size);
64  if (!counter[i]) {
65  ret = AVERROR(ENOMEM);
66  goto end;
67  }
68  }
69  for (i = 0; i < 2 * step + 1; i++) {
70  memset(temp1_counter, 0, counter_size);
71  temp1_counter[i] = 1;
72  for (z = 0; z < step * 2; z += 2) {
73  add_mask_counter(temp2_counter, counter[z], temp1_counter, step * 2);
74  memcpy(counter[z], temp1_counter, counter_size);
75  add_mask_counter(temp1_counter, counter[z + 1], temp2_counter, step * 2);
76  memcpy(counter[z + 1], temp2_counter, counter_size);
77  }
78  }
79  memcpy(mask, temp1_counter, counter_size);
80 end:
81  av_freep(&temp1_counter);
82  av_freep(&temp2_counter);
83  for (i = 0; i < 2 * step + 1; i++) {
84  av_freep(&counter[i]);
85  }
86  av_freep(&counter);
87  return ret;
88 }
89 
90 static int copy_separable_masks(cl_mem cl_mask_x, cl_mem cl_mask_y, int step_x, int step_y)
91 {
92  int ret = 0;
93  uint32_t *mask_x, *mask_y;
94  size_t size_mask_x = sizeof(uint32_t) * (2 * step_x + 1);
95  size_t size_mask_y = sizeof(uint32_t) * (2 * step_y + 1);
96  mask_x = av_mallocz_array(2 * step_x + 1, sizeof(uint32_t));
97  if (!mask_x) {
98  ret = AVERROR(ENOMEM);
99  goto end;
100  }
101  mask_y = av_mallocz_array(2 * step_y + 1, sizeof(uint32_t));
102  if (!mask_y) {
103  ret = AVERROR(ENOMEM);
104  goto end;
105  }
106 
107  ret = compute_mask(step_x, mask_x);
108  if (ret < 0)
109  goto end;
110  ret = compute_mask(step_y, mask_y);
111  if (ret < 0)
112  goto end;
113 
114  ret = av_opencl_buffer_write(cl_mask_x, (uint8_t *)mask_x, size_mask_x);
115  ret = av_opencl_buffer_write(cl_mask_y, (uint8_t *)mask_y, size_mask_y);
116 end:
117  av_freep(&mask_x);
118  av_freep(&mask_y);
119 
120  return ret;
121 }
122 
124 {
125  cl_mem masks[4];
126  cl_mem mask_matrix[2];
127  int i, ret = 0, step_x[2], step_y[2];
128 
129  UnsharpContext *unsharp = ctx->priv;
130  mask_matrix[0] = unsharp->opencl_ctx.cl_luma_mask;
131  mask_matrix[1] = unsharp->opencl_ctx.cl_chroma_mask;
132  masks[0] = unsharp->opencl_ctx.cl_luma_mask_x;
133  masks[1] = unsharp->opencl_ctx.cl_luma_mask_y;
134  masks[2] = unsharp->opencl_ctx.cl_chroma_mask_x;
135  masks[3] = unsharp->opencl_ctx.cl_chroma_mask_y;
136  step_x[0] = unsharp->luma.steps_x;
137  step_x[1] = unsharp->chroma.steps_x;
138  step_y[0] = unsharp->luma.steps_y;
139  step_y[1] = unsharp->chroma.steps_y;
140 
141  /* use default kernel if any matrix dim larger than 8 due to limited local mem size */
142  if (step_x[0]>8 || step_x[1]>8 || step_y[0]>8 || step_y[1]>8)
143  unsharp->opencl_ctx.use_fast_kernels = 0;
144  else
145  unsharp->opencl_ctx.use_fast_kernels = 1;
146 
147  if (!masks[0] || !masks[1] || !masks[2] || !masks[3]) {
148  av_log(ctx, AV_LOG_ERROR, "Luma mask and chroma mask should not be NULL\n");
149  return AVERROR(EINVAL);
150  }
151  if (!mask_matrix[0] || !mask_matrix[1]) {
152  av_log(ctx, AV_LOG_ERROR, "Luma mask and chroma mask should not be NULL\n");
153  return AVERROR(EINVAL);
154  }
155  for (i = 0; i < 2; i++) {
156  ret = copy_separable_masks(masks[2*i], masks[2*i+1], step_x[i], step_y[i]);
157  if (ret < 0)
158  return ret;
159  }
160  return ret;
161 }
162 
164 {
165  int ret;
166  AVFilterLink *link = ctx->inputs[0];
167  UnsharpContext *unsharp = ctx->priv;
168  cl_int status;
169  FFOpenclParam kernel1 = {0};
170  FFOpenclParam kernel2 = {0};
171  int width = link->w;
172  int height = link->h;
173  int cw = FF_CEIL_RSHIFT(link->w, unsharp->hsub);
174  int ch = FF_CEIL_RSHIFT(link->h, unsharp->vsub);
175  size_t globalWorkSize1d = width * height + 2 * ch * cw;
176  size_t globalWorkSize2dLuma[2];
177  size_t globalWorkSize2dChroma[2];
178  size_t localWorkSize2d[2] = {16, 16};
179 
180  if (unsharp->opencl_ctx.use_fast_kernels) {
181  globalWorkSize2dLuma[0] = (size_t)ROUND_TO_16(width);
182  globalWorkSize2dLuma[1] = (size_t)ROUND_TO_16(height);
183  globalWorkSize2dChroma[0] = (size_t)ROUND_TO_16(cw);
184  globalWorkSize2dChroma[1] = (size_t)(2*ROUND_TO_16(ch));
185 
186  kernel1.ctx = ctx;
187  kernel1.kernel = unsharp->opencl_ctx.kernel_luma;
188  ret = avpriv_opencl_set_parameter(&kernel1,
189  FF_OPENCL_PARAM_INFO(unsharp->opencl_ctx.cl_inbuf),
190  FF_OPENCL_PARAM_INFO(unsharp->opencl_ctx.cl_outbuf),
191  FF_OPENCL_PARAM_INFO(unsharp->opencl_ctx.cl_luma_mask_x),
192  FF_OPENCL_PARAM_INFO(unsharp->opencl_ctx.cl_luma_mask_y),
193  FF_OPENCL_PARAM_INFO(unsharp->luma.amount),
198  FF_OPENCL_PARAM_INFO(width),
199  FF_OPENCL_PARAM_INFO(height),
200  NULL);
201  if (ret < 0)
202  return ret;
203 
204  kernel2.ctx = ctx;
205  kernel2.kernel = unsharp->opencl_ctx.kernel_chroma;
206  ret = avpriv_opencl_set_parameter(&kernel2,
207  FF_OPENCL_PARAM_INFO(unsharp->opencl_ctx.cl_inbuf),
208  FF_OPENCL_PARAM_INFO(unsharp->opencl_ctx.cl_outbuf),
209  FF_OPENCL_PARAM_INFO(unsharp->opencl_ctx.cl_chroma_mask_x),
210  FF_OPENCL_PARAM_INFO(unsharp->opencl_ctx.cl_chroma_mask_y),
218  FF_OPENCL_PARAM_INFO(link->w),
219  FF_OPENCL_PARAM_INFO(link->h),
222  NULL);
223  if (ret < 0)
224  return ret;
225  status = clEnqueueNDRangeKernel(unsharp->opencl_ctx.command_queue,
226  unsharp->opencl_ctx.kernel_luma, 2, NULL,
227  globalWorkSize2dLuma, localWorkSize2d, 0, NULL, NULL);
228  status |=clEnqueueNDRangeKernel(unsharp->opencl_ctx.command_queue,
229  unsharp->opencl_ctx.kernel_chroma, 2, NULL,
230  globalWorkSize2dChroma, localWorkSize2d, 0, NULL, NULL);
231  if (status != CL_SUCCESS) {
232  av_log(ctx, AV_LOG_ERROR, "OpenCL run kernel error occurred: %s\n", av_opencl_errstr(status));
233  return AVERROR_EXTERNAL;
234  }
235  } else { /* use default kernel */
236  kernel1.ctx = ctx;
237  kernel1.kernel = unsharp->opencl_ctx.kernel_default;
238 
239  ret = avpriv_opencl_set_parameter(&kernel1,
240  FF_OPENCL_PARAM_INFO(unsharp->opencl_ctx.cl_inbuf),
241  FF_OPENCL_PARAM_INFO(unsharp->opencl_ctx.cl_outbuf),
242  FF_OPENCL_PARAM_INFO(unsharp->opencl_ctx.cl_luma_mask),
243  FF_OPENCL_PARAM_INFO(unsharp->opencl_ctx.cl_chroma_mask),
244  FF_OPENCL_PARAM_INFO(unsharp->luma.amount),
258  FF_OPENCL_PARAM_INFO(link->h),
259  FF_OPENCL_PARAM_INFO(link->w),
262  NULL);
263  if (ret < 0)
264  return ret;
265  status = clEnqueueNDRangeKernel(unsharp->opencl_ctx.command_queue,
266  unsharp->opencl_ctx.kernel_default, 1, NULL,
267  &globalWorkSize1d, NULL, 0, NULL, NULL);
268  if (status != CL_SUCCESS) {
269  av_log(ctx, AV_LOG_ERROR, "OpenCL run kernel error occurred: %s\n", av_opencl_errstr(status));
270  return AVERROR_EXTERNAL;
271  }
272  }
273  //blocking map is suffficient, no need for clFinish
274  //clFinish(unsharp->opencl_ctx.command_queue);
275 
276  return av_opencl_buffer_read_image(out->data, unsharp->opencl_ctx.out_plane_size,
277  unsharp->opencl_ctx.plane_num, unsharp->opencl_ctx.cl_outbuf,
278  unsharp->opencl_ctx.cl_outbuf_size);
279 }
280 
282 {
283  int ret = 0;
284  char build_opts[96];
285  UnsharpContext *unsharp = ctx->priv;
286  ret = av_opencl_init(NULL);
287  if (ret < 0)
288  return ret;
289  ret = av_opencl_buffer_create(&unsharp->opencl_ctx.cl_luma_mask,
290  sizeof(uint32_t) * (2 * unsharp->luma.steps_x + 1) * (2 * unsharp->luma.steps_y + 1),
291  CL_MEM_READ_ONLY, NULL);
292  if (ret < 0)
293  return ret;
294  ret = av_opencl_buffer_create(&unsharp->opencl_ctx.cl_chroma_mask,
295  sizeof(uint32_t) * (2 * unsharp->chroma.steps_x + 1) * (2 * unsharp->chroma.steps_y + 1),
296  CL_MEM_READ_ONLY, NULL);
297  // separable filters
298  if (ret < 0)
299  return ret;
300  ret = av_opencl_buffer_create(&unsharp->opencl_ctx.cl_luma_mask_x,
301  sizeof(uint32_t) * (2 * unsharp->luma.steps_x + 1),
302  CL_MEM_READ_ONLY, NULL);
303  if (ret < 0)
304  return ret;
305  ret = av_opencl_buffer_create(&unsharp->opencl_ctx.cl_luma_mask_y,
306  sizeof(uint32_t) * (2 * unsharp->luma.steps_y + 1),
307  CL_MEM_READ_ONLY, NULL);
308  if (ret < 0)
309  return ret;
310  ret = av_opencl_buffer_create(&unsharp->opencl_ctx.cl_chroma_mask_x,
311  sizeof(uint32_t) * (2 * unsharp->chroma.steps_x + 1),
312  CL_MEM_READ_ONLY, NULL);
313  if (ret < 0)
314  return ret;
315  ret = av_opencl_buffer_create(&unsharp->opencl_ctx.cl_chroma_mask_y,
316  sizeof(uint32_t) * (2 * unsharp->chroma.steps_y + 1),
317  CL_MEM_READ_ONLY, NULL);
318  if (ret < 0)
319  return ret;
320  ret = generate_mask(ctx);
321  if (ret < 0)
322  return ret;
323  unsharp->opencl_ctx.plane_num = PLANE_NUM;
324  unsharp->opencl_ctx.command_queue = av_opencl_get_command_queue();
325  if (!unsharp->opencl_ctx.command_queue) {
326  av_log(ctx, AV_LOG_ERROR, "Unable to get OpenCL command queue in filter 'unsharp'\n");
327  return AVERROR(EINVAL);
328  }
329  snprintf(build_opts, 96, "-D LU_RADIUS_X=%d -D LU_RADIUS_Y=%d -D CH_RADIUS_X=%d -D CH_RADIUS_Y=%d",
330  2*unsharp->luma.steps_x+1, 2*unsharp->luma.steps_y+1, 2*unsharp->chroma.steps_x+1, 2*unsharp->chroma.steps_y+1);
331  unsharp->opencl_ctx.program = av_opencl_compile("unsharp", build_opts);
332  if (!unsharp->opencl_ctx.program) {
333  av_log(ctx, AV_LOG_ERROR, "OpenCL failed to compile program 'unsharp'\n");
334  return AVERROR(EINVAL);
335  }
336  if (unsharp->opencl_ctx.use_fast_kernels) {
337  if (!unsharp->opencl_ctx.kernel_luma) {
338  unsharp->opencl_ctx.kernel_luma = clCreateKernel(unsharp->opencl_ctx.program, "unsharp_luma", &ret);
339  if (ret != CL_SUCCESS) {
340  av_log(ctx, AV_LOG_ERROR, "OpenCL failed to create kernel 'unsharp_luma'\n");
341  return ret;
342  }
343  }
344  if (!unsharp->opencl_ctx.kernel_chroma) {
345  unsharp->opencl_ctx.kernel_chroma = clCreateKernel(unsharp->opencl_ctx.program, "unsharp_chroma", &ret);
346  if (ret < 0) {
347  av_log(ctx, AV_LOG_ERROR, "OpenCL failed to create kernel 'unsharp_chroma'\n");
348  return ret;
349  }
350  }
351  }
352  else {
353  if (!unsharp->opencl_ctx.kernel_default) {
354  unsharp->opencl_ctx.kernel_default = clCreateKernel(unsharp->opencl_ctx.program, "unsharp_default", &ret);
355  if (ret < 0) {
356  av_log(ctx, AV_LOG_ERROR, "OpenCL failed to create kernel 'unsharp_default'\n");
357  return ret;
358  }
359  }
360  }
361  return ret;
362 }
363 
365 {
366  UnsharpContext *unsharp = ctx->priv;
367  av_opencl_buffer_release(&unsharp->opencl_ctx.cl_inbuf);
368  av_opencl_buffer_release(&unsharp->opencl_ctx.cl_outbuf);
369  av_opencl_buffer_release(&unsharp->opencl_ctx.cl_luma_mask);
370  av_opencl_buffer_release(&unsharp->opencl_ctx.cl_chroma_mask);
371  av_opencl_buffer_release(&unsharp->opencl_ctx.cl_luma_mask_x);
372  av_opencl_buffer_release(&unsharp->opencl_ctx.cl_chroma_mask_x);
373  av_opencl_buffer_release(&unsharp->opencl_ctx.cl_luma_mask_y);
374  av_opencl_buffer_release(&unsharp->opencl_ctx.cl_chroma_mask_y);
375  clReleaseKernel(unsharp->opencl_ctx.kernel_default);
376  clReleaseKernel(unsharp->opencl_ctx.kernel_luma);
377  clReleaseKernel(unsharp->opencl_ctx.kernel_chroma);
378  clReleaseProgram(unsharp->opencl_ctx.program);
379  unsharp->opencl_ctx.command_queue = NULL;
381 }
382 
384 {
385  int ret = 0;
386  AVFilterLink *link = ctx->inputs[0];
387  UnsharpContext *unsharp = ctx->priv;
388  int ch = FF_CEIL_RSHIFT(link->h, unsharp->vsub);
389 
390  if ((!unsharp->opencl_ctx.cl_inbuf) || (!unsharp->opencl_ctx.cl_outbuf)) {
391  unsharp->opencl_ctx.in_plane_size[0] = (in->linesize[0] * in->height);
392  unsharp->opencl_ctx.in_plane_size[1] = (in->linesize[1] * ch);
393  unsharp->opencl_ctx.in_plane_size[2] = (in->linesize[2] * ch);
394  unsharp->opencl_ctx.out_plane_size[0] = (out->linesize[0] * out->height);
395  unsharp->opencl_ctx.out_plane_size[1] = (out->linesize[1] * ch);
396  unsharp->opencl_ctx.out_plane_size[2] = (out->linesize[2] * ch);
397  unsharp->opencl_ctx.cl_inbuf_size = unsharp->opencl_ctx.in_plane_size[0] +
398  unsharp->opencl_ctx.in_plane_size[1] +
399  unsharp->opencl_ctx.in_plane_size[2];
400  unsharp->opencl_ctx.cl_outbuf_size = unsharp->opencl_ctx.out_plane_size[0] +
401  unsharp->opencl_ctx.out_plane_size[1] +
402  unsharp->opencl_ctx.out_plane_size[2];
403  if (!unsharp->opencl_ctx.cl_inbuf) {
404  ret = av_opencl_buffer_create(&unsharp->opencl_ctx.cl_inbuf,
405  unsharp->opencl_ctx.cl_inbuf_size,
406  CL_MEM_READ_ONLY, NULL);
407  if (ret < 0)
408  return ret;
409  }
410  if (!unsharp->opencl_ctx.cl_outbuf) {
411  ret = av_opencl_buffer_create(&unsharp->opencl_ctx.cl_outbuf,
412  unsharp->opencl_ctx.cl_outbuf_size,
413  CL_MEM_READ_WRITE, NULL);
414  if (ret < 0)
415  return ret;
416  }
417  }
418  return av_opencl_buffer_write_image(unsharp->opencl_ctx.cl_inbuf,
419  unsharp->opencl_ctx.cl_inbuf_size,
420  0, in->data, unsharp->opencl_ctx.in_plane_size,
421  unsharp->opencl_ctx.plane_num);
422 }