FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
unsharp_opencl.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2013 Wei Gao <weigao@multicorewareinc.com>
3  * Copyright (C) 2013 Lenny Wang
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 /**
23  * @file
24  * unsharp input video
25  */
26 
27 #include "unsharp_opencl.h"
28 #include "libavutil/common.h"
30 
31 #define PLANE_NUM 3
32 #define ROUND_TO_16(a) (((((a) - 1)/16)+1)*16)
33 
34 static inline void add_mask_counter(uint32_t *dst, uint32_t *counter1, uint32_t *counter2, int len)
35 {
36  int i;
37  for (i = 0; i < len; i++) {
38  dst[i] = counter1[i] + counter2[i];
39  }
40 }
41 
42 static int compute_mask(int step, uint32_t *mask)
43 {
44  int i, z, ret = 0;
45  int counter_size = sizeof(uint32_t) * (2 * step + 1);
46  uint32_t *temp1_counter, *temp2_counter, **counter;
47  temp1_counter = av_mallocz(counter_size);
48  if (!temp1_counter) {
49  ret = AVERROR(ENOMEM);
50  goto end;
51  }
52  temp2_counter = av_mallocz(counter_size);
53  if (!temp2_counter) {
54  ret = AVERROR(ENOMEM);
55  goto end;
56  }
57  counter = av_mallocz_array(2 * step + 1, sizeof(uint32_t *));
58  if (!counter) {
59  ret = AVERROR(ENOMEM);
60  goto end;
61  }
62  for (i = 0; i < 2 * step + 1; i++) {
63  counter[i] = av_mallocz(counter_size);
64  if (!counter[i]) {
65  ret = AVERROR(ENOMEM);
66  goto end;
67  }
68  }
69  for (i = 0; i < 2 * step + 1; i++) {
70  memset(temp1_counter, 0, counter_size);
71  temp1_counter[i] = 1;
72  for (z = 0; z < step * 2; z += 2) {
73  add_mask_counter(temp2_counter, counter[z], temp1_counter, step * 2);
74  memcpy(counter[z], temp1_counter, counter_size);
75  add_mask_counter(temp1_counter, counter[z + 1], temp2_counter, step * 2);
76  memcpy(counter[z + 1], temp2_counter, counter_size);
77  }
78  }
79  memcpy(mask, temp1_counter, counter_size);
80 end:
81  av_freep(&temp1_counter);
82  av_freep(&temp2_counter);
83  for (i = 0; i < 2 * step + 1; i++) {
84  av_freep(&counter[i]);
85  }
86  av_freep(&counter);
87  return ret;
88 }
89 
90 static int compute_mask_matrix(cl_mem cl_mask_matrix, int step_x, int step_y)
91 {
92  int i, j, ret = 0;
93  uint32_t *mask_matrix, *mask_x, *mask_y;
94  size_t size_matrix = sizeof(uint32_t) * (2 * step_x + 1) * (2 * step_y + 1);
95  mask_x = av_mallocz_array(2 * step_x + 1, sizeof(uint32_t));
96  if (!mask_x) {
97  ret = AVERROR(ENOMEM);
98  goto end;
99  }
100  mask_y = av_mallocz_array(2 * step_y + 1, sizeof(uint32_t));
101  if (!mask_y) {
102  ret = AVERROR(ENOMEM);
103  goto end;
104  }
105  mask_matrix = av_mallocz(size_matrix);
106  if (!mask_matrix) {
107  ret = AVERROR(ENOMEM);
108  goto end;
109  }
110  ret = compute_mask(step_x, mask_x);
111  if (ret < 0)
112  goto end;
113  ret = compute_mask(step_y, mask_y);
114  if (ret < 0)
115  goto end;
116  for (j = 0; j < 2 * step_y + 1; j++) {
117  for (i = 0; i < 2 * step_x + 1; i++) {
118  mask_matrix[i + j * (2 * step_x + 1)] = mask_y[j] * mask_x[i];
119  }
120  }
121  ret = av_opencl_buffer_write(cl_mask_matrix, (uint8_t *)mask_matrix, size_matrix);
122 end:
123  av_freep(&mask_x);
124  av_freep(&mask_y);
125  av_freep(&mask_matrix);
126  return ret;
127 }
128 
130 {
131  UnsharpContext *unsharp = ctx->priv;
132  int i, ret = 0, step_x[2], step_y[2];
133  cl_mem mask_matrix[2];
134  mask_matrix[0] = unsharp->opencl_ctx.cl_luma_mask;
135  mask_matrix[1] = unsharp->opencl_ctx.cl_chroma_mask;
136  step_x[0] = unsharp->luma.steps_x;
137  step_x[1] = unsharp->chroma.steps_x;
138  step_y[0] = unsharp->luma.steps_y;
139  step_y[1] = unsharp->chroma.steps_y;
140 
141  /* use default kernel if any matrix dim larger than 8 due to limited local mem size */
142  if (step_x[0]>8 || step_x[1]>8 || step_y[0]>8 || step_y[1]>8)
143  unsharp->opencl_ctx.use_fast_kernels = 0;
144  else
145  unsharp->opencl_ctx.use_fast_kernels = 1;
146 
147  if (!mask_matrix[0] || !mask_matrix[1]) {
148  av_log(ctx, AV_LOG_ERROR, "Luma mask and chroma mask should not be NULL\n");
149  return AVERROR(EINVAL);
150  }
151  for (i = 0; i < 2; i++) {
152  ret = compute_mask_matrix(mask_matrix[i], step_x[i], step_y[i]);
153  if (ret < 0)
154  return ret;
155  }
156  return ret;
157 }
158 
160 {
161  int ret;
162  AVFilterLink *link = ctx->inputs[0];
163  UnsharpContext *unsharp = ctx->priv;
164  cl_int status;
165  FFOpenclParam kernel1 = {0};
166  FFOpenclParam kernel2 = {0};
167  int width = link->w;
168  int height = link->h;
169  int cw = FF_CEIL_RSHIFT(link->w, unsharp->hsub);
170  int ch = FF_CEIL_RSHIFT(link->h, unsharp->vsub);
171  size_t globalWorkSize1d = width * height + 2 * ch * cw;
172  size_t globalWorkSize2dLuma[2];
173  size_t globalWorkSize2dChroma[2];
174  size_t localWorkSize2d[2] = {16, 16};
175 
176  if (unsharp->opencl_ctx.use_fast_kernels) {
177  globalWorkSize2dLuma[0] = (size_t)ROUND_TO_16(width);
178  globalWorkSize2dLuma[1] = (size_t)ROUND_TO_16(height);
179  globalWorkSize2dChroma[0] = (size_t)ROUND_TO_16(cw);
180  globalWorkSize2dChroma[1] = (size_t)(2*ROUND_TO_16(ch));
181 
182  kernel1.ctx = ctx;
183  kernel1.kernel = unsharp->opencl_ctx.kernel_luma;
184  ret = avpriv_opencl_set_parameter(&kernel1,
185  FF_OPENCL_PARAM_INFO(unsharp->opencl_ctx.cl_inbuf),
186  FF_OPENCL_PARAM_INFO(unsharp->opencl_ctx.cl_outbuf),
187  FF_OPENCL_PARAM_INFO(unsharp->opencl_ctx.cl_luma_mask),
188  FF_OPENCL_PARAM_INFO(unsharp->luma.amount),
193  FF_OPENCL_PARAM_INFO(width),
194  FF_OPENCL_PARAM_INFO(height),
195  NULL);
196  if (ret < 0)
197  return ret;
198 
199  kernel2.ctx = ctx;
200  kernel2.kernel = unsharp->opencl_ctx.kernel_chroma;
201  ret = avpriv_opencl_set_parameter(&kernel2,
202  FF_OPENCL_PARAM_INFO(unsharp->opencl_ctx.cl_inbuf),
203  FF_OPENCL_PARAM_INFO(unsharp->opencl_ctx.cl_outbuf),
204  FF_OPENCL_PARAM_INFO(unsharp->opencl_ctx.cl_chroma_mask),
212  FF_OPENCL_PARAM_INFO(link->w),
213  FF_OPENCL_PARAM_INFO(link->h),
216  NULL);
217  if (ret < 0)
218  return ret;
219  status = clEnqueueNDRangeKernel(unsharp->opencl_ctx.command_queue,
220  unsharp->opencl_ctx.kernel_luma, 2, NULL,
221  globalWorkSize2dLuma, localWorkSize2d, 0, NULL, NULL);
222  status |=clEnqueueNDRangeKernel(unsharp->opencl_ctx.command_queue,
223  unsharp->opencl_ctx.kernel_chroma, 2, NULL,
224  globalWorkSize2dChroma, localWorkSize2d, 0, NULL, NULL);
225  if (status != CL_SUCCESS) {
226  av_log(ctx, AV_LOG_ERROR, "OpenCL run kernel error occurred: %s\n", av_opencl_errstr(status));
227  return AVERROR_EXTERNAL;
228  }
229  } else { /* use default kernel */
230  kernel1.ctx = ctx;
231  kernel1.kernel = unsharp->opencl_ctx.kernel_default;
232 
233  ret = avpriv_opencl_set_parameter(&kernel1,
234  FF_OPENCL_PARAM_INFO(unsharp->opencl_ctx.cl_inbuf),
235  FF_OPENCL_PARAM_INFO(unsharp->opencl_ctx.cl_outbuf),
236  FF_OPENCL_PARAM_INFO(unsharp->opencl_ctx.cl_luma_mask),
237  FF_OPENCL_PARAM_INFO(unsharp->opencl_ctx.cl_chroma_mask),
238  FF_OPENCL_PARAM_INFO(unsharp->luma.amount),
252  FF_OPENCL_PARAM_INFO(link->h),
253  FF_OPENCL_PARAM_INFO(link->w),
256  NULL);
257  if (ret < 0)
258  return ret;
259  status = clEnqueueNDRangeKernel(unsharp->opencl_ctx.command_queue,
260  unsharp->opencl_ctx.kernel_default, 1, NULL,
261  &globalWorkSize1d, NULL, 0, NULL, NULL);
262  if (status != CL_SUCCESS) {
263  av_log(ctx, AV_LOG_ERROR, "OpenCL run kernel error occurred: %s\n", av_opencl_errstr(status));
264  return AVERROR_EXTERNAL;
265  }
266  }
267  clFinish(unsharp->opencl_ctx.command_queue);
268  return av_opencl_buffer_read_image(out->data, unsharp->opencl_ctx.out_plane_size,
269  unsharp->opencl_ctx.plane_num, unsharp->opencl_ctx.cl_outbuf,
270  unsharp->opencl_ctx.cl_outbuf_size);
271 }
272 
274 {
275  int ret = 0;
276  char build_opts[96];
277  UnsharpContext *unsharp = ctx->priv;
278  ret = av_opencl_init(NULL);
279  if (ret < 0)
280  return ret;
281  ret = av_opencl_buffer_create(&unsharp->opencl_ctx.cl_luma_mask,
282  sizeof(uint32_t) * (2 * unsharp->luma.steps_x + 1) * (2 * unsharp->luma.steps_y + 1),
283  CL_MEM_READ_ONLY, NULL);
284  if (ret < 0)
285  return ret;
286  ret = av_opencl_buffer_create(&unsharp->opencl_ctx.cl_chroma_mask,
287  sizeof(uint32_t) * (2 * unsharp->chroma.steps_x + 1) * (2 * unsharp->chroma.steps_y + 1),
288  CL_MEM_READ_ONLY, NULL);
289  if (ret < 0)
290  return ret;
291  ret = generate_mask(ctx);
292  if (ret < 0)
293  return ret;
294  unsharp->opencl_ctx.plane_num = PLANE_NUM;
295  unsharp->opencl_ctx.command_queue = av_opencl_get_command_queue();
296  if (!unsharp->opencl_ctx.command_queue) {
297  av_log(ctx, AV_LOG_ERROR, "Unable to get OpenCL command queue in filter 'unsharp'\n");
298  return AVERROR(EINVAL);
299  }
300  snprintf(build_opts, 96, "-D LU_RADIUS_X=%d -D LU_RADIUS_Y=%d -D CH_RADIUS_X=%d -D CH_RADIUS_Y=%d",
301  2*unsharp->luma.steps_x+1, 2*unsharp->luma.steps_y+1, 2*unsharp->chroma.steps_x+1, 2*unsharp->chroma.steps_y+1);
302  unsharp->opencl_ctx.program = av_opencl_compile("unsharp", build_opts);
303  if (!unsharp->opencl_ctx.program) {
304  av_log(ctx, AV_LOG_ERROR, "OpenCL failed to compile program 'unsharp'\n");
305  return AVERROR(EINVAL);
306  }
307  if (unsharp->opencl_ctx.use_fast_kernels) {
308  if (!unsharp->opencl_ctx.kernel_luma) {
309  unsharp->opencl_ctx.kernel_luma = clCreateKernel(unsharp->opencl_ctx.program, "unsharp_luma", &ret);
310  if (ret != CL_SUCCESS) {
311  av_log(ctx, AV_LOG_ERROR, "OpenCL failed to create kernel 'unsharp_luma'\n");
312  return ret;
313  }
314  }
315  if (!unsharp->opencl_ctx.kernel_chroma) {
316  unsharp->opencl_ctx.kernel_chroma = clCreateKernel(unsharp->opencl_ctx.program, "unsharp_chroma", &ret);
317  if (ret < 0) {
318  av_log(ctx, AV_LOG_ERROR, "OpenCL failed to create kernel 'unsharp_chroma'\n");
319  return ret;
320  }
321  }
322  }
323  else {
324  if (!unsharp->opencl_ctx.kernel_default) {
325  unsharp->opencl_ctx.kernel_default = clCreateKernel(unsharp->opencl_ctx.program, "unsharp_default", &ret);
326  if (ret < 0) {
327  av_log(ctx, AV_LOG_ERROR, "OpenCL failed to create kernel 'unsharp_default'\n");
328  return ret;
329  }
330  }
331  }
332  return ret;
333 }
334 
336 {
337  UnsharpContext *unsharp = ctx->priv;
338  av_opencl_buffer_release(&unsharp->opencl_ctx.cl_inbuf);
339  av_opencl_buffer_release(&unsharp->opencl_ctx.cl_outbuf);
340  av_opencl_buffer_release(&unsharp->opencl_ctx.cl_luma_mask);
341  av_opencl_buffer_release(&unsharp->opencl_ctx.cl_chroma_mask);
342  clReleaseKernel(unsharp->opencl_ctx.kernel_default);
343  clReleaseKernel(unsharp->opencl_ctx.kernel_luma);
344  clReleaseKernel(unsharp->opencl_ctx.kernel_chroma);
345  clReleaseProgram(unsharp->opencl_ctx.program);
346  unsharp->opencl_ctx.command_queue = NULL;
348 }
349 
351 {
352  int ret = 0;
353  AVFilterLink *link = ctx->inputs[0];
354  UnsharpContext *unsharp = ctx->priv;
355  int ch = FF_CEIL_RSHIFT(link->h, unsharp->vsub);
356 
357  if ((!unsharp->opencl_ctx.cl_inbuf) || (!unsharp->opencl_ctx.cl_outbuf)) {
358  unsharp->opencl_ctx.in_plane_size[0] = (in->linesize[0] * in->height);
359  unsharp->opencl_ctx.in_plane_size[1] = (in->linesize[1] * ch);
360  unsharp->opencl_ctx.in_plane_size[2] = (in->linesize[2] * ch);
361  unsharp->opencl_ctx.out_plane_size[0] = (out->linesize[0] * out->height);
362  unsharp->opencl_ctx.out_plane_size[1] = (out->linesize[1] * ch);
363  unsharp->opencl_ctx.out_plane_size[2] = (out->linesize[2] * ch);
364  unsharp->opencl_ctx.cl_inbuf_size = unsharp->opencl_ctx.in_plane_size[0] +
365  unsharp->opencl_ctx.in_plane_size[1] +
366  unsharp->opencl_ctx.in_plane_size[2];
367  unsharp->opencl_ctx.cl_outbuf_size = unsharp->opencl_ctx.out_plane_size[0] +
368  unsharp->opencl_ctx.out_plane_size[1] +
369  unsharp->opencl_ctx.out_plane_size[2];
370  if (!unsharp->opencl_ctx.cl_inbuf) {
371  ret = av_opencl_buffer_create(&unsharp->opencl_ctx.cl_inbuf,
372  unsharp->opencl_ctx.cl_inbuf_size,
373  CL_MEM_READ_ONLY, NULL);
374  if (ret < 0)
375  return ret;
376  }
377  if (!unsharp->opencl_ctx.cl_outbuf) {
378  ret = av_opencl_buffer_create(&unsharp->opencl_ctx.cl_outbuf,
379  unsharp->opencl_ctx.cl_outbuf_size,
380  CL_MEM_READ_WRITE, NULL);
381  if (ret < 0)
382  return ret;
383  }
384  }
385  return av_opencl_buffer_write_image(unsharp->opencl_ctx.cl_inbuf,
386  unsharp->opencl_ctx.cl_inbuf_size,
387  0, in->data, unsharp->opencl_ctx.in_plane_size,
388  unsharp->opencl_ctx.plane_num);
389 }