FFmpeg
vf_overlay_opencl.c
Go to the documentation of this file.
1 /*
2  * This file is part of FFmpeg.
3  *
4  * FFmpeg is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * FFmpeg is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with FFmpeg; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17  */
18 
19 #include "libavutil/log.h"
20 #include "libavutil/mem.h"
21 #include "libavutil/opt.h"
22 #include "libavutil/pixdesc.h"
23 
24 #include "avfilter.h"
25 #include "framesync.h"
26 #include "internal.h"
27 #include "opencl.h"
28 #include "opencl_source.h"
29 #include "video.h"
30 
31 typedef struct OverlayOpenCLContext {
33 
35  cl_kernel kernel;
36  cl_command_queue command_queue;
37 
39 
40  int nb_planes;
44 
48 
50  enum AVPixelFormat main_format,
51  enum AVPixelFormat overlay_format)
52 {
53  OverlayOpenCLContext *ctx = avctx->priv;
54  cl_int cle;
55  const char *source = ff_opencl_source_overlay;
56  const char *kernel;
57  const AVPixFmtDescriptor *main_desc, *overlay_desc;
58  int err, i, main_planes, overlay_planes;
59 
60  main_desc = av_pix_fmt_desc_get(main_format);
61  overlay_desc = av_pix_fmt_desc_get(overlay_format);
62 
63  main_planes = overlay_planes = 0;
64  for (i = 0; i < main_desc->nb_components; i++)
65  main_planes = FFMAX(main_planes,
66  main_desc->comp[i].plane + 1);
67  for (i = 0; i < overlay_desc->nb_components; i++)
68  overlay_planes = FFMAX(overlay_planes,
69  overlay_desc->comp[i].plane + 1);
70 
71  ctx->nb_planes = main_planes;
72  ctx->x_subsample = 1 << main_desc->log2_chroma_w;
73  ctx->y_subsample = 1 << main_desc->log2_chroma_h;
74 
75  if (ctx->x_position % ctx->x_subsample ||
76  ctx->y_position % ctx->y_subsample) {
77  av_log(avctx, AV_LOG_WARNING, "Warning: overlay position (%d, %d) "
78  "does not match subsampling (%d, %d).\n",
79  ctx->x_position, ctx->y_position,
80  ctx->x_subsample, ctx->y_subsample);
81  }
82 
83  if (main_planes == overlay_planes) {
84  if (main_desc->nb_components == overlay_desc->nb_components)
85  kernel = "overlay_no_alpha";
86  else
87  kernel = "overlay_internal_alpha";
88  ctx->alpha_separate = 0;
89  } else {
90  kernel = "overlay_external_alpha";
91  ctx->alpha_separate = 1;
92  }
93 
94  av_log(avctx, AV_LOG_DEBUG, "Using kernel %s.\n", kernel);
95 
96  err = ff_opencl_filter_load_program(avctx, &source, 1);
97  if (err < 0)
98  goto fail;
99 
100  ctx->command_queue = clCreateCommandQueue(ctx->ocf.hwctx->context,
101  ctx->ocf.hwctx->device_id,
102  0, &cle);
103  CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create OpenCL "
104  "command queue %d.\n", cle);
105 
106  ctx->kernel = clCreateKernel(ctx->ocf.program, kernel, &cle);
107  CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create kernel %d.\n", cle);
108 
109  ctx->initialised = 1;
110  return 0;
111 
112 fail:
113  if (ctx->command_queue)
114  clReleaseCommandQueue(ctx->command_queue);
115  if (ctx->kernel)
116  clReleaseKernel(ctx->kernel);
117  return err;
118 }
119 
121 {
122  AVFilterContext *avctx = fs->parent;
123  AVFilterLink *outlink = avctx->outputs[0];
124  OverlayOpenCLContext *ctx = avctx->priv;
125  AVFrame *input_main, *input_overlay;
126  AVFrame *output;
127  cl_mem mem;
128  cl_int cle, x, y;
129  size_t global_work[2];
130  int kernel_arg = 0;
131  int err, plane;
132 
133  err = ff_framesync_get_frame(fs, 0, &input_main, 0);
134  if (err < 0)
135  return err;
136  err = ff_framesync_get_frame(fs, 1, &input_overlay, 0);
137  if (err < 0)
138  return err;
139 
140  if (!ctx->initialised) {
141  AVHWFramesContext *main_fc =
142  (AVHWFramesContext*)input_main->hw_frames_ctx->data;
143  AVHWFramesContext *overlay_fc =
144  (AVHWFramesContext*)input_overlay->hw_frames_ctx->data;
145 
146  err = overlay_opencl_load(avctx, main_fc->sw_format,
147  overlay_fc->sw_format);
148  if (err < 0)
149  return err;
150  }
151 
152  output = ff_get_video_buffer(outlink, outlink->w, outlink->h);
153  if (!output) {
154  err = AVERROR(ENOMEM);
155  goto fail;
156  }
157 
158  for (plane = 0; plane < ctx->nb_planes; plane++) {
159  kernel_arg = 0;
160 
161  mem = (cl_mem)output->data[plane];
162  CL_SET_KERNEL_ARG(ctx->kernel, kernel_arg, cl_mem, &mem);
163  kernel_arg++;
164 
165  mem = (cl_mem)input_main->data[plane];
166  CL_SET_KERNEL_ARG(ctx->kernel, kernel_arg, cl_mem, &mem);
167  kernel_arg++;
168 
169  mem = (cl_mem)input_overlay->data[plane];
170  CL_SET_KERNEL_ARG(ctx->kernel, kernel_arg, cl_mem, &mem);
171  kernel_arg++;
172 
173  if (ctx->alpha_separate) {
174  mem = (cl_mem)input_overlay->data[ctx->nb_planes];
175  CL_SET_KERNEL_ARG(ctx->kernel, kernel_arg, cl_mem, &mem);
176  kernel_arg++;
177  }
178 
179  x = ctx->x_position / (plane == 0 ? 1 : ctx->x_subsample);
180  y = ctx->y_position / (plane == 0 ? 1 : ctx->y_subsample);
181 
182  CL_SET_KERNEL_ARG(ctx->kernel, kernel_arg, cl_int, &x);
183  kernel_arg++;
184  CL_SET_KERNEL_ARG(ctx->kernel, kernel_arg, cl_int, &y);
185  kernel_arg++;
186 
187  if (ctx->alpha_separate) {
188  cl_int alpha_adj_x = plane == 0 ? 1 : ctx->x_subsample;
189  cl_int alpha_adj_y = plane == 0 ? 1 : ctx->y_subsample;
190 
191  CL_SET_KERNEL_ARG(ctx->kernel, kernel_arg, cl_int, &alpha_adj_x);
192  kernel_arg++;
193  CL_SET_KERNEL_ARG(ctx->kernel, kernel_arg, cl_int, &alpha_adj_y);
194  kernel_arg++;
195  }
196 
197  err = ff_opencl_filter_work_size_from_image(avctx, global_work,
198  output, plane, 0);
199  if (err < 0)
200  goto fail;
201 
202  cle = clEnqueueNDRangeKernel(ctx->command_queue, ctx->kernel, 2, NULL,
203  global_work, NULL, 0, NULL, NULL);
204  CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to enqueue overlay kernel "
205  "for plane %d: %d.\n", plane, cle);
206  }
207 
208  cle = clFinish(ctx->command_queue);
209  CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to finish command queue: %d.\n", cle);
210 
211  err = av_frame_copy_props(output, input_main);
212 
213  av_log(avctx, AV_LOG_DEBUG, "Filter output: %s, %ux%u (%"PRId64").\n",
214  av_get_pix_fmt_name(output->format),
215  output->width, output->height, output->pts);
216 
217  return ff_filter_frame(outlink, output);
218 
219 fail:
220  av_frame_free(&output);
221  return err;
222 }
223 
225 {
226  AVFilterContext *avctx = outlink->src;
227  OverlayOpenCLContext *ctx = avctx->priv;
228  int err;
229 
230  err = ff_opencl_filter_config_output(outlink);
231  if (err < 0)
232  return err;
233 
234  err = ff_framesync_init_dualinput(&ctx->fs, avctx);
235  if (err < 0)
236  return err;
237 
238  return ff_framesync_configure(&ctx->fs);
239 }
240 
242 {
243  OverlayOpenCLContext *ctx = avctx->priv;
244 
246 
247  return ff_opencl_filter_init(avctx);
248 }
249 
251 {
252  OverlayOpenCLContext *ctx = avctx->priv;
253 
254  return ff_framesync_activate(&ctx->fs);
255 }
256 
258 {
259  OverlayOpenCLContext *ctx = avctx->priv;
260  cl_int cle;
261 
262  if (ctx->kernel) {
263  cle = clReleaseKernel(ctx->kernel);
264  if (cle != CL_SUCCESS)
265  av_log(avctx, AV_LOG_ERROR, "Failed to release "
266  "kernel: %d.\n", cle);
267  }
268 
269  if (ctx->command_queue) {
270  cle = clReleaseCommandQueue(ctx->command_queue);
271  if (cle != CL_SUCCESS)
272  av_log(avctx, AV_LOG_ERROR, "Failed to release "
273  "command queue: %d.\n", cle);
274  }
275 
277 
278  ff_framesync_uninit(&ctx->fs);
279 }
280 
281 #define OFFSET(x) offsetof(OverlayOpenCLContext, x)
282 #define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
284  { "x", "Overlay x position",
285  OFFSET(x_position), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, .flags = FLAGS },
286  { "y", "Overlay y position",
287  OFFSET(y_position), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, .flags = FLAGS },
288  { NULL },
289 };
290 
291 AVFILTER_DEFINE_CLASS(overlay_opencl);
292 
294  {
295  .name = "main",
296  .type = AVMEDIA_TYPE_VIDEO,
297  .config_props = &ff_opencl_filter_config_input,
298  },
299  {
300  .name = "overlay",
301  .type = AVMEDIA_TYPE_VIDEO,
302  .config_props = &ff_opencl_filter_config_input,
303  },
304  { NULL }
305 };
306 
308  {
309  .name = "default",
310  .type = AVMEDIA_TYPE_VIDEO,
311  .config_props = &overlay_opencl_config_output,
312  },
313  { NULL }
314 };
315 
317  .name = "overlay_opencl",
318  .description = NULL_IF_CONFIG_SMALL("Overlay one video on top of another"),
319  .priv_size = sizeof(OverlayOpenCLContext),
320  .priv_class = &overlay_opencl_class,
325  .inputs = overlay_opencl_inputs,
326  .outputs = overlay_opencl_outputs,
327  .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
328 };
int plane
Definition: avisynth_c.h:384
int plane
Which of the 4 planes contains the component.
Definition: pixdesc.h:35
#define NULL
Definition: coverity.c:32
#define FF_FILTER_FLAG_HWFRAME_AWARE
The filter is aware of hardware frames, and any hardware frame context should not be automatically pr...
Definition: internal.h:385
static const AVFilterPad overlay_opencl_outputs[]
const AVPixFmtDescriptor * av_pix_fmt_desc_get(enum AVPixelFormat pix_fmt)
Definition: pixdesc.c:2522
This structure describes decoded (raw) audio or video data.
Definition: frame.h:295
AVOption.
Definition: opt.h:246
int ff_opencl_filter_work_size_from_image(AVFilterContext *avctx, size_t *work_size, AVFrame *frame, int plane, int block_alignment)
Find the work size needed needed for a given plane of an image.
Definition: opencl.c:278
int ff_opencl_filter_config_input(AVFilterLink *inlink)
Check that the input link contains a suitable hardware frames context and extract the device from it...
Definition: opencl.c:60
int ff_opencl_filter_query_formats(AVFilterContext *avctx)
Return that all inputs and outputs support only AV_PIX_FMT_OPENCL.
Definition: opencl.c:28
#define AV_LOG_WARNING
Something somehow does not look correct.
Definition: log.h:182
Main libavfilter public API header.
Memory handling functions.
static av_cold int init(AVCodecContext *avctx)
Definition: avrndec.c:35
int ff_framesync_configure(FFFrameSync *fs)
Configure a frame sync structure.
Definition: framesync.c:117
AVFrame * ff_get_video_buffer(AVFilterLink *link, int w, int h)
Request a picture buffer with a specific set of permissions.
Definition: video.c:99
int ff_opencl_filter_init(AVFilterContext *avctx)
Initialise an OpenCL filter context.
Definition: opencl.c:147
uint8_t log2_chroma_w
Amount to shift the luma width right to find the chroma width.
Definition: pixdesc.h:92
AVFILTER_DEFINE_CLASS(overlay_opencl)
AVOpenCLDeviceContext * hwctx
Definition: opencl.h:41
AVBufferRef * hw_frames_ctx
For hwaccel-format frames, this should be a reference to the AVHWFramesContext describing the frame...
Definition: frame.h:634
static int overlay_opencl_load(AVFilterContext *avctx, enum AVPixelFormat main_format, enum AVPixelFormat overlay_format)
const char * name
Pad name.
Definition: internal.h:60
AVFilterContext * parent
Parent filter context.
Definition: framesync.h:152
static int overlay_opencl_activate(AVFilterContext *avctx)
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
Definition: avfilter.c:1080
AVFilter ff_vf_overlay_opencl
AVComponentDescriptor comp[4]
Parameters that describe how pixels are packed.
Definition: pixdesc.h:117
int mem
Definition: avisynth_c.h:916
#define av_cold
Definition: attributes.h:82
static av_cold int uninit(AVCodecContext *avctx)
Definition: crystalhd.c:279
cl_command_queue command_queue
AVOptions.
int ff_framesync_init_dualinput(FFFrameSync *fs, AVFilterContext *parent)
Initialize a frame sync structure for dualinput.
Definition: framesync.c:361
static int overlay_opencl_blend(FFFrameSync *fs)
filter_frame For filters that do not use the activate() callback
cl_device_id device_id
The primary device ID of the device.
filter_frame For filters that do not use the this method is called when a frame is pushed to the filter s input It can be called at any time except in a reentrant way If the input frame is enough to produce output
int64_t pts
Presentation timestamp in time_base units (time when frame should be shown to user).
Definition: frame.h:388
static av_cold int overlay_opencl_init(AVFilterContext *avctx)
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several the filter must be ready for frames arriving randomly on any input any filter with several inputs will most likely require some kind of queuing mechanism It is perfectly acceptable to have a limited queue and to drop frames when the inputs are too unbalanced request_frame For filters that do not use the this method is called when a frame is wanted on an output For a source
int ff_opencl_filter_config_output(AVFilterLink *outlink)
Create a suitable hardware frames context for the output.
Definition: opencl.c:96
const char * ff_opencl_source_overlay
#define av_log(a,...)
A filter pad used for either input or output.
Definition: internal.h:54
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:259
int width
Definition: frame.h:353
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:176
uint8_t log2_chroma_h
Amount to shift the luma height right to find the chroma height.
Definition: pixdesc.h:101
void ff_framesync_uninit(FFFrameSync *fs)
Free all memory currently allocated.
Definition: framesync.c:293
Frame sync structure.
Definition: framesync.h:146
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
Definition: frame.c:202
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification. ...
Definition: internal.h:186
void * priv
private data for use by the filter
Definition: avfilter.h:353
#define AV_LOG_DEBUG
Stuff which is only useful for libav* developers.
Definition: log.h:197
static const AVFilterPad overlay_opencl_inputs[]
int ff_framesync_activate(FFFrameSync *fs)
Examine the frames in the filter&#39;s input and try to produce output.
Definition: framesync.c:344
int(* on_event)(struct FFFrameSync *fs)
Callback called when a frame event is ready.
Definition: framesync.h:172
#define FFMAX(a, b)
Definition: common.h:94
#define fail()
Definition: checkasm.h:121
uint8_t nb_components
The number of components each pixel has, (1-4)
Definition: pixdesc.h:83
AVFormatContext * ctx
Definition: movenc.c:48
#define OFFSET(x)
static const AVFilterPad outputs[]
Definition: af_acontrast.c:203
int format
format of the frame, -1 if unknown or unset Values correspond to enum AVPixelFormat for video frames...
Definition: frame.h:368
#define CL_SET_KERNEL_ARG(kernel, arg_num, type, arg)
set argument to specific Kernel.
Definition: opencl.h:61
Descriptor that unambiguously describes how the bits of a pixel are stored in the up to 4 data planes...
Definition: pixdesc.h:81
uint8_t * data
The data buffer.
Definition: buffer.h:89
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several inputs
static int overlay_opencl_config_output(AVFilterLink *outlink)
Filter definition.
Definition: avfilter.h:144
static av_cold void overlay_opencl_uninit(AVFilterContext *avctx)
This struct describes a set or pool of "hardware" frames (i.e.
Definition: hwcontext.h:123
const char * name
Filter name.
Definition: avfilter.h:148
AVFilterLink ** outputs
array of pointers to output links
Definition: avfilter.h:350
#define CL_FAIL_ON_ERROR(errcode,...)
A helper macro to handle OpenCL errors.
Definition: opencl.h:74
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
Definition: frame.h:309
static const AVOption overlay_opencl_options[]
static int query_formats(AVFilterContext *ctx)
Definition: aeval.c:244
void ff_opencl_filter_uninit(AVFilterContext *avctx)
Uninitialise an OpenCL filter context.
Definition: opencl.c:156
#define FLAGS
OpenCLFilterContext ocf
cl_context context
The OpenCL context which will contain all operations and frames on this device.
An instance of a filter.
Definition: avfilter.h:338
int height
Definition: frame.h:353
cl_program program
Definition: opencl.h:43
int ff_opencl_filter_load_program(AVFilterContext *avctx, const char **program_source_array, int nb_strings)
Load a new OpenCL program from strings in memory.
Definition: opencl.c:171
const char * av_get_pix_fmt_name(enum AVPixelFormat pix_fmt)
Return the short name for a pixel format, NULL in case pix_fmt is unknown.
Definition: pixdesc.c:2438
internal API functions
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later.That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another.Frame references ownership and permissions
int ff_framesync_get_frame(FFFrameSync *fs, unsigned in, AVFrame **rframe, unsigned get)
Get the current frame in an input.
Definition: framesync.c:256
enum AVPixelFormat sw_format
The pixel format identifying the actual data layout of the hardware frames.
Definition: hwcontext.h:221
AVPixelFormat
Pixel format.
Definition: pixfmt.h:64
int av_frame_copy_props(AVFrame *dst, const AVFrame *src)
Copy only "metadata" fields from src to dst.
Definition: frame.c:654