FFmpeg
vf_overlay_cuda.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2020 Yaroslav Pogrebnyak <yyyaroslav@gmail.com>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 /**
22  * @file
23  * Overlay one video on top of another using cuda hardware acceleration
24  */
25 
26 #include "libavutil/log.h"
27 #include "libavutil/mem.h"
28 #include "libavutil/opt.h"
29 #include "libavutil/pixdesc.h"
30 #include "libavutil/hwcontext.h"
32 #include "libavutil/cuda_check.h"
33 
34 #include "avfilter.h"
35 #include "framesync.h"
36 #include "internal.h"
37 
38 #define CHECK_CU(x) FF_CUDA_CHECK_DL(ctx, ctx->hwctx->internal->cuda_dl, x)
39 #define DIV_UP(a, b) ( ((a) + (b) - 1) / (b) )
40 
41 #define BLOCK_X 32
42 #define BLOCK_Y 16
43 
44 static const enum AVPixelFormat supported_main_formats[] = {
48 };
49 
55 };
56 
57 /**
58  * OverlayCUDAContext
59  */
60 typedef struct OverlayCUDAContext {
61  const AVClass *class;
62 
65 
67 
68  CUcontext cu_ctx;
69  CUmodule cu_module;
70  CUfunction cu_func;
71  CUstream cu_stream;
72 
74 
77 
79 
80 /**
81  * Helper to find out if provided format is supported by filter
82  */
83 static int format_is_supported(const enum AVPixelFormat formats[], enum AVPixelFormat fmt)
84 {
85  for (int i = 0; formats[i] != AV_PIX_FMT_NONE; i++)
86  if (formats[i] == fmt)
87  return 1;
88  return 0;
89 }
90 
91 /**
92  * Helper checks if we can process main and overlay pixel formats
93  */
94 static int formats_match(const enum AVPixelFormat format_main, const enum AVPixelFormat format_overlay) {
95  switch(format_main) {
96  case AV_PIX_FMT_NV12:
97  return format_overlay == AV_PIX_FMT_NV12;
98  case AV_PIX_FMT_YUV420P:
99  return format_overlay == AV_PIX_FMT_YUV420P ||
100  format_overlay == AV_PIX_FMT_YUVA420P;
101  default:
102  return 0;
103  }
104 }
105 
106 /**
107  * Call overlay kernell for a plane
108  */
111  int x_position, int y_position,
112  uint8_t* main_data, int main_linesize,
113  int main_width, int main_height,
114  uint8_t* overlay_data, int overlay_linesize,
115  int overlay_width, int overlay_height,
116  uint8_t* alpha_data, int alpha_linesize,
117  int alpha_adj_x, int alpha_adj_y) {
118 
119  CudaFunctions *cu = ctx->hwctx->internal->cuda_dl;
120 
121  void* kernel_args[] = {
123  &main_data, &main_linesize,
124  &overlay_data, &overlay_linesize,
125  &overlay_width, &overlay_height,
126  &alpha_data, &alpha_linesize,
127  &alpha_adj_x, &alpha_adj_y,
128  };
129 
130  return CHECK_CU(cu->cuLaunchKernel(
131  ctx->cu_func,
132  DIV_UP(main_width, BLOCK_X), DIV_UP(main_height, BLOCK_Y), 1,
133  BLOCK_X, BLOCK_Y, 1,
134  0, ctx->cu_stream, kernel_args, NULL));
135 }
136 
137 /**
138  * Perform blend overlay picture over main picture
139  */
141 {
142  int ret;
143 
144  AVFilterContext *avctx = fs->parent;
145  OverlayCUDAContext *ctx = avctx->priv;
146  AVFilterLink *outlink = avctx->outputs[0];
147 
148  CudaFunctions *cu = ctx->hwctx->internal->cuda_dl;
149  CUcontext dummy, cuda_ctx = ctx->hwctx->cuda_ctx;
150 
151  AVFrame *input_main, *input_overlay;
152 
153  ctx->cu_ctx = cuda_ctx;
154 
155  // read main and overlay frames from inputs
156  ret = ff_framesync_dualinput_get(fs, &input_main, &input_overlay);
157  if (ret < 0)
158  return ret;
159 
160  if (!input_main || !input_overlay)
161  return AVERROR_BUG;
162 
163  ret = av_frame_make_writable(input_main);
164  if (ret < 0) {
165  av_frame_free(&input_main);
166  return ret;
167  }
168 
169  // push cuda context
170 
171  ret = CHECK_CU(cu->cuCtxPushCurrent(cuda_ctx));
172  if (ret < 0) {
173  av_frame_free(&input_main);
174  return ret;
175  }
176 
177  // overlay first plane
178 
180  ctx->x_position, ctx->y_position,
181  input_main->data[0], input_main->linesize[0],
182  input_main->width, input_main->height,
183  input_overlay->data[0], input_overlay->linesize[0],
184  input_overlay->width, input_overlay->height,
185  input_overlay->data[3], input_overlay->linesize[3], 1, 1);
186 
187  // overlay rest planes depending on pixel format
188 
189  switch(ctx->in_format_overlay) {
190  case AV_PIX_FMT_NV12:
192  ctx->x_position, ctx->y_position / 2,
193  input_main->data[1], input_main->linesize[1],
194  input_main->width, input_main->height / 2,
195  input_overlay->data[1], input_overlay->linesize[1],
196  input_overlay->width, input_overlay->height / 2,
197  0, 0, 0, 0);
198  break;
199  case AV_PIX_FMT_YUV420P:
200  case AV_PIX_FMT_YUVA420P:
202  ctx->x_position / 2 , ctx->y_position / 2,
203  input_main->data[1], input_main->linesize[1],
204  input_main->width / 2, input_main->height / 2,
205  input_overlay->data[1], input_overlay->linesize[1],
206  input_overlay->width / 2, input_overlay->height / 2,
207  input_overlay->data[3], input_overlay->linesize[3], 2, 2);
208 
210  ctx->x_position / 2 , ctx->y_position / 2,
211  input_main->data[2], input_main->linesize[2],
212  input_main->width / 2, input_main->height / 2,
213  input_overlay->data[2], input_overlay->linesize[2],
214  input_overlay->width / 2, input_overlay->height / 2,
215  input_overlay->data[3], input_overlay->linesize[3], 2, 2);
216  break;
217  default:
218  av_log(ctx, AV_LOG_ERROR, "Passed unsupported overlay pixel format\n");
219  av_frame_free(&input_main);
220  CHECK_CU(cu->cuCtxPopCurrent(&dummy));
221  return AVERROR_BUG;
222  }
223 
224  CHECK_CU(cu->cuCtxPopCurrent(&dummy));
225 
226  return ff_filter_frame(outlink, input_main);
227 }
228 
229 /**
230  * Initialize overlay_cuda
231  */
233 {
234  OverlayCUDAContext* ctx = avctx->priv;
236 
237  return 0;
238 }
239 
240 /**
241  * Uninitialize overlay_cuda
242  */
244 {
245  OverlayCUDAContext* ctx = avctx->priv;
246 
247  ff_framesync_uninit(&ctx->fs);
248 
249  if (ctx->hwctx && ctx->cu_module) {
250  CUcontext dummy;
251  CudaFunctions *cu = ctx->hwctx->internal->cuda_dl;
252  CHECK_CU(cu->cuCtxPushCurrent(ctx->cu_ctx));
253  CHECK_CU(cu->cuModuleUnload(ctx->cu_module));
254  CHECK_CU(cu->cuCtxPopCurrent(&dummy));
255  }
256 }
257 
258 /**
259  * Activate overlay_cuda
260  */
262 {
263  OverlayCUDAContext *ctx = avctx->priv;
264 
265  return ff_framesync_activate(&ctx->fs);
266 }
267 
268 /**
269  * Query formats
270  */
272 {
273  static const enum AVPixelFormat pixel_formats[] = {
275  };
276 
277  AVFilterFormats *pix_fmts = ff_make_format_list(pixel_formats);
278 
279  return ff_set_common_formats(avctx, pix_fmts);
280 }
281 
282 /**
283  * Configure output
284  */
286 {
287 
288  extern char vf_overlay_cuda_ptx[];
289 
290  int err;
291  AVFilterContext* avctx = outlink->src;
292  OverlayCUDAContext* ctx = avctx->priv;
293 
294  AVFilterLink *inlink = avctx->inputs[0];
295  AVHWFramesContext *frames_ctx = (AVHWFramesContext*)inlink->hw_frames_ctx->data;
296 
297  AVFilterLink *inlink_overlay = avctx->inputs[1];
298  AVHWFramesContext *frames_ctx_overlay = (AVHWFramesContext*)inlink_overlay->hw_frames_ctx->data;
299 
300  CUcontext dummy, cuda_ctx;
301  CudaFunctions *cu;
302 
303  // check main input formats
304 
305  if (!frames_ctx) {
306  av_log(ctx, AV_LOG_ERROR, "No hw context provided on main input\n");
307  return AVERROR(EINVAL);
308  }
309 
310  ctx->in_format_main = frames_ctx->sw_format;
312  av_log(ctx, AV_LOG_ERROR, "Unsupported main input format: %s\n",
314  return AVERROR(ENOSYS);
315  }
316 
317  // check overlay input formats
318 
319  if (!frames_ctx_overlay) {
320  av_log(ctx, AV_LOG_ERROR, "No hw context provided on overlay input\n");
321  return AVERROR(EINVAL);
322  }
323 
324  ctx->in_format_overlay = frames_ctx_overlay->sw_format;
326  av_log(ctx, AV_LOG_ERROR, "Unsupported overlay input format: %s\n",
328  return AVERROR(ENOSYS);
329  }
330 
331  // check we can overlay pictures with those pixel formats
332 
334  av_log(ctx, AV_LOG_ERROR, "Can't overlay %s on %s \n",
336  return AVERROR(EINVAL);
337  }
338 
339  // initialize
340 
341  ctx->hwctx = frames_ctx->device_ctx->hwctx;
342  cuda_ctx = ctx->hwctx->cuda_ctx;
343  ctx->fs.time_base = inlink->time_base;
344 
345  ctx->cu_stream = ctx->hwctx->stream;
346 
347  outlink->hw_frames_ctx = av_buffer_ref(inlink->hw_frames_ctx);
348 
349  // load functions
350 
351  cu = ctx->hwctx->internal->cuda_dl;
352 
353  err = CHECK_CU(cu->cuCtxPushCurrent(cuda_ctx));
354  if (err < 0) {
355  return err;
356  }
357 
358  err = CHECK_CU(cu->cuModuleLoadData(&ctx->cu_module, vf_overlay_cuda_ptx));
359  if (err < 0) {
360  CHECK_CU(cu->cuCtxPopCurrent(&dummy));
361  return err;
362  }
363 
364  err = CHECK_CU(cu->cuModuleGetFunction(&ctx->cu_func, ctx->cu_module, "Overlay_Cuda"));
365  if (err < 0) {
366  CHECK_CU(cu->cuCtxPopCurrent(&dummy));
367  return err;
368  }
369 
370  CHECK_CU(cu->cuCtxPopCurrent(&dummy));
371 
372  // init dual input
373 
374  err = ff_framesync_init_dualinput(&ctx->fs, avctx);
375  if (err < 0) {
376  return err;
377  }
378 
379  return ff_framesync_configure(&ctx->fs);
380 }
381 
382 
383 #define OFFSET(x) offsetof(OverlayCUDAContext, x)
384 #define FLAGS (AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
385 
386 static const AVOption overlay_cuda_options[] = {
387  { "x", "Overlay x position",
388  OFFSET(x_position), AV_OPT_TYPE_INT, { .i64 = 0 }, INT_MIN, INT_MAX, .flags = FLAGS },
389  { "y", "Overlay y position",
390  OFFSET(y_position), AV_OPT_TYPE_INT, { .i64 = 0 }, INT_MIN, INT_MAX, .flags = FLAGS },
391  { "eof_action", "Action to take when encountering EOF from secondary input ",
393  EOF_ACTION_REPEAT, EOF_ACTION_PASS, .flags = FLAGS, "eof_action" },
394  { "repeat", "Repeat the previous frame.", 0, AV_OPT_TYPE_CONST, { .i64 = EOF_ACTION_REPEAT }, .flags = FLAGS, "eof_action" },
395  { "endall", "End both streams.", 0, AV_OPT_TYPE_CONST, { .i64 = EOF_ACTION_ENDALL }, .flags = FLAGS, "eof_action" },
396  { "pass", "Pass through the main input.", 0, AV_OPT_TYPE_CONST, { .i64 = EOF_ACTION_PASS }, .flags = FLAGS, "eof_action" },
397  { "shortest", "force termination when the shortest input terminates", OFFSET(fs.opt_shortest), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, FLAGS },
398  { "repeatlast", "repeat overlay of the last overlay frame", OFFSET(fs.opt_repeatlast), AV_OPT_TYPE_BOOL, {.i64=1}, 0, 1, FLAGS },
399  { NULL },
400 };
401 
403 
405  {
406  .name = "main",
407  .type = AVMEDIA_TYPE_VIDEO,
408  },
409  {
410  .name = "overlay",
411  .type = AVMEDIA_TYPE_VIDEO,
412  },
413  { NULL }
414 };
415 
417  {
418  .name = "default",
419  .type = AVMEDIA_TYPE_VIDEO,
420  .config_props = &overlay_cuda_config_output,
421  },
422  { NULL }
423 };
424 
426  .name = "overlay_cuda",
427  .description = NULL_IF_CONFIG_SMALL("Overlay one video on top of another using CUDA"),
428  .priv_size = sizeof(OverlayCUDAContext),
429  .priv_class = &overlay_cuda_class,
434  .inputs = overlay_cuda_inputs,
435  .outputs = overlay_cuda_outputs,
436  .preinit = overlay_cuda_framesync_preinit,
437  .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE,
438 };
#define NULL
Definition: coverity.c:32
#define FF_FILTER_FLAG_HWFRAME_AWARE
The filter is aware of hardware frames, and any hardware frame context should not be automatically pr...
Definition: internal.h:339
#define FLAGS
FRAMESYNC_DEFINE_CLASS(overlay_cuda, OverlayCUDAContext, fs)
This structure describes decoded (raw) audio or video data.
Definition: frame.h:314
AVOption.
Definition: opt.h:248
Main libavfilter public API header.
Memory handling functions.
static av_cold int init(AVCodecContext *avctx)
Definition: avrndec.c:35
AVCUDADeviceContextInternal * internal
static enum AVPixelFormat supported_overlay_formats[]
int ff_framesync_configure(FFFrameSync *fs)
Configure a frame sync structure.
Definition: framesync.c:124
#define BLOCK_X
AVFilterFormats * ff_make_format_list(const int *fmts)
Create a list of supported formats.
Definition: formats.c:287
const char * name
Pad name.
Definition: internal.h:60
AVFilterContext * parent
Parent filter context.
Definition: framesync.h:152
AVFilterLink ** inputs
array of pointers to input links
Definition: avfilter.h:349
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
Definition: avfilter.c:1091
static int overlay_cuda_call_kernel(OverlayCUDAContext *ctx, int x_position, int y_position, uint8_t *main_data, int main_linesize, int main_width, int main_height, uint8_t *overlay_data, int overlay_linesize, int overlay_width, int overlay_height, uint8_t *alpha_data, int alpha_linesize, int alpha_adj_x, int alpha_adj_y)
Call overlay kernell for a plane.
planar YUV 4:2:0, 20bpp, (1 Cr & Cb sample per 2x2 Y & A samples)
Definition: pixfmt.h:101
uint8_t
#define av_cold
Definition: attributes.h:88
static av_cold int uninit(AVCodecContext *avctx)
Definition: crystalhd.c:279
AVOptions.
int ff_framesync_init_dualinput(FFFrameSync *fs, AVFilterContext *parent)
Initialize a frame sync structure for dualinput.
Definition: framesync.c:358
filter_frame For filters that do not use the activate() callback
#define BLOCK_Y
int ff_framesync_dualinput_get(FFFrameSync *fs, AVFrame **f0, AVFrame **f1)
Definition: framesync.c:376
static const AVFilterPad overlay_cuda_inputs[]
void * hwctx
The format-specific data, allocated and freed by libavutil along with this context.
Definition: hwcontext.h:92
#define av_log(a,...)
A filter pad used for either input or output.
Definition: internal.h:54
static int format_is_supported(const enum AVPixelFormat formats[], enum AVPixelFormat fmt)
Helper to find out if provided format is supported by filter.
int width
Definition: frame.h:372
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:194
int ff_set_common_formats(AVFilterContext *ctx, AVFilterFormats *formats)
A helper for query_formats() which sets all links to the same list of formats.
Definition: formats.c:588
void ff_framesync_uninit(FFFrameSync *fs)
Free all memory currently allocated.
Definition: framesync.c:290
Frame sync structure.
Definition: framesync.h:146
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
Definition: frame.c:203
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification. ...
Definition: internal.h:153
void * priv
private data for use by the filter
Definition: avfilter.h:356
planar YUV 4:2:0, 12bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (firs...
Definition: pixfmt.h:89
int opt_shortest
Definition: framesync.h:206
int ff_framesync_activate(FFFrameSync *fs)
Examine the frames in the filter&#39;s input and try to produce output.
Definition: framesync.c:341
int(* on_event)(struct FFFrameSync *fs)
Callback called when a frame event is ready.
Definition: framesync.h:172
int opt_repeatlast
Definition: framesync.h:205
OverlayCUDAContext.
static const AVOption overlay_cuda_options[]
enum AVPixelFormat in_format_main
AVHWDeviceContext * device_ctx
The parent AVHWDeviceContext.
Definition: hwcontext.h:149
AVFormatContext * ctx
Definition: movenc.c:48
AVRational time_base
Time base for the output events.
Definition: framesync.h:162
FFmpeg internal API for CUDA.
int dummy
Definition: motion.c:64
HW acceleration through CUDA.
Definition: pixfmt.h:235
static av_cold int overlay_cuda_init(AVFilterContext *avctx)
Initialize overlay_cuda.
static const AVFilterPad outputs[]
Definition: af_acontrast.c:203
static int overlay_cuda_activate(AVFilterContext *avctx)
Activate overlay_cuda.
enum AVPixelFormat in_format_overlay
int linesize[AV_NUM_DATA_POINTERS]
For video, size in bytes of each picture line.
Definition: frame.h:345
uint8_t * data
The data buffer.
Definition: buffer.h:89
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several inputs
#define AVERROR_BUG
Internal bug, also see AVERROR_BUG2.
Definition: error.h:50
static av_cold void overlay_cuda_uninit(AVFilterContext *avctx)
Uninitialize overlay_cuda.
This struct is allocated as AVHWDeviceContext.hwctx.
Describe the class of an AVClass context structure.
Definition: log.h:67
Filter definition.
Definition: avfilter.h:145
This struct describes a set or pool of "hardware" frames (i.e.
Definition: hwcontext.h:124
const char * name
Filter name.
Definition: avfilter.h:149
AVFilterLink ** outputs
array of pointers to output links
Definition: avfilter.h:353
#define OFFSET(x)
static enum AVPixelFormat pix_fmts[]
Definition: libkvazaar.c:300
int av_frame_make_writable(AVFrame *frame)
Ensure that the frame data is writable, avoiding data copy if possible.
Definition: frame.c:611
static const AVFilterPad overlay_cuda_outputs[]
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
Definition: frame.h:328
The exact code depends on how similar the blocks are and how related they are to the and needs to apply these operations to the correct inlink or outlink if there are several Macros are available to factor that when no extra processing is inlink
int opt_eof_action
Definition: framesync.h:207
planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)
Definition: pixfmt.h:66
static int query_formats(AVFilterContext *ctx)
Definition: aeval.c:244
AVBufferRef * av_buffer_ref(AVBufferRef *buf)
Create a new reference to an AVBuffer.
Definition: buffer.c:93
#define DIV_UP(a, b)
static int overlay_cuda_blend(FFFrameSync *fs)
Perform blend overlay picture over main picture.
static int formats_match(const enum AVPixelFormat format_main, const enum AVPixelFormat format_overlay)
Helper checks if we can process main and overlay pixel formats.
A list of supported formats for one end of a filter link.
Definition: formats.h:65
static int overlay_cuda_config_output(AVFilterLink *outlink)
Configure output.
An instance of a filter.
Definition: avfilter.h:341
AVCUDADeviceContext * hwctx
#define CHECK_CU(x)
int height
Definition: frame.h:372
formats
Definition: signature.h:48
const char * av_get_pix_fmt_name(enum AVPixelFormat pix_fmt)
Return the short name for a pixel format, NULL in case pix_fmt is unknown.
Definition: pixdesc.c:2489
internal API functions
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later.That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another.Frame references ownership and permissions
enum AVPixelFormat sw_format
The pixel format identifying the actual data layout of the hardware frames.
Definition: hwcontext.h:222
AVPixelFormat
Pixel format.
Definition: pixfmt.h:64
int i
Definition: input.c:407
AVFilter ff_vf_overlay_cuda
static int overlay_cuda_query_formats(AVFilterContext *avctx)
Query formats.
static enum AVPixelFormat supported_main_formats[]