FFmpeg
hwcontext_cuda.c
Go to the documentation of this file.
1 /*
2  * This file is part of FFmpeg.
3  *
4  * FFmpeg is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * FFmpeg is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with FFmpeg; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17  */
18 
19 #include "buffer.h"
20 #include "common.h"
21 #include "hwcontext.h"
22 #include "hwcontext_internal.h"
24 #include "cuda_check.h"
25 #include "mem.h"
26 #include "pixdesc.h"
27 #include "pixfmt.h"
28 #include "imgutils.h"
29 
30 #define CUDA_FRAME_ALIGNMENT 256
31 
32 typedef struct CUDAFramesContext {
35 
36 static const enum AVPixelFormat supported_formats[] = {
45 };
46 
47 #define CHECK_CU(x) FF_CUDA_CHECK_DL(device_ctx, cu, x)
48 
50  const void *hwconfig,
51  AVHWFramesConstraints *constraints)
52 {
53  int i;
54 
56  sizeof(*constraints->valid_sw_formats));
57  if (!constraints->valid_sw_formats)
58  return AVERROR(ENOMEM);
59 
60  for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++)
61  constraints->valid_sw_formats[i] = supported_formats[i];
62  constraints->valid_sw_formats[FF_ARRAY_ELEMS(supported_formats)] = AV_PIX_FMT_NONE;
63 
64  constraints->valid_hw_formats = av_malloc_array(2, sizeof(*constraints->valid_hw_formats));
65  if (!constraints->valid_hw_formats)
66  return AVERROR(ENOMEM);
67 
68  constraints->valid_hw_formats[0] = AV_PIX_FMT_CUDA;
69  constraints->valid_hw_formats[1] = AV_PIX_FMT_NONE;
70 
71  return 0;
72 }
73 
74 static void cuda_buffer_free(void *opaque, uint8_t *data)
75 {
76  AVHWFramesContext *ctx = opaque;
77  AVHWDeviceContext *device_ctx = ctx->device_ctx;
78  AVCUDADeviceContext *hwctx = device_ctx->hwctx;
79  CudaFunctions *cu = hwctx->internal->cuda_dl;
80 
81  CUcontext dummy;
82 
83  CHECK_CU(cu->cuCtxPushCurrent(hwctx->cuda_ctx));
84 
85  CHECK_CU(cu->cuMemFree((CUdeviceptr)data));
86 
87  CHECK_CU(cu->cuCtxPopCurrent(&dummy));
88 }
89 
90 static AVBufferRef *cuda_pool_alloc(void *opaque, int size)
91 {
92  AVHWFramesContext *ctx = opaque;
93  AVHWDeviceContext *device_ctx = ctx->device_ctx;
94  AVCUDADeviceContext *hwctx = device_ctx->hwctx;
95  CudaFunctions *cu = hwctx->internal->cuda_dl;
96 
97  AVBufferRef *ret = NULL;
98  CUcontext dummy = NULL;
99  CUdeviceptr data;
100  int err;
101 
102  err = CHECK_CU(cu->cuCtxPushCurrent(hwctx->cuda_ctx));
103  if (err < 0)
104  return NULL;
105 
106  err = CHECK_CU(cu->cuMemAlloc(&data, size));
107  if (err < 0)
108  goto fail;
109 
110  ret = av_buffer_create((uint8_t*)data, size, cuda_buffer_free, ctx, 0);
111  if (!ret) {
112  CHECK_CU(cu->cuMemFree(data));
113  goto fail;
114  }
115 
116 fail:
117  CHECK_CU(cu->cuCtxPopCurrent(&dummy));
118  return ret;
119 }
120 
122 {
123  CUDAFramesContext *priv = ctx->internal->priv;
124  int i;
125 
126  for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++) {
127  if (ctx->sw_format == supported_formats[i])
128  break;
129  }
130  if (i == FF_ARRAY_ELEMS(supported_formats)) {
131  av_log(ctx, AV_LOG_ERROR, "Pixel format '%s' is not supported\n",
133  return AVERROR(ENOSYS);
134  }
135 
137 
138  if (!ctx->pool) {
140  if (size < 0)
141  return size;
142 
144  if (!ctx->internal->pool_internal)
145  return AVERROR(ENOMEM);
146  }
147 
148  return 0;
149 }
150 
152 {
153  int res;
154 
155  frame->buf[0] = av_buffer_pool_get(ctx->pool);
156  if (!frame->buf[0])
157  return AVERROR(ENOMEM);
158 
159  res = av_image_fill_arrays(frame->data, frame->linesize, frame->buf[0]->data,
160  ctx->sw_format, ctx->width, ctx->height, CUDA_FRAME_ALIGNMENT);
161  if (res < 0)
162  return res;
163 
164  // YUV420P is a special case.
165  // Nvenc expects the U/V planes in swapped order from how ffmpeg expects them, also chroma is half-aligned
166  if (ctx->sw_format == AV_PIX_FMT_YUV420P) {
167  frame->linesize[1] = frame->linesize[2] = frame->linesize[0] / 2;
168  frame->data[2] = frame->data[1];
169  frame->data[1] = frame->data[2] + frame->linesize[2] * ctx->height / 2;
170  }
171 
172  frame->format = AV_PIX_FMT_CUDA;
173  frame->width = ctx->width;
174  frame->height = ctx->height;
175 
176  return 0;
177 }
178 
181  enum AVPixelFormat **formats)
182 {
183  enum AVPixelFormat *fmts;
184 
185  fmts = av_malloc_array(2, sizeof(*fmts));
186  if (!fmts)
187  return AVERROR(ENOMEM);
188 
189  fmts[0] = ctx->sw_format;
190  fmts[1] = AV_PIX_FMT_NONE;
191 
192  *formats = fmts;
193 
194  return 0;
195 }
196 
198  const AVFrame *src)
199 {
200  CUDAFramesContext *priv = ctx->internal->priv;
201  AVHWDeviceContext *device_ctx = ctx->device_ctx;
202  AVCUDADeviceContext *hwctx = device_ctx->hwctx;
203  CudaFunctions *cu = hwctx->internal->cuda_dl;
204 
205  CUcontext dummy;
206  int i, ret;
207 
208  ret = CHECK_CU(cu->cuCtxPushCurrent(hwctx->cuda_ctx));
209  if (ret < 0)
210  return ret;
211 
212  for (i = 0; i < FF_ARRAY_ELEMS(src->data) && src->data[i]; i++) {
213  CUDA_MEMCPY2D cpy = {
214  .srcMemoryType = CU_MEMORYTYPE_DEVICE,
215  .dstMemoryType = CU_MEMORYTYPE_HOST,
216  .srcDevice = (CUdeviceptr)src->data[i],
217  .dstHost = dst->data[i],
218  .srcPitch = src->linesize[i],
219  .dstPitch = dst->linesize[i],
220  .WidthInBytes = FFMIN(src->linesize[i], dst->linesize[i]),
221  .Height = src->height >> (i ? priv->shift_height : 0),
222  };
223 
224  ret = CHECK_CU(cu->cuMemcpy2DAsync(&cpy, hwctx->stream));
225  if (ret < 0)
226  goto exit;
227  }
228 
229  ret = CHECK_CU(cu->cuStreamSynchronize(hwctx->stream));
230  if (ret < 0)
231  goto exit;
232 
233 exit:
234  CHECK_CU(cu->cuCtxPopCurrent(&dummy));
235 
236  return 0;
237 }
238 
240  const AVFrame *src)
241 {
242  CUDAFramesContext *priv = ctx->internal->priv;
243  AVHWDeviceContext *device_ctx = ctx->device_ctx;
244  AVCUDADeviceContext *hwctx = device_ctx->hwctx;
245  CudaFunctions *cu = hwctx->internal->cuda_dl;
246 
247  CUcontext dummy;
248  int i, ret;
249 
250  ret = CHECK_CU(cu->cuCtxPushCurrent(hwctx->cuda_ctx));
251  if (ret < 0)
252  return ret;
253 
254  for (i = 0; i < FF_ARRAY_ELEMS(src->data) && src->data[i]; i++) {
255  CUDA_MEMCPY2D cpy = {
256  .srcMemoryType = CU_MEMORYTYPE_HOST,
257  .dstMemoryType = CU_MEMORYTYPE_DEVICE,
258  .srcHost = src->data[i],
259  .dstDevice = (CUdeviceptr)dst->data[i],
260  .srcPitch = src->linesize[i],
261  .dstPitch = dst->linesize[i],
262  .WidthInBytes = FFMIN(src->linesize[i], dst->linesize[i]),
263  .Height = src->height >> (i ? priv->shift_height : 0),
264  };
265 
266  ret = CHECK_CU(cu->cuMemcpy2DAsync(&cpy, hwctx->stream));
267  if (ret < 0)
268  goto exit;
269  }
270 
271 exit:
272  CHECK_CU(cu->cuCtxPopCurrent(&dummy));
273 
274  return 0;
275 }
276 
277 static void cuda_device_uninit(AVHWDeviceContext *device_ctx)
278 {
279  AVCUDADeviceContext *hwctx = device_ctx->hwctx;
280 
281  if (hwctx->internal) {
282  CudaFunctions *cu = hwctx->internal->cuda_dl;
283  if (hwctx->internal->is_allocated && hwctx->cuda_ctx) {
284  CHECK_CU(cu->cuCtxDestroy(hwctx->cuda_ctx));
285  hwctx->cuda_ctx = NULL;
286  }
287  cuda_free_functions(&hwctx->internal->cuda_dl);
288  }
289 
290  av_freep(&hwctx->internal);
291 }
292 
294 {
295  AVCUDADeviceContext *hwctx = ctx->hwctx;
296  int ret;
297 
298  if (!hwctx->internal) {
299  hwctx->internal = av_mallocz(sizeof(*hwctx->internal));
300  if (!hwctx->internal)
301  return AVERROR(ENOMEM);
302  }
303 
304  if (!hwctx->internal->cuda_dl) {
305  ret = cuda_load_functions(&hwctx->internal->cuda_dl, ctx);
306  if (ret < 0) {
307  av_log(ctx, AV_LOG_ERROR, "Could not dynamically load CUDA\n");
308  goto error;
309  }
310  }
311 
312  return 0;
313 
314 error:
315  cuda_device_uninit(ctx);
316  return ret;
317 }
318 
319 static int cuda_device_create(AVHWDeviceContext *device_ctx,
320  const char *device,
321  AVDictionary *opts, int flags)
322 {
323  AVCUDADeviceContext *hwctx = device_ctx->hwctx;
324  CudaFunctions *cu;
325  CUdevice cu_device;
326  CUcontext dummy;
327  int ret, device_idx = 0;
328 
329  if (device)
330  device_idx = strtol(device, NULL, 0);
331 
332  if (cuda_device_init(device_ctx) < 0)
333  goto error;
334 
335  cu = hwctx->internal->cuda_dl;
336 
337  ret = CHECK_CU(cu->cuInit(0));
338  if (ret < 0)
339  goto error;
340 
341  ret = CHECK_CU(cu->cuDeviceGet(&cu_device, device_idx));
342  if (ret < 0)
343  goto error;
344 
345  ret = CHECK_CU(cu->cuCtxCreate(&hwctx->cuda_ctx, CU_CTX_SCHED_BLOCKING_SYNC, cu_device));
346  if (ret < 0)
347  goto error;
348 
349  // Setting stream to NULL will make functions automatically use the default CUstream
350  hwctx->stream = NULL;
351 
352  CHECK_CU(cu->cuCtxPopCurrent(&dummy));
353 
354  hwctx->internal->is_allocated = 1;
355 
356  return 0;
357 
358 error:
359  cuda_device_uninit(device_ctx);
360  return AVERROR_UNKNOWN;
361 }
362 
365  .name = "CUDA",
366 
367  .device_hwctx_size = sizeof(AVCUDADeviceContext),
368  .frames_priv_size = sizeof(CUDAFramesContext),
369 
370  .device_create = cuda_device_create,
371  .device_init = cuda_device_init,
372  .device_uninit = cuda_device_uninit,
373  .frames_get_constraints = cuda_frames_get_constraints,
374  .frames_init = cuda_frames_init,
375  .frames_get_buffer = cuda_get_buffer,
376  .transfer_get_formats = cuda_transfer_get_formats,
377  .transfer_data_to = cuda_transfer_data_to,
378  .transfer_data_from = cuda_transfer_data_from,
379 
380  .pix_fmts = (const enum AVPixelFormat[]){ AV_PIX_FMT_CUDA, AV_PIX_FMT_NONE },
381 };
This struct aggregates all the (hardware/vendor-specific) "high-level" state, i.e.
Definition: hwcontext.h:60
#define NULL
Definition: coverity.c:32
This structure describes decoded (raw) audio or video data.
Definition: frame.h:295
ptrdiff_t const GLvoid * data
Definition: opengl_enc.c:100
planar YUV 4:4:4, 24bpp, (1 Cr & Cb sample per 1x1 Y samples)
Definition: pixfmt.h:71
misc image utilities
Memory handling functions.
AVBufferRef * buf[AV_NUM_DATA_POINTERS]
AVBuffer references backing the data for this frame.
Definition: frame.h:486
AVCUDADeviceContextInternal * internal
int av_image_fill_arrays(uint8_t *dst_data[4], int dst_linesize[4], const uint8_t *src, enum AVPixelFormat pix_fmt, int width, int height, int align)
Setup the data pointers and linesizes based on the specified image parameters and the provided array...
Definition: imgutils.c:411
int width
The allocated dimensions of the frames in this pool.
Definition: hwcontext.h:228
static int cuda_frames_get_constraints(AVHWDeviceContext *ctx, const void *hwconfig, AVHWFramesConstraints *constraints)
static int cuda_frames_init(AVHWFramesContext *ctx)
void * av_mallocz(size_t size)
Allocate a memory block with alignment suitable for all memory accesses (including vectors if availab...
Definition: mem.c:236
#define src
Definition: vp8dsp.c:254
#define AV_PIX_FMT_P016
Definition: pixfmt.h:437
#define AV_PIX_FMT_P010
Definition: pixfmt.h:436
AVBufferPool * pool_internal
enum AVHWDeviceType type
uint8_t
static enum AVPixelFormat supported_formats[]
void * hwctx
The format-specific data, allocated and freed by libavutil along with this context.
Definition: hwcontext.h:91
static int cuda_transfer_data_from(AVHWFramesContext *ctx, AVFrame *dst, const AVFrame *src)
#define AV_PIX_FMT_YUV444P16
Definition: pixfmt.h:400
ptrdiff_t size
Definition: opengl_enc.c:100
#define CHECK_CU(x)
#define av_log(a,...)
static int cuda_device_create(AVHWDeviceContext *device_ctx, const char *device, AVDictionary *opts, int flags)
static void cuda_buffer_free(void *opaque, uint8_t *data)
int av_image_get_buffer_size(enum AVPixelFormat pix_fmt, int width, int height, int align)
Return the size in bytes of the amount of data required to store an image with the given parameters...
Definition: imgutils.c:431
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:259
int width
Definition: frame.h:353
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:176
int av_pix_fmt_get_chroma_sub_sample(enum AVPixelFormat pix_fmt, int *h_shift, int *v_shift)
Utility function to access log2_chroma_w log2_chroma_h from the pixel format AVPixFmtDescriptor.
Definition: pixdesc.c:2550
planar YUV 4:2:0, 12bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (firs...
Definition: pixfmt.h:89
#define AV_PIX_FMT_0BGR32
Definition: pixfmt.h:365
AVBufferRef * av_buffer_create(uint8_t *data, int size, void(*free)(void *opaque, uint8_t *data), void *opaque, int flags)
Create an AVBuffer from an existing array.
Definition: buffer.c:28
static int cuda_device_init(AVHWDeviceContext *ctx)
#define fail()
Definition: checkasm.h:120
AVDictionary * opts
Definition: movenc.c:50
static int cuda_get_buffer(AVHWFramesContext *ctx, AVFrame *frame)
#define FFMIN(a, b)
Definition: common.h:96
AVHWDeviceContext * device_ctx
The parent AVHWDeviceContext.
Definition: hwcontext.h:148
static int cuda_transfer_get_formats(AVHWFramesContext *ctx, enum AVHWFrameTransferDirection dir, enum AVPixelFormat **formats)
static AVBufferRef * cuda_pool_alloc(void *opaque, int size)
AVFormatContext * ctx
Definition: movenc.c:48
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several the filter must be ready for frames arriving randomly on any input any filter with several inputs will most likely require some kind of queuing mechanism It is perfectly acceptable to have a limited queue and to drop frames when the inputs are too unbalanced request_frame For filters that do not use the this method is called when a frame is wanted on an output For a it should directly call filter_frame on the corresponding output For a if there are queued frames already one of these frames should be pushed If the filter should request a frame on one of its repeatedly until at least one frame has been pushed Return or at least make progress towards producing a frame
FFmpeg internal API for CUDA.
int dummy
Definition: motion.c:64
HW acceleration through CUDA.
Definition: pixfmt.h:235
AVBufferPool * av_buffer_pool_init2(int size, void *opaque, AVBufferRef *(*alloc)(void *opaque, int size), void(*pool_free)(void *opaque))
Allocate and initialize a buffer pool with a more complex allocator.
Definition: buffer.c:218
static void error(const char *err)
#define FF_ARRAY_ELEMS(a)
#define CUDA_FRAME_ALIGNMENT
int format
format of the frame, -1 if unknown or unset Values correspond to enum AVPixelFormat for video frames...
Definition: frame.h:368
This struct describes the constraints on hardware frames attached to a given device with a hardware-s...
Definition: hwcontext.h:432
static int cuda_transfer_data_to(AVHWFramesContext *ctx, AVFrame *dst, const AVFrame *src)
int linesize[AV_NUM_DATA_POINTERS]
For video, size in bytes of each picture line.
Definition: frame.h:326
const HWContextType ff_hwcontext_type_cuda
uint8_t * data
The data buffer.
Definition: buffer.h:89
This struct is allocated as AVHWDeviceContext.hwctx.
This struct describes a set or pool of "hardware" frames (i.e.
Definition: hwcontext.h:123
refcounted data buffer API
enum AVPixelFormat * valid_hw_formats
A list of possible values for format in the hw_frames_ctx, terminated by AV_PIX_FMT_NONE.
Definition: hwcontext.h:437
AVHWFramesInternal * internal
Private data used internally by libavutil.
Definition: hwcontext.h:133
#define flags(name, subs,...)
Definition: cbs_av1.c:561
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
Definition: frame.h:309
A reference to a data buffer.
Definition: buffer.h:81
planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)
Definition: pixfmt.h:66
common internal and external API header
static void cuda_device_uninit(AVHWDeviceContext *device_ctx)
#define AVERROR_UNKNOWN
Unknown error, typically from an external library.
Definition: error.h:71
AVHWFrameTransferDirection
Definition: hwcontext.h:394
pixel format definitions
AVBufferPool * pool
A pool from which the frames are allocated by av_hwframe_get_buffer().
Definition: hwcontext.h:189
enum AVPixelFormat * valid_sw_formats
A list of possible values for sw_format in the hw_frames_ctx, terminated by AV_PIX_FMT_NONE.
Definition: hwcontext.h:444
int height
Definition: frame.h:353
#define av_freep(p)
AVBufferRef * av_buffer_pool_get(AVBufferPool *pool)
Allocate a new AVBuffer, reusing an old buffer from the pool when available.
Definition: buffer.c:334
#define av_malloc_array(a, b)
formats
Definition: signature.h:48
const char * av_get_pix_fmt_name(enum AVPixelFormat pix_fmt)
Return the short name for a pixel format, NULL in case pix_fmt is unknown.
Definition: pixdesc.c:2438
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later.That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another.Frame references ownership and permissions
enum AVPixelFormat sw_format
The pixel format identifying the actual data layout of the hardware frames.
Definition: hwcontext.h:221
AVPixelFormat
Pixel format.
Definition: pixfmt.h:64
#define AV_PIX_FMT_0RGB32
Definition: pixfmt.h:364