FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
hwcontext_cuda.c
Go to the documentation of this file.
1 /*
2  * This file is part of FFmpeg.
3  *
4  * FFmpeg is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * FFmpeg is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with FFmpeg; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17  */
18 
19 #include "buffer.h"
20 #include "common.h"
21 #include "hwcontext.h"
22 #include "hwcontext_internal.h"
24 #include "mem.h"
25 #include "pixdesc.h"
26 #include "pixfmt.h"
27 #include "imgutils.h"
28 
29 #define CUDA_FRAME_ALIGNMENT 256
30 
31 typedef struct CUDAFramesContext {
34 
35 static const enum AVPixelFormat supported_formats[] = {
44 };
45 
47  const void *hwconfig,
48  AVHWFramesConstraints *constraints)
49 {
50  int i;
51 
53  sizeof(*constraints->valid_sw_formats));
54  if (!constraints->valid_sw_formats)
55  return AVERROR(ENOMEM);
56 
57  for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++)
58  constraints->valid_sw_formats[i] = supported_formats[i];
59  constraints->valid_sw_formats[FF_ARRAY_ELEMS(supported_formats)] = AV_PIX_FMT_NONE;
60 
61  constraints->valid_hw_formats = av_malloc_array(2, sizeof(*constraints->valid_hw_formats));
62  if (!constraints->valid_hw_formats)
63  return AVERROR(ENOMEM);
64 
65  constraints->valid_hw_formats[0] = AV_PIX_FMT_CUDA;
66  constraints->valid_hw_formats[1] = AV_PIX_FMT_NONE;
67 
68  return 0;
69 }
70 
71 static void cuda_buffer_free(void *opaque, uint8_t *data)
72 {
73  AVHWFramesContext *ctx = opaque;
74  AVCUDADeviceContext *hwctx = ctx->device_ctx->hwctx;
75  CudaFunctions *cu = hwctx->internal->cuda_dl;
76 
77  CUcontext dummy;
78 
79  cu->cuCtxPushCurrent(hwctx->cuda_ctx);
80 
81  cu->cuMemFree((CUdeviceptr)data);
82 
83  cu->cuCtxPopCurrent(&dummy);
84 }
85 
86 static AVBufferRef *cuda_pool_alloc(void *opaque, int size)
87 {
88  AVHWFramesContext *ctx = opaque;
89  AVCUDADeviceContext *hwctx = ctx->device_ctx->hwctx;
90  CudaFunctions *cu = hwctx->internal->cuda_dl;
91 
92  AVBufferRef *ret = NULL;
93  CUcontext dummy = NULL;
94  CUdeviceptr data;
95  CUresult err;
96 
97  err = cu->cuCtxPushCurrent(hwctx->cuda_ctx);
98  if (err != CUDA_SUCCESS) {
99  av_log(ctx, AV_LOG_ERROR, "Error setting current CUDA context\n");
100  return NULL;
101  }
102 
103  err = cu->cuMemAlloc(&data, size);
104  if (err != CUDA_SUCCESS)
105  goto fail;
106 
107  ret = av_buffer_create((uint8_t*)data, size, cuda_buffer_free, ctx, 0);
108  if (!ret) {
109  cu->cuMemFree(data);
110  goto fail;
111  }
112 
113 fail:
114  cu->cuCtxPopCurrent(&dummy);
115  return ret;
116 }
117 
119 {
120  CUDAFramesContext *priv = ctx->internal->priv;
121  int i;
122 
123  for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++) {
124  if (ctx->sw_format == supported_formats[i])
125  break;
126  }
127  if (i == FF_ARRAY_ELEMS(supported_formats)) {
128  av_log(ctx, AV_LOG_ERROR, "Pixel format '%s' is not supported\n",
130  return AVERROR(ENOSYS);
131  }
132 
134 
135  if (!ctx->pool) {
137  if (size < 0)
138  return size;
139 
141  if (!ctx->internal->pool_internal)
142  return AVERROR(ENOMEM);
143  }
144 
145  return 0;
146 }
147 
149 {
150  int res;
151 
152  frame->buf[0] = av_buffer_pool_get(ctx->pool);
153  if (!frame->buf[0])
154  return AVERROR(ENOMEM);
155 
156  res = av_image_fill_arrays(frame->data, frame->linesize, frame->buf[0]->data,
157  ctx->sw_format, ctx->width, ctx->height, CUDA_FRAME_ALIGNMENT);
158  if (res < 0)
159  return res;
160 
161  // YUV420P is a special case.
162  // Nvenc expects the U/V planes in swapped order from how ffmpeg expects them, also chroma is half-aligned
163  if (ctx->sw_format == AV_PIX_FMT_YUV420P) {
164  frame->linesize[1] = frame->linesize[2] = frame->linesize[0] / 2;
165  frame->data[2] = frame->data[1];
166  frame->data[1] = frame->data[2] + frame->linesize[2] * ctx->height / 2;
167  }
168 
169  frame->format = AV_PIX_FMT_CUDA;
170  frame->width = ctx->width;
171  frame->height = ctx->height;
172 
173  return 0;
174 }
175 
178  enum AVPixelFormat **formats)
179 {
180  enum AVPixelFormat *fmts;
181 
182  fmts = av_malloc_array(2, sizeof(*fmts));
183  if (!fmts)
184  return AVERROR(ENOMEM);
185 
186  fmts[0] = ctx->sw_format;
187  fmts[1] = AV_PIX_FMT_NONE;
188 
189  *formats = fmts;
190 
191  return 0;
192 }
193 
195  const AVFrame *src)
196 {
197  CUDAFramesContext *priv = ctx->internal->priv;
198  AVCUDADeviceContext *device_hwctx = ctx->device_ctx->hwctx;
199  CudaFunctions *cu = device_hwctx->internal->cuda_dl;
200 
201  CUcontext dummy;
202  CUresult err;
203  int i;
204 
205  err = cu->cuCtxPushCurrent(device_hwctx->cuda_ctx);
206  if (err != CUDA_SUCCESS)
207  return AVERROR_UNKNOWN;
208 
209  for (i = 0; i < FF_ARRAY_ELEMS(src->data) && src->data[i]; i++) {
210  CUDA_MEMCPY2D cpy = {
211  .srcMemoryType = CU_MEMORYTYPE_DEVICE,
212  .dstMemoryType = CU_MEMORYTYPE_HOST,
213  .srcDevice = (CUdeviceptr)src->data[i],
214  .dstHost = dst->data[i],
215  .srcPitch = src->linesize[i],
216  .dstPitch = dst->linesize[i],
217  .WidthInBytes = FFMIN(src->linesize[i], dst->linesize[i]),
218  .Height = src->height >> (i ? priv->shift_height : 0),
219  };
220 
221  err = cu->cuMemcpy2DAsync(&cpy, device_hwctx->stream);
222  if (err != CUDA_SUCCESS) {
223  av_log(ctx, AV_LOG_ERROR, "Error transferring the data from the CUDA frame\n");
224  return AVERROR_UNKNOWN;
225  }
226  }
227 
228  err = cu->cuStreamSynchronize(device_hwctx->stream);
229  if (err != CUDA_SUCCESS) {
230  av_log(ctx, AV_LOG_ERROR, "Error synchronizing CUDA stream\n");
231  return AVERROR_UNKNOWN;
232  }
233 
234  cu->cuCtxPopCurrent(&dummy);
235 
236  return 0;
237 }
238 
240  const AVFrame *src)
241 {
242  CUDAFramesContext *priv = ctx->internal->priv;
243  AVCUDADeviceContext *device_hwctx = ctx->device_ctx->hwctx;
244  CudaFunctions *cu = device_hwctx->internal->cuda_dl;
245 
246  CUcontext dummy;
247  CUresult err;
248  int i;
249 
250  err = cu->cuCtxPushCurrent(device_hwctx->cuda_ctx);
251  if (err != CUDA_SUCCESS)
252  return AVERROR_UNKNOWN;
253 
254  for (i = 0; i < FF_ARRAY_ELEMS(src->data) && src->data[i]; i++) {
255  CUDA_MEMCPY2D cpy = {
256  .srcMemoryType = CU_MEMORYTYPE_HOST,
257  .dstMemoryType = CU_MEMORYTYPE_DEVICE,
258  .srcHost = src->data[i],
259  .dstDevice = (CUdeviceptr)dst->data[i],
260  .srcPitch = src->linesize[i],
261  .dstPitch = dst->linesize[i],
262  .WidthInBytes = FFMIN(src->linesize[i], dst->linesize[i]),
263  .Height = src->height >> (i ? priv->shift_height : 0),
264  };
265 
266  err = cu->cuMemcpy2DAsync(&cpy, device_hwctx->stream);
267  if (err != CUDA_SUCCESS) {
268  av_log(ctx, AV_LOG_ERROR, "Error transferring the data to the CUDA frame\n");
269  return AVERROR_UNKNOWN;
270  }
271  }
272 
273  err = cu->cuStreamSynchronize(device_hwctx->stream);
274  if (err != CUDA_SUCCESS) {
275  av_log(ctx, AV_LOG_ERROR, "Error synchronizing CUDA stream\n");
276  return AVERROR_UNKNOWN;
277  }
278 
279  cu->cuCtxPopCurrent(&dummy);
280 
281  return 0;
282 }
283 
285 {
286  AVCUDADeviceContext *hwctx = ctx->hwctx;
287 
288  if (hwctx->internal) {
289  if (hwctx->internal->is_allocated && hwctx->cuda_ctx) {
290  hwctx->internal->cuda_dl->cuCtxDestroy(hwctx->cuda_ctx);
291  hwctx->cuda_ctx = NULL;
292  }
293  cuda_free_functions(&hwctx->internal->cuda_dl);
294  }
295 
296  av_freep(&hwctx->internal);
297 }
298 
300 {
301  AVCUDADeviceContext *hwctx = ctx->hwctx;
302  int ret;
303 
304  if (!hwctx->internal) {
305  hwctx->internal = av_mallocz(sizeof(*hwctx->internal));
306  if (!hwctx->internal)
307  return AVERROR(ENOMEM);
308  }
309 
310  if (!hwctx->internal->cuda_dl) {
311  ret = cuda_load_functions(&hwctx->internal->cuda_dl, ctx);
312  if (ret < 0) {
313  av_log(ctx, AV_LOG_ERROR, "Could not dynamically load CUDA\n");
314  goto error;
315  }
316  }
317 
318  return 0;
319 
320 error:
321  cuda_device_uninit(ctx);
322  return ret;
323 }
324 
325 static int cuda_device_create(AVHWDeviceContext *ctx, const char *device,
326  AVDictionary *opts, int flags)
327 {
328  AVCUDADeviceContext *hwctx = ctx->hwctx;
329  CudaFunctions *cu;
330  CUdevice cu_device;
331  CUcontext dummy;
332  CUresult err;
333  int device_idx = 0;
334 
335  if (device)
336  device_idx = strtol(device, NULL, 0);
337 
338  if (cuda_device_init(ctx) < 0)
339  goto error;
340 
341  cu = hwctx->internal->cuda_dl;
342 
343  err = cu->cuInit(0);
344  if (err != CUDA_SUCCESS) {
345  av_log(ctx, AV_LOG_ERROR, "Could not initialize the CUDA driver API\n");
346  goto error;
347  }
348 
349  err = cu->cuDeviceGet(&cu_device, device_idx);
350  if (err != CUDA_SUCCESS) {
351  av_log(ctx, AV_LOG_ERROR, "Could not get the device number %d\n", device_idx);
352  goto error;
353  }
354 
355  err = cu->cuCtxCreate(&hwctx->cuda_ctx, CU_CTX_SCHED_BLOCKING_SYNC, cu_device);
356  if (err != CUDA_SUCCESS) {
357  av_log(ctx, AV_LOG_ERROR, "Error creating a CUDA context\n");
358  goto error;
359  }
360 
361  // Setting stream to NULL will make functions automatically use the default CUstream
362  hwctx->stream = NULL;
363 
364  cu->cuCtxPopCurrent(&dummy);
365 
366  hwctx->internal->is_allocated = 1;
367 
368  return 0;
369 
370 error:
371  cuda_device_uninit(ctx);
372  return AVERROR_UNKNOWN;
373 }
374 
377  .name = "CUDA",
378 
379  .device_hwctx_size = sizeof(AVCUDADeviceContext),
380  .frames_priv_size = sizeof(CUDAFramesContext),
381 
382  .device_create = cuda_device_create,
383  .device_init = cuda_device_init,
384  .device_uninit = cuda_device_uninit,
385  .frames_get_constraints = cuda_frames_get_constraints,
386  .frames_init = cuda_frames_init,
387  .frames_get_buffer = cuda_get_buffer,
388  .transfer_get_formats = cuda_transfer_get_formats,
389  .transfer_data_to = cuda_transfer_data_to,
390  .transfer_data_from = cuda_transfer_data_from,
391 
392  .pix_fmts = (const enum AVPixelFormat[]){ AV_PIX_FMT_CUDA, AV_PIX_FMT_NONE },
393 };
This struct aggregates all the (hardware/vendor-specific) "high-level" state, i.e.
Definition: hwcontext.h:60
#define NULL
Definition: coverity.c:32
This structure describes decoded (raw) audio or video data.
Definition: frame.h:226
ptrdiff_t const GLvoid * data
Definition: opengl_enc.c:101
planar YUV 4:4:4, 24bpp, (1 Cr & Cb sample per 1x1 Y samples)
Definition: pixfmt.h:71
misc image utilities
Memory handling functions.
AVBufferRef * buf[AV_NUM_DATA_POINTERS]
AVBuffer references backing the data for this frame.
Definition: frame.h:418
AVCUDADeviceContextInternal * internal
int av_image_fill_arrays(uint8_t *dst_data[4], int dst_linesize[4], const uint8_t *src, enum AVPixelFormat pix_fmt, int width, int height, int align)
Setup the data pointers and linesizes based on the specified image parameters and the provided array...
Definition: imgutils.c:411
int width
The allocated dimensions of the frames in this pool.
Definition: hwcontext.h:228
static int cuda_frames_get_constraints(AVHWDeviceContext *ctx, const void *hwconfig, AVHWFramesConstraints *constraints)
static int cuda_frames_init(AVHWFramesContext *ctx)
void * av_mallocz(size_t size)
Allocate a memory block with alignment suitable for all memory accesses (including vectors if availab...
Definition: mem.c:236
#define src
Definition: vp8dsp.c:254
#define AV_PIX_FMT_P016
Definition: pixfmt.h:427
#define AV_PIX_FMT_P010
Definition: pixfmt.h:426
AVBufferPool * pool_internal
enum AVHWDeviceType type
uint8_t
static enum AVPixelFormat supported_formats[]
static AVFrame * frame
void * hwctx
The format-specific data, allocated and freed by libavutil along with this context.
Definition: hwcontext.h:91
static int cuda_transfer_data_from(AVHWFramesContext *ctx, AVFrame *dst, const AVFrame *src)
#define AV_PIX_FMT_YUV444P16
Definition: pixfmt.h:392
ptrdiff_t size
Definition: opengl_enc.c:101
#define av_log(a,...)
static void cuda_buffer_free(void *opaque, uint8_t *data)
int av_image_get_buffer_size(enum AVPixelFormat pix_fmt, int width, int height, int align)
Return the size in bytes of the amount of data required to store an image with the given parameters...
Definition: imgutils.c:431
int width
Definition: frame.h:284
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:176
#define AVERROR(e)
Definition: error.h:43
int av_pix_fmt_get_chroma_sub_sample(enum AVPixelFormat pix_fmt, int *h_shift, int *v_shift)
Utility function to access log2_chroma_w log2_chroma_h from the pixel format AVPixFmtDescriptor.
Definition: pixdesc.c:2474
planar YUV 4:2:0, 12bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (firs...
Definition: pixfmt.h:89
#define AV_PIX_FMT_0BGR32
Definition: pixfmt.h:357
AVBufferRef * av_buffer_create(uint8_t *data, int size, void(*free)(void *opaque, uint8_t *data), void *opaque, int flags)
Create an AVBuffer from an existing array.
Definition: buffer.c:28
static int cuda_device_init(AVHWDeviceContext *ctx)
#define fail()
Definition: checkasm.h:117
AVDictionary * opts
Definition: movenc.c:50
static int cuda_get_buffer(AVHWFramesContext *ctx, AVFrame *frame)
#define FFMIN(a, b)
Definition: common.h:96
AVHWDeviceContext * device_ctx
The parent AVHWDeviceContext.
Definition: hwcontext.h:148
static int cuda_transfer_get_formats(AVHWFramesContext *ctx, enum AVHWFrameTransferDirection dir, enum AVPixelFormat **formats)
static AVBufferRef * cuda_pool_alloc(void *opaque, int size)
AVFormatContext * ctx
Definition: movenc.c:48
FFmpeg internal API for CUDA.
int dummy
Definition: motion.c:64
HW acceleration through CUDA.
Definition: pixfmt.h:235
AVBufferPool * av_buffer_pool_init2(int size, void *opaque, AVBufferRef *(*alloc)(void *opaque, int size), void(*pool_free)(void *opaque))
Allocate and initialize a buffer pool with a more complex allocator.
Definition: buffer.c:218
static void error(const char *err)
#define FF_ARRAY_ELEMS(a)
#define CUDA_FRAME_ALIGNMENT
int format
format of the frame, -1 if unknown or unset Values correspond to enum AVPixelFormat for video frames...
Definition: frame.h:299
This struct describes the constraints on hardware frames attached to a given device with a hardware-s...
Definition: hwcontext.h:432
static int cuda_transfer_data_to(AVHWFramesContext *ctx, AVFrame *dst, const AVFrame *src)
int linesize[AV_NUM_DATA_POINTERS]
For video, size in bytes of each picture line.
Definition: frame.h:257
const HWContextType ff_hwcontext_type_cuda
uint8_t * data
The data buffer.
Definition: buffer.h:89
This struct is allocated as AVHWDeviceContext.hwctx.
This struct describes a set or pool of "hardware" frames (i.e.
Definition: hwcontext.h:123
refcounted data buffer API
enum AVPixelFormat * valid_hw_formats
A list of possible values for format in the hw_frames_ctx, terminated by AV_PIX_FMT_NONE.
Definition: hwcontext.h:437
AVHWFramesInternal * internal
Private data used internally by libavutil.
Definition: hwcontext.h:133
#define flags(name, subs,...)
Definition: cbs_av1.c:596
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
Definition: frame.h:240
A reference to a data buffer.
Definition: buffer.h:81
planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)
Definition: pixfmt.h:66
common internal and external API header
#define AVERROR_UNKNOWN
Unknown error, typically from an external library.
Definition: error.h:71
AVHWFrameTransferDirection
Definition: hwcontext.h:394
pixel format definitions
AVBufferPool * pool
A pool from which the frames are allocated by av_hwframe_get_buffer().
Definition: hwcontext.h:189
static int cuda_device_create(AVHWDeviceContext *ctx, const char *device, AVDictionary *opts, int flags)
enum AVPixelFormat * valid_sw_formats
A list of possible values for sw_format in the hw_frames_ctx, terminated by AV_PIX_FMT_NONE.
Definition: hwcontext.h:444
int height
Definition: frame.h:284
#define av_freep(p)
AVBufferRef * av_buffer_pool_get(AVBufferPool *pool)
Allocate a new AVBuffer, reusing an old buffer from the pool when available.
Definition: buffer.c:334
#define av_malloc_array(a, b)
formats
Definition: signature.h:48
static void cuda_device_uninit(AVHWDeviceContext *ctx)
const char * av_get_pix_fmt_name(enum AVPixelFormat pix_fmt)
Return the short name for a pixel format, NULL in case pix_fmt is unknown.
Definition: pixdesc.c:2362
enum AVPixelFormat sw_format
The pixel format identifying the actual data layout of the hardware frames.
Definition: hwcontext.h:221
AVPixelFormat
Pixel format.
Definition: pixfmt.h:64
#define AV_PIX_FMT_0RGB32
Definition: pixfmt.h:356