FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
hwcontext_cuda.c
Go to the documentation of this file.
1 /*
2  * This file is part of FFmpeg.
3  *
4  * FFmpeg is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * FFmpeg is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with FFmpeg; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17  */
18 
19 #include "buffer.h"
20 #include "common.h"
21 #include "hwcontext.h"
22 #include "hwcontext_internal.h"
24 #include "mem.h"
25 #include "pixdesc.h"
26 #include "pixfmt.h"
27 
28 #define CUDA_FRAME_ALIGNMENT 256
29 
30 typedef struct CUDAFramesContext {
33 
34 static const enum AVPixelFormat supported_formats[] = {
40 };
41 
43  const void *hwconfig,
44  AVHWFramesConstraints *constraints)
45 {
46  int i;
47 
49  sizeof(*constraints->valid_sw_formats));
50  if (!constraints->valid_sw_formats)
51  return AVERROR(ENOMEM);
52 
53  for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++)
54  constraints->valid_sw_formats[i] = supported_formats[i];
55  constraints->valid_sw_formats[FF_ARRAY_ELEMS(supported_formats)] = AV_PIX_FMT_NONE;
56 
57  constraints->valid_hw_formats = av_malloc_array(2, sizeof(*constraints->valid_hw_formats));
58  if (!constraints->valid_hw_formats)
59  return AVERROR(ENOMEM);
60 
61  constraints->valid_hw_formats[0] = AV_PIX_FMT_CUDA;
62  constraints->valid_hw_formats[1] = AV_PIX_FMT_NONE;
63 
64  return 0;
65 }
66 
67 static void cuda_buffer_free(void *opaque, uint8_t *data)
68 {
69  AVHWFramesContext *ctx = opaque;
70  AVCUDADeviceContext *hwctx = ctx->device_ctx->hwctx;
71  CudaFunctions *cu = hwctx->internal->cuda_dl;
72 
74 
75  cu->cuCtxPushCurrent(hwctx->cuda_ctx);
76 
77  cu->cuMemFree((CUdeviceptr)data);
78 
79  cu->cuCtxPopCurrent(&dummy);
80 }
81 
82 static AVBufferRef *cuda_pool_alloc(void *opaque, int size)
83 {
84  AVHWFramesContext *ctx = opaque;
85  AVCUDADeviceContext *hwctx = ctx->device_ctx->hwctx;
86  CudaFunctions *cu = hwctx->internal->cuda_dl;
87 
88  AVBufferRef *ret = NULL;
91  CUresult err;
92 
93  err = cu->cuCtxPushCurrent(hwctx->cuda_ctx);
94  if (err != CUDA_SUCCESS) {
95  av_log(ctx, AV_LOG_ERROR, "Error setting current CUDA context\n");
96  return NULL;
97  }
98 
99  err = cu->cuMemAlloc(&data, size);
100  if (err != CUDA_SUCCESS)
101  goto fail;
102 
103  ret = av_buffer_create((uint8_t*)data, size, cuda_buffer_free, ctx, 0);
104  if (!ret) {
105  cu->cuMemFree(data);
106  goto fail;
107  }
108 
109 fail:
110  cu->cuCtxPopCurrent(&dummy);
111  return ret;
112 }
113 
115 {
116  CUDAFramesContext *priv = ctx->internal->priv;
117  int aligned_width = FFALIGN(ctx->width, CUDA_FRAME_ALIGNMENT);
118  int i;
119 
120  for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++) {
121  if (ctx->sw_format == supported_formats[i])
122  break;
123  }
124  if (i == FF_ARRAY_ELEMS(supported_formats)) {
125  av_log(ctx, AV_LOG_ERROR, "Pixel format '%s' is not supported\n",
127  return AVERROR(ENOSYS);
128  }
129 
131 
132  if (!ctx->pool) {
133  int size;
134 
135  switch (ctx->sw_format) {
136  case AV_PIX_FMT_NV12:
137  case AV_PIX_FMT_YUV420P:
138  size = aligned_width * ctx->height * 3 / 2;
139  break;
140  case AV_PIX_FMT_YUV444P:
141  case AV_PIX_FMT_P010:
142  case AV_PIX_FMT_P016:
143  size = aligned_width * ctx->height * 3;
144  break;
145  default:
146  av_log(ctx, AV_LOG_ERROR, "BUG: Pixel format missing from size calculation.");
147  return AVERROR_BUG;
148  }
149 
151  if (!ctx->internal->pool_internal)
152  return AVERROR(ENOMEM);
153  }
154 
155  return 0;
156 }
157 
159 {
160  int aligned_width;
161  int width_in_bytes = ctx->width;
162 
163  if (ctx->sw_format == AV_PIX_FMT_P010 ||
164  ctx->sw_format == AV_PIX_FMT_P016) {
165  width_in_bytes *= 2;
166  }
167  aligned_width = FFALIGN(width_in_bytes, CUDA_FRAME_ALIGNMENT);
168 
169  frame->buf[0] = av_buffer_pool_get(ctx->pool);
170  if (!frame->buf[0])
171  return AVERROR(ENOMEM);
172 
173  switch (ctx->sw_format) {
174  case AV_PIX_FMT_NV12:
175  case AV_PIX_FMT_P010:
176  case AV_PIX_FMT_P016:
177  frame->data[0] = frame->buf[0]->data;
178  frame->data[1] = frame->data[0] + aligned_width * ctx->height;
179  frame->linesize[0] = aligned_width;
180  frame->linesize[1] = aligned_width;
181  break;
182  case AV_PIX_FMT_YUV420P:
183  frame->data[0] = frame->buf[0]->data;
184  frame->data[2] = frame->data[0] + aligned_width * ctx->height;
185  frame->data[1] = frame->data[2] + aligned_width * ctx->height / 4;
186  frame->linesize[0] = aligned_width;
187  frame->linesize[1] = aligned_width / 2;
188  frame->linesize[2] = aligned_width / 2;
189  break;
190  case AV_PIX_FMT_YUV444P:
191  frame->data[0] = frame->buf[0]->data;
192  frame->data[1] = frame->data[0] + aligned_width * ctx->height;
193  frame->data[2] = frame->data[1] + aligned_width * ctx->height;
194  frame->linesize[0] = aligned_width;
195  frame->linesize[1] = aligned_width;
196  frame->linesize[2] = aligned_width;
197  break;
198  default:
199  av_frame_unref(frame);
200  return AVERROR_BUG;
201  }
202 
203  frame->format = AV_PIX_FMT_CUDA;
204  frame->width = ctx->width;
205  frame->height = ctx->height;
206 
207  return 0;
208 }
209 
212  enum AVPixelFormat **formats)
213 {
214  enum AVPixelFormat *fmts;
215 
216  fmts = av_malloc_array(2, sizeof(*fmts));
217  if (!fmts)
218  return AVERROR(ENOMEM);
219 
220  fmts[0] = ctx->sw_format;
221  fmts[1] = AV_PIX_FMT_NONE;
222 
223  *formats = fmts;
224 
225  return 0;
226 }
227 
229  const AVFrame *src)
230 {
231  CUDAFramesContext *priv = ctx->internal->priv;
232  AVCUDADeviceContext *device_hwctx = ctx->device_ctx->hwctx;
233  CudaFunctions *cu = device_hwctx->internal->cuda_dl;
234 
236  CUresult err;
237  int i;
238 
239  err = cu->cuCtxPushCurrent(device_hwctx->cuda_ctx);
240  if (err != CUDA_SUCCESS)
241  return AVERROR_UNKNOWN;
242 
243  for (i = 0; i < FF_ARRAY_ELEMS(src->data) && src->data[i]; i++) {
244  CUDA_MEMCPY2D cpy = {
246  .dstMemoryType = CU_MEMORYTYPE_HOST,
247  .srcDevice = (CUdeviceptr)src->data[i],
248  .dstHost = dst->data[i],
249  .srcPitch = src->linesize[i],
250  .dstPitch = dst->linesize[i],
251  .WidthInBytes = FFMIN(src->linesize[i], dst->linesize[i]),
252  .Height = src->height >> (i ? priv->shift_height : 0),
253  };
254 
255  err = cu->cuMemcpy2D(&cpy);
256  if (err != CUDA_SUCCESS) {
257  av_log(ctx, AV_LOG_ERROR, "Error transferring the data from the CUDA frame\n");
258  return AVERROR_UNKNOWN;
259  }
260  }
261 
262  cu->cuCtxPopCurrent(&dummy);
263 
264  return 0;
265 }
266 
268  const AVFrame *src)
269 {
270  CUDAFramesContext *priv = ctx->internal->priv;
271  AVCUDADeviceContext *device_hwctx = ctx->device_ctx->hwctx;
272  CudaFunctions *cu = device_hwctx->internal->cuda_dl;
273 
275  CUresult err;
276  int i;
277 
278  err = cu->cuCtxPushCurrent(device_hwctx->cuda_ctx);
279  if (err != CUDA_SUCCESS)
280  return AVERROR_UNKNOWN;
281 
282  for (i = 0; i < FF_ARRAY_ELEMS(src->data) && src->data[i]; i++) {
283  CUDA_MEMCPY2D cpy = {
285  .dstMemoryType = CU_MEMORYTYPE_DEVICE,
286  .srcHost = src->data[i],
287  .dstDevice = (CUdeviceptr)dst->data[i],
288  .srcPitch = src->linesize[i],
289  .dstPitch = dst->linesize[i],
290  .WidthInBytes = FFMIN(src->linesize[i], dst->linesize[i]),
291  .Height = src->height >> (i ? priv->shift_height : 0),
292  };
293 
294  err = cu->cuMemcpy2D(&cpy);
295  if (err != CUDA_SUCCESS) {
296  av_log(ctx, AV_LOG_ERROR, "Error transferring the data from the CUDA frame\n");
297  return AVERROR_UNKNOWN;
298  }
299  }
300 
301  cu->cuCtxPopCurrent(&dummy);
302 
303  return 0;
304 }
305 
307 {
308  AVCUDADeviceContext *hwctx = ctx->hwctx;
309 
310  if (hwctx->internal) {
311  if (hwctx->internal->is_allocated && hwctx->cuda_ctx) {
312  hwctx->internal->cuda_dl->cuCtxDestroy(hwctx->cuda_ctx);
313  hwctx->cuda_ctx = NULL;
314  }
316  }
317 
318  av_freep(&hwctx->internal);
319 }
320 
322 {
323  AVCUDADeviceContext *hwctx = ctx->hwctx;
324  int ret;
325 
326  if (!hwctx->internal) {
327  hwctx->internal = av_mallocz(sizeof(*hwctx->internal));
328  if (!hwctx->internal)
329  return AVERROR(ENOMEM);
330  }
331 
332  if (!hwctx->internal->cuda_dl) {
333  ret = cuda_load_functions(&hwctx->internal->cuda_dl);
334  if (ret < 0) {
335  av_log(ctx, AV_LOG_ERROR, "Could not dynamically load CUDA\n");
336  goto error;
337  }
338  }
339 
340  return 0;
341 
342 error:
343  cuda_device_uninit(ctx);
344  return ret;
345 }
346 
347 static int cuda_device_create(AVHWDeviceContext *ctx, const char *device,
348  AVDictionary *opts, int flags)
349 {
350  AVCUDADeviceContext *hwctx = ctx->hwctx;
351  CudaFunctions *cu;
352  CUdevice cu_device;
354  CUresult err;
355  int device_idx = 0;
356 
357  if (device)
358  device_idx = strtol(device, NULL, 0);
359 
360  if (cuda_device_init(ctx) < 0)
361  goto error;
362 
363  cu = hwctx->internal->cuda_dl;
364 
365  err = cu->cuInit(0);
366  if (err != CUDA_SUCCESS) {
367  av_log(ctx, AV_LOG_ERROR, "Could not initialize the CUDA driver API\n");
368  goto error;
369  }
370 
371  err = cu->cuDeviceGet(&cu_device, device_idx);
372  if (err != CUDA_SUCCESS) {
373  av_log(ctx, AV_LOG_ERROR, "Could not get the device number %d\n", device_idx);
374  goto error;
375  }
376 
377  err = cu->cuCtxCreate(&hwctx->cuda_ctx, CU_CTX_SCHED_BLOCKING_SYNC, cu_device);
378  if (err != CUDA_SUCCESS) {
379  av_log(ctx, AV_LOG_ERROR, "Error creating a CUDA context\n");
380  goto error;
381  }
382 
383  cu->cuCtxPopCurrent(&dummy);
384 
385  hwctx->internal->is_allocated = 1;
386 
387  return 0;
388 
389 error:
390  cuda_device_uninit(ctx);
391  return AVERROR_UNKNOWN;
392 }
393 
396  .name = "CUDA",
397 
398  .device_hwctx_size = sizeof(AVCUDADeviceContext),
399  .frames_priv_size = sizeof(CUDAFramesContext),
400 
401  .device_create = cuda_device_create,
402  .device_init = cuda_device_init,
403  .device_uninit = cuda_device_uninit,
404  .frames_get_constraints = cuda_frames_get_constraints,
405  .frames_init = cuda_frames_init,
406  .frames_get_buffer = cuda_get_buffer,
407  .transfer_get_formats = cuda_transfer_get_formats,
408  .transfer_data_to = cuda_transfer_data_to,
409  .transfer_data_from = cuda_transfer_data_from,
410 
411  .pix_fmts = (const enum AVPixelFormat[]){ AV_PIX_FMT_CUDA, AV_PIX_FMT_NONE },
412 };
This struct aggregates all the (hardware/vendor-specific) "high-level" state, i.e.
Definition: hwcontext.h:54
#define NULL
Definition: coverity.c:32
This structure describes decoded (raw) audio or video data.
Definition: frame.h:187
ptrdiff_t const GLvoid * data
Definition: opengl_enc.c:101
planar YUV 4:4:4, 24bpp, (1 Cr & Cb sample per 1x1 Y samples)
Definition: pixfmt.h:67
Memory handling functions.
AVBufferRef * buf[AV_NUM_DATA_POINTERS]
AVBuffer references backing the data for this frame.
Definition: frame.h:370
AVCUDADeviceContextInternal * internal
int width
The allocated dimensions of the frames in this pool.
Definition: hwcontext.h:222
tcuMemFree_v2 * cuMemFree
static int cuda_frames_get_constraints(AVHWDeviceContext *ctx, const void *hwconfig, AVHWFramesConstraints *constraints)
static int cuda_frames_init(AVHWFramesContext *ctx)
void * av_mallocz(size_t size)
Allocate a memory block with alignment suitable for all memory accesses (including vectors if availab...
Definition: mem.c:222
#define src
Definition: vp8dsp.c:254
#define AV_PIX_FMT_P016
Definition: pixfmt.h:395
#define AV_PIX_FMT_P010
Definition: pixfmt.h:394
AVBufferPool * pool_internal
enum AVHWDeviceType type
uint8_t
static enum AVPixelFormat supported_formats[]
static AVFrame * frame
void * hwctx
The format-specific data, allocated and freed by libavutil along with this context.
Definition: hwcontext.h:85
static int flags
Definition: log.c:57
static int cuda_transfer_data_from(AVHWFramesContext *ctx, AVFrame *dst, const AVFrame *src)
ptrdiff_t size
Definition: opengl_enc.c:101
tcuCtxPushCurrent_v2 * cuCtxPushCurrent
#define FFALIGN(x, a)
Definition: macros.h:48
#define av_log(a,...)
static void cuda_buffer_free(void *opaque, uint8_t *data)
int width
width and height of the video frame
Definition: frame.h:239
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:176
#define AVERROR(e)
Definition: error.h:43
int av_pix_fmt_get_chroma_sub_sample(enum AVPixelFormat pix_fmt, int *h_shift, int *v_shift)
Utility function to access log2_chroma_w log2_chroma_h from the pixel format AVPixFmtDescriptor.
Definition: pixdesc.c:2361
planar YUV 4:2:0, 12bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (firs...
Definition: pixfmt.h:90
tcuInit * cuInit
AVBufferRef * av_buffer_create(uint8_t *data, int size, void(*free)(void *opaque, uint8_t *data), void *opaque, int flags)
Create an AVBuffer from an existing array.
Definition: buffer.c:28
static int cuda_device_init(AVHWDeviceContext *ctx)
#define fail()
Definition: checkasm.h:89
AVDictionary * opts
Definition: movenc.c:50
static int cuda_get_buffer(AVHWFramesContext *ctx, AVFrame *frame)
tcuCtxCreate_v2 * cuCtxCreate
#define FFMIN(a, b)
Definition: common.h:96
AVHWDeviceContext * device_ctx
The parent AVHWDeviceContext.
Definition: hwcontext.h:142
static int cuda_transfer_get_formats(AVHWFramesContext *ctx, enum AVHWFrameTransferDirection dir, enum AVPixelFormat **formats)
static AVBufferRef * cuda_pool_alloc(void *opaque, int size)
AVFormatContext * ctx
Definition: movenc.c:48
FFmpeg internal API for CUDA.
int dummy
Definition: motion.c:64
HW acceleration through CUDA.
Definition: pixfmt.h:249
AVBufferPool * av_buffer_pool_init2(int size, void *opaque, AVBufferRef *(*alloc)(void *opaque, int size), void(*pool_free)(void *opaque))
Allocate and initialize a buffer pool with a more complex allocator.
Definition: buffer.c:218
static void error(const char *err)
#define FF_ARRAY_ELEMS(a)
#define CUDA_FRAME_ALIGNMENT
CUmemorytype srcMemoryType
Definition: dynlink_cuda.h:64
int format
format of the frame, -1 if unknown or unset Values correspond to enum AVPixelFormat for video frames...
Definition: frame.h:251
This struct describes the constraints on hardware frames attached to a given device with a hardware-s...
Definition: hwcontext.h:373
static int cuda_transfer_data_to(AVHWFramesContext *ctx, AVFrame *dst, const AVFrame *src)
int linesize[AV_NUM_DATA_POINTERS]
For video, size in bytes of each picture line.
Definition: frame.h:218
tcuMemAlloc_v2 * cuMemAlloc
const HWContextType ff_hwcontext_type_cuda
uint8_t * data
The data buffer.
Definition: buffer.h:89
tcuMemcpy2D_v2 * cuMemcpy2D
#define AVERROR_BUG
Internal bug, also see AVERROR_BUG2.
Definition: error.h:50
This struct is allocated as AVHWDeviceContext.hwctx.
This struct describes a set or pool of "hardware" frames (i.e.
Definition: hwcontext.h:117
refcounted data buffer API
enum AVPixelFormat * valid_hw_formats
A list of possible values for format in the hw_frames_ctx, terminated by AV_PIX_FMT_NONE.
Definition: hwcontext.h:378
AVHWFramesInternal * internal
Private data used internally by libavutil.
Definition: hwcontext.h:127
void av_frame_unref(AVFrame *frame)
Unreference all the buffers referenced by frame and reset the frame fields.
Definition: frame.c:498
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
Definition: frame.h:201
A reference to a data buffer.
Definition: buffer.h:81
planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)
Definition: pixfmt.h:62
common internal and external API header
tcuCtxPopCurrent_v2 * cuCtxPopCurrent
#define AVERROR_UNKNOWN
Unknown error, typically from an external library.
Definition: error.h:71
AVHWFrameTransferDirection
Definition: hwcontext.h:335
pixel format definitions
AVBufferPool * pool
A pool from which the frames are allocated by av_hwframe_get_buffer().
Definition: hwcontext.h:183
static int cuda_device_create(AVHWDeviceContext *ctx, const char *device, AVDictionary *opts, int flags)
enum AVPixelFormat * valid_sw_formats
A list of possible values for sw_format in the hw_frames_ctx, terminated by AV_PIX_FMT_NONE.
Definition: hwcontext.h:385
tcuCtxDestroy_v2 * cuCtxDestroy
int height
Definition: frame.h:239
#define av_freep(p)
AVBufferRef * av_buffer_pool_get(AVBufferPool *pool)
Allocate a new AVBuffer, reusing an old buffer from the pool when available.
Definition: buffer.c:334
#define av_malloc_array(a, b)
formats
Definition: signature.h:48
static void cuda_device_uninit(AVHWDeviceContext *ctx)
const char * av_get_pix_fmt_name(enum AVPixelFormat pix_fmt)
Return the short name for a pixel format, NULL in case pix_fmt is unknown.
Definition: pixdesc.c:2249
tcuDeviceGet * cuDeviceGet
enum AVPixelFormat sw_format
The pixel format identifying the actual data layout of the hardware frames.
Definition: hwcontext.h:215
AVPixelFormat
Pixel format.
Definition: pixfmt.h:60