FFmpeg
nvdec.c
Go to the documentation of this file.
1 /*
2  * HW decode acceleration through NVDEC
3  *
4  * Copyright (c) 2016 Anton Khirnov
5  *
6  * This file is part of FFmpeg.
7  *
8  * FFmpeg is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * FFmpeg is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with FFmpeg; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21  */
22 
23 #include "config.h"
24 #include "config_components.h"
25 
26 #include "libavutil/common.h"
27 #include "libavutil/error.h"
28 #include "libavutil/hwcontext.h"
30 #include "libavutil/cuda_check.h"
31 #include "libavutil/pixdesc.h"
32 #include "libavutil/pixfmt.h"
33 
34 #include "avcodec.h"
35 #include "decode.h"
36 #include "nvdec.h"
37 #include "internal.h"
38 
39 #if !NVDECAPI_CHECK_VERSION(9, 0)
40 #define cudaVideoSurfaceFormat_YUV444 2
41 #define cudaVideoSurfaceFormat_YUV444_16Bit 3
42 #endif
43 
44 typedef struct NVDECDecoder {
45  CUvideodecoder decoder;
46 
49  CUcontext cuda_ctx;
50  CUstream stream;
51 
52  CudaFunctions *cudl;
53  CuvidFunctions *cvdl;
54 } NVDECDecoder;
55 
56 typedef struct NVDECFramePool {
57  unsigned int dpb_size;
58  unsigned int nb_allocated;
60 
61 #define CHECK_CU(x) FF_CUDA_CHECK_DL(logctx, decoder->cudl, x)
62 
63 static int map_avcodec_id(enum AVCodecID id)
64 {
65  switch (id) {
66 #if CONFIG_AV1_NVDEC_HWACCEL
67  case AV_CODEC_ID_AV1: return cudaVideoCodec_AV1;
68 #endif
69  case AV_CODEC_ID_H264: return cudaVideoCodec_H264;
70  case AV_CODEC_ID_HEVC: return cudaVideoCodec_HEVC;
71  case AV_CODEC_ID_MJPEG: return cudaVideoCodec_JPEG;
72  case AV_CODEC_ID_MPEG1VIDEO: return cudaVideoCodec_MPEG1;
73  case AV_CODEC_ID_MPEG2VIDEO: return cudaVideoCodec_MPEG2;
74  case AV_CODEC_ID_MPEG4: return cudaVideoCodec_MPEG4;
75  case AV_CODEC_ID_VC1: return cudaVideoCodec_VC1;
76  case AV_CODEC_ID_VP8: return cudaVideoCodec_VP8;
77  case AV_CODEC_ID_VP9: return cudaVideoCodec_VP9;
78  case AV_CODEC_ID_WMV3: return cudaVideoCodec_VC1;
79  }
80  return -1;
81 }
82 
84 {
85  int shift_h = 0, shift_v = 0;
86 
88  return cudaVideoChromaFormat_Monochrome;
89 
90  av_pix_fmt_get_chroma_sub_sample(pix_fmt, &shift_h, &shift_v);
91 
92  if (shift_h == 1 && shift_v == 1)
93  return cudaVideoChromaFormat_420;
94  else if (shift_h == 1 && shift_v == 0)
95  return cudaVideoChromaFormat_422;
96  else if (shift_h == 0 && shift_v == 0)
97  return cudaVideoChromaFormat_444;
98 
99  return -1;
100 }
101 
103  CUVIDDECODECREATEINFO *params, void *logctx)
104 {
105  int ret;
106  CUVIDDECODECAPS caps = { 0 };
107 
108  caps.eCodecType = params->CodecType;
109  caps.eChromaFormat = params->ChromaFormat;
110  caps.nBitDepthMinus8 = params->bitDepthMinus8;
111 
112  if (!decoder->cvdl->cuvidGetDecoderCaps) {
113  av_log(logctx, AV_LOG_WARNING, "Used Nvidia driver is too old to perform a capability check.\n");
114  av_log(logctx, AV_LOG_WARNING, "The minimum required version is "
115 #if defined(_WIN32) || defined(__CYGWIN__)
116  "378.66"
117 #else
118  "378.13"
119 #endif
120  ". Continuing blind.\n");
121  return 0;
122  }
123 
124  ret = CHECK_CU(decoder->cvdl->cuvidGetDecoderCaps(&caps));
125  if (ret < 0)
126  return ret;
127 
128  av_log(logctx, AV_LOG_VERBOSE, "NVDEC capabilities:\n");
129  av_log(logctx, AV_LOG_VERBOSE, "format supported: %s, max_mb_count: %d\n",
130  caps.bIsSupported ? "yes" : "no", caps.nMaxMBCount);
131  av_log(logctx, AV_LOG_VERBOSE, "min_width: %d, max_width: %d\n",
132  caps.nMinWidth, caps.nMaxWidth);
133  av_log(logctx, AV_LOG_VERBOSE, "min_height: %d, max_height: %d\n",
134  caps.nMinHeight, caps.nMaxHeight);
135 
136  if (!caps.bIsSupported) {
137  av_log(logctx, AV_LOG_ERROR, "Hardware is lacking required capabilities\n");
138  return AVERROR(EINVAL);
139  }
140 
141  if (params->ulWidth > caps.nMaxWidth || params->ulWidth < caps.nMinWidth) {
142  av_log(logctx, AV_LOG_ERROR, "Video width %d not within range from %d to %d\n",
143  (int)params->ulWidth, caps.nMinWidth, caps.nMaxWidth);
144  return AVERROR(EINVAL);
145  }
146 
147  if (params->ulHeight > caps.nMaxHeight || params->ulHeight < caps.nMinHeight) {
148  av_log(logctx, AV_LOG_ERROR, "Video height %d not within range from %d to %d\n",
149  (int)params->ulHeight, caps.nMinHeight, caps.nMaxHeight);
150  return AVERROR(EINVAL);
151  }
152 
153  if ((params->ulWidth * params->ulHeight) / 256 > caps.nMaxMBCount) {
154  av_log(logctx, AV_LOG_ERROR, "Video macroblock count %d exceeds maximum of %d\n",
155  (int)(params->ulWidth * params->ulHeight) / 256, caps.nMaxMBCount);
156  return AVERROR(EINVAL);
157  }
158 
159  return 0;
160 }
161 
162 static void nvdec_decoder_free(void *opaque, uint8_t *data)
163 {
165 
166  if (decoder->decoder) {
167  void *logctx = decoder->hw_device_ref->data;
168  CUcontext dummy;
169  CHECK_CU(decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx));
170  CHECK_CU(decoder->cvdl->cuvidDestroyDecoder(decoder->decoder));
171  CHECK_CU(decoder->cudl->cuCtxPopCurrent(&dummy));
172  }
173 
174  av_buffer_unref(&decoder->real_hw_frames_ref);
175  av_buffer_unref(&decoder->hw_device_ref);
176 
177  cuvid_free_functions(&decoder->cvdl);
178 
179  av_freep(&decoder);
180 }
181 
182 static int nvdec_decoder_create(AVBufferRef **out, AVBufferRef *hw_device_ref,
183  CUVIDDECODECREATEINFO *params, void *logctx)
184 {
186  AVCUDADeviceContext *device_hwctx = hw_device_ctx->hwctx;
187 
188  AVBufferRef *decoder_ref;
190 
191  CUcontext dummy;
192  int ret;
193 
194  decoder = av_mallocz(sizeof(*decoder));
195  if (!decoder)
196  return AVERROR(ENOMEM);
197 
198  decoder_ref = av_buffer_create((uint8_t*)decoder, sizeof(*decoder),
200  if (!decoder_ref) {
201  av_freep(&decoder);
202  return AVERROR(ENOMEM);
203  }
204 
205  decoder->hw_device_ref = av_buffer_ref(hw_device_ref);
206  if (!decoder->hw_device_ref) {
207  ret = AVERROR(ENOMEM);
208  goto fail;
209  }
210  decoder->cuda_ctx = device_hwctx->cuda_ctx;
211  decoder->cudl = device_hwctx->internal->cuda_dl;
212  decoder->stream = device_hwctx->stream;
213 
214  ret = cuvid_load_functions(&decoder->cvdl, logctx);
215  if (ret < 0) {
216  av_log(logctx, AV_LOG_ERROR, "Failed loading nvcuvid.\n");
217  goto fail;
218  }
219 
220  ret = CHECK_CU(decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx));
221  if (ret < 0)
222  goto fail;
223 
224  ret = nvdec_test_capabilities(decoder, params, logctx);
225  if (ret < 0) {
226  CHECK_CU(decoder->cudl->cuCtxPopCurrent(&dummy));
227  goto fail;
228  }
229 
230  ret = CHECK_CU(decoder->cvdl->cuvidCreateDecoder(&decoder->decoder, params));
231 
232  CHECK_CU(decoder->cudl->cuCtxPopCurrent(&dummy));
233 
234  if (ret < 0) {
235  goto fail;
236  }
237 
238  *out = decoder_ref;
239 
240  return 0;
241 fail:
242  av_buffer_unref(&decoder_ref);
243  return ret;
244 }
245 
246 static AVBufferRef *nvdec_decoder_frame_alloc(void *opaque, size_t size)
247 {
248  NVDECFramePool *pool = opaque;
249  AVBufferRef *ret;
250 
251  if (pool->nb_allocated >= pool->dpb_size)
252  return NULL;
253 
254  ret = av_buffer_alloc(sizeof(unsigned int));
255  if (!ret)
256  return NULL;
257 
258  *(unsigned int*)ret->data = pool->nb_allocated++;
259 
260  return ret;
261 }
262 
264 {
266 
267  av_freep(&ctx->bitstream);
268  av_freep(&ctx->bitstream_internal);
269  ctx->bitstream_len = 0;
270  ctx->bitstream_allocated = 0;
271 
272  av_freep(&ctx->slice_offsets);
273  ctx->nb_slices = 0;
274  ctx->slice_offsets_allocated = 0;
275 
276  av_buffer_unref(&ctx->decoder_ref);
277  av_buffer_pool_uninit(&ctx->decoder_pool);
278 
279  return 0;
280 }
281 
283 {
284  av_buffer_pool_uninit(&ctx->pool);
285 }
286 
288 {
289  return av_buffer_create(NULL, 0, NULL, NULL, 0);
290 }
291 
292 static int nvdec_init_hwframes(AVCodecContext *avctx, AVBufferRef **out_frames_ref, int dummy)
293 {
294  AVHWFramesContext *frames_ctx;
295  int ret;
296 
298  avctx->hw_device_ctx,
299  avctx->hwaccel->pix_fmt,
300  out_frames_ref);
301  if (ret < 0)
302  return ret;
303 
304  frames_ctx = (AVHWFramesContext*)(*out_frames_ref)->data;
305 
306  if (dummy) {
307  // Copied from ff_decode_get_hw_frames_ctx for compatibility
308  frames_ctx->initial_pool_size += 3;
309 
310  frames_ctx->free = nvdec_free_dummy;
311  frames_ctx->pool = av_buffer_pool_init(0, nvdec_alloc_dummy);
312 
313  if (!frames_ctx->pool) {
314  av_buffer_unref(out_frames_ref);
315  return AVERROR(ENOMEM);
316  }
317  } else {
318  // This is normally not used to actually allocate frames from
319  frames_ctx->initial_pool_size = 0;
320  }
321 
322  ret = av_hwframe_ctx_init(*out_frames_ref);
323  if (ret < 0) {
324  av_buffer_unref(out_frames_ref);
325  return ret;
326  }
327 
328  return 0;
329 }
330 
332 {
334 
336  AVBufferRef *real_hw_frames_ref;
337  NVDECFramePool *pool;
338  AVHWFramesContext *frames_ctx;
339  const AVPixFmtDescriptor *sw_desc;
340 
341  CUVIDDECODECREATEINFO params = { 0 };
342 
343  cudaVideoSurfaceFormat output_format;
344  int cuvid_codec_type, cuvid_chroma_format, chroma_444;
345  int ret = 0;
346 
347  sw_desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
348  if (!sw_desc)
349  return AVERROR_BUG;
350 
351  cuvid_codec_type = map_avcodec_id(avctx->codec_id);
352  if (cuvid_codec_type < 0) {
353  av_log(avctx, AV_LOG_ERROR, "Unsupported codec ID\n");
354  return AVERROR_BUG;
355  }
356 
357  cuvid_chroma_format = map_chroma_format(avctx->sw_pix_fmt);
358  if (cuvid_chroma_format < 0) {
359  av_log(avctx, AV_LOG_ERROR, "Unsupported chroma format\n");
360  return AVERROR(ENOSYS);
361  }
362  chroma_444 = ctx->supports_444 && cuvid_chroma_format == cudaVideoChromaFormat_444;
363 
364  if (!avctx->hw_frames_ctx) {
365  ret = nvdec_init_hwframes(avctx, &avctx->hw_frames_ctx, 1);
366  if (ret < 0)
367  return ret;
368 
369  ret = nvdec_init_hwframes(avctx, &real_hw_frames_ref, 0);
370  if (ret < 0)
371  return ret;
372  } else {
373  real_hw_frames_ref = av_buffer_ref(avctx->hw_frames_ctx);
374  if (!real_hw_frames_ref)
375  return AVERROR(ENOMEM);
376  }
377 
378  switch (sw_desc->comp[0].depth) {
379  case 8:
380  output_format = chroma_444 ? cudaVideoSurfaceFormat_YUV444 :
381  cudaVideoSurfaceFormat_NV12;
382  break;
383  case 10:
384  case 12:
385  output_format = chroma_444 ? cudaVideoSurfaceFormat_YUV444_16Bit :
386  cudaVideoSurfaceFormat_P016;
387  break;
388  default:
389  av_log(avctx, AV_LOG_ERROR, "Unsupported bit depth\n");
390  av_buffer_unref(&real_hw_frames_ref);
391  return AVERROR(ENOSYS);
392  }
393 
394  frames_ctx = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
395 
396  params.ulWidth = avctx->coded_width;
397  params.ulHeight = avctx->coded_height;
398  params.ulTargetWidth = avctx->coded_width;
399  params.ulTargetHeight = avctx->coded_height;
400  params.bitDepthMinus8 = sw_desc->comp[0].depth - 8;
401  params.OutputFormat = output_format;
402  params.CodecType = cuvid_codec_type;
403  params.ChromaFormat = cuvid_chroma_format;
404  params.ulNumDecodeSurfaces = frames_ctx->initial_pool_size;
405  params.ulNumOutputSurfaces = frames_ctx->initial_pool_size;
406 
407  ret = nvdec_decoder_create(&ctx->decoder_ref, frames_ctx->device_ref, &params, avctx);
408  if (ret < 0) {
409  if (params.ulNumDecodeSurfaces > 32) {
410  av_log(avctx, AV_LOG_WARNING, "Using more than 32 (%d) decode surfaces might cause nvdec to fail.\n",
411  (int)params.ulNumDecodeSurfaces);
412  av_log(avctx, AV_LOG_WARNING, "Try lowering the amount of threads. Using %d right now.\n",
413  avctx->thread_count);
414  }
415  av_buffer_unref(&real_hw_frames_ref);
416  return ret;
417  }
418 
419  decoder = (NVDECDecoder*)ctx->decoder_ref->data;
420  decoder->real_hw_frames_ref = real_hw_frames_ref;
421  real_hw_frames_ref = NULL;
422 
423  pool = av_mallocz(sizeof(*pool));
424  if (!pool) {
425  ret = AVERROR(ENOMEM);
426  goto fail;
427  }
428  pool->dpb_size = frames_ctx->initial_pool_size;
429 
430  ctx->decoder_pool = av_buffer_pool_init2(sizeof(int), pool,
432  if (!ctx->decoder_pool) {
433  ret = AVERROR(ENOMEM);
434  goto fail;
435  }
436 
437  return 0;
438 fail:
439  ff_nvdec_decode_uninit(avctx);
440  return ret;
441 }
442 
443 static void nvdec_fdd_priv_free(void *priv)
444 {
445  NVDECFrame *cf = priv;
446 
447  if (!cf)
448  return;
449 
450  av_buffer_unref(&cf->idx_ref);
453 
454  av_freep(&priv);
455 }
456 
457 static void nvdec_unmap_mapped_frame(void *opaque, uint8_t *data)
458 {
459  NVDECFrame *unmap_data = (NVDECFrame*)data;
461  void *logctx = decoder->hw_device_ref->data;
462  CUdeviceptr devptr = (CUdeviceptr)opaque;
463  int ret;
464  CUcontext dummy;
465 
466  ret = CHECK_CU(decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx));
467  if (ret < 0)
468  goto finish;
469 
470  CHECK_CU(decoder->cvdl->cuvidUnmapVideoFrame(decoder->decoder, devptr));
471 
472  CHECK_CU(decoder->cudl->cuCtxPopCurrent(&dummy));
473 
474 finish:
475  av_buffer_unref(&unmap_data->idx_ref);
476  av_buffer_unref(&unmap_data->decoder_ref);
477  av_buffer_unref(&unmap_data->ref_idx_ref);
478  av_free(unmap_data);
479 }
480 
481 static int nvdec_retrieve_data(void *logctx, AVFrame *frame)
482 {
483  FrameDecodeData *fdd = (FrameDecodeData*)frame->private_ref->data;
484  NVDECFrame *cf = (NVDECFrame*)fdd->hwaccel_priv;
485  NVDECDecoder *decoder = (NVDECDecoder*)cf->decoder_ref->data;
486 
487  AVHWFramesContext *hwctx = (AVHWFramesContext *)frame->hw_frames_ctx->data;
488 
489  CUVIDPROCPARAMS vpp = { 0 };
490  NVDECFrame *unmap_data = NULL;
491 
492  CUcontext dummy;
493  CUdeviceptr devptr;
494 
495  unsigned int pitch, i;
496  unsigned int offset = 0;
497  int shift_h = 0, shift_v = 0;
498  int ret = 0;
499 
500  vpp.progressive_frame = 1;
501  vpp.output_stream = decoder->stream;
502 
503  ret = CHECK_CU(decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx));
504  if (ret < 0)
505  return ret;
506 
507  ret = CHECK_CU(decoder->cvdl->cuvidMapVideoFrame(decoder->decoder,
508  cf->idx, &devptr,
509  &pitch, &vpp));
510  if (ret < 0)
511  goto finish;
512 
513  unmap_data = av_mallocz(sizeof(*unmap_data));
514  if (!unmap_data) {
515  ret = AVERROR(ENOMEM);
516  goto copy_fail;
517  }
518 
519  frame->buf[1] = av_buffer_create((uint8_t *)unmap_data, sizeof(*unmap_data),
520  nvdec_unmap_mapped_frame, (void*)devptr,
522  if (!frame->buf[1]) {
523  ret = AVERROR(ENOMEM);
524  goto copy_fail;
525  }
526 
527  av_buffer_unref(&frame->hw_frames_ctx);
528  frame->hw_frames_ctx = av_buffer_ref(decoder->real_hw_frames_ref);
529  if (!frame->hw_frames_ctx) {
530  ret = AVERROR(ENOMEM);
531  goto copy_fail;
532  }
533 
534  unmap_data->idx = cf->idx;
535  unmap_data->idx_ref = av_buffer_ref(cf->idx_ref);
536  unmap_data->decoder_ref = av_buffer_ref(cf->decoder_ref);
537 
538  av_pix_fmt_get_chroma_sub_sample(hwctx->sw_format, &shift_h, &shift_v);
539  for (i = 0; frame->linesize[i]; i++) {
540  frame->data[i] = (uint8_t*)(devptr + offset);
541  frame->linesize[i] = pitch;
542  offset += pitch * (frame->height >> (i ? shift_v : 0));
543  }
544 
545  goto finish;
546 
547 copy_fail:
548  if (!frame->buf[1]) {
549  CHECK_CU(decoder->cvdl->cuvidUnmapVideoFrame(decoder->decoder, devptr));
550  av_freep(&unmap_data);
551  } else {
552  av_buffer_unref(&frame->buf[1]);
553  }
554 
555 finish:
556  CHECK_CU(decoder->cudl->cuCtxPopCurrent(&dummy));
557  return ret;
558 }
559 
561 {
563  FrameDecodeData *fdd = (FrameDecodeData*)frame->private_ref->data;
564  NVDECFrame *cf = NULL;
565  int ret;
566 
567  ctx->bitstream_len = 0;
568  ctx->nb_slices = 0;
569 
570  if (fdd->hwaccel_priv)
571  return 0;
572 
573  cf = av_mallocz(sizeof(*cf));
574  if (!cf)
575  return AVERROR(ENOMEM);
576 
577  cf->decoder_ref = av_buffer_ref(ctx->decoder_ref);
578  if (!cf->decoder_ref) {
579  ret = AVERROR(ENOMEM);
580  goto fail;
581  }
582 
583  cf->idx_ref = av_buffer_pool_get(ctx->decoder_pool);
584  if (!cf->idx_ref) {
585  av_log(avctx, AV_LOG_ERROR, "No decoder surfaces left\n");
586  ret = AVERROR(ENOMEM);
587  goto fail;
588  }
589  cf->ref_idx = cf->idx = *(unsigned int*)cf->idx_ref->data;
590 
591  fdd->hwaccel_priv = cf;
594 
595  return 0;
596 fail:
598  return ret;
599 
600 }
601 
603 {
605  FrameDecodeData *fdd = (FrameDecodeData*)frame->private_ref->data;
606  NVDECFrame *cf;
607  int ret;
608 
609  ret = ff_nvdec_start_frame(avctx, frame);
610  if (ret < 0)
611  return ret;
612 
613  cf = fdd->hwaccel_priv;
614 
615  if (has_sep_ref) {
616  if (!cf->ref_idx_ref) {
617  cf->ref_idx_ref = av_buffer_pool_get(ctx->decoder_pool);
618  if (!cf->ref_idx_ref) {
619  av_log(avctx, AV_LOG_ERROR, "No decoder surfaces left\n");
620  ret = AVERROR(ENOMEM);
621  goto fail;
622  }
623  }
624  cf->ref_idx = *(unsigned int*)cf->ref_idx_ref->data;
625  } else {
626  av_buffer_unref(&cf->ref_idx_ref);
627  cf->ref_idx = cf->idx;
628  }
629 
630  return 0;
631 fail:
633  return ret;
634 }
635 
637 {
639  NVDECDecoder *decoder = (NVDECDecoder*)ctx->decoder_ref->data;
640  void *logctx = avctx;
641  CUVIDPICPARAMS *pp = &ctx->pic_params;
642 
643  CUcontext dummy;
644 
645  int ret = 0;
646 
647  pp->nBitstreamDataLen = ctx->bitstream_len;
648  pp->pBitstreamData = ctx->bitstream;
649  pp->nNumSlices = ctx->nb_slices;
650  pp->pSliceDataOffsets = ctx->slice_offsets;
651 
652  ret = CHECK_CU(decoder->cudl->cuCtxPushCurrent(decoder->cuda_ctx));
653  if (ret < 0)
654  return ret;
655 
656  ret = CHECK_CU(decoder->cvdl->cuvidDecodePicture(decoder->decoder, &ctx->pic_params));
657  if (ret < 0)
658  goto finish;
659 
660 finish:
661  CHECK_CU(decoder->cudl->cuCtxPopCurrent(&dummy));
662 
663  return ret;
664 }
665 
667 {
669  int ret = ff_nvdec_end_frame(avctx);
670  ctx->bitstream = NULL;
671  return ret;
672 }
673 
675  uint32_t size)
676 {
678  void *tmp;
679 
680  tmp = av_fast_realloc(ctx->slice_offsets, &ctx->slice_offsets_allocated,
681  (ctx->nb_slices + 1) * sizeof(*ctx->slice_offsets));
682  if (!tmp)
683  return AVERROR(ENOMEM);
684  ctx->slice_offsets = tmp;
685 
686  if (!ctx->bitstream)
687  ctx->bitstream = (uint8_t*)buffer;
688 
689  ctx->slice_offsets[ctx->nb_slices] = buffer - ctx->bitstream;
690  ctx->bitstream_len += size;
691  ctx->nb_slices++;
692 
693  return 0;
694 }
695 
697  AVBufferRef *hw_frames_ctx,
698  int dpb_size,
699  int supports_444)
700 {
701  AVHWFramesContext *frames_ctx = (AVHWFramesContext*)hw_frames_ctx->data;
702  const AVPixFmtDescriptor *sw_desc;
703  int cuvid_codec_type, cuvid_chroma_format, chroma_444;
704 
705  sw_desc = av_pix_fmt_desc_get(avctx->sw_pix_fmt);
706  if (!sw_desc)
707  return AVERROR_BUG;
708 
709  cuvid_codec_type = map_avcodec_id(avctx->codec_id);
710  if (cuvid_codec_type < 0) {
711  av_log(avctx, AV_LOG_ERROR, "Unsupported codec ID\n");
712  return AVERROR_BUG;
713  }
714 
715  cuvid_chroma_format = map_chroma_format(avctx->sw_pix_fmt);
716  if (cuvid_chroma_format < 0) {
717  av_log(avctx, AV_LOG_VERBOSE, "Unsupported chroma format\n");
718  return AVERROR(EINVAL);
719  }
720  chroma_444 = supports_444 && cuvid_chroma_format == cudaVideoChromaFormat_444;
721 
722  frames_ctx->format = AV_PIX_FMT_CUDA;
723  frames_ctx->width = (avctx->coded_width + 1) & ~1;
724  frames_ctx->height = (avctx->coded_height + 1) & ~1;
725  /*
726  * We add two extra frames to the pool to account for deinterlacing filters
727  * holding onto their frames.
728  */
729  frames_ctx->initial_pool_size = dpb_size + 2;
730 
731  switch (sw_desc->comp[0].depth) {
732  case 8:
733  frames_ctx->sw_format = chroma_444 ? AV_PIX_FMT_YUV444P : AV_PIX_FMT_NV12;
734  break;
735  case 10:
736  frames_ctx->sw_format = chroma_444 ? AV_PIX_FMT_YUV444P16 : AV_PIX_FMT_P010;
737  break;
738  case 12:
739  frames_ctx->sw_format = chroma_444 ? AV_PIX_FMT_YUV444P16 : AV_PIX_FMT_P016;
740  break;
741  default:
742  return AVERROR(EINVAL);
743  }
744 
745  return 0;
746 }
747 
749 {
750  FrameDecodeData *fdd;
751  NVDECFrame *cf;
752 
753  if (!frame || !frame->private_ref)
754  return -1;
755 
756  fdd = (FrameDecodeData*)frame->private_ref->data;
757  cf = (NVDECFrame*)fdd->hwaccel_priv;
758  if (!cf)
759  return -1;
760 
761  return cf->ref_idx;
762 }
AVCodecContext::hwaccel
const struct AVHWAccel * hwaccel
Hardware accelerator in use.
Definition: avcodec.h:1379
av_buffer_pool_init
AVBufferPool * av_buffer_pool_init(size_t size, AVBufferRef *(*alloc)(size_t size))
Allocate and initialize a buffer pool.
Definition: buffer.c:280
nvdec_decoder_create
static int nvdec_decoder_create(AVBufferRef **out, AVBufferRef *hw_device_ref, CUVIDDECODECREATEINFO *params, void *logctx)
Definition: nvdec.c:182
AV_LOG_WARNING
#define AV_LOG_WARNING
Something somehow does not look correct.
Definition: log.h:186
AV_PIX_FMT_CUDA
@ AV_PIX_FMT_CUDA
HW acceleration through CUDA.
Definition: pixfmt.h:225
AVPixelFormat
AVPixelFormat
Pixel format.
Definition: pixfmt.h:64
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
map_avcodec_id
static int map_avcodec_id(enum AVCodecID id)
Definition: nvdec.c:63
hwcontext_cuda_internal.h
out
FILE * out
Definition: movenc.c:54
av_pix_fmt_desc_get
const AVPixFmtDescriptor * av_pix_fmt_desc_get(enum AVPixelFormat pix_fmt)
Definition: pixdesc.c:2662
AVBufferRef::data
uint8_t * data
The data buffer.
Definition: buffer.h:90
map_chroma_format
static int map_chroma_format(enum AVPixelFormat pix_fmt)
Definition: nvdec.c:83
NVDECFramePool
Definition: nvdec.c:56
AVHWFramesContext::format
enum AVPixelFormat format
The pixel format identifying the underlying HW surface type.
Definition: hwcontext.h:209
NVDECDecoder::stream
CUstream stream
Definition: nvdec.c:50
ff_nvdec_get_ref_idx
int ff_nvdec_get_ref_idx(AVFrame *frame)
Definition: nvdec.c:748
AV_CODEC_ID_MPEG4
@ AV_CODEC_ID_MPEG4
Definition: codec_id.h:62
FrameDecodeData
This struct stores per-frame lavc-internal data and is attached to it via private_ref.
Definition: decode.h:34
av_hwframe_ctx_init
int av_hwframe_ctx_init(AVBufferRef *ref)
Finalize the context before use.
Definition: hwcontext.c:334
AVFrame
This structure describes decoded (raw) audio or video data.
Definition: frame.h:325
tmp
static uint8_t tmp[11]
Definition: aes_ctr.c:28
pixdesc.h
internal.h
AVHWFramesContext::free
void(* free)(struct AVHWFramesContext *ctx)
This field may be set by the caller before calling av_hwframe_ctx_init().
Definition: hwcontext.h:170
AVComponentDescriptor::depth
int depth
Number of bits in the component.
Definition: pixdesc.h:57
data
const char data[16]
Definition: mxf.c:143
AV_LOG_VERBOSE
#define AV_LOG_VERBOSE
Detailed information.
Definition: log.h:196
FrameDecodeData::hwaccel_priv_free
void(* hwaccel_priv_free)(void *priv)
Definition: decode.h:53
NVDECDecoder::decoder
CUvideodecoder decoder
Definition: nvdec.c:45
av_buffer_ref
AVBufferRef * av_buffer_ref(const AVBufferRef *buf)
Create a new reference to an AVBuffer.
Definition: buffer.c:103
AVHWFramesContext::width
int width
The allocated dimensions of the frames in this pool.
Definition: hwcontext.h:229
NVDECDecoder::cvdl
CuvidFunctions * cvdl
Definition: nvdec.c:53
av_pix_fmt_count_planes
int av_pix_fmt_count_planes(enum AVPixelFormat pix_fmt)
Definition: pixdesc.c:2702
decoder
static const chunk_decoder decoder[8]
Definition: dfa.c:331
finish
static void finish(void)
Definition: movenc.c:342
ff_nvdec_start_frame
int ff_nvdec_start_frame(AVCodecContext *avctx, AVFrame *frame)
Definition: nvdec.c:560
NVDECFrame::ref_idx
unsigned int ref_idx
Definition: nvdec.h:46
fail
#define fail()
Definition: checkasm.h:131
AVCodecContext::thread_count
int thread_count
thread count is used to decide how many independent tasks should be passed to execute()
Definition: avcodec.h:1463
dummy
int dummy
Definition: motion.c:65
av_buffer_pool_init2
AVBufferPool * av_buffer_pool_init2(size_t size, void *opaque, AVBufferRef *(*alloc)(void *opaque, size_t size), void(*pool_free)(void *opaque))
Allocate and initialize a buffer pool with a more complex allocator.
Definition: buffer.c:259
av_pix_fmt_get_chroma_sub_sample
int av_pix_fmt_get_chroma_sub_sample(enum AVPixelFormat pix_fmt, int *h_shift, int *v_shift)
Utility function to access log2_chroma_w log2_chroma_h from the pixel format AVPixFmtDescriptor.
Definition: pixdesc.c:2690
AVCodecContext::coded_height
int coded_height
Definition: avcodec.h:577
AVHWDeviceContext
This struct aggregates all the (hardware/vendor-specific) "high-level" state, i.e.
Definition: hwcontext.h:61
AV_LOG_ERROR
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:180
cudaVideoSurfaceFormat_YUV444
#define cudaVideoSurfaceFormat_YUV444
Definition: nvdec.c:40
AVHWFramesContext::height
int height
Definition: hwcontext.h:229
av_buffer_pool_get
AVBufferRef * av_buffer_pool_get(AVBufferPool *pool)
Allocate a new AVBuffer, reusing an old buffer from the pool when available.
Definition: buffer.c:387
AVHWFramesContext::pool
AVBufferPool * pool
A pool from which the frames are allocated by av_hwframe_get_buffer().
Definition: hwcontext.h:190
ff_nvdec_start_frame_sep_ref
int ff_nvdec_start_frame_sep_ref(AVCodecContext *avctx, AVFrame *frame, int has_sep_ref)
Definition: nvdec.c:602
av_fast_realloc
void * av_fast_realloc(void *ptr, unsigned int *size, size_t min_size)
Reallocate the given buffer if it is not large enough, otherwise do nothing.
Definition: mem.c:505
AV_PIX_FMT_YUV444P16
#define AV_PIX_FMT_YUV444P16
Definition: pixfmt.h:419
AV_BUFFER_FLAG_READONLY
#define AV_BUFFER_FLAG_READONLY
Always treat the buffer as read-only, even when it has only one reference.
Definition: buffer.h:114
AV_CODEC_ID_VP9
@ AV_CODEC_ID_VP9
Definition: codec_id.h:218
nvdec_decoder_free
static void nvdec_decoder_free(void *opaque, uint8_t *data)
Definition: nvdec.c:162
NVDECFrame
Definition: nvdec.h:44
ctx
AVFormatContext * ctx
Definition: movenc.c:48
decode.h
pix_fmt
static enum AVPixelFormat pix_fmt
Definition: demuxing_decoding.c:41
AV_CODEC_ID_H264
@ AV_CODEC_ID_H264
Definition: codec_id.h:77
AVCodecContext::codec_id
enum AVCodecID codec_id
Definition: avcodec.h:399
NVDECDecoder::cudl
CudaFunctions * cudl
Definition: nvdec.c:52
dpb_size
int dpb_size
Definition: h264_levels.c:107
if
if(ret)
Definition: filter_design.txt:179
AV_CODEC_ID_WMV3
@ AV_CODEC_ID_WMV3
Definition: codec_id.h:121
ff_nvdec_simple_end_frame
int ff_nvdec_simple_end_frame(AVCodecContext *avctx)
Definition: nvdec.c:666
NULL
#define NULL
Definition: coverity.c:32
AVHWFramesContext::sw_format
enum AVPixelFormat sw_format
The pixel format identifying the actual data layout of the hardware frames.
Definition: hwcontext.h:222
av_buffer_unref
void av_buffer_unref(AVBufferRef **buf)
Free a given reference and automatically free the buffer if there are no more references to it.
Definition: buffer.c:139
AV_CODEC_ID_AV1
@ AV_CODEC_ID_AV1
Definition: codec_id.h:279
ff_nvdec_decode_init
int ff_nvdec_decode_init(AVCodecContext *avctx)
Definition: nvdec.c:331
AVHWFramesContext::device_ref
AVBufferRef * device_ref
A reference to the parent AVHWDeviceContext.
Definition: hwcontext.h:141
NVDECDecoder::hw_device_ref
AVBufferRef * hw_device_ref
Definition: nvdec.c:47
AVCodecContext::internal
struct AVCodecInternal * internal
Private context used for internal data.
Definition: avcodec.h:424
av_buffer_pool_uninit
void av_buffer_pool_uninit(AVBufferPool **ppool)
Mark the pool as being available for freeing.
Definition: buffer.c:322
AV_CODEC_ID_MPEG1VIDEO
@ AV_CODEC_ID_MPEG1VIDEO
Definition: codec_id.h:51
av_buffer_create
AVBufferRef * av_buffer_create(uint8_t *data, size_t size, void(*free)(void *opaque, uint8_t *data), void *opaque, int flags)
Create an AVBuffer from an existing array.
Definition: buffer.c:55
error.h
AVCodecID
AVCodecID
Identify the syntax and semantics of the bitstream.
Definition: codec_id.h:47
cudaVideoSurfaceFormat_YUV444_16Bit
#define cudaVideoSurfaceFormat_YUV444_16Bit
Definition: nvdec.c:41
nvdec_alloc_dummy
static AVBufferRef * nvdec_alloc_dummy(size_t size)
Definition: nvdec.c:287
CHECK_CU
#define CHECK_CU(x)
Definition: nvdec.c:61
NVDECDecoder::real_hw_frames_ref
AVBufferRef * real_hw_frames_ref
Definition: nvdec.c:48
FrameDecodeData::post_process
int(* post_process)(void *logctx, AVFrame *frame)
The callback to perform some delayed processing on the frame right before it is returned to the calle...
Definition: decode.h:45
AVCodecInternal::hwaccel_priv_data
void * hwaccel_priv_data
hwaccel-specific private data
Definition: internal.h:120
size
int size
Definition: twinvq_data.h:10344
nvdec_decoder_frame_alloc
static AVBufferRef * nvdec_decoder_frame_alloc(void *opaque, size_t size)
Definition: nvdec.c:246
nvdec_test_capabilities
static int nvdec_test_capabilities(NVDECDecoder *decoder, CUVIDDECODECREATEINFO *params, void *logctx)
Definition: nvdec.c:102
offset
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Definition: writing_filters.txt:86
nvdec.h
av_buffer_alloc
AVBufferRef * av_buffer_alloc(size_t size)
Allocate an AVBuffer of the given size using av_malloc().
Definition: buffer.c:77
nvdec_free_dummy
static void nvdec_free_dummy(struct AVHWFramesContext *ctx)
Definition: nvdec.c:282
ff_nvdec_decode_uninit
int ff_nvdec_decode_uninit(AVCodecContext *avctx)
Definition: nvdec.c:263
AV_CODEC_ID_MJPEG
@ AV_CODEC_ID_MJPEG
Definition: codec_id.h:57
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:271
hw_device_ctx
static AVBufferRef * hw_device_ctx
Definition: hw_decode.c:45
NVDECDecoder::cuda_ctx
CUcontext cuda_ctx
Definition: nvdec.c:49
nvdec_fdd_priv_free
static void nvdec_fdd_priv_free(void *priv)
Definition: nvdec.c:443
ff_nvdec_end_frame
int ff_nvdec_end_frame(AVCodecContext *avctx)
Definition: nvdec.c:636
common.h
AV_CODEC_ID_HEVC
@ AV_CODEC_ID_HEVC
Definition: codec_id.h:224
av_mallocz
void * av_mallocz(size_t size)
Allocate a memory block with alignment suitable for all memory accesses (including vectors if availab...
Definition: mem.c:264
AVCodecContext::hw_device_ctx
AVBufferRef * hw_device_ctx
A reference to the AVHWDeviceContext describing the device which will be used by a hardware encoder/d...
Definition: avcodec.h:1930
AV_CODEC_ID_VC1
@ AV_CODEC_ID_VC1
Definition: codec_id.h:120
NVDECDecoder
Definition: nvdec.c:44
AV_PIX_FMT_P016
#define AV_PIX_FMT_P016
Definition: pixfmt.h:456
AVCodecContext::hw_frames_ctx
AVBufferRef * hw_frames_ctx
A reference to the AVHWFramesContext describing the input (for encoding) or output (decoding) frames.
Definition: avcodec.h:1880
avcodec.h
nvdec_retrieve_data
static int nvdec_retrieve_data(void *logctx, AVFrame *frame)
Definition: nvdec.c:481
AVHWFramesContext
This struct describes a set or pool of "hardware" frames (i.e.
Definition: hwcontext.h:124
AVCUDADeviceContext
This struct is allocated as AVHWDeviceContext.hwctx.
Definition: hwcontext_cuda.h:42
avcodec_get_hw_frames_parameters
int avcodec_get_hw_frames_parameters(AVCodecContext *avctx, AVBufferRef *device_ref, enum AVPixelFormat hw_pix_fmt, AVBufferRef **out_frames_ref)
Create and return a AVHWFramesContext with values adequate for hardware decoding.
Definition: decode.c:1002
ret
ret
Definition: filter_design.txt:187
pixfmt.h
AV_PIX_FMT_NV12
@ AV_PIX_FMT_NV12
planar YUV 4:2:0, 12bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (firs...
Definition: pixfmt.h:89
frame
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several the filter must be ready for frames arriving randomly on any input any filter with several inputs will most likely require some kind of queuing mechanism It is perfectly acceptable to have a limited queue and to drop frames when the inputs are too unbalanced request_frame For filters that do not use the this method is called when a frame is wanted on an output For a it should directly call filter_frame on the corresponding output For a if there are queued frames already one of these frames should be pushed If the filter should request a frame on one of its repeatedly until at least one frame has been pushed Return or at least make progress towards producing a frame
Definition: filter_design.txt:264
cuda_check.h
NVDECFramePool::dpb_size
unsigned int dpb_size
Definition: nvdec.c:57
nvdec_init_hwframes
static int nvdec_init_hwframes(AVCodecContext *avctx, AVBufferRef **out_frames_ref, int dummy)
Definition: nvdec.c:292
NVDECFramePool::nb_allocated
unsigned int nb_allocated
Definition: nvdec.c:58
AVCodecContext
main external API structure.
Definition: avcodec.h:389
ff_nvdec_simple_decode_slice
int ff_nvdec_simple_decode_slice(AVCodecContext *avctx, const uint8_t *buffer, uint32_t size)
Definition: nvdec.c:674
nvdec_unmap_mapped_frame
static void nvdec_unmap_mapped_frame(void *opaque, uint8_t *data)
Definition: nvdec.c:457
buffer
the frame and frame reference mechanism is intended to as much as expensive copies of that data while still allowing the filters to produce correct results The data is stored in buffers represented by AVFrame structures Several references can point to the same frame buffer
Definition: filter_design.txt:49
ff_nvdec_frame_params
int ff_nvdec_frame_params(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx, int dpb_size, int supports_444)
Definition: nvdec.c:696
AVPixFmtDescriptor::comp
AVComponentDescriptor comp[4]
Parameters that describe how pixels are packed.
Definition: pixdesc.h:105
AV_PIX_FMT_YUV444P
@ AV_PIX_FMT_YUV444P
planar YUV 4:4:4, 24bpp, (1 Cr & Cb sample per 1x1 Y samples)
Definition: pixfmt.h:71
NVDECFrame::idx_ref
AVBufferRef * idx_ref
Definition: nvdec.h:47
AV_PIX_FMT_P010
#define AV_PIX_FMT_P010
Definition: pixfmt.h:455
AVCodecContext::coded_width
int coded_width
Bitstream width / height, may be different from width/height e.g.
Definition: avcodec.h:577
AVHWFramesContext::initial_pool_size
int initial_pool_size
Initial size of the frame pool.
Definition: hwcontext.h:199
AVBufferRef
A reference to a data buffer.
Definition: buffer.h:82
AVPixFmtDescriptor
Descriptor that unambiguously describes how the bits of a pixel are stored in the up to 4 data planes...
Definition: pixdesc.h:69
av_free
#define av_free(p)
Definition: tableprint_vlc.h:33
av_freep
#define av_freep(p)
Definition: tableprint_vlc.h:34
FrameDecodeData::hwaccel_priv
void * hwaccel_priv
Per-frame private data for hwaccels.
Definition: decode.h:52
AV_CODEC_ID_VP8
@ AV_CODEC_ID_VP8
Definition: codec_id.h:190
hwcontext.h
AVERROR_BUG
#define AVERROR_BUG
Internal bug, also see AVERROR_BUG2.
Definition: error.h:52
av_log
#define av_log(a,...)
Definition: tableprint_vlc.h:27
AVCodecContext::sw_pix_fmt
enum AVPixelFormat sw_pix_fmt
Nominal unaccelerated pixel format, see AV_PIX_FMT_xxx.
Definition: avcodec.h:1739
NVDECContext
Definition: nvdec.h:52
NVDECFrame::ref_idx_ref
AVBufferRef * ref_idx_ref
Definition: nvdec.h:48
NVDECFrame::decoder_ref
AVBufferRef * decoder_ref
Definition: nvdec.h:49
AV_CODEC_ID_MPEG2VIDEO
@ AV_CODEC_ID_MPEG2VIDEO
preferred ID for MPEG-1/2 video decoding
Definition: codec_id.h:52
AVHWAccel::pix_fmt
enum AVPixelFormat pix_fmt
Supported pixel format.
Definition: avcodec.h:2094