22 #include "config_components.h"
45 #if !NVDECAPI_CHECK_VERSION(9, 0)
46 #define cudaVideoSurfaceFormat_YUV444 2
47 #define cudaVideoSurfaceFormat_YUV444_16Bit 3
50 #if NVDECAPI_CHECK_VERSION(11, 0)
51 #define CUVID_HAS_AV1_SUPPORT
117 #define CHECK_CU(x) FF_CUDA_CHECK_DL(avctx, ctx->cudl, x)
120 #define CUVID_MAX_DISPLAY_DELAY (4)
123 #define CUVID_DEFAULT_NUM_SURFACES (CUVID_MAX_DISPLAY_DELAY + 1)
130 CUVIDDECODECAPS *caps =
NULL;
131 CUVIDDECODECREATEINFO cuinfo;
134 int old_nb_surfaces, fifo_size_inc, fifo_size_mul = 1;
136 int old_width = avctx->
width;
137 int old_height = avctx->
height;
145 memset(&cuinfo, 0,
sizeof(cuinfo));
147 ctx->internal_error = 0;
149 avctx->coded_width = cuinfo.ulWidth =
format->coded_width;
150 avctx->coded_height = cuinfo.ulHeight =
format->coded_height;
153 cuinfo.display_area.left =
format->display_area.left +
ctx->crop.left;
154 cuinfo.display_area.top =
format->display_area.top +
ctx->crop.top;
155 cuinfo.display_area.right =
format->display_area.right -
ctx->crop.right;
156 cuinfo.display_area.bottom =
format->display_area.bottom -
ctx->crop.bottom;
159 if (
ctx->resize_expr) {
160 avctx->width =
ctx->resize.width;
161 avctx->height =
ctx->resize.height;
163 avctx->width = cuinfo.display_area.right - cuinfo.display_area.left;
164 avctx->height = cuinfo.display_area.bottom - cuinfo.display_area.top;
171 cuinfo.ulTargetWidth = (avctx->width + 1) & ~1;
172 cuinfo.ulTargetHeight = (avctx->height + 1) & ~1;
173 if (
format->chroma_format == cudaVideoChromaFormat_420 ||
174 format->chroma_format == cudaVideoChromaFormat_422) {
175 avctx->width = cuinfo.ulTargetWidth;
176 avctx->height = cuinfo.ulTargetHeight;
180 cuinfo.target_rect.left = 0;
181 cuinfo.target_rect.top = 0;
182 cuinfo.target_rect.right = cuinfo.ulTargetWidth;
183 cuinfo.target_rect.bottom = cuinfo.ulTargetHeight;
185 chroma_444 =
format->chroma_format == cudaVideoChromaFormat_444;
187 switch (
format->bit_depth_luma_minus8) {
191 #ifdef NVDEC_HAVE_422_SUPPORT
192 }
else if (
format->chroma_format == cudaVideoChromaFormat_422) {
202 #if FF_API_NVDEC_OLD_PIX_FMTS
207 #ifdef NVDEC_HAVE_422_SUPPORT
208 }
else if (
format->chroma_format == cudaVideoChromaFormat_422) {
209 #if FF_API_NVDEC_OLD_PIX_FMTS
222 #if FF_API_NVDEC_OLD_PIX_FMTS
227 #ifdef NVDEC_HAVE_422_SUPPORT
228 }
else if (
format->chroma_format == cudaVideoChromaFormat_422) {
229 #if FF_API_NVDEC_OLD_PIX_FMTS
236 #if FF_API_NVDEC_OLD_PIX_FMTS
248 if (!caps || !caps->bIsSupported) {
250 format->bit_depth_luma_minus8 + 8);
256 if (surface_fmt < 0) {
267 avctx->pix_fmt = surface_fmt;
270 if (avctx->hw_frames_ctx) {
284 (
AVRational){ avctx->width, avctx->height }));
286 ctx->deint_mode_current =
format->progressive_sequence
287 ? cudaVideoDeinterlaceMode_Weave
290 ctx->progressive_sequence =
format->progressive_sequence;
292 if (!
format->progressive_sequence &&
ctx->deint_mode_current == cudaVideoDeinterlaceMode_Weave)
297 if (
format->video_signal_description.video_full_range_flag)
302 avctx->color_primaries =
format->video_signal_description.color_primaries;
303 avctx->color_trc =
format->video_signal_description.transfer_characteristics;
304 avctx->colorspace =
format->video_signal_description.matrix_coefficients;
307 avctx->bit_rate =
format->bitrate;
309 if (
format->frame_rate.numerator &&
format->frame_rate.denominator) {
310 avctx->framerate.num =
format->frame_rate.numerator;
311 avctx->framerate.den =
format->frame_rate.denominator;
315 && avctx->coded_width ==
format->coded_width
316 && avctx->coded_height ==
format->coded_height
317 && avctx->width == old_width
318 && avctx->height == old_height
319 &&
ctx->chroma_format ==
format->chroma_format
323 if (
ctx->cudecoder) {
326 if (
ctx->internal_error < 0)
331 if (hwframe_ctx->pool && (
332 hwframe_ctx->width < avctx->width ||
333 hwframe_ctx->height < avctx->height ||
335 hwframe_ctx->sw_format != avctx->sw_pix_fmt)) {
336 av_log(avctx,
AV_LOG_ERROR,
"AVHWFramesContext is already initialized with incompatible parameters\n");
338 av_log(avctx,
AV_LOG_DEBUG,
"height: %d <-> %d\n", hwframe_ctx->height, avctx->height);
346 ctx->chroma_format =
format->chroma_format;
348 cuinfo.CodecType =
ctx->codec_type =
format->codec;
349 cuinfo.ChromaFormat =
format->chroma_format;
351 switch (avctx->sw_pix_fmt) {
353 cuinfo.OutputFormat = cudaVideoSurfaceFormat_NV12;
357 cuinfo.OutputFormat = cudaVideoSurfaceFormat_P016;
359 #ifdef NVDEC_HAVE_422_SUPPORT
361 cuinfo.OutputFormat = cudaVideoSurfaceFormat_NV16;
364 cuinfo.OutputFormat = cudaVideoSurfaceFormat_P216;
380 if (
ctx->deint_mode_current != cudaVideoDeinterlaceMode_Weave && !
ctx->drop_second_field) {
385 old_nb_surfaces =
ctx->nb_surfaces;
387 if (avctx->extra_hw_frames > 0)
388 ctx->nb_surfaces += avctx->extra_hw_frames;
391 if (fifo_size_inc > 0 &&
av_fifo_grow2(
ctx->frame_queue, fifo_size_inc) < 0) {
392 av_log(avctx,
AV_LOG_ERROR,
"Failed to grow frame queue on video sequence callback\n");
398 av_log(avctx,
AV_LOG_ERROR,
"Failed to grow key frame array on video sequence callback\n");
403 cuinfo.ulNumDecodeSurfaces =
ctx->nb_surfaces;
404 cuinfo.ulNumOutputSurfaces = 1;
405 cuinfo.ulCreationFlags = cudaVideoCreate_PreferCUVID;
406 cuinfo.bitDepthMinus8 =
format->bit_depth_luma_minus8;
407 cuinfo.DeinterlaceMode =
ctx->deint_mode_current;
409 ctx->internal_error =
CHECK_CU(
ctx->cvdl->cuvidCreateDecoder(&
ctx->cudecoder, &cuinfo));
410 if (
ctx->internal_error < 0)
413 if (!hwframe_ctx->pool) {
415 hwframe_ctx->sw_format = avctx->sw_pix_fmt;
416 hwframe_ctx->width = avctx->width;
417 hwframe_ctx->height = avctx->height;
425 if(
ctx->cuparseinfo.ulMaxNumDecodeSurfaces != cuinfo.ulNumDecodeSurfaces) {
426 ctx->cuparseinfo.ulMaxNumDecodeSurfaces = cuinfo.ulNumDecodeSurfaces;
427 return cuinfo.ulNumDecodeSurfaces;
440 if(picparams->intra_pic_flag)
441 ctx->key_frame[picparams->CurrPicIdx] = picparams->intra_pic_flag;
443 ctx->internal_error =
CHECK_CU(
ctx->cvdl->cuvidDecodePicture(
ctx->cudecoder, picparams));
444 if (
ctx->internal_error < 0)
458 ctx->internal_error = 0;
461 parsed_frame.
dispinfo.progressive_frame =
ctx->progressive_sequence;
463 if (
ctx->deint_mode_current == cudaVideoDeinterlaceMode_Weave) {
473 if (!
ctx->drop_second_field) {
489 if (
ctx->deint_mode != cudaVideoDeinterlaceMode_Weave && !
ctx->drop_second_field)
501 CUcontext
dummy, cuda_ctx = device_hwctx->cuda_ctx;
502 CUVIDSOURCEDATAPACKET cupkt;
503 int ret = 0, eret = 0, is_flush =
ctx->decoder_flushing;
507 if (is_flush && avpkt && avpkt->
size)
518 memset(&cupkt, 0,
sizeof(cupkt));
520 if (avpkt && avpkt->
size) {
521 cupkt.payload_size = avpkt->
size;
522 cupkt.payload = avpkt->
data;
525 cupkt.flags = CUVID_PKT_TIMESTAMP;
529 cupkt.timestamp = avpkt->
pts;
532 cupkt.flags = CUVID_PKT_ENDOFSTREAM;
533 ctx->decoder_flushing = 1;
547 if (
ctx->internal_error) {
549 ret =
ctx->internal_error;
571 CUcontext
dummy, cuda_ctx = device_hwctx->cuda_ctx;
573 CUdeviceptr mapped_frame = 0;
574 int ret = 0, eret = 0;
578 if (
ctx->decoder_flushing) {
604 CUVIDPROCPARAMS params;
605 unsigned int pitch = 0;
609 memset(¶ms, 0,
sizeof(params));
610 params.progressive_frame = parsed_frame.dispinfo.progressive_frame;
611 params.second_field = parsed_frame.second_field;
612 params.top_field_first = parsed_frame.dispinfo.top_field_first;
614 ret =
CHECK_CU(
ctx->cvdl->cuvidMapVideoFrame(
ctx->cudecoder, parsed_frame.dispinfo.picture_index, &mapped_frame, &pitch, ¶ms));
635 CUDA_MEMCPY2D cpy = {
636 .srcMemoryType = CU_MEMORYTYPE_DEVICE,
637 .dstMemoryType = CU_MEMORYTYPE_DEVICE,
638 .srcDevice = mapped_frame,
639 .dstDevice = (CUdeviceptr)
frame->data[
i],
641 .dstPitch =
frame->linesize[
i],
647 ret =
CHECK_CU(
ctx->cudl->cuMemcpy2DAsync(&cpy, device_hwctx->stream));
656 #ifdef NVDEC_HAVE_422_SUPPORT
689 tmp_frame->
data[
i] = (uint8_t*)mapped_frame +
offset;
713 if (
ctx->key_frame[parsed_frame.dispinfo.picture_index])
717 ctx->key_frame[parsed_frame.dispinfo.picture_index] = 0;
724 frame->pts = parsed_frame.dispinfo.timestamp;
726 if (parsed_frame.second_field) {
727 if (
ctx->prev_pts == INT64_MIN) {
731 int pts_diff = (
frame->pts -
ctx->prev_pts) / 2;
733 frame->pts += pts_diff;
742 if (!parsed_frame.is_deinterlacing && !parsed_frame.dispinfo.progressive_frame)
747 }
else if (
ctx->decoder_flushing) {
758 eret =
CHECK_CU(
ctx->cvdl->cuvidUnmapVideoFrame(
ctx->cudecoder, mapped_frame));
773 CUcontext
dummy, cuda_ctx = device_hwctx ? device_hwctx->cuda_ctx :
NULL;
778 ctx->cudl->cuCtxPushCurrent(cuda_ctx);
781 ctx->cvdl->cuvidDestroyVideoParser(
ctx->cuparser);
784 ctx->cvdl->cuvidDestroyDecoder(
ctx->cudecoder);
797 cuvid_free_functions(&
ctx->cvdl);
803 const CUVIDPARSERPARAMS *cuparseinfo,
806 int bit_depth,
int is_yuv422,
int is_yuv444)
809 CUVIDDECODECAPS *caps;
810 int res8 = 0, res10 = 0, res12 = 0;
812 if (!
ctx->cvdl->cuvidGetDecoderCaps) {
813 av_log(avctx,
AV_LOG_WARNING,
"Used Nvidia driver is too old to perform a capability check.\n");
815 #
if defined(_WIN32) || defined(__CYGWIN__)
820 ". Continuing blind.\n");
821 ctx->caps8.bIsSupported =
ctx->caps10.bIsSupported = 1;
823 ctx->caps12.bIsSupported = 0;
827 ctx->caps8.eCodecType =
ctx->caps10.eCodecType =
ctx->caps12.eCodecType
828 = cuparseinfo->CodecType;
830 ctx->caps8.eChromaFormat =
ctx->caps10.eChromaFormat =
ctx->caps12.eChromaFormat
831 = is_yuv444 ? cudaVideoChromaFormat_444 :
832 #ifdef NVDEC_HAVE_422_SUPPORT
833 (is_yuv422 ? cudaVideoChromaFormat_422 : cudaVideoChromaFormat_420);
835 cudaVideoChromaFormat_420;
838 ctx->caps8.nBitDepthMinus8 = 0;
839 ctx->caps10.nBitDepthMinus8 = 2;
840 ctx->caps12.nBitDepthMinus8 = 4;
847 av_log(avctx,
AV_LOG_VERBOSE,
"8 bit: supported: %d, min_width: %d, max_width: %d, min_height: %d, max_height: %d\n",
848 ctx->caps8.bIsSupported,
ctx->caps8.nMinWidth,
ctx->caps8.nMaxWidth,
ctx->caps8.nMinHeight,
ctx->caps8.nMaxHeight);
849 av_log(avctx,
AV_LOG_VERBOSE,
"10 bit: supported: %d, min_width: %d, max_width: %d, min_height: %d, max_height: %d\n",
850 ctx->caps10.bIsSupported,
ctx->caps10.nMinWidth,
ctx->caps10.nMaxWidth,
ctx->caps10.nMinHeight,
ctx->caps10.nMaxHeight);
851 av_log(avctx,
AV_LOG_VERBOSE,
"12 bit: supported: %d, min_width: %d, max_width: %d, min_height: %d, max_height: %d\n",
852 ctx->caps12.bIsSupported,
ctx->caps12.nMinWidth,
ctx->caps12.nMaxWidth,
ctx->caps12.nMinHeight,
ctx->caps12.nMaxHeight);
871 if (!
ctx->caps8.bIsSupported) {
876 if (!caps->bIsSupported) {
881 if (probed_width > caps->nMaxWidth || probed_width < caps->nMinWidth) {
883 probed_width, caps->nMinWidth, caps->nMaxWidth);
887 if (probed_height > caps->nMaxHeight || probed_height < caps->nMinHeight) {
889 probed_height, caps->nMinHeight, caps->nMaxHeight);
893 if ((probed_width * probed_height) / 256 > caps->nMaxMBCount) {
895 (
int)(probed_width * probed_height) / 256, caps->nMaxMBCount);
908 CUVIDSOURCEDATAPACKET seq_pkt;
909 CUcontext cuda_ctx =
NULL;
921 int probed_bit_depth = 8, is_yuv444 = 0, is_yuv422 = 0;
925 probed_bit_depth = probe_desc->
comp[0].
depth;
930 #ifdef NVDEC_HAVE_422_SUPPORT
936 switch (probed_bit_depth) {
938 #if FF_API_NVDEC_OLD_PIX_FMTS
945 #if FF_API_NVDEC_OLD_PIX_FMTS
968 if (
ctx->resize_expr && sscanf(
ctx->resize_expr,
"%dx%d",
969 &
ctx->resize.width, &
ctx->resize.height) != 2) {
975 if (
ctx->crop_expr && sscanf(
ctx->crop_expr,
"%dx%dx%dx%d",
976 &
ctx->crop.top, &
ctx->crop.bottom,
977 &
ctx->crop.left, &
ctx->crop.right) != 4) {
983 ret = cuvid_load_functions(&
ctx->cvdl, avctx);
990 if(
ctx->nb_surfaces < 0)
994 if (!
ctx->frame_queue) {
1001 if (!
ctx->hwframe) {
1009 if (!
ctx->hwdevice) {
1016 if (!
ctx->hwdevice) {
1027 if (!
ctx->hwframe) {
1037 device_hwctx = device_ctx->
hwctx;
1042 memset(&
ctx->cuparseinfo, 0,
sizeof(
ctx->cuparseinfo));
1043 memset(&seq_pkt, 0,
sizeof(seq_pkt));
1046 #if CONFIG_H264_CUVID_DECODER
1048 ctx->cuparseinfo.CodecType = cudaVideoCodec_H264;
1051 #if CONFIG_HEVC_CUVID_DECODER
1053 ctx->cuparseinfo.CodecType = cudaVideoCodec_HEVC;
1056 #if CONFIG_MJPEG_CUVID_DECODER
1058 ctx->cuparseinfo.CodecType = cudaVideoCodec_JPEG;
1061 #if CONFIG_MPEG1_CUVID_DECODER
1063 ctx->cuparseinfo.CodecType = cudaVideoCodec_MPEG1;
1066 #if CONFIG_MPEG2_CUVID_DECODER
1068 ctx->cuparseinfo.CodecType = cudaVideoCodec_MPEG2;
1071 #if CONFIG_MPEG4_CUVID_DECODER
1073 ctx->cuparseinfo.CodecType = cudaVideoCodec_MPEG4;
1076 #if CONFIG_VP8_CUVID_DECODER
1078 ctx->cuparseinfo.CodecType = cudaVideoCodec_VP8;
1081 #if CONFIG_VP9_CUVID_DECODER
1083 ctx->cuparseinfo.CodecType = cudaVideoCodec_VP9;
1086 #if CONFIG_VC1_CUVID_DECODER
1088 ctx->cuparseinfo.CodecType = cudaVideoCodec_VC1;
1091 #if CONFIG_AV1_CUVID_DECODER && defined(CUVID_HAS_AV1_SUPPORT)
1093 ctx->cuparseinfo.CodecType = cudaVideoCodec_AV1;
1114 extradata_size >= 4 &&
1115 extradata[0] & 0x80) {
1117 extradata_size -= 4;
1121 +
FFMAX(extradata_size - (
int)
sizeof(
ctx->cuparse_ext->raw_seqhdr_data), 0));
1122 if (!
ctx->cuparse_ext) {
1127 if (extradata_size > 0)
1128 memcpy(
ctx->cuparse_ext->raw_seqhdr_data, extradata, extradata_size);
1129 ctx->cuparse_ext->format.seqhdr_data_length = extradata_size;
1131 ctx->cuparseinfo.pExtVideoInfo =
ctx->cuparse_ext;
1134 if (!
ctx->key_frame) {
1139 ctx->cuparseinfo.ulMaxNumDecodeSurfaces = 1;
1141 ctx->cuparseinfo.pUserData = avctx;
1153 probed_bit_depth, is_yuv422, is_yuv444);
1161 seq_pkt.payload =
ctx->cuparse_ext->raw_seqhdr_data;
1162 seq_pkt.payload_size =
ctx->cuparse_ext->format.seqhdr_data_length;
1164 if (seq_pkt.payload && seq_pkt.payload_size) {
1174 ctx->prev_pts = INT64_MIN;
1191 CUcontext
dummy, cuda_ctx = device_hwctx->cuda_ctx;
1192 CUVIDSOURCEDATAPACKET seq_pkt = { 0 };
1201 if (
ctx->cudecoder) {
1202 ctx->cvdl->cuvidDestroyDecoder(
ctx->cudecoder);
1206 if (
ctx->cuparser) {
1207 ctx->cvdl->cuvidDestroyVideoParser(
ctx->cuparser);
1215 seq_pkt.payload =
ctx->cuparse_ext->raw_seqhdr_data;
1216 seq_pkt.payload_size =
ctx->cuparse_ext->format.seqhdr_data_length;
1218 if (seq_pkt.payload && seq_pkt.payload_size) {
1228 ctx->prev_pts = INT64_MIN;
1229 ctx->decoder_flushing = 0;
1236 #define OFFSET(x) offsetof(CuvidContext, x)
1237 #define VD AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_DECODING_PARAM
1239 {
"deint",
"Set deinterlacing mode",
OFFSET(deint_mode),
AV_OPT_TYPE_INT, { .i64 = cudaVideoDeinterlaceMode_Weave }, cudaVideoDeinterlaceMode_Weave, cudaVideoDeinterlaceMode_Adaptive,
VD, .unit =
"deint" },
1240 {
"weave",
"Weave deinterlacing (do nothing)", 0,
AV_OPT_TYPE_CONST, { .i64 = cudaVideoDeinterlaceMode_Weave }, 0, 0,
VD, .unit =
"deint" },
1241 {
"bob",
"Bob deinterlacing", 0,
AV_OPT_TYPE_CONST, { .i64 = cudaVideoDeinterlaceMode_Bob }, 0, 0,
VD, .unit =
"deint" },
1242 {
"adaptive",
"Adaptive deinterlacing", 0,
AV_OPT_TYPE_CONST, { .i64 = cudaVideoDeinterlaceMode_Adaptive }, 0, 0,
VD, .unit =
"deint" },
1245 {
"drop_second_field",
"Drop second field when deinterlacing",
OFFSET(drop_second_field),
AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1,
VD },
1265 #define DEFINE_CUVID_CODEC(x, X, bsf_name) \
1266 static const AVClass x##_cuvid_class = { \
1267 .class_name = #x "_cuvid", \
1268 .item_name = av_default_item_name, \
1269 .option = options, \
1270 .version = LIBAVUTIL_VERSION_INT, \
1272 const FFCodec ff_##x##_cuvid_decoder = { \
1273 .p.name = #x "_cuvid", \
1274 CODEC_LONG_NAME("Nvidia CUVID " #X " decoder"), \
1275 .p.type = AVMEDIA_TYPE_VIDEO, \
1276 .p.id = AV_CODEC_ID_##X, \
1277 .priv_data_size = sizeof(CuvidContext), \
1278 .p.priv_class = &x##_cuvid_class, \
1279 .init = cuvid_decode_init, \
1280 .close = cuvid_decode_end, \
1281 FF_CODEC_RECEIVE_FRAME_CB(cuvid_output_frame), \
1282 .flush = cuvid_flush, \
1284 .p.capabilities = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AVOID_PROBING | AV_CODEC_CAP_HARDWARE, \
1285 .caps_internal = FF_CODEC_CAP_NOT_INIT_THREADSAFE | \
1286 FF_CODEC_CAP_SETS_FRAME_PROPS, \
1287 .hw_configs = cuvid_hw_configs, \
1288 .p.wrapper_name = "cuvid", \
1291 #if CONFIG_AV1_CUVID_DECODER && defined(CUVID_HAS_AV1_SUPPORT)
1295 #if CONFIG_HEVC_CUVID_DECODER
1299 #if CONFIG_H264_CUVID_DECODER
1303 #if CONFIG_MJPEG_CUVID_DECODER
1307 #if CONFIG_MPEG1_CUVID_DECODER
1311 #if CONFIG_MPEG2_CUVID_DECODER
1315 #if CONFIG_MPEG4_CUVID_DECODER
1319 #if CONFIG_VP8_CUVID_DECODER
1323 #if CONFIG_VP9_CUVID_DECODER
1327 #if CONFIG_VC1_CUVID_DECODER