[FFmpeg-devel] [PATCH] avcodec/cuviddec: correctly handle buffer size and status when deinterlacing

Wed Feb 26 04:42:13 EET 2025

On 2/25/25 13:43, Timo Rothenpieler wrote:
> ---
>   libavcodec/cuviddec.c | 24 +++++++++++++-----------
>   1 file changed, 13 insertions(+), 11 deletions(-)
>
> diff --git a/libavcodec/cuviddec.c b/libavcodec/cuviddec.c
> index 67076a1752..312742fb8c 100644
> --- a/libavcodec/cuviddec.c
> +++ b/libavcodec/cuviddec.c
> @@ -131,7 +131,7 @@ static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form
>       CUVIDDECODECREATEINFO cuinfo;
>       int surface_fmt;
>       int chroma_444;
> -    int fifo_size_inc;
> +    int old_nb_surfaces, fifo_size_inc, fifo_size_mul = 1;
>   
>       int old_width = avctx->width;
>       int old_height = avctx->height;
> @@ -349,20 +349,24 @@ static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form
>           return 0;
>       }
>   
> -    fifo_size_inc = ctx->nb_surfaces;
> -    ctx->nb_surfaces = FFMAX(ctx->nb_surfaces, format->min_num_decode_surfaces + 3);
> +    if (ctx->deint_mode_current != cudaVideoDeinterlaceMode_Weave && !ctx->drop_second_field) {
> +        avctx->framerate = av_mul_q(avctx->framerate, (AVRational){2, 1});
> +        fifo_size_mul = 2;
> +    }
>   
> +    old_nb_surfaces = ctx->nb_surfaces;
> +    ctx->nb_surfaces = FFMAX(ctx->nb_surfaces, format->min_num_decode_surfaces + 3);
>       if (avctx->extra_hw_frames > 0)
>           ctx->nb_surfaces += avctx->extra_hw_frames;
>   
> -    fifo_size_inc = ctx->nb_surfaces - fifo_size_inc;
> +    fifo_size_inc = ctx->nb_surfaces * fifo_size_mul - av_fifo_can_read(ctx->frame_queue) - av_fifo_can_write(ctx->frame_queue);
>       if (fifo_size_inc > 0 && av_fifo_grow2(ctx->frame_queue, fifo_size_inc) < 0) {
>           av_log(avctx, AV_LOG_ERROR, "Failed to grow frame queue on video sequence callback\n");
>           ctx->internal_error = AVERROR(ENOMEM);
>           return 0;
>       }
>   
> -    if (fifo_size_inc > 0 && av_reallocp_array(&ctx->key_frame, ctx->nb_surfaces, sizeof(int)) < 0) {
> +    if (ctx->nb_surfaces > old_nb_surfaces && av_reallocp_array(&ctx->key_frame, ctx->nb_surfaces, sizeof(int)) < 0) {
>           av_log(avctx, AV_LOG_ERROR, "Failed to grow key frame array on video sequence callback\n");
>           ctx->internal_error = AVERROR(ENOMEM);
>           return 0;
> @@ -374,9 +378,6 @@ static int CUDAAPI cuvid_handle_video_sequence(void *opaque, CUVIDEOFORMAT* form
>       cuinfo.bitDepthMinus8 = format->bit_depth_luma_minus8;
>       cuinfo.DeinterlaceMode = ctx->deint_mode_current;
>   
> -    if (ctx->deint_mode_current != cudaVideoDeinterlaceMode_Weave && !ctx->drop_second_field)
> -        avctx->framerate = av_mul_q(avctx->framerate, (AVRational){2, 1});
> -
>       ctx->internal_error = CHECK_CU(ctx->cvdl->cuvidCreateDecoder(&ctx->cudecoder, &cuinfo));
>       if (ctx->internal_error < 0)
>           return 0;
> @@ -448,11 +449,12 @@ static int cuvid_is_buffer_full(AVCodecContext *avctx)
>   {
>       CuvidContext *ctx = avctx->priv_data;
>   
> -    int delay = ctx->cuparseinfo.ulMaxDisplayDelay;
> +    int mult = 1;
>       if (ctx->deint_mode != cudaVideoDeinterlaceMode_Weave && !ctx->drop_second_field)
> -        delay *= 2;
> +        mult = 2;
>   
> -    return av_fifo_can_read(ctx->frame_queue) + delay >= ctx->nb_surfaces;
> +    // "- mult + 1" ensures that the buffer is still signalled full if one half-frame has already been returned when deinterlacing.
> +    return av_fifo_can_read(ctx->frame_queue) + (ctx->cuparseinfo.ulMaxDisplayDelay * mult) >= ctx->nb_surfaces * mult - mult + 1;

I think this is clearer:
return ((av_fifo_can_read(ctx->frame_queue) + mult - 1) / mult) + 
ctx->cuparseinfo.ulMaxDisplayDelay >= ctx->nb_surfaces

Integer ceiling division to get the number of referenced surfaces in 
frame_queue.

However, when going from mult = 2 to 1, it thinks the buffer is more 
full than it really is, which probably isn't a problem. Unfortunately, 
when going from mult = 1 to 2, if there is more than one frame in 
frame_queue, it will think there are less surfaces referenced than there 
are, which may be a problem.

To avoid that, on transitions you would have to drain frame_queue until 
it is empty or peek at what is in frame_queue to actually count the 
number of referenced surfaces.

>   }
>   
>   static int cuvid_decode_packet(AVCodecContext *avctx, const AVPacket *avpkt)

I'll apply this to MythTV for testing.