[FFmpeg-devel] [PATCH v12 15/15] avcodec/hw_base_encode: add avctx pointer for FFHWBaseEncodeContext

Mon Jun 3 12:23:37 EEST 2024

>From: Lynne <dev at lynne.ee>
>Sent: Wednesday, May 29, 2024 8:02 AM
>To: Wu, Tong1 <tong1.wu at intel.com>; FFmpeg development discussions and
>patches <ffmpeg-devel at ffmpeg.org>
>Subject: Re: [FFmpeg-devel] [PATCH v12 15/15] avcodec/hw_base_encode: add
>avctx pointer for FFHWBaseEncodeContext
>
>On 29/05/2024 00:53, Wu, Tong1 wrote:
>>> From: ffmpeg-devel <ffmpeg-devel-bounces at ffmpeg.org> On Behalf Of
>Lynne
>>> via ffmpeg-devel
>>> Sent: Wednesday, May 29, 2024 1:08 AM
>>> To: ffmpeg-devel at ffmpeg.org
>>> Cc: Lynne <dev at lynne.ee>
>>> Subject: Re: [FFmpeg-devel] [PATCH v12 15/15] avcodec/hw_base_encode:
>add
>>> avctx pointer for FFHWBaseEncodeContext
>>>
>>> On 28/05/2024 17:48, tong1.wu-at-intel.com at ffmpeg.org wrote:
>>>> From: Tong Wu <tong1.wu at intel.com>
>>>>
>>>> An avctx pointer is added to FFHWBaseEncodeContext. This is to make
>>>> FFHWBaseEncodeContext a standalone component for ff_hw_base_*
>>> functions.
>>>> This patch also removes some unnecessary AVCodecContext arguments.
>>>>
>>>> Signed-off-by: Tong Wu <tong1.wu at intel.com>
>>>> ---
>>>>    libavcodec/d3d12va_encode.c |  6 +++---
>>>>    libavcodec/hw_base_encode.c | 31 +++++++++++++------------------
>>>>    libavcodec/hw_base_encode.h |  8 +++++---
>>>>    libavcodec/vaapi_encode.c   |  6 +++---
>>>>    4 files changed, 24 insertions(+), 27 deletions(-)
>>>>
>>>> diff --git a/libavcodec/d3d12va_encode.c b/libavcodec/d3d12va_encode.c
>>>> index 0fbf8eb07c..6d3a53c6ca 100644
>>>> --- a/libavcodec/d3d12va_encode.c
>>>> +++ b/libavcodec/d3d12va_encode.c
>>>> @@ -1351,7 +1351,7 @@ static int
>>> d3d12va_encode_create_recon_frames(AVCodecContext *avctx)
>>>>        enum AVPixelFormat recon_format;
>>>>        int err;
>>>>
>>>> -    err = ff_hw_base_get_recon_format(avctx, NULL, &recon_format);
>>>> +    err = ff_hw_base_get_recon_format(base_ctx, NULL, &recon_format);
>>>>        if (err < 0)
>>>>            return err;
>>>>
>>>> @@ -1398,7 +1398,7 @@ int ff_d3d12va_encode_init(AVCodecContext
>>> *avctx)
>>>>        int err;
>>>>        HRESULT hr;
>>>>
>>>> -    err = ff_hw_base_encode_init(avctx);
>>>> +    err = ff_hw_base_encode_init(avctx, base_ctx);
>>>>        if (err < 0)
>>>>            goto fail;
>>>>
>>>> @@ -1552,7 +1552,7 @@ int ff_d3d12va_encode_close(AVCodecContext
>>> *avctx)
>>>>        D3D12_OBJECT_RELEASE(ctx->video_device3);
>>>>        D3D12_OBJECT_RELEASE(ctx->device3);
>>>>
>>>> -    ff_hw_base_encode_close(avctx);
>>>> +    ff_hw_base_encode_close(base_ctx);
>>>>
>>>>        return 0;
>>>>    }
>>>> diff --git a/libavcodec/hw_base_encode.c b/libavcodec/hw_base_encode.c
>>>> index 92f69bb78c..88efdf672c 100644
>>>> --- a/libavcodec/hw_base_encode.c
>>>> +++ b/libavcodec/hw_base_encode.c
>>>> @@ -94,14 +94,13 @@ static void
>>> hw_base_encode_remove_refs(FFHWBaseEncodePicture *pic, int level)
>>>>        pic->ref_removed[level] = 1;
>>>>    }
>>>>
>>>> -static void hw_base_encode_set_b_pictures(AVCodecContext *avctx,
>>>> +static void hw_base_encode_set_b_pictures(FFHWBaseEncodeContext
>*ctx,
>>>>                                              FFHWBaseEncodePicture *start,
>>>>                                              FFHWBaseEncodePicture *end,
>>>>                                              FFHWBaseEncodePicture *prev,
>>>>                                              int current_depth,
>>>>                                              FFHWBaseEncodePicture **last)
>>>>    {
>>>> -    FFHWBaseEncodeContext *ctx = avctx->priv_data;
>>>>        FFHWBaseEncodePicture *pic, *next, *ref;
>>>>        int i, len;
>>>>
>>>> @@ -148,20 +147,19 @@ static void
>>> hw_base_encode_set_b_pictures(AVCodecContext *avctx,
>>>>                hw_base_encode_add_ref(pic, ref, 0, 1, 0);
>>>>
>>>>            if (i > 1)
>>>> -            hw_base_encode_set_b_pictures(avctx, start, pic, pic,
>>>> +            hw_base_encode_set_b_pictures(ctx, start, pic, pic,
>>>>                                              current_depth + 1, &next);
>>>>            else
>>>>                next = pic;
>>>>
>>>> -        hw_base_encode_set_b_pictures(avctx, pic, end, next,
>>>> +        hw_base_encode_set_b_pictures(ctx, pic, end, next,
>>>>                                          current_depth + 1, last);
>>>>        }
>>>>    }
>>>>
>>>> -static void hw_base_encode_add_next_prev(AVCodecContext *avctx,
>>>> +static void hw_base_encode_add_next_prev(FFHWBaseEncodeContext
>>> *ctx,
>>>>                                             FFHWBaseEncodePicture *pic)
>>>>    {
>>>> -    FFHWBaseEncodeContext *ctx = avctx->priv_data;
>>>>        int i;
>>>>
>>>>        if (!pic)
>>>> @@ -333,12 +331,12 @@ static int
>>> hw_base_encode_pick_next(AVCodecContext *avctx,
>>>>        }
>>>>
>>>>        if (b_counter > 0) {
>>>> -        hw_base_encode_set_b_pictures(avctx, start, pic, pic, 1,
>>>> +        hw_base_encode_set_b_pictures(ctx, start, pic, pic, 1,
>>>>                                          &prev);
>>>>        } else {
>>>>            prev = pic;
>>>>        }
>>>> -    hw_base_encode_add_next_prev(avctx, prev);
>>>> +    hw_base_encode_add_next_prev(ctx, prev);
>>>>
>>>>        return 0;
>>>>    }
>>>> @@ -687,9 +685,9 @@ int
>ff_hw_base_init_gop_structure(AVCodecContext
>>> *avctx, uint32_t ref_l0, uint32
>>>>        return 0;
>>>>    }
>>>>
>>>> -int ff_hw_base_get_recon_format(AVCodecContext *avctx, const void
>>> *hwconfig, enum AVPixelFormat *fmt)
>>>> +int ff_hw_base_get_recon_format(FFHWBaseEncodeContext *ctx, const
>>> void *hwconfig,
>>>> +                                enum AVPixelFormat *fmt)
>>>>    {
>>>> -    FFHWBaseEncodeContext *ctx = avctx->priv_data;
>>>>        AVHWFramesConstraints *constraints = NULL;
>>>>        enum AVPixelFormat recon_format;
>>>>        int err, i;
>>>> @@ -722,14 +720,14 @@ int
>>> ff_hw_base_get_recon_format(AVCodecContext *avctx, const void
>*hwconfig,
>>> enu
>>>>            // No idea what to use; copy input format.
>>>>            recon_format = ctx->input_frames->sw_format;
>>>>        }
>>>> -    av_log(avctx, AV_LOG_DEBUG, "Using %s as format of "
>>>> +    av_log(ctx->avctx, AV_LOG_DEBUG, "Using %s as format of "
>>>>               "reconstructed frames.\n", av_get_pix_fmt_name(recon_format));
>>>>
>>>>        if (ctx->surface_width  < constraints->min_width  ||
>>>>            ctx->surface_height < constraints->min_height ||
>>>>            ctx->surface_width  > constraints->max_width ||
>>>>            ctx->surface_height > constraints->max_height) {
>>>> -        av_log(avctx, AV_LOG_ERROR, "Hardware does not support encoding
>at
>>> "
>>>> +        av_log(ctx->avctx, AV_LOG_ERROR, "Hardware does not support
>>> encoding at "
>>>>                   "size %dx%d (constraints: width %d-%d height %d-%d).\n",
>>>>                   ctx->surface_width, ctx->surface_height,
>>>>                   constraints->min_width,  constraints->max_width,
>>>> @@ -756,10 +754,9 @@ int
>>> ff_hw_base_encode_free(FFHWBaseEncodePicture *pic)
>>>>        return 0;
>>>>    }
>>>>
>>>> -int ff_hw_base_encode_init(AVCodecContext *avctx)
>>>> +int ff_hw_base_encode_init(AVCodecContext *avctx,
>>> FFHWBaseEncodeContext *ctx)
>>>>    {
>>>> -    FFHWBaseEncodeContext *ctx = avctx->priv_data;
>>>> -
>>>> +    ctx->avctx = avctx;
>>>>        ctx->frame = av_frame_alloc();
>>>>        if (!ctx->frame)
>>>>            return AVERROR(ENOMEM);
>>>> @@ -789,10 +786,8 @@ int ff_hw_base_encode_init(AVCodecContext
>>> *avctx)
>>>>        return 0;
>>>>    }
>>>>
>>>> -int ff_hw_base_encode_close(AVCodecContext *avctx)
>>>> +int ff_hw_base_encode_close(FFHWBaseEncodeContext *ctx)
>>>>    {
>>>> -    FFHWBaseEncodeContext *ctx = avctx->priv_data;
>>>> -
>>>>        av_fifo_freep2(&ctx->encode_fifo);
>>>>
>>>>        av_frame_free(&ctx->frame);
>>>> diff --git a/libavcodec/hw_base_encode.h
>b/libavcodec/hw_base_encode.h
>>>> index 15ef3d7ac6..13c1fc0f69 100644
>>>> --- a/libavcodec/hw_base_encode.h
>>>> +++ b/libavcodec/hw_base_encode.h
>>>> @@ -116,6 +116,7 @@ typedef struct FFHWEncodePictureOperation {
>>>>
>>>>    typedef struct FFHWBaseEncodeContext {
>>>>        const AVClass *class;
>>>> +    AVCodecContext *avctx;
>>>>
>>>>        // Hardware-specific hooks.
>>>>        const struct FFHWEncodePictureOperation *op;
>>>> @@ -222,13 +223,14 @@ int
>>> ff_hw_base_encode_receive_packet(AVCodecContext *avctx, AVPacket
>*pkt);
>>>>    int ff_hw_base_init_gop_structure(AVCodecContext *avctx, uint32_t
>ref_l0,
>>> uint32_t ref_l1,
>>>>                                      int flags, int prediction_pre_only);
>>>>
>>>> -int ff_hw_base_get_recon_format(AVCodecContext *avctx, const void
>>> *hwconfig, enum AVPixelFormat *fmt);
>>>> +int ff_hw_base_get_recon_format(FFHWBaseEncodeContext *ctx, const
>>> void *hwconfig,
>>>> +                                enum AVPixelFormat *fmt);
>>>>
>>>>    int ff_hw_base_encode_free(FFHWBaseEncodePicture *pic);
>>>>
>>>> -int ff_hw_base_encode_init(AVCodecContext *avctx);
>>>> +int ff_hw_base_encode_init(AVCodecContext *avctx,
>>> FFHWBaseEncodeContext *ctx);
>>>>
>>>> -int ff_hw_base_encode_close(AVCodecContext *avctx);
>>>> +int ff_hw_base_encode_close(FFHWBaseEncodeContext *ctx);
>>>>
>>>>    #define HW_BASE_ENCODE_COMMON_OPTIONS \
>>>>        { "idr_interval", \
>>>> diff --git a/libavcodec/vaapi_encode.c b/libavcodec/vaapi_encode.c
>>>> index b35a23e852..0693e77548 100644
>>>> --- a/libavcodec/vaapi_encode.c
>>>> +++ b/libavcodec/vaapi_encode.c
>>>> @@ -2059,7 +2059,7 @@ static av_cold int
>>> vaapi_encode_create_recon_frames(AVCodecContext *avctx)
>>>>        }
>>>>        hwconfig->config_id = ctx->va_config;
>>>>
>>>> -    err = ff_hw_base_get_recon_format(avctx, (const void*)hwconfig,
>>> &recon_format);
>>>> +    err = ff_hw_base_get_recon_format(base_ctx, (const void*)hwconfig,
>>> &recon_format);
>>>>        if (err < 0)
>>>>            goto fail;
>>>>
>>>> @@ -2106,7 +2106,7 @@ av_cold int
>ff_vaapi_encode_init(AVCodecContext
>>> *avctx)
>>>>        VAStatus vas;
>>>>        int err;
>>>>
>>>> -    err = ff_hw_base_encode_init(avctx);
>>>> +    err = ff_hw_base_encode_init(avctx, base_ctx);
>>>>        if (err < 0)
>>>>            goto fail;
>>>>
>>>> @@ -2313,7 +2313,7 @@ av_cold int
>>> ff_vaapi_encode_close(AVCodecContext *avctx)
>>>>        av_freep(&ctx->codec_sequence_params);
>>>>        av_freep(&ctx->codec_picture_params);
>>>>
>>>> -    ff_hw_base_encode_close(avctx);
>>>> +    ff_hw_base_encode_close(base_ctx);
>>>>
>>>>        return 0;
>>>>    }
>>>
>>> Err, you missed ff_hw_base_encode_set_output_property,
>>> ff_hw_base_encode_receive_packet and ff_hw_base_init_gop_structure?
>>>
>>> Rest looks better.
>>
>> ff_hw_base_encode_receive_packet is directly bind to int
>(*receive_packet)(struct AVCodecContext *avctx, struct AVPacket *avpkt); I
>thought maybe this should not be changed? For the other two functions,
>components in avctx are used. If they need to be changed, I could either take
>both FFHWBaseEncodeContext and AVCodecContext as arguments or use ctx-
>>avctx->* to get the components which one do you think better. Thank you.
>
>Take both FFHWBaseEncodeContext and AVCodecContext as arguments, and
>use
>it to get fields such as gop_size, max_b_frames and so on.
>
>*Do not use* avctx->priv_data anywhere in hw_base_encode. Any usage of
>it has to be switched to a user-given FFHWBaseEncodeContext via a
>function argument.
>
>ff_hw_base_encode_receive_packet needs to be switched to
>(FFHWBaseEncodeContext, AVCodecContext...) as well, as it uses
>avctx->priv_data.
>
>Hardware encoders should have small wrappers for receive_packet which
>would call ff_hw_base_encode_receive_packet. It's not much, just 3 lines.
>
>Hardware encoders could still use FFHWBaseEncodeContext as their own
>main encode context, but they would no longer be required to use it as
>their main context. It'll make sense with some code:
>
>For example, d3d12va_encode's wrapper function would be:
>static int receive_packet(avctx...)
>{
>     return ff_hw_base_encode_receive_packet(avctx->priv_data, avctx...)
>}
>
>Vulkan's wrapper would, on the other hand look like:
>static int receive_packet(avctx...)
>{
>     VulkanEncodeContext *vkenc = avctx->priv_data;
>     return ff_hw_base_encode_receive_packet(vkenc->hw_base, avctx...)
>}
>
>You should also rename FFHWBaseEncodeContext.avctx to
>FFHWBaseEncodeContext.log_ctx with a type of void *, and use it for
>av_log where needed. It would be set to avctx in
>ff_hw_base_encode_init(), but encoders could override it if they needed
>to by modifying the context.
>If frame threading is ever implemented, this change would make it
>easier, as the context for each call would be exlicitly known.
>

Updated in v13. I've added wrappers in vaapi_encode.c and d3d12_encode.c to make it available for all the codecs(vaapi_encode_* and d3d12va_encode_hevc). Thanks.