[FFmpeg-devel] [PATCH v9 01/13] avcodec/vaapi_encode: introduce a base layer for vaapi encode

Thu May 23 19:11:20 EEST 2024

On 20/05/2024 16:52, tong1.wu-at-intel.com at ffmpeg.org wrote:
> From: Tong Wu <tong1.wu at intel.com>
> 
> Since VAAPI and future D3D12VA implementation may share some common parameters,
> a base layer encode context is introduced as vaapi context's base.
> 
> Signed-off-by: Tong Wu <tong1.wu at intel.com>
> ---
>   libavcodec/hw_base_encode.h | 56 +++++++++++++++++++++++++++++++++++++
>   libavcodec/vaapi_encode.h   | 39 +++++---------------------
>   2 files changed, 63 insertions(+), 32 deletions(-)
>   create mode 100644 libavcodec/hw_base_encode.h
> 
> diff --git a/libavcodec/hw_base_encode.h b/libavcodec/hw_base_encode.h
> new file mode 100644
> index 0000000000..1996179456
> --- /dev/null
> +++ b/libavcodec/hw_base_encode.h
> @@ -0,0 +1,56 @@
> +/*
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#ifndef AVCODEC_HW_BASE_ENCODE_H
> +#define AVCODEC_HW_BASE_ENCODE_H
> +
> +#define MAX_DPB_SIZE 16
> +#define MAX_PICTURE_REFERENCES 2
> +#define MAX_REORDER_DELAY 16
> +#define MAX_ASYNC_DEPTH 64
> +#define MAX_REFERENCE_LIST_NUM 2
> +
> +enum {
> +    PICTURE_TYPE_IDR = 0,
> +    PICTURE_TYPE_I   = 1,
> +    PICTURE_TYPE_P   = 2,
> +    PICTURE_TYPE_B   = 3,
> +};
> +
> +enum {
> +    // Codec supports controlling the subdivision of pictures into slices.
> +    FLAG_SLICE_CONTROL         = 1 << 0,
> +    // Codec only supports constant quality (no rate control).
> +    FLAG_CONSTANT_QUALITY_ONLY = 1 << 1,
> +    // Codec is intra-only.
> +    FLAG_INTRA_ONLY            = 1 << 2,
> +    // Codec supports B-pictures.
> +    FLAG_B_PICTURES            = 1 << 3,
> +    // Codec supports referencing B-pictures.
> +    FLAG_B_PICTURE_REFERENCES  = 1 << 4,
> +    // Codec supports non-IDR key pictures (that is, key pictures do
> +    // not necessarily empty the DPB).
> +    FLAG_NON_IDR_KEY_PICTURES  = 1 << 5,
> +};
> +
> +typedef struct HWBaseEncodeContext {
> +    const AVClass *class;
> +} HWBaseEncodeContext;
> +
> +#endif /* AVCODEC_HW_BASE_ENCODE_H */
> +
> diff --git a/libavcodec/vaapi_encode.h b/libavcodec/vaapi_encode.h
> index 0eed9691ca..f5c9be8973 100644
> --- a/libavcodec/vaapi_encode.h
> +++ b/libavcodec/vaapi_encode.h
> @@ -33,34 +33,27 @@
>   
>   #include "avcodec.h"
>   #include "hwconfig.h"
> +#include "hw_base_encode.h"
>   
>   struct VAAPIEncodeType;
>   struct VAAPIEncodePicture;
>   
> +// Codec output packet without timestamp delay, which means the
> +// output packet has same PTS and DTS.
> +#define FLAG_TIMESTAMP_NO_DELAY 1 << 6
> +
>   enum {
>       MAX_CONFIG_ATTRIBUTES  = 4,
>       MAX_GLOBAL_PARAMS      = 4,
> -    MAX_DPB_SIZE           = 16,
> -    MAX_PICTURE_REFERENCES = 2,
> -    MAX_REORDER_DELAY      = 16,
>       MAX_PARAM_BUFFER_SIZE  = 1024,
>       // A.4.1: table A.6 allows at most 22 tile rows for any level.
>       MAX_TILE_ROWS          = 22,
>       // A.4.1: table A.6 allows at most 20 tile columns for any level.
>       MAX_TILE_COLS          = 20,
> -    MAX_ASYNC_DEPTH        = 64,
> -    MAX_REFERENCE_LIST_NUM = 2,
>   };
>   
>   extern const AVCodecHWConfigInternal *const ff_vaapi_encode_hw_configs[];
>   
> -enum {
> -    PICTURE_TYPE_IDR = 0,
> -    PICTURE_TYPE_I   = 1,
> -    PICTURE_TYPE_P   = 2,
> -    PICTURE_TYPE_B   = 3,
> -};
> -
>   typedef struct VAAPIEncodeSlice {
>       int             index;
>       int             row_start;
> @@ -193,7 +186,8 @@ typedef struct VAAPIEncodeRCMode {
>   } VAAPIEncodeRCMode;
>   
>   typedef struct VAAPIEncodeContext {
> -    const AVClass *class;
> +    // Base context.
> +    HWBaseEncodeContext base;
>   
>       // Codec-specific hooks.
>       const struct VAAPIEncodeType *codec;
> @@ -397,25 +391,6 @@ typedef struct VAAPIEncodeContext {
>       AVPacket        *tail_pkt;
>   } VAAPIEncodeContext;
>   
> -enum {
> -    // Codec supports controlling the subdivision of pictures into slices.
> -    FLAG_SLICE_CONTROL         = 1 << 0,
> -    // Codec only supports constant quality (no rate control).
> -    FLAG_CONSTANT_QUALITY_ONLY = 1 << 1,
> -    // Codec is intra-only.
> -    FLAG_INTRA_ONLY            = 1 << 2,
> -    // Codec supports B-pictures.
> -    FLAG_B_PICTURES            = 1 << 3,
> -    // Codec supports referencing B-pictures.
> -    FLAG_B_PICTURE_REFERENCES  = 1 << 4,
> -    // Codec supports non-IDR key pictures (that is, key pictures do
> -    // not necessarily empty the DPB).
> -    FLAG_NON_IDR_KEY_PICTURES  = 1 << 5,
> -    // Codec output packet without timestamp delay, which means the
> -    // output packet has same PTS and DTS.
> -    FLAG_TIMESTAMP_NO_DELAY    = 1 << 6,
> -};
> -
>   typedef struct VAAPIEncodeType {
>       // List of supported profiles and corresponding VAAPI profiles.
>       // (Must end with AV_PROFILE_UNKNOWN.)

Would you mind changing ff_hw_ functions to take in HWBaseEncodePicture 
first, and AVCodecContext only if needed?
You have a lot of functions like hw_base_encode_add_ref that don't even 
use AVCodecContext, not even to get a context.

Also, HWBaseEncodePicture should be prefixed with FF, so 
FFHWBaseEncodePicture.
PICTURE_TYPE_* and FLAG_SLICE_* should also have an FF_HW_ prefix.

Instead of defining PICTURE_TYPE_I/P/B, would it be possible to use 
AV_PICTURE_TYPE_I/P/B with an additional `bool key;` or similar, which 
would remove hardcoding of MPEGese in the API.
That would allow differentiating intra-only frames from IDR (intra-only 
keyframes).

 > static inline const char *ff_hw_base_encode_get_pictype_name(const 
int type) {
Newline missing.

I'm working on integrating this into Vulkan right now, it seems suitable 
and saves me a lot of time, thanks for working on it.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: OpenPGP_0xA2FEA5F03F034464.asc
Type: application/pgp-keys
Size: 624 bytes
Desc: OpenPGP public key
URL: <https://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20240523/067bcc94/attachment.key>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: OpenPGP_signature.asc
Type: application/pgp-signature
Size: 236 bytes
Desc: OpenPGP digital signature
URL: <https://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20240523/067bcc94/attachment.sig>