[FFmpeg-devel] [PATCH v2 1/2] avcodec/cbs_h2645: fix parsing and storing Picture Header references in the context
James Almer
jamrial at gmail.com
Sun Jul 2 16:01:54 EEST 2023
On 7/1/2023 10:06 PM, Nuo Mi wrote:
> On Sun, Jul 2, 2023 at 7:32 AM James Almer <jamrial at gmail.com> wrote:
>
>> On 7/1/2023 8:51 AM, Nuo Mi wrote:
>>> On Fri, Jun 30, 2023 at 6:45 AM James Almer<jamrial at gmail.com> wrote:
>>>
>>>> Signed-off-by: James Almer<jamrial at gmail.com>
>>>> ---
>>>> libavcodec/cbs_h2645.c | 35 ++++++++++++++++-----------
>>>> libavcodec/cbs_h266.h | 17 +++++++------
>>>> libavcodec/cbs_h266_syntax_template.c | 17 ++++++-------
>>>> libavcodec/h266_metadata_bsf.c | 13 +++++-----
>>>> libavcodec/vvc_parser.c | 10 ++++----
>>>> 5 files changed, 50 insertions(+), 42 deletions(-)
>>>>
>>>> diff --git a/libavcodec/cbs_h2645.c b/libavcodec/cbs_h2645.c
>>>> index cdd7901518..68ccf6a7eb 100644
>>>> --- a/libavcodec/cbs_h2645.c
>>>> +++ b/libavcodec/cbs_h2645.c
>>>> @@ -525,12 +525,6 @@ static int
>>>> cbs_h2645_split_fragment(CodedBitstreamContext *ctx,
>>>> if (frag->data_size == 0)
>>>> return 0;
>>>>
>>>> - if (codec_id == AV_CODEC_ID_VVC) {
>>>> - //we deactive picture header here to avoid reuse previous au's
>> ph.
>>>> - CodedBitstreamH266Context *h266 = ctx->priv_data;
>>>> - h266->priv.ph = NULL;
>>>> - }
>>>> -
>>>> if (header && frag->data[0] && codec_id == AV_CODEC_ID_H264) {
>>>> // AVCC header.
>>>> size_t size, start, end;
>>>> @@ -793,19 +787,20 @@ cbs_h266_replace_ps(6, SPS, sps,
>>>> sps_seq_parameter_set_id)
>>>> cbs_h266_replace_ps(6, PPS, pps, pps_pic_parameter_set_id)
>>>>
>>>> static int cbs_h266_replace_ph(CodedBitstreamContext *ctx,
>>>> - CodedBitstreamUnit *unit)
>>>> + CodedBitstreamUnit *unit,
>>>> + H266RawPictureHeader *ph)
>>>> {
>>>> CodedBitstreamH266Context *h266 = ctx->priv_data;
>>>> int err;
>>>>
>>>> - h266->priv.ph = NULL;
>>>> err = ff_cbs_make_unit_refcounted(ctx, unit);
>>>> if (err < 0)
>>>> return err;
>>>> - err = av_buffer_replace(&h266->priv.ph_ref, unit->content_ref);
>>>> + av_assert0(unit->content_ref);
>>>> + err = av_buffer_replace(&h266->ph_ref, unit->content_ref);
>>>> if (err < 0)
>>>> return err;
>>>> - h266->priv.ph = (H266RawPH*)h266->priv.ph_ref->data;
>>>> + h266->ph = ph;
>>>> return 0;
>>>> }
>>>>
>>>> @@ -1111,7 +1106,7 @@ static int
>>>> cbs_h266_read_nal_unit(CodedBitstreamContext *ctx,
>>>> err = cbs_h266_read_ph(ctx, &gbc, ph);
>>>> if (err < 0)
>>>> return err;
>>>> - err = cbs_h266_replace_ph(ctx, unit);
>>>> + err = cbs_h266_replace_ph(ctx, unit,
>> &ph->ph_picture_header);
>>>> if (err < 0)
>>>> return err;
>>>> }
>>>> @@ -1139,6 +1134,12 @@ static int
>>>> cbs_h266_read_nal_unit(CodedBitstreamContext *ctx,
>>>> pos = get_bits_count(&gbc);
>>>> len = unit->data_size;
>>>>
>>>> + if (slice->header.sh_picture_header_in_slice_header_flag) {
>>>> + err = cbs_h266_replace_ph(ctx, unit,
>>>> &slice->header.sh_picture_header);
>>>> + if (err < 0)
>>>> + return err;
>>>> + }
>>>> +
>>>> slice->data_size = len - pos / 8;
>>>> slice->data_ref = av_buffer_ref(unit->data_ref);
>>>> if (!slice->data_ref)
>>>> @@ -1640,7 +1641,7 @@ static int
>>>> cbs_h266_write_nal_unit(CodedBitstreamContext *ctx,
>>>> if (err < 0)
>>>> return err;
>>>>
>>>> - err = cbs_h266_replace_ph(ctx, unit);
>>>> + err = cbs_h266_replace_ph(ctx, unit,
>> &ph->ph_picture_header);
>>>> if (err < 0)
>>>> return err;
>>>> }
>>>> @@ -1661,6 +1662,12 @@ static int
>>>> cbs_h266_write_nal_unit(CodedBitstreamContext *ctx,
>>>> if (err < 0)
>>>> return err;
>>>>
>>>> + if (slice->header.sh_picture_header_in_slice_header_flag) {
>>>> + err = cbs_h266_replace_ph(ctx, unit,
>>>> &slice->header.sh_picture_header);
>>>> + if (err < 0)
>>>> + return err;
>>>> + }
>>>> +
>>>> if (slice->data) {
>>>> err = cbs_h2645_write_slice_data(ctx, pbc,
>> slice->data,
>>>> slice->data_size,
>>>> @@ -1884,8 +1891,8 @@ static void cbs_h266_flush(CodedBitstreamContext
>>>> *ctx)
>>>> av_buffer_unref(&h266->pps_ref[i]);
>>>> h266->pps[i] = NULL;
>>>> }
>>>> - av_buffer_unref(&h266->priv.ph_ref);
>>>> - h266->priv.ph = NULL;
>>>> + av_buffer_unref(&h266->ph_ref);
>>>> + h266->ph = NULL;
>>>> }
>>>>
>>>> static void cbs_h266_close(CodedBitstreamContext *ctx)
>>>> diff --git a/libavcodec/cbs_h266.h b/libavcodec/cbs_h266.h
>>>> index 03dfd4a954..54590748c3 100644
>>>> --- a/libavcodec/cbs_h266.h
>>>> +++ b/libavcodec/cbs_h266.h
>>>> @@ -581,8 +581,7 @@ typedef struct H266RawPredWeightTable {
>>>> int16_t delta_chroma_offset_l1[15][2];
>>>> } H266RawPredWeightTable;
>>>>
>>>> -typedef struct H266RawPH {
>>>> - H266RawNALUnitHeader nal_unit_header;
>>>> +typedef struct H266RawPictureHeader {
>>>> uint8_t ph_gdr_or_irap_pic_flag;
>>>> uint8_t ph_non_ref_pic_flag;
>>>> uint8_t ph_gdr_pic_flag;
>>>> @@ -670,12 +669,17 @@ typedef struct H266RawPH {
>>>>
>>>> uint8_t ph_extension_length;
>>>> uint8_t ph_extension_data_byte[256];
>>>> +} H266RawPictureHeader;
>>>> +
>>>> +typedef struct H266RawPH {
>>>> + H266RawNALUnitHeader nal_unit_header;
>>>> + H266RawPictureHeader ph_picture_header;
>>>> } H266RawPH;
>>>>
>>>> typedef struct H266RawSliceHeader {
>>>> H266RawNALUnitHeader nal_unit_header;
>>>> uint8_t sh_picture_header_in_slice_header_flag;
>>>> - H266RawPH sh_picture_header;
>>>> + H266RawPictureHeader sh_picture_header;
>>>>
>>>> uint16_t sh_subpic_id;
>>>> uint16_t sh_slice_address;
>>>> @@ -770,14 +774,11 @@ typedef struct CodedBitstreamH266Context {
>>>> AVBufferRef *vps_ref[VVC_MAX_VPS_COUNT];
>>>> AVBufferRef *sps_ref[VVC_MAX_SPS_COUNT];
>>>> AVBufferRef *pps_ref[VVC_MAX_PPS_COUNT];
>>>> + AVBufferRef *ph_ref;
>>>> H266RawVPS *vps[VVC_MAX_SPS_COUNT];
>>>> H266RawSPS *sps[VVC_MAX_SPS_COUNT];
>>>> H266RawPPS *pps[VVC_MAX_PPS_COUNT];
>>>> -
>>>> - struct {
>>>> - AVBufferRef *ph_ref;
>>>> - H266RawPH *ph;
>>>> - } priv;
>>>> + H266RawPictureHeader *ph;
>>>> } CodedBitstreamH266Context;
>>>>
>>>> #endif /* AVCODEC_CBS_H266_H */
>>>> diff --git a/libavcodec/cbs_h266_syntax_template.c
>>>> b/libavcodec/cbs_h266_syntax_template.c
>>>> index 06f9f29e08..6d826eba49 100644
>>>> --- a/libavcodec/cbs_h266_syntax_template.c
>>>> +++ b/libavcodec/cbs_h266_syntax_template.c
>>>> @@ -2231,8 +2231,8 @@ static int FUNC(pred_weight_table)
>>>> (CodedBitstreamContext *ctx, RWContext *rw,
>>>> return 0;
>>>> }
>>>>
>>>> -static int FUNC(picture_header) (CodedBitstreamContext *ctx, RWContext
>>>> *rw,
>>>> - H266RawPH *current){
>>>> +static int FUNC(picture_header_structure)(CodedBitstreamContext *ctx,
>>>> RWContext *rw,
>>>> + H266RawPictureHeader
>> *current) {
>>>> CodedBitstreamH266Context *h266 = ctx->priv_data;
>>>> const H266RawVPS *vps;
>>>> const H266RawSPS *sps;
>>>> @@ -2651,7 +2651,7 @@ static int FUNC(ph) (CodedBitstreamContext *ctx,
>>>> RWContext *rw,
>>>> HEADER("Picture Header");
>>>>
>>>> CHECK(FUNC(nal_unit_header) (ctx, rw, ¤t->nal_unit_header,
>>>> VVC_PH_NUT));
>>>> - CHECK(FUNC(picture_header) (ctx, rw, current));
>>>> + CHECK(FUNC(picture_header_structure) (ctx, rw,
>>>> ¤t->ph_picture_header));
>>>> CHECK(FUNC(rbsp_trailing_bits) (ctx, rw));
>>>> return 0;
>>>> }
>>>> @@ -2662,7 +2662,7 @@ static int FUNC(slice_header)
>> (CodedBitstreamContext
>>>> *ctx, RWContext *rw,
>>>> CodedBitstreamH266Context *h266 = ctx->priv_data;
>>>> const H266RawSPS *sps;
>>>> const H266RawPPS *pps;
>>>> - const H266RawPH *ph;
>>>> + const H266RawPictureHeader *ph;
>>>> const H266RefPicLists *ref_pic_lists;
>>>> int err, i;
>>>> uint8_t nal_unit_type, qp_bd_offset;
>>>> @@ -2675,12 +2675,11 @@ static int FUNC(slice_header)
>>>> (CodedBitstreamContext *ctx, RWContext *rw,
>>>>
>>>> flag(sh_picture_header_in_slice_header_flag);
>>>> if (current->sh_picture_header_in_slice_header_flag) {
>>>> - CHECK(FUNC(picture_header) (ctx, rw,
>>>> ¤t->sh_picture_header));
>>>> + //7.4.8 if sh_picture_header_in_slice_header_flag is true, we
>> do
>>>> not have a PH NAL unit
>>>> + CHECK(FUNC(picture_header_structure) (ctx, rw,
>>>> ¤t->sh_picture_header));
>>>> ph = ¤t->sh_picture_header;
>>>> - //7.4.8 if sh_picture_header_in_slice_header_flag is true, we
>> do
>>>> not have PH NAL unit
>>>> - h266->priv.ph = NULL;
>>>> } else {
>>>> - ph = h266->priv.ph;
>>>> + ph = h266->ph;
>>>>
>>> Based on the following items in the spec, all slices will have the same
>>> picture header. Maybe we can remove sh_picture_header and just keep
>>> h266->ph.
>>>
>>> 1. The PH syntax structure contains information that is common for all
>>> slices of the current picture.
>>> 2. It is a requirement of bitstream conformance that the value of
>>> sh_picture_header_in_slice_header_flag shall be the same in all coded
>>> slices in a CLVS.
>>> 3. When sh_picture_header_in_slice_header_flag is equal to 1 for a coded
>>> slice, it is a requirement of bitstream conformance that no NAL unit with
>>> nal_unit_type equal to PH_NUT shall be present in the CLVS.
>>
>> CodedBitstreamH266Context holds the state of an hypothetical decoder
>> after the last unit fed to CBS was parsed. If you were to feed it two
>> PUs in a row, h266->ph will be a pointer to the H266RawPictureHeader
>> relevant to the second PU (Either picture_header from the last PH NALU,
>> or picture_header from the last Slice NALU).
>>
>> We can't store values read from the bitstream there, since if they are
>> overwritten, then they will be unavailable to callers. It's only meant
>> to store pointers and references to values and structs stored in units
>> within the fragment, or derived values.
>>
> All slices in one picture will have the same picture header content.
> if sh_picture_header_in_slice_header_flag == 0, we only have one PH NAL.
> so we will not overwrite.
> if sh_picture_header_in_slice_header_flag == 1, we only need to parse the
> PH in the first slice, then use memcpy to do a sanity check for later
> slices.
We need to parse the entire slice header in all cases, picture header
included if present, to advance the getbitcontext and reach the
following fields.
>
> BTW, do we need constify the raw vps, sps, pps and ph
> in CodedBitstreamH266Context
Yeah, probably a good idea.
More information about the ffmpeg-devel
mailing list