[FFmpeg-devel] [PATCH v2 1/2] avcodec/cbs_h2645: fix parsing and storing Picture Header references in the context
James Almer
jamrial at gmail.com
Sun Jul 2 02:32:35 EEST 2023
On 7/1/2023 8:51 AM, Nuo Mi wrote:
> On Fri, Jun 30, 2023 at 6:45 AM James Almer<jamrial at gmail.com> wrote:
>
>> Signed-off-by: James Almer<jamrial at gmail.com>
>> ---
>> libavcodec/cbs_h2645.c | 35 ++++++++++++++++-----------
>> libavcodec/cbs_h266.h | 17 +++++++------
>> libavcodec/cbs_h266_syntax_template.c | 17 ++++++-------
>> libavcodec/h266_metadata_bsf.c | 13 +++++-----
>> libavcodec/vvc_parser.c | 10 ++++----
>> 5 files changed, 50 insertions(+), 42 deletions(-)
>>
>> diff --git a/libavcodec/cbs_h2645.c b/libavcodec/cbs_h2645.c
>> index cdd7901518..68ccf6a7eb 100644
>> --- a/libavcodec/cbs_h2645.c
>> +++ b/libavcodec/cbs_h2645.c
>> @@ -525,12 +525,6 @@ static int
>> cbs_h2645_split_fragment(CodedBitstreamContext *ctx,
>> if (frag->data_size == 0)
>> return 0;
>>
>> - if (codec_id == AV_CODEC_ID_VVC) {
>> - //we deactive picture header here to avoid reuse previous au's ph.
>> - CodedBitstreamH266Context *h266 = ctx->priv_data;
>> - h266->priv.ph = NULL;
>> - }
>> -
>> if (header && frag->data[0] && codec_id == AV_CODEC_ID_H264) {
>> // AVCC header.
>> size_t size, start, end;
>> @@ -793,19 +787,20 @@ cbs_h266_replace_ps(6, SPS, sps,
>> sps_seq_parameter_set_id)
>> cbs_h266_replace_ps(6, PPS, pps, pps_pic_parameter_set_id)
>>
>> static int cbs_h266_replace_ph(CodedBitstreamContext *ctx,
>> - CodedBitstreamUnit *unit)
>> + CodedBitstreamUnit *unit,
>> + H266RawPictureHeader *ph)
>> {
>> CodedBitstreamH266Context *h266 = ctx->priv_data;
>> int err;
>>
>> - h266->priv.ph = NULL;
>> err = ff_cbs_make_unit_refcounted(ctx, unit);
>> if (err < 0)
>> return err;
>> - err = av_buffer_replace(&h266->priv.ph_ref, unit->content_ref);
>> + av_assert0(unit->content_ref);
>> + err = av_buffer_replace(&h266->ph_ref, unit->content_ref);
>> if (err < 0)
>> return err;
>> - h266->priv.ph = (H266RawPH*)h266->priv.ph_ref->data;
>> + h266->ph = ph;
>> return 0;
>> }
>>
>> @@ -1111,7 +1106,7 @@ static int
>> cbs_h266_read_nal_unit(CodedBitstreamContext *ctx,
>> err = cbs_h266_read_ph(ctx, &gbc, ph);
>> if (err < 0)
>> return err;
>> - err = cbs_h266_replace_ph(ctx, unit);
>> + err = cbs_h266_replace_ph(ctx, unit, &ph->ph_picture_header);
>> if (err < 0)
>> return err;
>> }
>> @@ -1139,6 +1134,12 @@ static int
>> cbs_h266_read_nal_unit(CodedBitstreamContext *ctx,
>> pos = get_bits_count(&gbc);
>> len = unit->data_size;
>>
>> + if (slice->header.sh_picture_header_in_slice_header_flag) {
>> + err = cbs_h266_replace_ph(ctx, unit,
>> &slice->header.sh_picture_header);
>> + if (err < 0)
>> + return err;
>> + }
>> +
>> slice->data_size = len - pos / 8;
>> slice->data_ref = av_buffer_ref(unit->data_ref);
>> if (!slice->data_ref)
>> @@ -1640,7 +1641,7 @@ static int
>> cbs_h266_write_nal_unit(CodedBitstreamContext *ctx,
>> if (err < 0)
>> return err;
>>
>> - err = cbs_h266_replace_ph(ctx, unit);
>> + err = cbs_h266_replace_ph(ctx, unit, &ph->ph_picture_header);
>> if (err < 0)
>> return err;
>> }
>> @@ -1661,6 +1662,12 @@ static int
>> cbs_h266_write_nal_unit(CodedBitstreamContext *ctx,
>> if (err < 0)
>> return err;
>>
>> + if (slice->header.sh_picture_header_in_slice_header_flag) {
>> + err = cbs_h266_replace_ph(ctx, unit,
>> &slice->header.sh_picture_header);
>> + if (err < 0)
>> + return err;
>> + }
>> +
>> if (slice->data) {
>> err = cbs_h2645_write_slice_data(ctx, pbc, slice->data,
>> slice->data_size,
>> @@ -1884,8 +1891,8 @@ static void cbs_h266_flush(CodedBitstreamContext
>> *ctx)
>> av_buffer_unref(&h266->pps_ref[i]);
>> h266->pps[i] = NULL;
>> }
>> - av_buffer_unref(&h266->priv.ph_ref);
>> - h266->priv.ph = NULL;
>> + av_buffer_unref(&h266->ph_ref);
>> + h266->ph = NULL;
>> }
>>
>> static void cbs_h266_close(CodedBitstreamContext *ctx)
>> diff --git a/libavcodec/cbs_h266.h b/libavcodec/cbs_h266.h
>> index 03dfd4a954..54590748c3 100644
>> --- a/libavcodec/cbs_h266.h
>> +++ b/libavcodec/cbs_h266.h
>> @@ -581,8 +581,7 @@ typedef struct H266RawPredWeightTable {
>> int16_t delta_chroma_offset_l1[15][2];
>> } H266RawPredWeightTable;
>>
>> -typedef struct H266RawPH {
>> - H266RawNALUnitHeader nal_unit_header;
>> +typedef struct H266RawPictureHeader {
>> uint8_t ph_gdr_or_irap_pic_flag;
>> uint8_t ph_non_ref_pic_flag;
>> uint8_t ph_gdr_pic_flag;
>> @@ -670,12 +669,17 @@ typedef struct H266RawPH {
>>
>> uint8_t ph_extension_length;
>> uint8_t ph_extension_data_byte[256];
>> +} H266RawPictureHeader;
>> +
>> +typedef struct H266RawPH {
>> + H266RawNALUnitHeader nal_unit_header;
>> + H266RawPictureHeader ph_picture_header;
>> } H266RawPH;
>>
>> typedef struct H266RawSliceHeader {
>> H266RawNALUnitHeader nal_unit_header;
>> uint8_t sh_picture_header_in_slice_header_flag;
>> - H266RawPH sh_picture_header;
>> + H266RawPictureHeader sh_picture_header;
>>
>> uint16_t sh_subpic_id;
>> uint16_t sh_slice_address;
>> @@ -770,14 +774,11 @@ typedef struct CodedBitstreamH266Context {
>> AVBufferRef *vps_ref[VVC_MAX_VPS_COUNT];
>> AVBufferRef *sps_ref[VVC_MAX_SPS_COUNT];
>> AVBufferRef *pps_ref[VVC_MAX_PPS_COUNT];
>> + AVBufferRef *ph_ref;
>> H266RawVPS *vps[VVC_MAX_SPS_COUNT];
>> H266RawSPS *sps[VVC_MAX_SPS_COUNT];
>> H266RawPPS *pps[VVC_MAX_PPS_COUNT];
>> -
>> - struct {
>> - AVBufferRef *ph_ref;
>> - H266RawPH *ph;
>> - } priv;
>> + H266RawPictureHeader *ph;
>> } CodedBitstreamH266Context;
>>
>> #endif /* AVCODEC_CBS_H266_H */
>> diff --git a/libavcodec/cbs_h266_syntax_template.c
>> b/libavcodec/cbs_h266_syntax_template.c
>> index 06f9f29e08..6d826eba49 100644
>> --- a/libavcodec/cbs_h266_syntax_template.c
>> +++ b/libavcodec/cbs_h266_syntax_template.c
>> @@ -2231,8 +2231,8 @@ static int FUNC(pred_weight_table)
>> (CodedBitstreamContext *ctx, RWContext *rw,
>> return 0;
>> }
>>
>> -static int FUNC(picture_header) (CodedBitstreamContext *ctx, RWContext
>> *rw,
>> - H266RawPH *current){
>> +static int FUNC(picture_header_structure)(CodedBitstreamContext *ctx,
>> RWContext *rw,
>> + H266RawPictureHeader *current) {
>> CodedBitstreamH266Context *h266 = ctx->priv_data;
>> const H266RawVPS *vps;
>> const H266RawSPS *sps;
>> @@ -2651,7 +2651,7 @@ static int FUNC(ph) (CodedBitstreamContext *ctx,
>> RWContext *rw,
>> HEADER("Picture Header");
>>
>> CHECK(FUNC(nal_unit_header) (ctx, rw, ¤t->nal_unit_header,
>> VVC_PH_NUT));
>> - CHECK(FUNC(picture_header) (ctx, rw, current));
>> + CHECK(FUNC(picture_header_structure) (ctx, rw,
>> ¤t->ph_picture_header));
>> CHECK(FUNC(rbsp_trailing_bits) (ctx, rw));
>> return 0;
>> }
>> @@ -2662,7 +2662,7 @@ static int FUNC(slice_header) (CodedBitstreamContext
>> *ctx, RWContext *rw,
>> CodedBitstreamH266Context *h266 = ctx->priv_data;
>> const H266RawSPS *sps;
>> const H266RawPPS *pps;
>> - const H266RawPH *ph;
>> + const H266RawPictureHeader *ph;
>> const H266RefPicLists *ref_pic_lists;
>> int err, i;
>> uint8_t nal_unit_type, qp_bd_offset;
>> @@ -2675,12 +2675,11 @@ static int FUNC(slice_header)
>> (CodedBitstreamContext *ctx, RWContext *rw,
>>
>> flag(sh_picture_header_in_slice_header_flag);
>> if (current->sh_picture_header_in_slice_header_flag) {
>> - CHECK(FUNC(picture_header) (ctx, rw,
>> ¤t->sh_picture_header));
>> + //7.4.8 if sh_picture_header_in_slice_header_flag is true, we do
>> not have a PH NAL unit
>> + CHECK(FUNC(picture_header_structure) (ctx, rw,
>> ¤t->sh_picture_header));
>> ph = ¤t->sh_picture_header;
>> - //7.4.8 if sh_picture_header_in_slice_header_flag is true, we do
>> not have PH NAL unit
>> - h266->priv.ph = NULL;
>> } else {
>> - ph = h266->priv.ph;
>> + ph = h266->ph;
>>
> Based on the following items in the spec, all slices will have the same
> picture header. Maybe we can remove sh_picture_header and just keep
> h266->ph.
>
> 1. The PH syntax structure contains information that is common for all
> slices of the current picture.
> 2. It is a requirement of bitstream conformance that the value of
> sh_picture_header_in_slice_header_flag shall be the same in all coded
> slices in a CLVS.
> 3. When sh_picture_header_in_slice_header_flag is equal to 1 for a coded
> slice, it is a requirement of bitstream conformance that no NAL unit with
> nal_unit_type equal to PH_NUT shall be present in the CLVS.
CodedBitstreamH266Context holds the state of an hypothetical decoder
after the last unit fed to CBS was parsed. If you were to feed it two
PUs in a row, h266->ph will be a pointer to the H266RawPictureHeader
relevant to the second PU (Either picture_header from the last PH NALU,
or picture_header from the last Slice NALU).
We can't store values read from the bitstream there, since if they are
overwritten, then they will be unavailable to callers. It's only meant
to store pointers and references to values and structs stored in units
within the fragment, or derived values.
More information about the ffmpeg-devel
mailing list