[FFmpeg-devel] [PATCH] avcodec: add a bsf to reorder DTS into PTS

Anton Khirnov anton at khirnov.net
Fri Sep 23 16:06:11 EEST 2022


Quoting James Almer (2022-09-05 03:09:55)
> +static int h264_init(AVBSFContext *ctx)
> +{
> +    DTS2PTSContext *s = ctx->priv_data;
> +    DTS2PTSH264Context *h264 = &s->u.h264;
> +
> +    s->cbc->decompose_unit_types    = h264_decompose_unit_types;
> +    s->cbc->nb_decompose_unit_types = FF_ARRAY_ELEMS(h264_decompose_unit_types);
> +
> +    s->nb_frame = -(ctx->par_in->video_delay << 1);
> +    h264->last_poc = h264->highest_poc = INT_MIN;

just call h264_flush()?

> +    return 0;
> +}
> +
> +static int get_mmco_reset(const H264RawSliceHeader *header)
> +{
> +    if (header->nal_unit_header.nal_ref_idc == 0 ||
> +        !header->adaptive_ref_pic_marking_mode_flag)
> +        return 0;
> +
> +    for (int i = 0; i < H264_MAX_MMCO_COUNT; i++) {
> +        if (header->mmco[i].memory_management_control_operation == 0)
> +            return 0;
> +        else if (header->mmco[i].memory_management_control_operation == 5)
> +            return 1;
> +    }
> +
> +    return 0;
> +}
> +
> +static int h264_queue_frame(AVBSFContext *ctx, AVPacket *pkt, int poc, int *queued)
> +{
> +    DTS2PTSContext *s = ctx->priv_data;
> +    DTS2PTSH264Context *h264 = &s->u.h264;
> +    DTS2PTSFrame frame;
> +    int poc_diff, ret;
> +
> +    poc_diff = (h264->picture_structure == 3) + 1;
> +    if (h264->sps.frame_mbs_only_flag && h264->poc_diff)
> +        poc_diff = FFMIN(poc_diff, h264->poc_diff);
> +    if (poc < 0) {
> +        av_tree_enumerate(s->root, &poc_diff, NULL, dec_poc);
> +        s->nb_frame -= poc_diff;
> +    }
> +    // Check if there was a POC reset (Like an IDR slice)

I don't think this is enough. You should bump the sequence counter on
- IDR
- MMCO type 5
- SPS change
- H264_NAL_END_SEQUENCE/STREAM
Then every sequence should get a separate tree and you sort POCs in each
tree.

> +    if (s->nb_frame > h264->highest_poc) {
> +        s->nb_frame = 0;
> +        s->gop = (s->gop + 1) % s->fifo_size;
> +        h264->highest_poc = h264->last_poc;
> +    }
> +
> +    ret = alloc_and_insert_node(ctx, pkt->dts, pkt->duration, s->nb_frame, poc_diff, s->gop);
> +    if (ret < 0)
> +        return ret;
> +    av_log(ctx, AV_LOG_DEBUG, "Queueing frame with POC %d, GOP %d, dts %"PRId64"\n",
> +           poc, s->gop, pkt->dts);
> +    s->nb_frame += poc_diff;
> +
> +    // Add frame to output FIFO only once
> +    if (*queued)
> +        return 0;
> +
> +    frame = (DTS2PTSFrame) { pkt, poc, poc_diff, s->gop };
> +    ret = av_fifo_write(s->fifo, &frame, 1);
> +    av_assert2(ret >= 0);
> +    *queued = 1;
> +
> +    return 0;
> +}
> +
> +static int h264_filter(AVBSFContext *ctx)
> +{
> +    DTS2PTSContext *s = ctx->priv_data;
> +    DTS2PTSH264Context *h264 = &s->u.h264;
> +    CodedBitstreamFragment *au = &s->au;
> +    AVPacket *in;
> +    int output_picture_number = INT_MIN;
> +    int field_poc[2];
> +    int queued = 0, ret;
> +
> +    ret = ff_bsf_get_packet(ctx, &in);
> +    if (ret < 0)
> +        return ret;
> +
> +    ret = ff_cbs_read_packet(s->cbc, au, in);
> +    if (ret < 0) {
> +        av_log(ctx, AV_LOG_WARNING, "Failed to parse access unit.\n");
> +        goto fail;
> +    }
> +
> +    for (int i = 0; i < au->nb_units; i++) {
> +        CodedBitstreamUnit *unit = &au->units[i];
> +
> +        switch (unit->type) {
> +        case H264_NAL_IDR_SLICE:
> +            h264->poc.prev_frame_num        = 0;
> +            h264->poc.prev_frame_num_offset = 0;
> +            h264->poc.prev_poc_msb          =
> +            h264->poc.prev_poc_lsb          = 0;
> +        // fall-through
> +        case H264_NAL_SLICE: {
> +            const H264RawSlice *slice = unit->content;
> +            const H264RawSliceHeader *header = &slice->header;
> +            const CodedBitstreamH264Context *cbs_h264 = s->cbc->priv_data;
> +            const H264RawSPS *sps = cbs_h264->active_sps;
> +            int got_reset;
> +
> +            if (!sps) {
> +                av_log(ctx, AV_LOG_ERROR, "No active SPS for a slice\n");
> +                goto fail;
> +            }
> +            // Initialize the SPS struct with the fields ff_h264_init_poc() cares about
> +            h264->sps.frame_mbs_only_flag            = sps->frame_mbs_only_flag;
> +            h264->sps.log2_max_frame_num             = sps->log2_max_frame_num_minus4 + 4;
> +            h264->sps.poc_type                       = sps->pic_order_cnt_type;
> +            h264->sps.log2_max_poc_lsb               = sps->log2_max_pic_order_cnt_lsb_minus4 + 4;
> +            h264->sps.offset_for_non_ref_pic         = sps->offset_for_non_ref_pic;
> +            h264->sps.offset_for_top_to_bottom_field = sps->offset_for_top_to_bottom_field;
> +            h264->sps.poc_cycle_length               = sps->num_ref_frames_in_pic_order_cnt_cycle;
> +            for (int i = 0; i < h264->sps.poc_cycle_length; i++)

moderately evil shadowing

> +                h264->sps.offset_for_ref_frame[i] = sps->offset_for_ref_frame[i];
> +
> +            h264->picture_structure = sps->frame_mbs_only_flag ? 3 :
> +                                      (header->field_pic_flag ?
> +                                       header->field_pic_flag + header->bottom_field_flag : 3);
> +
> +            h264->poc.frame_num = header->frame_num;
> +            h264->poc.poc_lsb = header->pic_order_cnt_lsb;
> +            h264->poc.delta_poc_bottom = header->delta_pic_order_cnt_bottom;
> +            h264->poc.delta_poc[0] = header->delta_pic_order_cnt[0];
> +            h264->poc.delta_poc[1] = header->delta_pic_order_cnt[1];
> +
> +            field_poc[0] = field_poc[1] = INT_MAX;
> +            ret = ff_h264_init_poc(field_poc, &output_picture_number, &h264->sps,
> +                                   &h264->poc, h264->picture_structure,
> +                                   header->nal_unit_header.nal_ref_idc);
> +            if (ret < 0) {
> +                av_log(ctx, AV_LOG_ERROR, "ff_h264_init_poc() failure\n");
> +                goto fail;
> +            }
> +
> +            got_reset = get_mmco_reset(header);
> +            h264->poc.prev_frame_num        = got_reset ? 0 : h264->poc.frame_num;
> +            h264->poc.prev_frame_num_offset = got_reset ? 0 : h264->poc.frame_num_offset;
> +            if (header->nal_unit_header.nal_ref_idc != 0) {
> +                h264->poc.prev_poc_msb      = got_reset ? 0 : h264->poc.poc_msb;
> +                if (got_reset)
> +                    h264->poc.prev_poc_lsb = h264->picture_structure == 2 ? 0 : field_poc[0];
> +                else
> +                    h264->poc.prev_poc_lsb = h264->poc.poc_lsb;
> +            }
> +
> +            if (output_picture_number != h264->last_poc) {

Why this condition?

Also the entire block below looks extremely magical
and could use some detailed explanation.

> +                if (h264->last_poc != INT_MIN) {
> +                    int diff = FFABS(h264->last_poc - output_picture_number);
> +
> +                    if ((output_picture_number < 0) && !h264->last_poc)
> +                        h264->poc_diff = 0;
> +                    else if (FFABS(output_picture_number) < h264->poc_diff) {
> +                        diff = FFABS(output_picture_number);
> +                        h264->poc_diff = 0;
> +                    }
> +                    if (!h264->poc_diff || (h264->poc_diff > diff)) {
> +                        h264->poc_diff = diff;
> +                        if (h264->poc_diff == 1 && h264->sps.frame_mbs_only_flag) {
> +                            av_tree_enumerate(s->root, &h264->poc_diff, NULL, dec_poc);
> +                            s->nb_frame -= 2;
> +                        }
> +                    }
> +                }
> +                h264->last_poc = output_picture_number;
> +                h264->highest_poc = FFMAX(h264->highest_poc, output_picture_number);
> +
> +                ret = h264_queue_frame(ctx, in, output_picture_number, &queued);
> +                if (ret < 0)
> +                    goto fail;
> +            }
> +            break;
> +        }
> +        default:
> +            break;
> +        }
> +    }
> +
> +    if (output_picture_number == INT_MIN) {
> +        av_log(ctx, AV_LOG_ERROR, "No slices in access unit\n");
> +        ret = AVERROR_INVALIDDATA;
> +        goto fail;
> +    }
> +
> +    ret = AVERROR(EAGAIN);
> +fail:
> +    ff_cbs_fragment_reset(au);
> +    if (!queued)
> +        av_packet_free(&in);
> +
> +    return ret;
> +}
> [...]
> +static int dts2pts_filter(AVBSFContext *ctx, AVPacket *out)
> +{
> +    DTS2PTSContext *s = ctx->priv_data;
> +    DTS2PTSNode *poc_node = NULL, *next[2] = { NULL, NULL };
> +    DTS2PTSFrame frame;
> +    int ret;
> +
> +    // Fill up the FIFO and POC tree
> +    if (!s->eof && av_fifo_can_write(s->fifo)) {

More than one packet can be available, so this should probably be a
loop.

-- 
Anton Khirnov


More information about the ffmpeg-devel mailing list