[FFmpeg-devel] [PATCH v3 37/41] vaapi_encode: Convert to send/receive API

Thu Aug 23 02:45:10 EEST 2018

This attaches the logic of picking the mode of for the next picture to
the output, which simplifies some choices by removing the concept of
the picture for which input is not yet available.  At the same time,
we allow more complex reference structures and track more reference
metadata (particularly the contents of the DPB) for use in the
codec-specific code.

It also adds flags to explicitly track the available features of the
different codecs.  The new structure also allows open-GOP support, so
that is now available for codecs which can do it.
---
 libavcodec/vaapi_encode.c       | 635 +++++++++++++++++---------------
 libavcodec/vaapi_encode.h       |  80 +++-
 libavcodec/vaapi_encode_h264.c  |   6 +-
 libavcodec/vaapi_encode_h265.c  |   6 +-
 libavcodec/vaapi_encode_mjpeg.c |   8 +-
 libavcodec/vaapi_encode_mpeg2.c |   5 +-
 libavcodec/vaapi_encode_vp8.c   |   3 +-
 libavcodec/vaapi_encode_vp9.c   |   5 +-
 8 files changed, 425 insertions(+), 323 deletions(-)

diff --git a/libavcodec/vaapi_encode.c b/libavcodec/vaapi_encode.c
index a6981d69fa..d03bd1925e 100644
--- a/libavcodec/vaapi_encode.c
+++ b/libavcodec/vaapi_encode.c
@@ -158,16 +158,10 @@ static int vaapi_encode_issue(AVCodecContext *avctx,
         av_log(avctx, AV_LOG_DEBUG, ".\n");
     }
 
-    av_assert0(pic->input_available && !pic->encode_issued);
+    av_assert0(!pic->encode_issued);
     for (i = 0; i < pic->nb_refs; i++) {
         av_assert0(pic->refs[i]);
-        // If we are serialised then the references must have already
-        // completed.  If not, they must have been issued but need not
-        // have completed yet.
-        if (ctx->issue_mode == ISSUE_MODE_SERIALISE_EVERYTHING)
-            av_assert0(pic->refs[i]->encode_complete);
-        else
-            av_assert0(pic->refs[i]->encode_issued);
+        av_assert0(pic->refs[i]->encode_issued);
     }
 
     av_log(avctx, AV_LOG_DEBUG, "Input surface is %#x.\n", pic->input_surface);
@@ -422,10 +416,7 @@ static int vaapi_encode_issue(AVCodecContext *avctx,
 
     pic->encode_issued = 1;
 
-    if (ctx->issue_mode == ISSUE_MODE_SERIALISE_EVERYTHING)
-        return vaapi_encode_wait(avctx, pic);
-    else
-        return 0;
+    return 0;
 
 fail_with_picture:
     vaEndPicture(ctx->hwctx->display, ctx->va_context);
@@ -582,315 +573,330 @@ static int vaapi_encode_free(AVCodecContext *avctx,
     return 0;
 }
 
-static int vaapi_encode_step(AVCodecContext *avctx,
-                             VAAPIEncodePicture *target)
+static void vaapi_encode_add_ref(AVCodecContext *avctx,
+                                 VAAPIEncodePicture *pic,
+                                 VAAPIEncodePicture *target,
+                                 int is_ref, int in_dpb, int prev)
 {
-    VAAPIEncodeContext *ctx = avctx->priv_data;
-    VAAPIEncodePicture *pic;
-    int i, err;
+    int refs = 0;
 
-    if (ctx->issue_mode == ISSUE_MODE_SERIALISE_EVERYTHING ||
-        ctx->issue_mode == ISSUE_MODE_MINIMISE_LATENCY) {
-        // These two modes are equivalent, except that we wait for
-        // immediate completion on each operation if serialised.
-
-        if (!target) {
-            // No target, nothing to do yet.
-            return 0;
-        }
-
-        if (target->encode_complete) {
-            // Already done.
-            return 0;
-        }
-
-        pic = target;
-        for (i = 0; i < pic->nb_refs; i++) {
-            if (!pic->refs[i]->encode_complete) {
-                err = vaapi_encode_step(avctx, pic->refs[i]);
-                if (err < 0)
-                    return err;
-            }
-        }
-
-        err = vaapi_encode_issue(avctx, pic);
-        if (err < 0)
-            return err;
-
-    } else if (ctx->issue_mode == ISSUE_MODE_MAXIMISE_THROUGHPUT) {
-        int activity;
-
-        // Run through the list of all available pictures repeatedly
-        // and issue the first one found which has all dependencies
-        // available (including previously-issued but not necessarily
-        // completed pictures).
-        do {
-            activity = 0;
-            for (pic = ctx->pic_start; pic; pic = pic->next) {
-                if (!pic->input_available || pic->encode_issued)
-                    continue;
-                for (i = 0; i < pic->nb_refs; i++) {
-                    if (!pic->refs[i]->encode_issued)
-                        break;
-                }
-                if (i < pic->nb_refs)
-                    continue;
-                err = vaapi_encode_issue(avctx, pic);
-                if (err < 0)
-                    return err;
-                activity = 1;
-                // Start again from the beginning of the list,
-                // because issuing this picture may have satisfied
-                // forward dependencies of earlier ones.
-                break;
-            }
-        } while(activity);
+    if (is_ref) {
+        av_assert0(pic != target);
+        av_assert0(pic->nb_refs < MAX_PICTURE_REFERENCES);
+        pic->refs[pic->nb_refs++] = target;
+        ++refs;
+    }
 
-        // If we had a defined target for this step then it will
-        // always have been issued by now.
-        if (target) {
-            av_assert0(target->encode_issued && "broken dependencies?");
-        }
+    if (in_dpb) {
+        av_assert0(pic->nb_dpb_pics < MAX_DPB_SIZE);
+        pic->dpb[pic->nb_dpb_pics++] = target;
+        ++refs;
+    }
 
-    } else {
-        av_assert0(0);
+    if (prev) {
+        av_assert0(!pic->prev);
+        pic->prev = target;
+        ++refs;
     }
 
-    return 0;
+    target->ref_count[0] += refs;
+    target->ref_count[1] += refs;
 }
 
-static int vaapi_encode_get_next(AVCodecContext *avctx,
-                                 VAAPIEncodePicture **pic_out)
+static void vaapi_encode_remove_refs(AVCodecContext *avctx,
+                                     VAAPIEncodePicture *pic,
+                                     int level)
 {
-    VAAPIEncodeContext *ctx = avctx->priv_data;
-    VAAPIEncodePicture *start, *end, *pic;
     int i;
 
-    for (pic = ctx->pic_start; pic; pic = pic->next) {
-        if (pic->next)
-            av_assert0(pic->display_order + 1 == pic->next->display_order);
-        if (pic->display_order == ctx->input_order) {
-            *pic_out = pic;
-            return 0;
-        }
+    if (pic->ref_removed[level])
+        return;
+
+    for (i = 0; i < pic->nb_refs; i++) {
+        av_assert0(pic->refs[i]);
+        --pic->refs[i]->ref_count[level];
+        av_assert0(pic->refs[i]->ref_count[level] >= 0);
     }
 
-    pic = vaapi_encode_alloc(avctx);
-    if (!pic)
-        return AVERROR(ENOMEM);
+    for (i = 0; i < pic->nb_dpb_pics; i++) {
+        av_assert0(pic->dpb[i]);
+        --pic->dpb[i]->ref_count[level];
+        av_assert0(pic->dpb[i]->ref_count[level] >= 0);
+    }
 
-    if (ctx->input_order == 0 || ctx->force_idr ||
-        ctx->gop_counter >= ctx->gop_size) {
-        pic->type = PICTURE_TYPE_IDR;
-        ctx->force_idr = 0;
-        ctx->gop_counter = 1;
-        ctx->p_counter = 0;
-    } else if (ctx->p_counter >= ctx->p_per_i) {
-        pic->type = PICTURE_TYPE_I;
-        ++ctx->gop_counter;
-        ctx->p_counter = 0;
-    } else {
-        pic->type = PICTURE_TYPE_P;
-        pic->refs[0] = ctx->pic_end;
-        pic->nb_refs = 1;
-        ++ctx->gop_counter;
-        ++ctx->p_counter;
+    av_assert0(pic->prev || pic->type == PICTURE_TYPE_IDR);
+    if (pic->prev) {
+        --pic->prev->ref_count[level];
+        av_assert0(pic->prev->ref_count[level] >= 0);
     }
-    start = end = pic;
 
-    if (pic->type != PICTURE_TYPE_IDR) {
-        // If that was not an IDR frame, add B-frames display-before and
-        // encode-after it, but not exceeding the GOP size.
+    pic->ref_removed[level] = 1;
+}
 
-        for (i = 0; i < ctx->b_per_p &&
-             ctx->gop_counter < ctx->gop_size; i++) {
-            pic = vaapi_encode_alloc(avctx);
-            if (!pic)
-                goto fail;
+static void vaapi_encode_set_b_pictures(AVCodecContext *avctx,
+                                        VAAPIEncodePicture *start,
+                                        VAAPIEncodePicture *end,
+                                        VAAPIEncodePicture *prev,
+                                        int current_depth,
+                                        VAAPIEncodePicture **last)
+{
+    VAAPIEncodeContext *ctx = avctx->priv_data;
+    VAAPIEncodePicture *pic, *next, *ref;
+    int i, len;
 
-            pic->type = PICTURE_TYPE_B;
-            pic->refs[0] = ctx->pic_end;
-            pic->refs[1] = end;
-            pic->nb_refs = 2;
+    av_assert0(start && end && start != end && start->next != end);
 
-            pic->next = start;
-            pic->display_order = ctx->input_order + ctx->b_per_p - i - 1;
-            pic->encode_order  = pic->display_order + 1;
-            start = pic;
+    // If we are at the maximum depth then encode all pictures as
+    // non-referenced B-pictures.  Also do this if there is exactly one
+    // picture left, since there will be nothing to reference it.
+    if (current_depth == ctx->max_b_depth || start->next->next == end) {
+        for (pic = start->next; pic; pic = pic->next) {
+            if (pic == end)
+                break;
+            pic->type    = PICTURE_TYPE_B;
+            pic->b_depth = current_depth;
+
+            vaapi_encode_add_ref(avctx, pic, start, 1, 1, 0);
+            vaapi_encode_add_ref(avctx, pic, end,   1, 1, 0);
+            vaapi_encode_add_ref(avctx, pic, prev,  0, 0, 1);
 
-            ++ctx->gop_counter;
+            for (ref = end->refs[1]; ref; ref = ref->refs[1])
+                vaapi_encode_add_ref(avctx, pic, ref, 0, 1, 0);
         }
-    }
+        *last = prev;
 
-    if (ctx->input_order == 0) {
-        pic->display_order = 0;
-        pic->encode_order  = 0;
+    } else {
+        // Split the current list at the midpoint with a referenced
+        // B-picture, then descend into each side separately.
+        len = 0;
+        for (pic = start->next; pic != end; pic = pic->next)
+            ++len;
+        for (pic = start->next, i = 1; 2 * i < len; pic = pic->next, i++);
 
-        ctx->pic_start = ctx->pic_end = pic;
+        pic->type    = PICTURE_TYPE_B;
+        pic->b_depth = current_depth;
 
-    } else {
-        for (i = 0, pic = start; pic; i++, pic = pic->next) {
-            pic->display_order = ctx->input_order + i;
-            if (end->type == PICTURE_TYPE_IDR)
-                pic->encode_order = ctx->input_order + i;
-            else if (pic == end)
-                pic->encode_order = ctx->input_order;
-            else
-                pic->encode_order = ctx->input_order + i + 1;
-        }
+        pic->is_reference = 1;
 
-        av_assert0(ctx->pic_end);
-        ctx->pic_end->next = start;
-        ctx->pic_end = end;
-    }
-    *pic_out = start;
+        vaapi_encode_add_ref(avctx, pic, pic,   0, 1, 0);
+        vaapi_encode_add_ref(avctx, pic, start, 1, 1, 0);
+        vaapi_encode_add_ref(avctx, pic, end,   1, 1, 0);
+        vaapi_encode_add_ref(avctx, pic, prev,  0, 0, 1);
 
-    av_log(avctx, AV_LOG_DEBUG, "Pictures:");
-    for (pic = ctx->pic_start; pic; pic = pic->next) {
-        av_log(avctx, AV_LOG_DEBUG, " %s (%"PRId64"/%"PRId64")",
-               picture_type_name[pic->type],
-               pic->display_order, pic->encode_order);
-    }
-    av_log(avctx, AV_LOG_DEBUG, "\n");
+        for (ref = end->refs[1]; ref; ref = ref->refs[1])
+            vaapi_encode_add_ref(avctx, pic, ref, 0, 1, 0);
 
-    return 0;
+        if (i > 1)
+            vaapi_encode_set_b_pictures(avctx, start, pic, pic,
+                                        current_depth + 1, &next);
+        else
+            next = pic;
 
-fail:
-    while (start) {
-        pic = start->next;
-        vaapi_encode_free(avctx, start);
-        start = pic;
+        vaapi_encode_set_b_pictures(avctx, pic, end, next,
+                                    current_depth + 1, last);
     }
-    return AVERROR(ENOMEM);
 }
 
-static int vaapi_encode_truncate_gop(AVCodecContext *avctx)
+static int vaapi_encode_pick_next(AVCodecContext *avctx,
+                                  VAAPIEncodePicture **pic_out)
 {
     VAAPIEncodeContext *ctx = avctx->priv_data;
-    VAAPIEncodePicture *pic, *last_pic, *next;
+    VAAPIEncodePicture *pic = NULL, *next, *start;
+    int i, b_counter, closed_gop_end;
 
-    av_assert0(!ctx->pic_start || ctx->pic_start->input_available);
-
-    // Find the last picture we actually have input for.
+    // If there are any B-frames already queued, the next one to encode
+    // is the earliest not-yet-issued frame for which all references are
+    // available.
     for (pic = ctx->pic_start; pic; pic = pic->next) {
-        if (!pic->input_available)
+        if (pic->encode_issued)
+            continue;
+        if (pic->type != PICTURE_TYPE_B)
+            continue;
+        for (i = 0; i < pic->nb_refs; i++) {
+            if (!pic->refs[i]->encode_issued)
+                break;
+        }
+        if (i == pic->nb_refs)
             break;
-        last_pic = pic;
     }
 
     if (pic) {
-        if (last_pic->type == PICTURE_TYPE_B) {
-            // Some fixing up is required.  Change the type of this
-            // picture to P, then modify preceding B references which
-            // point beyond it to point at it instead.
-
-            last_pic->type = PICTURE_TYPE_P;
-            last_pic->encode_order = last_pic->refs[1]->encode_order;
-
-            for (pic = ctx->pic_start; pic != last_pic; pic = pic->next) {
-                if (pic->type == PICTURE_TYPE_B &&
-                    pic->refs[1] == last_pic->refs[1])
-                    pic->refs[1] = last_pic;
-            }
+        av_log(avctx, AV_LOG_DEBUG, "Pick B-picture at depth %d to "
+               "encode next.\n", pic->b_depth);
+        *pic_out = pic;
+        return 0;
+    }
 
-            last_pic->nb_refs = 1;
-            last_pic->refs[1] = NULL;
-        } else {
-            // We can use the current structure (no references point
-            // beyond the end), but there are unused pics to discard.
+    // Find the B-per-Pth available picture to become the next picture
+    // on the top layer.
+    start = NULL;
+    b_counter = 0;
+    closed_gop_end = ctx->closed_gop ||
+                     ctx->idr_counter == ctx->gop_per_idr;
+    for (pic = ctx->pic_start; pic; pic = next) {
+        next = pic->next;
+        if (pic->encode_issued) {
+            start = pic;
+            continue;
         }
+        // If the next available picture is force-IDR, encode it to start
+        // a new GOP immediately.
+        if (pic->force_idr)
+            break;
+        if (b_counter == ctx->b_per_p)
+            break;
+        // If this picture ends a closed GOP or starts a new GOP then it
+        // needs to be in the top layer.
+        if (ctx->gop_counter + b_counter + closed_gop_end >= ctx->gop_size)
+            break;
+        // If the picture after this one is force-IDR, we need to encode
+        // this one in the top layer.
+        if (next && next->force_idr)
+            break;
+        ++b_counter;
+    }
 
-        // Discard all following pics, they will never be used.
-        for (pic = last_pic->next; pic; pic = next) {
-            next = pic->next;
-            vaapi_encode_free(avctx, pic);
-        }
+    // At the end of the stream the last picture must be in the top layer.
+    if (!pic && ctx->end_of_stream) {
+        --b_counter;
+        pic = ctx->pic_end;
+        if (pic->encode_issued)
+            return AVERROR_EOF;
+    }
 
-        last_pic->next = NULL;
-        ctx->pic_end = last_pic;
+    if (!pic) {
+        av_log(avctx, AV_LOG_DEBUG, "Pick nothing to encode next - "
+               "need more input for reference pictures.\n");
+        return AVERROR(EAGAIN);
+    }
+    if (ctx->input_order <= ctx->decode_delay && !ctx->end_of_stream) {
+        av_log(avctx, AV_LOG_DEBUG, "Pick nothing to encode next - "
+               "need more input for timestamps.\n");
+        return AVERROR(EAGAIN);
+    }
+
+    if (pic->force_idr) {
+        av_log(avctx, AV_LOG_DEBUG, "Pick forced IDR-picture to "
+               "encode next.\n");
+        pic->type = PICTURE_TYPE_IDR;
+        ctx->idr_counter = 1;
+        ctx->gop_counter = 1;
+
+    } else if (ctx->gop_counter + b_counter >= ctx->gop_size) {
+        if (ctx->idr_counter == ctx->gop_per_idr) {
+            av_log(avctx, AV_LOG_DEBUG, "Pick new-GOP IDR-picture to "
+                   "encode next.\n");
+            pic->type = PICTURE_TYPE_IDR;
+            ctx->idr_counter = 1;
+        } else {
+            av_log(avctx, AV_LOG_DEBUG, "Pick new-GOP I-picture to "
+                   "encode next.\n");
+            pic->type = PICTURE_TYPE_I;
+            ++ctx->idr_counter;
+        }
+        ctx->gop_counter = 1;
 
     } else {
-        // Input is available for all pictures, so we don't need to
-        // mangle anything.
+        if (ctx->gop_counter + b_counter + closed_gop_end == ctx->gop_size) {
+            av_log(avctx, AV_LOG_DEBUG, "Pick group-end P-picture to "
+                   "encode next.\n");
+        } else {
+            av_log(avctx, AV_LOG_DEBUG, "Pick normal P-picture to "
+                   "encode next.\n");
+        }
+        pic->type = PICTURE_TYPE_P;
+        av_assert0(start);
+        ctx->gop_counter += 1 + b_counter;
     }
+    pic->is_reference = 1;
+    *pic_out = pic;
 
-    av_log(avctx, AV_LOG_DEBUG, "Pictures ending truncated GOP:");
-    for (pic = ctx->pic_start; pic; pic = pic->next) {
-        av_log(avctx, AV_LOG_DEBUG, " %s (%"PRId64"/%"PRId64")",
-               picture_type_name[pic->type],
-               pic->display_order, pic->encode_order);
+    vaapi_encode_add_ref(avctx, pic, pic, 0, 1, 0);
+    if (pic->type != PICTURE_TYPE_IDR) {
+        vaapi_encode_add_ref(avctx, pic, start,
+                             pic->type == PICTURE_TYPE_P,
+                             b_counter > 0, 0);
+        vaapi_encode_add_ref(avctx, pic, ctx->next_prev, 0, 0, 1);
     }
-    av_log(avctx, AV_LOG_DEBUG, "\n");
+    if (ctx->next_prev)
+        --ctx->next_prev->ref_count[0];
 
+    if (b_counter > 0) {
+        vaapi_encode_set_b_pictures(avctx, start, pic, pic, 1,
+                                    &ctx->next_prev);
+    } else {
+        ctx->next_prev = pic;
+    }
+    ++ctx->next_prev->ref_count[0];
     return 0;
 }
 
 static int vaapi_encode_clear_old(AVCodecContext *avctx)
 {
     VAAPIEncodeContext *ctx = avctx->priv_data;
-    VAAPIEncodePicture *pic, *old;
-    int i;
+    VAAPIEncodePicture *pic, *prev, *next;
 
-    while (ctx->pic_start != ctx->pic_end) {
-        old = ctx->pic_start;
-        if (old->encode_order > ctx->output_order)
-            break;
+    av_assert0(ctx->pic_start);
 
-        for (pic = old->next; pic; pic = pic->next) {
-            if (pic->encode_complete)
-                continue;
-            for (i = 0; i < pic->nb_refs; i++) {
-                if (pic->refs[i] == old) {
-                    // We still need this picture because it's referred to
-                    // directly by a later one, so it and all following
-                    // pictures have to stay.
-                    return 0;
-                }
-            }
-        }
+    // Remove direct references once each picture is complete.
+    for (pic = ctx->pic_start; pic; pic = pic->next) {
+        if (pic->encode_complete && pic->next)
+            vaapi_encode_remove_refs(avctx, pic, 0);
+    }
 
-        pic = ctx->pic_start;
-        ctx->pic_start = pic->next;
-        vaapi_encode_free(avctx, pic);
+    // Remove indirect references once a picture has no direct references.
+    for (pic = ctx->pic_start; pic; pic = pic->next) {
+        if (pic->encode_complete && pic->ref_count[0] == 0)
+            vaapi_encode_remove_refs(avctx, pic, 1);
+    }
+
+    // Clear out all complete pictures with no remaining references.
+    prev = NULL;
+    for (pic = ctx->pic_start; pic; pic = next) {
+        next = pic->next;
+        if (pic->encode_complete && pic->ref_count[1] == 0) {
+            av_assert0(pic->ref_removed[0] && pic->ref_removed[1]);
+            if (prev)
+                prev->next = next;
+            else
+                ctx->pic_start = next;
+            vaapi_encode_free(avctx, pic);
+        } else {
+            prev = pic;
+        }
     }
 
     return 0;
 }
 
-int ff_vaapi_encode2(AVCodecContext *avctx, AVPacket *pkt,
-                     const AVFrame *input_image, int *got_packet)
+int ff_vaapi_encode_send_frame(AVCodecContext *avctx, const AVFrame *frame)
 {
     VAAPIEncodeContext *ctx = avctx->priv_data;
     VAAPIEncodePicture *pic;
     int err;
 
-    if (input_image) {
-        av_log(avctx, AV_LOG_DEBUG, "Encode frame: %ux%u (%"PRId64").\n",
-               input_image->width, input_image->height, input_image->pts);
+    if (frame) {
+        av_log(avctx, AV_LOG_DEBUG, "Input frame: %ux%u (%"PRId64").\n",
+               frame->width, frame->height, frame->pts);
 
-        if (input_image->pict_type == AV_PICTURE_TYPE_I) {
-            err = vaapi_encode_truncate_gop(avctx);
-            if (err < 0)
-                goto fail;
-            ctx->force_idr = 1;
-        }
-
-        err = vaapi_encode_get_next(avctx, &pic);
-        if (err) {
-            av_log(avctx, AV_LOG_ERROR, "Input setup failed: %d.\n", err);
-            return err;
-        }
+        pic = vaapi_encode_alloc(avctx);
+        if (!pic)
+            return AVERROR(ENOMEM);
 
         pic->input_image = av_frame_alloc();
         if (!pic->input_image) {
             err = AVERROR(ENOMEM);
             goto fail;
         }
-        err = av_frame_ref(pic->input_image, input_image);
+        err = av_frame_ref(pic->input_image, frame);
         if (err < 0)
             goto fail;
-        pic->input_surface = (VASurfaceID)(uintptr_t)input_image->data[3];
-        pic->pts = input_image->pts;
+
+        if (ctx->input_order == 0)
+            pic->force_idr = 1;
+
+        pic->input_surface = (VASurfaceID)(uintptr_t)frame->data[3];
+        pic->pts = frame->pts;
 
         if (ctx->input_order == 0)
             ctx->first_pts = pic->pts;
@@ -899,72 +905,89 @@ int ff_vaapi_encode2(AVCodecContext *avctx, AVPacket *pkt,
         if (ctx->output_delay > 0)
             ctx->ts_ring[ctx->input_order % (3 * ctx->output_delay)] = pic->pts;
 
-        pic->input_available = 1;
+        pic->display_order = ctx->input_order;
+        ++ctx->input_order;
 
-    } else {
-        if (!ctx->end_of_stream) {
-            err = vaapi_encode_truncate_gop(avctx);
-            if (err < 0)
-                goto fail;
-            ctx->end_of_stream = 1;
+        if (ctx->pic_start) {
+            ctx->pic_end->next = pic;
+            ctx->pic_end       = pic;
+        } else {
+            ctx->pic_start     = pic;
+            ctx->pic_end       = pic;
         }
+
+    } else {
+        ctx->end_of_stream = 1;
+
+        // Fix timestamps if we hit end-of-stream before the initial decode
+        // delay has elapsed.
+        if (ctx->input_order < ctx->decode_delay)
+            ctx->dts_pts_diff = ctx->pic_end->pts - ctx->first_pts;
     }
 
-    ++ctx->input_order;
-    ++ctx->output_order;
-    av_assert0(ctx->output_order + ctx->output_delay + 1 == ctx->input_order);
+    return 0;
 
-    for (pic = ctx->pic_start; pic; pic = pic->next)
-        if (pic->encode_order == ctx->output_order)
-            break;
+fail:
+    return err;
+}
 
-    // pic can be null here if we don't have a specific target in this
-    // iteration.  We might still issue encodes if things can be overlapped,
-    // even though we don't intend to output anything.
+int ff_vaapi_encode_receive_packet(AVCodecContext *avctx, AVPacket *pkt)
+{
+    VAAPIEncodeContext *ctx = avctx->priv_data;
+    VAAPIEncodePicture *pic;
+    int err;
 
-    err = vaapi_encode_step(avctx, pic);
-    if (err < 0) {
-        av_log(avctx, AV_LOG_ERROR, "Encode failed: %d.\n", err);
-        goto fail;
+    if (!ctx->pic_start) {
+        if (ctx->end_of_stream)
+            return AVERROR_EOF;
+        else
+            return AVERROR(EAGAIN);
     }
 
-    if (!pic) {
-        *got_packet = 0;
-    } else {
-        err = vaapi_encode_output(avctx, pic, pkt);
-        if (err < 0) {
-            av_log(avctx, AV_LOG_ERROR, "Output failed: %d.\n", err);
-            goto fail;
-        }
+    pic = NULL;
+    err = vaapi_encode_pick_next(avctx, &pic);
+    if (err < 0)
+        return err;
+    av_assert0(pic);
 
-        if (ctx->output_delay == 0) {
-            pkt->dts = pkt->pts;
-        } else if (ctx->output_order < ctx->decode_delay) {
-            if (ctx->ts_ring[ctx->output_order] < INT64_MIN + ctx->dts_pts_diff)
-                pkt->dts = INT64_MIN;
-            else
-                pkt->dts = ctx->ts_ring[ctx->output_order] - ctx->dts_pts_diff;
-        } else {
-            pkt->dts = ctx->ts_ring[(ctx->output_order - ctx->decode_delay) %
-                                    (3 * ctx->output_delay)];
-        }
+    pic->encode_order = ctx->encode_order++;
 
-        *got_packet = 1;
+    err = vaapi_encode_issue(avctx, pic);
+    if (err < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Encode failed: %d.\n", err);
+        return err;
     }
 
-    err = vaapi_encode_clear_old(avctx);
+    err = vaapi_encode_output(avctx, pic, pkt);
     if (err < 0) {
-        av_log(avctx, AV_LOG_ERROR, "List clearing failed: %d.\n", err);
-        goto fail;
+        av_log(avctx, AV_LOG_ERROR, "Output failed: %d.\n", err);
+        return err;
     }
 
+    if (ctx->output_delay == 0) {
+        pkt->dts = pkt->pts;
+    } else if (pic->encode_order < ctx->decode_delay) {
+        if (ctx->ts_ring[pic->encode_order] < INT64_MIN + ctx->dts_pts_diff)
+            pkt->dts = INT64_MIN;
+        else
+            pkt->dts = ctx->ts_ring[pic->encode_order] - ctx->dts_pts_diff;
+    } else {
+        pkt->dts = ctx->ts_ring[(pic->encode_order - ctx->decode_delay) %
+                                (3 * ctx->output_delay)];
+    }
+    av_log(avctx, AV_LOG_DEBUG, "Output packet: pts %"PRId64" dts %"PRId64".\n",
+           pkt->pts, pkt->dts);
+
+    ctx->output_order = pic->encode_order;
+    vaapi_encode_clear_old(avctx);
+
     return 0;
+}
 
-fail:
-    // Unclear what to clean up on failure.  There are probably some things we
-    // could do usefully clean up here, but for now just leave them for uninit()
-    // to do instead.
-    return err;
+int ff_vaapi_encode2(AVCodecContext *avctx, AVPacket *pkt,
+                     const AVFrame *input_image, int *got_packet)
+{
+    return AVERROR(ENOSYS);
 }
 
 static av_cold void vaapi_encode_add_global_param(AVCodecContext *avctx,
@@ -1429,14 +1452,16 @@ static av_cold int vaapi_encode_init_gop_structure(AVCodecContext *avctx)
         ref_l1 = attr.value >> 16 & 0xffff;
     }
 
-    if (avctx->gop_size <= 1) {
+    if (ctx->codec->flags & FLAG_INTRA_ONLY ||
+        avctx->gop_size <= 1) {
         av_log(avctx, AV_LOG_VERBOSE, "Using intra frames only.\n");
         ctx->gop_size = 1;
     } else if (ref_l0 < 1) {
         av_log(avctx, AV_LOG_ERROR, "Driver does not support any "
                "reference frames.\n");
         return AVERROR(EINVAL);
-    } else if (ref_l1 < 1 || avctx->max_b_frames < 1) {
+    } else if (!(ctx->codec->flags & FLAG_B_PICTURES) ||
+               ref_l1 < 1 || avctx->max_b_frames < 1) {
         av_log(avctx, AV_LOG_VERBOSE, "Using intra and P-frames "
                "(supported references: %d / %d).\n", ref_l0, ref_l1);
         ctx->gop_size = avctx->gop_size;
@@ -1448,6 +1473,20 @@ static av_cold int vaapi_encode_init_gop_structure(AVCodecContext *avctx)
         ctx->gop_size = avctx->gop_size;
         ctx->p_per_i  = INT_MAX;
         ctx->b_per_p  = avctx->max_b_frames;
+        if (ctx->codec->flags & FLAG_B_PICTURE_REFERENCES) {
+            ctx->max_b_depth = FFMIN(ctx->desired_b_depth,
+                                     av_log2(ctx->b_per_p) + 1);
+        } else {
+            ctx->max_b_depth = 1;
+        }
+    }
+
+    if (ctx->codec->flags & FLAG_NON_IDR_KEY_PICTURES) {
+        ctx->closed_gop  = !!(avctx->flags & AV_CODEC_FLAG_CLOSED_GOP);
+        ctx->gop_per_idr = ctx->idr_interval + 1;
+    } else {
+        ctx->closed_gop  = 1;
+        ctx->gop_per_idr = 1;
     }
 
     return 0;
@@ -1796,10 +1835,8 @@ av_cold int ff_vaapi_encode_init(AVCodecContext *avctx)
             goto fail;
     }
 
-    ctx->input_order  = 0;
     ctx->output_delay = ctx->b_per_p;
-    ctx->decode_delay = 1;
-    ctx->output_order = - ctx->output_delay - 1;
+    ctx->decode_delay = ctx->max_b_depth;
 
     if (ctx->codec->sequence_params_size > 0) {
         ctx->codec_sequence_params =
@@ -1827,10 +1864,6 @@ av_cold int ff_vaapi_encode_init(AVCodecContext *avctx)
         }
     }
 
-    // This should be configurable somehow.  (Needs testing on a machine
-    // where it actually overlaps properly, though.)
-    ctx->issue_mode = ISSUE_MODE_MAXIMISE_THROUGHPUT;
-
     if (ctx->va_packed_headers & VA_ENC_PACKED_HEADER_SEQUENCE &&
         ctx->codec->write_sequence_header &&
         avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) {
diff --git a/libavcodec/vaapi_encode.h b/libavcodec/vaapi_encode.h
index 33bdd16403..08253a96b2 100644
--- a/libavcodec/vaapi_encode.h
+++ b/libavcodec/vaapi_encode.h
@@ -38,6 +38,7 @@ struct VAAPIEncodePicture;
 enum {
     MAX_CONFIG_ATTRIBUTES  = 4,
     MAX_GLOBAL_PARAMS      = 4,
+    MAX_DPB_SIZE           = 16,
     MAX_PICTURE_REFERENCES = 2,
     MAX_REORDER_DELAY      = 16,
     MAX_PARAM_BUFFER_SIZE  = 1024,
@@ -62,9 +63,10 @@ typedef struct VAAPIEncodePicture {
     int64_t         display_order;
     int64_t         encode_order;
     int64_t         pts;
+    int             force_idr;
 
     int             type;
-    int             input_available;
+    int             b_depth;
     int             encode_issued;
     int             encode_complete;
 
@@ -83,8 +85,26 @@ typedef struct VAAPIEncodePicture {
     void           *priv_data;
     void           *codec_picture_params;
 
-    int          nb_refs;
+    // Whether this picture is a reference picture.
+    int             is_reference;
+
+    // The contents of the DPB after this picture has been decoded.
+    // This will contain the picture itself if it is a reference picture,
+    // but not if it isn't.
+    int                     nb_dpb_pics;
+    struct VAAPIEncodePicture *dpb[MAX_DPB_SIZE];
+    // The reference pictures used in decoding this picture.  If they are
+    // used by later pictures they will also appear in the DPB.
+    int                     nb_refs;
     struct VAAPIEncodePicture *refs[MAX_PICTURE_REFERENCES];
+    // The previous reference picture in encode order.  Must be in at least
+    // one of the reference list and DPB list.
+    struct VAAPIEncodePicture *prev;
+    // Reference count for other pictures referring to this one through
+    // the above pointers, directly from incomplete pictures and indirectly
+    // through completed pictures.
+    int             ref_count[2];
+    int             ref_removed[2];
 
     int          nb_slices;
     VAAPIEncodeSlice *slices;
@@ -116,6 +136,12 @@ typedef struct VAAPIEncodeContext {
     // Use low power encoding mode.
     int             low_power;
 
+    // Number of I frames between IDR frames.
+    int             idr_interval;
+
+    // Desired B frame reference depth.
+    int             desired_b_depth;
+
     // Desired packed headers.
     unsigned int    desired_packed_headers;
 
@@ -199,26 +225,21 @@ typedef struct VAAPIEncodeContext {
 
     // Current encoding window, in display (input) order.
     VAAPIEncodePicture *pic_start, *pic_end;
+    // The next picture to use as the previous reference picture in
+    // encoding order.
+    VAAPIEncodePicture *next_prev;
 
     // Next input order index (display order).
     int64_t         input_order;
     // Number of frames that output is behind input.
     int64_t         output_delay;
+    // Next encode order index.
+    int64_t         encode_order;
     // Number of frames decode output will need to be delayed.
     int64_t         decode_delay;
-    // Next output order index (encode order).
+    // Next output order index (in encode order).
     int64_t         output_order;
 
-    enum {
-        // All encode operations are done independently (synchronise
-        // immediately after every operation).
-        ISSUE_MODE_SERIALISE_EVERYTHING = 0,
-        // Overlap as many operations as possible.
-        ISSUE_MODE_MAXIMISE_THROUGHPUT,
-        // Overlap operations only when satisfying parallel dependencies.
-        ISSUE_MODE_MINIMISE_LATENCY,
-    } issue_mode;
-
     // Timestamp handling.
     int64_t         first_pts;
     int64_t         dts_pts_diff;
@@ -226,19 +247,37 @@ typedef struct VAAPIEncodeContext {
 
     // Frame type decision.
     int gop_size;
+    int closed_gop;
+    int gop_per_idr;
     int p_per_i;
+    int max_b_depth;
     int b_per_p;
     int force_idr;
+    int idr_counter;
     int gop_counter;
-    int p_counter;
     int end_of_stream;
 } VAAPIEncodeContext;
 
+enum {
+    // Codec is intra-only.
+    FLAG_INTRA_ONLY           = 1 << 0,
+    // Codec supports B-pictures.
+    FLAG_B_PICTURES           = 1 << 1,
+    // Codec supports referencing B-pictures.
+    FLAG_B_PICTURE_REFERENCES = 1 << 2,
+    // Codec supports non-IDR key pictures (that is, key pictures do
+    // not necessarily empty the DPB).
+    FLAG_NON_IDR_KEY_PICTURES = 1 << 3,
+};
+
 typedef struct VAAPIEncodeType {
     // List of supported profiles and corresponding VAAPI profiles.
     // (Must end with FF_PROFILE_UNKNOWN.)
     const VAAPIEncodeProfile *profiles;
 
+    // Codec feature flags.
+    int flags;
+
     // Perform any extra codec-specific configuration after the
     // codec context is initialised (set up the private data and
     // add any necessary global parameters).
@@ -303,6 +342,9 @@ typedef struct VAAPIEncodeType {
 int ff_vaapi_encode2(AVCodecContext *avctx, AVPacket *pkt,
                      const AVFrame *input_image, int *got_packet);
 
+int ff_vaapi_encode_send_frame(AVCodecContext *avctx, const AVFrame *frame);
+int ff_vaapi_encode_receive_packet(AVCodecContext *avctx, AVPacket *pkt);
+
 int ff_vaapi_encode_init(AVCodecContext *avctx);
 int ff_vaapi_encode_close(AVCodecContext *avctx);
 
@@ -312,7 +354,15 @@ int ff_vaapi_encode_close(AVCodecContext *avctx);
       "Use low-power encoding mode (only available on some platforms; " \
       "may not support all encoding features)", \
       OFFSET(common.low_power), AV_OPT_TYPE_BOOL, \
-      { .i64 = 0 }, 0, 1, FLAGS }
+      { .i64 = 0 }, 0, 1, FLAGS }, \
+    { "idr_interval", \
+      "Distance (in I-frames) between IDR frames", \
+      OFFSET(common.idr_interval), AV_OPT_TYPE_INT, \
+      { .i64 = 0 }, 0, INT_MAX, FLAGS }, \
+    { "b_depth", \
+      "Maximum B-frame reference depth", \
+      OFFSET(common.desired_b_depth), AV_OPT_TYPE_INT, \
+      { .i64 = 1 }, 1, INT_MAX, FLAGS }
 
 
 #endif /* AVCODEC_VAAPI_ENCODE_H */
diff --git a/libavcodec/vaapi_encode_h264.c b/libavcodec/vaapi_encode_h264.c
index 8feae0d42f..bf6e7dfb98 100644
--- a/libavcodec/vaapi_encode_h264.c
+++ b/libavcodec/vaapi_encode_h264.c
@@ -903,6 +903,9 @@ static const VAAPIEncodeProfile vaapi_encode_h264_profiles[] = {
 static const VAAPIEncodeType vaapi_encode_type_h264 = {
     .profiles              = vaapi_encode_h264_profiles,
 
+    .flags                 = FLAG_B_PICTURES |
+                             FLAG_NON_IDR_KEY_PICTURES,
+
     .configure             = &vaapi_encode_h264_configure,
 
     .sequence_params_size  = sizeof(VAEncSequenceParameterBufferH264),
@@ -1092,7 +1095,8 @@ AVCodec ff_h264_vaapi_encoder = {
     .id             = AV_CODEC_ID_H264,
     .priv_data_size = sizeof(VAAPIEncodeH264Context),
     .init           = &vaapi_encode_h264_init,
-    .encode2        = &ff_vaapi_encode2,
+    .send_frame     = &ff_vaapi_encode_send_frame,
+    .receive_packet = &ff_vaapi_encode_receive_packet,
     .close          = &vaapi_encode_h264_close,
     .priv_class     = &vaapi_encode_h264_class,
     .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_HARDWARE,
diff --git a/libavcodec/vaapi_encode_h265.c b/libavcodec/vaapi_encode_h265.c
index 1ada973dd3..f25ba7edf0 100644
--- a/libavcodec/vaapi_encode_h265.c
+++ b/libavcodec/vaapi_encode_h265.c
@@ -1091,6 +1091,9 @@ static const VAAPIEncodeProfile vaapi_encode_h265_profiles[] = {
 static const VAAPIEncodeType vaapi_encode_type_h265 = {
     .profiles              = vaapi_encode_h265_profiles,
 
+    .flags                 = FLAG_B_PICTURES |
+                             FLAG_NON_IDR_KEY_PICTURES,
+
     .configure             = &vaapi_encode_h265_configure,
 
     .sequence_params_size  = sizeof(VAEncSequenceParameterBufferHEVC),
@@ -1241,7 +1244,8 @@ AVCodec ff_hevc_vaapi_encoder = {
     .id             = AV_CODEC_ID_HEVC,
     .priv_data_size = sizeof(VAAPIEncodeH265Context),
     .init           = &vaapi_encode_h265_init,
-    .encode2        = &ff_vaapi_encode2,
+    .send_frame     = &ff_vaapi_encode_send_frame,
+    .receive_packet = &ff_vaapi_encode_receive_packet,
     .close          = &vaapi_encode_h265_close,
     .priv_class     = &vaapi_encode_h265_class,
     .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_HARDWARE,
diff --git a/libavcodec/vaapi_encode_mjpeg.c b/libavcodec/vaapi_encode_mjpeg.c
index db9b36321a..9e3065bb7f 100644
--- a/libavcodec/vaapi_encode_mjpeg.c
+++ b/libavcodec/vaapi_encode_mjpeg.c
@@ -230,6 +230,8 @@ static int vaapi_encode_mjpeg_init_picture_params(AVCodecContext *avctx,
     const uint8_t *components;
     int t, i, quant_scale, len;
 
+    av_assert0(pic->type == PICTURE_TYPE_IDR);
+
     desc = av_pix_fmt_desc_get(priv->common.input_frames->sw_format);
     av_assert0(desc);
     if (desc->flags & AV_PIX_FMT_FLAG_RGB)
@@ -476,6 +478,8 @@ static const VAAPIEncodeProfile vaapi_encode_mjpeg_profiles[] = {
 static const VAAPIEncodeType vaapi_encode_type_mjpeg = {
     .profiles              = vaapi_encode_mjpeg_profiles,
 
+    .flags                 = FLAG_INTRA_ONLY,
+
     .configure             = &vaapi_encode_mjpeg_configure,
 
     .picture_params_size   = sizeof(VAEncPictureParameterBufferJPEG),
@@ -533,7 +537,6 @@ static const AVOption vaapi_encode_mjpeg_options[] = {
 static const AVCodecDefault vaapi_encode_mjpeg_defaults[] = {
     { "global_quality", "80" },
     { "b",              "0"  },
-    { "g",              "1"  },
     { NULL },
 };
 
@@ -551,7 +554,8 @@ AVCodec ff_mjpeg_vaapi_encoder = {
     .id             = AV_CODEC_ID_MJPEG,
     .priv_data_size = sizeof(VAAPIEncodeMJPEGContext),
     .init           = &vaapi_encode_mjpeg_init,
-    .encode2        = &ff_vaapi_encode2,
+    .send_frame     = &ff_vaapi_encode_send_frame,
+    .receive_packet = &ff_vaapi_encode_receive_packet,
     .close          = &vaapi_encode_mjpeg_close,
     .priv_class     = &vaapi_encode_mjpeg_class,
     .capabilities   = AV_CODEC_CAP_HARDWARE,
diff --git a/libavcodec/vaapi_encode_mpeg2.c b/libavcodec/vaapi_encode_mpeg2.c
index 1377eeb9bb..0dd0913d88 100644
--- a/libavcodec/vaapi_encode_mpeg2.c
+++ b/libavcodec/vaapi_encode_mpeg2.c
@@ -565,6 +565,8 @@ static const VAAPIEncodeProfile vaapi_encode_mpeg2_profiles[] = {
 static const VAAPIEncodeType vaapi_encode_type_mpeg2 = {
     .profiles              = vaapi_encode_mpeg2_profiles,
 
+    .flags                 = FLAG_B_PICTURES,
+
     .configure             = &vaapi_encode_mpeg2_configure,
 
     .sequence_params_size  = sizeof(VAEncSequenceParameterBufferMPEG2),
@@ -691,7 +693,8 @@ AVCodec ff_mpeg2_vaapi_encoder = {
     .id             = AV_CODEC_ID_MPEG2VIDEO,
     .priv_data_size = sizeof(VAAPIEncodeMPEG2Context),
     .init           = &vaapi_encode_mpeg2_init,
-    .encode2        = &ff_vaapi_encode2,
+    .send_frame     = &ff_vaapi_encode_send_frame,
+    .receive_packet = &ff_vaapi_encode_receive_packet,
     .close          = &vaapi_encode_mpeg2_close,
     .priv_class     = &vaapi_encode_mpeg2_class,
     .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_HARDWARE,
diff --git a/libavcodec/vaapi_encode_vp8.c b/libavcodec/vaapi_encode_vp8.c
index 697b465787..166636cd84 100644
--- a/libavcodec/vaapi_encode_vp8.c
+++ b/libavcodec/vaapi_encode_vp8.c
@@ -246,7 +246,8 @@ AVCodec ff_vp8_vaapi_encoder = {
     .id             = AV_CODEC_ID_VP8,
     .priv_data_size = sizeof(VAAPIEncodeVP8Context),
     .init           = &vaapi_encode_vp8_init,
-    .encode2        = &ff_vaapi_encode2,
+    .send_frame     = &ff_vaapi_encode_send_frame,
+    .receive_packet = &ff_vaapi_encode_receive_packet,
     .close          = &ff_vaapi_encode_close,
     .priv_class     = &vaapi_encode_vp8_class,
     .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_HARDWARE,
diff --git a/libavcodec/vaapi_encode_vp9.c b/libavcodec/vaapi_encode_vp9.c
index 39bc868f3a..94f29c0483 100644
--- a/libavcodec/vaapi_encode_vp9.c
+++ b/libavcodec/vaapi_encode_vp9.c
@@ -213,6 +213,8 @@ static const VAAPIEncodeProfile vaapi_encode_vp9_profiles[] = {
 static const VAAPIEncodeType vaapi_encode_type_vp9 = {
     .profiles              = vaapi_encode_vp9_profiles,
 
+    .flags                 = FLAG_B_PICTURES,
+
     .configure             = &vaapi_encode_vp9_configure,
 
     .sequence_params_size  = sizeof(VAEncSequenceParameterBufferVP9),
@@ -275,7 +277,8 @@ AVCodec ff_vp9_vaapi_encoder = {
     .id             = AV_CODEC_ID_VP9,
     .priv_data_size = sizeof(VAAPIEncodeVP9Context),
     .init           = &vaapi_encode_vp9_init,
-    .encode2        = &ff_vaapi_encode2,
+    .send_frame     = &ff_vaapi_encode_send_frame,
+    .receive_packet = &ff_vaapi_encode_receive_packet,
     .close          = &ff_vaapi_encode_close,
     .priv_class     = &vaapi_encode_vp9_class,
     .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_HARDWARE,
-- 
2.18.0