[FFmpeg-devel] [PATCH v1] lavfi/qsvvpp: support async depth
Wang, Fei W
fei.w.wang at intel.com
Sun Feb 7 08:06:39 EET 2021
> -----Original Message-----
> From: Wang, Fei W <fei.w.wang at intel.com>
> Sent: Wednesday, February 3, 2021 9:09 AM
> To: ffmpeg-devel at ffmpeg.org
> Subject: Re: [PATCH v1] lavfi/qsvvpp: support async depth
>
> On Wed, 2021-01-27 at 09:42 +0800, Fei Wang wrote:
> > Async depth will allow qsv filter cache few frames, and avoid force
> > switch and end filter task frame by frame. This change will improve
> > performance for some multi-task case, for example 1:N transcode(
> > decode + vpp + encode) with all QSV plugins.
> >
> > Signed-off-by: Fei Wang <fei.w.wang at intel.com>
> > ---
> > libavfilter/qsvvpp.c | 147 ++++++++++++++++++-----------
> > --
> > libavfilter/qsvvpp.h | 42 ++++++++-
> > libavfilter/vf_deinterlace_qsv.c | 8 --
> > libavfilter/vf_vpp_qsv.c | 75 +++++++++++++---
> > 4 files changed, 187 insertions(+), 85 deletions(-)
> >
> > diff --git a/libavfilter/qsvvpp.c b/libavfilter/qsvvpp.c index
> > f216b3f248..2e824e67e7 100644
> > --- a/libavfilter/qsvvpp.c
> > +++ b/libavfilter/qsvvpp.c
> > @@ -27,6 +27,7 @@
> > #include "libavutil/hwcontext_qsv.h"
> > #include "libavutil/time.h"
> > #include "libavutil/pixdesc.h"
> > +#include "libavutil/fifo.h"
> >
> > #include "internal.h"
> > #include "qsvvpp.h"
> > @@ -37,37 +38,6 @@
> > #define IS_OPAQUE_MEMORY(mode) (mode &
> MFX_MEMTYPE_OPAQUE_FRAME)
> > #define IS_SYSTEM_MEMORY(mode) (mode &
> MFX_MEMTYPE_SYSTEM_MEMORY)
> >
> > -typedef struct QSVFrame {
> > - AVFrame *frame;
> > - mfxFrameSurface1 *surface;
> > - mfxFrameSurface1 surface_internal; /* for system memory */
> > - struct QSVFrame *next;
> > -} QSVFrame;
> > -
> > -/* abstract struct for all QSV filters */ -struct QSVVPPContext {
> > - mfxSession session;
> > - int (*filter_frame) (AVFilterLink *outlink, AVFrame *frame);/*
> > callback */
> > - enum AVPixelFormat out_sw_format; /* Real output format */
> > - mfxVideoParam vpp_param;
> > - mfxFrameInfo *frame_infos; /* frame info for each
> > input */
> > -
> > - /* members related to the input/output surface */
> > - int in_mem_mode;
> > - int out_mem_mode;
> > - QSVFrame *in_frame_list;
> > - QSVFrame *out_frame_list;
> > - int nb_surface_ptrs_in;
> > - int nb_surface_ptrs_out;
> > - mfxFrameSurface1 **surface_ptrs_in;
> > - mfxFrameSurface1 **surface_ptrs_out;
> > -
> > - /* MFXVPP extern parameters */
> > - mfxExtOpaqueSurfaceAlloc opaque_alloc;
> > - mfxExtBuffer **ext_buffers;
> > - int nb_ext_buffers;
> > -};
> > -
> > static const mfxHandleType handle_types[] = {
> > MFX_HANDLE_VA_DISPLAY,
> > MFX_HANDLE_D3D9_DEVICE_MANAGER,
> > @@ -336,9 +306,9 @@ static int fill_frameinfo_by_link(mfxFrameInfo
> > *frameinfo, AVFilterLink *link)
> > static void clear_unused_frames(QSVFrame *list) {
> > while (list) {
> > - if (list->surface && !list->surface->Data.Locked) {
> > - list->surface = NULL;
> > + if (list->used && !list->queued && !list-
> > >surface.Data.Locked) {
> > av_frame_free(&list->frame);
> > + list->used = 0;
> > }
> > list = list->next;
> > }
> > @@ -361,8 +331,10 @@ static QSVFrame *get_free_frame(QSVFrame **list)
> > QSVFrame *out = *list;
> >
> > for (; out; out = out->next) {
> > - if (!out->surface)
> > + if (!out->used) {
> > + out->used = 1;
> > break;
> > + }
> > }
> >
> > if (!out) {
> > @@ -371,6 +343,7 @@ static QSVFrame *get_free_frame(QSVFrame **list)
> > av_log(NULL, AV_LOG_ERROR, "Can't alloc new output
> > frame.\n");
> > return NULL;
> > }
> > + out->used = 1;
> > out->next = *list;
> > *list = out;
> > }
> > @@ -402,7 +375,7 @@ static QSVFrame *submit_frame(QSVVPPContext *s,
> > AVFilterLink *inlink, AVFrame *p
> > return NULL;
> > }
> > qsv_frame->frame = av_frame_clone(picref);
> > - qsv_frame->surface = (mfxFrameSurface1 *)qsv_frame->frame-
> > >data[3];
> > + qsv_frame->surface = *(mfxFrameSurface1 *)qsv_frame->frame-
> > >data[3];
> > } else {
> > /* make a copy if the input is not padded as libmfx requires
> > */
> > if (picref->height & 31 || picref->linesize[0] & 31) { @@
> > -425,27 +398,26 @@ static QSVFrame *submit_frame(QSVVPPContext *s,
> > AVFilterLink *inlink, AVFrame *p
> > qsv_frame->frame = av_frame_clone(picref);
> >
> > if (map_frame_to_surface(qsv_frame->frame,
> > - &qsv_frame->surface_internal) < 0) {
> > + &qsv_frame->surface) < 0) {
> > av_log(ctx, AV_LOG_ERROR, "Unsupported frame.\n");
> > return NULL;
> > }
> > - qsv_frame->surface = &qsv_frame->surface_internal;
> > }
> >
> > - qsv_frame->surface->Info = s-
> > >frame_infos[FF_INLINK_IDX(inlink)];
> > - qsv_frame->surface->Data.TimeStamp = av_rescale_q(qsv_frame-
> > >frame->pts,
> > + qsv_frame->surface.Info = s-
> > >frame_infos[FF_INLINK_IDX(inlink)];
> > + qsv_frame->surface.Data.TimeStamp = av_rescale_q(qsv_frame-
> > >frame->pts,
> > inlink-
> > >time_base, default_tb);
> >
> > - qsv_frame->surface->Info.PicStruct =
> > + qsv_frame->surface.Info.PicStruct =
> > !qsv_frame->frame->interlaced_frame ?
> > MFX_PICSTRUCT_PROGRESSIVE :
> > (qsv_frame->frame->top_field_first ?
> > MFX_PICSTRUCT_FIELD_TFF :
> > MFX_PICSTRUCT_FIELD
> > _BFF);
> > if (qsv_frame->frame->repeat_pict == 1)
> > - qsv_frame->surface->Info.PicStruct |=
> > MFX_PICSTRUCT_FIELD_REPEATED;
> > + qsv_frame->surface.Info.PicStruct |=
> > MFX_PICSTRUCT_FIELD_REPEATED;
> > else if (qsv_frame->frame->repeat_pict == 2)
> > - qsv_frame->surface->Info.PicStruct |=
> > MFX_PICSTRUCT_FRAME_DOUBLING;
> > + qsv_frame->surface.Info.PicStruct |=
> > MFX_PICSTRUCT_FRAME_DOUBLING;
> > else if (qsv_frame->frame->repeat_pict == 4)
> > - qsv_frame->surface->Info.PicStruct |=
> > MFX_PICSTRUCT_FRAME_TRIPLING;
> > + qsv_frame->surface.Info.PicStruct |=
> > MFX_PICSTRUCT_FRAME_TRIPLING;
> >
> > return qsv_frame;
> > }
> > @@ -476,7 +448,7 @@ static QSVFrame *query_frame(QSVVPPContext *s,
> > AVFilterLink *outlink)
> > return NULL;
> > }
> >
> > - out_frame->surface = (mfxFrameSurface1 *)out_frame->frame-
> > >data[3];
> > + out_frame->surface = *(mfxFrameSurface1 *)out_frame->frame-
> > >data[3];
> > } else {
> > /* Get a frame with aligned dimensions.
> > * Libmfx need system memory being 128x64 aligned */ @@
> > -490,14 +462,12 @@ static QSVFrame *query_frame(QSVVPPContext *s,
> > AVFilterLink *outlink)
> > out_frame->frame->height = outlink->h;
> >
> > ret = map_frame_to_surface(out_frame->frame,
> > - &out_frame->surface_internal);
> > + &out_frame->surface);
> > if (ret < 0)
> > return NULL;
> > -
> > - out_frame->surface = &out_frame->surface_internal;
> > }
> >
> > - out_frame->surface->Info = s->vpp_param.vpp.Out;
> > + out_frame->surface.Info = s->vpp_param.vpp.Out;
> >
> > return out_frame;
> > }
> > @@ -666,6 +636,16 @@ static int init_vpp_session(AVFilterContext
> > *avctx, QSVVPPContext *s)
> > return 0;
> > }
> >
> > +static unsigned int qsv_fifo_item_size(void) {
> > + return sizeof(mfxSyncPoint*) + sizeof(QSVFrame*); }
> > +
> > +static unsigned int qsv_fifo_size(const AVFifoBuffer* fifo) {
> > + return av_fifo_size(fifo)/qsv_fifo_item_size();
> > +}
> > +
> > int ff_qsvvpp_create(AVFilterContext *avctx, QSVVPPContext **vpp,
> > QSVVPPParam *param) {
> > int i;
> > @@ -738,7 +718,17 @@ int ff_qsvvpp_create(AVFilterContext *avctx,
> > QSVVPPContext **vpp, QSVVPPParam *p
> > s->vpp_param.ExtParam = param->ext_buf;
> > }
> >
> > - s->vpp_param.AsyncDepth = 1;
> > + s->got_frame = 0;
> > +
> > + /** keep fifo size at least 1. Even when async_depth is 0, fifo
> > is used. */
> > + s->async_fifo = av_fifo_alloc((param->async_depth + 1) *
> > qsv_fifo_item_size());
> > + s->async_depth = param->async_depth;
> > + if (!s->async_fifo) {
> > + ret = AVERROR(ENOMEM);
> > + goto failed;
> > + }
> > +
> > + s->vpp_param.AsyncDepth = param->async_depth;
> >
> > if (IS_SYSTEM_MEMORY(s->in_mem_mode))
> > s->vpp_param.IOPattern |= MFX_IOPATTERN_IN_SYSTEM_MEMORY;
> @@
> > -793,6 +783,7 @@ int ff_qsvvpp_free(QSVVPPContext **vpp)
> > av_freep(&s->surface_ptrs_out);
> > av_freep(&s->ext_buffers);
> > av_freep(&s->frame_infos);
> > + av_fifo_free(s->async_fifo);
> > av_freep(vpp);
> >
> > return 0;
> > @@ -803,9 +794,29 @@ int ff_qsvvpp_filter_frame(QSVVPPContext *s,
> > AVFilterLink *inlink, AVFrame *picr
> > AVFilterContext *ctx = inlink->dst;
> > AVFilterLink *outlink = ctx->outputs[0];
> > mfxSyncPoint sync;
> > - QSVFrame *in_frame, *out_frame;
> > + QSVFrame *in_frame, *out_frame, *tmp;
> > int ret, filter_ret;
> >
> > + while (s->eof && qsv_fifo_size(s->async_fifo)) {
> > + av_fifo_generic_read(s->async_fifo, &tmp, sizeof(tmp),
> > NULL);
> > + av_fifo_generic_read(s->async_fifo, &sync, sizeof(sync),
> > NULL);
> > + if (MFXVideoCORE_SyncOperation(s->session, sync, 1000) < 0)
> > + av_log(ctx, AV_LOG_WARNING, "Sync failed.\n");
> > +
> > + filter_ret = s->filter_frame(outlink, tmp->frame);
> > + if (filter_ret < 0) {
> > + av_frame_free(&tmp->frame);
> > + ret = filter_ret;
> > + break;
> > + }
> > + tmp->queued = 0;
> > + s->got_frame = 1;
> > + tmp->frame = NULL;
> > + };
> > +
> > + if (!picref)
> > + return 0;
> > +
> > in_frame = submit_frame(s, inlink, picref);
> > if (!in_frame) {
> > av_log(ctx, AV_LOG_ERROR, "Failed to submit frame on
> > input[%d]\n", @@ -821,8 +832,8 @@ int
> > ff_qsvvpp_filter_frame(QSVVPPContext *s, AVFilterLink *inlink, AVFrame
> > *picr
> > }
> >
> > do {
> > - ret = MFXVideoVPP_RunFrameVPPAsync(s->session, in_frame-
> > >surface,
> > - out_frame->surface,
> > NULL, &sync);
> > + ret = MFXVideoVPP_RunFrameVPPAsync(s->session,
> > &in_frame->surface,
> > + &out_frame->surface,
> > NULL, &sync);
> > if (ret == MFX_WRN_DEVICE_BUSY)
> > av_usleep(500);
> > } while (ret == MFX_WRN_DEVICE_BUSY); @@ -833,20 +844,32 @@
> > int ff_qsvvpp_filter_frame(QSVVPPContext *s, AVFilterLink *inlink,
> > AVFrame *picr
> > ret = AVERROR(EAGAIN);
> > break;
> > }
> > + out_frame->frame->pts = av_rescale_q(out_frame-
> > >surface.Data.TimeStamp,
> > + default_tb, outlink-
> > >time_base);
> >
> > - if (MFXVideoCORE_SyncOperation(s->session, sync, 1000) < 0)
> > - av_log(ctx, AV_LOG_WARNING, "Sync failed.\n");
> > + out_frame->queued = 1;
> > + av_fifo_generic_write(s->async_fifo, &out_frame,
> > sizeof(out_frame), NULL);
> > + av_fifo_generic_write(s->async_fifo, &sync, sizeof(sync),
> > NULL);
> >
> > - out_frame->frame->pts = av_rescale_q(out_frame->surface-
> > >Data.TimeStamp,
> > - default_tb, outlink-
> > >time_base);
> >
> > - filter_ret = s->filter_frame(outlink, out_frame->frame);
> > - if (filter_ret < 0) {
> > - av_frame_free(&out_frame->frame);
> > - ret = filter_ret;
> > - break;
> > + if (qsv_fifo_size(s->async_fifo) > s->async_depth) {
> > + av_fifo_generic_read(s->async_fifo, &tmp, sizeof(tmp),
> > NULL);
> > + av_fifo_generic_read(s->async_fifo, &sync, sizeof(sync),
> > NULL);
> > +
> > + if (MFXVideoCORE_SyncOperation(s->session, sync, 1000) <
> > 0)
> > + av_log(ctx, AV_LOG_WARNING, "Sync failed.\n");
> > +
> > + filter_ret = s->filter_frame(outlink, tmp->frame);
> > + if (filter_ret < 0) {
> > + av_frame_free(&tmp->frame);
> > + ret = filter_ret;
> > + break;
> > + }
> > +
> > + tmp->queued = 0;
> > + s->got_frame = 1;
> > + tmp->frame = NULL;
> > }
> > - out_frame->frame = NULL;
> > } while(ret == MFX_ERR_MORE_SURFACE);
> >
> > return ret;
> > diff --git a/libavfilter/qsvvpp.h b/libavfilter/qsvvpp.h index
> > b4baeedf9e..48c8ffc2d2 100644
> > --- a/libavfilter/qsvvpp.h
> > +++ b/libavfilter/qsvvpp.h
> > @@ -27,6 +27,7 @@
> > #include <mfx/mfxvideo.h>
> >
> > #include "avfilter.h"
> > +#include "libavutil/fifo.h"
> >
> > #define FF_INLINK_IDX(link) ((int)((link)->dstpad - (link)->dst-
> > >input_pads))
> > #define FF_OUTLINK_IDX(link) ((int)((link)->srcpad - (link)->src-
> > >output_pads))
> > @@ -39,7 +40,44 @@
> > ((MFX_VERSION.Major > (MAJOR)) || \
> > (MFX_VERSION.Major == (MAJOR) && MFX_VERSION.Minor >= (MINOR)))
> >
> > -typedef struct QSVVPPContext QSVVPPContext;
> > +#define VPP_ASYNC_DEPTH_DEFAULT 1
> > +
> > +typedef struct QSVFrame {
> > + AVFrame *frame;
> > + mfxFrameSurface1 surface;
> > + struct QSVFrame *next;
> > + int queued;
> > + int used;
> > +} QSVFrame;
> > +
> > +typedef struct QSVVPPContext {
> > + mfxSession session;
> > + int (*filter_frame) (AVFilterLink *outlink, AVFrame *frame);
> > /**< callback */
> > + enum AVPixelFormat out_sw_format; /**< Real output format */
> > + mfxVideoParam vpp_param;
> > + mfxFrameInfo *frame_infos; /**< frame info for each
> > input */
> > +
> > + /** members related to the input/output surface */
> > + int in_mem_mode;
> > + int out_mem_mode;
> > + QSVFrame *in_frame_list;
> > + QSVFrame *out_frame_list;
> > + int nb_surface_ptrs_in;
> > + int nb_surface_ptrs_out;
> > + mfxFrameSurface1 **surface_ptrs_in;
> > + mfxFrameSurface1 **surface_ptrs_out;
> > +
> > + /** MFXVPP extern parameters */
> > + mfxExtOpaqueSurfaceAlloc opaque_alloc;
> > + mfxExtBuffer **ext_buffers;
> > + int nb_ext_buffers;
> > +
> > + int got_frame;
> > + int async_depth;
> > + int eof;
> > + /** order with frame_out, sync */
> > + AVFifoBuffer *async_fifo;
> > +} QSVVPPContext;
> >
> > typedef struct QSVVPPCrop {
> > int in_idx; ///< Input index
> > @@ -60,6 +98,8 @@ typedef struct QSVVPPParam {
> > /* Crop information for each input, if needed */
> > int num_crop;
> > QSVVPPCrop *crop;
> > +
> > + int async_depth;
> > } QSVVPPParam;
> >
> > /* create and initialize the QSV session */ diff --git
> > a/libavfilter/vf_deinterlace_qsv.c
> > b/libavfilter/vf_deinterlace_qsv.c
> > index 89a282f99e..a620567de2 100644
> > --- a/libavfilter/vf_deinterlace_qsv.c
> > +++ b/libavfilter/vf_deinterlace_qsv.c
> > @@ -47,14 +47,6 @@ enum {
> > QSVDEINT_MORE_INPUT,
> > };
> >
> > -typedef struct QSVFrame {
> > - AVFrame *frame;
> > - mfxFrameSurface1 surface;
> > - int used;
> > -
> > - struct QSVFrame *next;
> > -} QSVFrame;
> > -
> > typedef struct QSVDeintContext {
> > const AVClass *class;
> >
> > diff --git a/libavfilter/vf_vpp_qsv.c b/libavfilter/vf_vpp_qsv.c index
> > 5d57707455..83bdf1276c 100644
> > --- a/libavfilter/vf_vpp_qsv.c
> > +++ b/libavfilter/vf_vpp_qsv.c
> > @@ -32,6 +32,7 @@
> > #include "formats.h"
> > #include "internal.h"
> > #include "avfilter.h"
> > +#include "filters.h"
> > #include "libavcodec/avcodec.h"
> > #include "libavformat/avformat.h"
> >
> > @@ -93,6 +94,9 @@ typedef struct VPPContext{
> > char *cx, *cy, *cw, *ch;
> > char *ow, *oh;
> > char *output_format_str;
> > +
> > + int async_depth;
> > + int eof;
> > } VPPContext;
> >
> > static const AVOption options[] = {
> > @@ -128,6 +132,7 @@ static const AVOption options[] = {
> > { "h", "Output video height", OFFSET(oh),
> > AV_OPT_TYPE_STRING, { .str="w*ch/cw" }, 0, 255, .flags = FLAGS },
> > { "height", "Output video height", OFFSET(oh),
> > AV_OPT_TYPE_STRING, { .str="w*ch/cw" }, 0, 255, .flags = FLAGS },
> > { "format", "Output pixel format", OFFSET(output_format_str),
> > AV_OPT_TYPE_STRING, { .str = "same" }, .flags = FLAGS },
> > + { "async_depth", "Internal parallelization depth, the higher the
> > value the higher the latency.", OFFSET(async_depth), AV_OPT_TYPE_INT,
> > { .i64 = VPP_ASYNC_DEPTH_DEFAULT }, 0, INT_MAX, .flags = FLAGS },
> >
> > { NULL }
> > };
> > @@ -303,6 +308,7 @@ static int config_output(AVFilterLink *outlink)
> > param.filter_frame = NULL;
> > param.num_ext_buf = 0;
> > param.ext_buf = ext_buf;
> > + param.async_depth = vpp->async_depth;
> >
> > if (inlink->format == AV_PIX_FMT_QSV) {
> > if (!inlink->hw_frames_ctx || !inlink->hw_frames_ctx->data)
> > @@ -467,23 +473,64 @@ static int config_output(AVFilterLink *outlink)
> > return 0;
> > }
> >
> > -static int filter_frame(AVFilterLink *inlink, AVFrame *picref)
> > +static int activate(AVFilterContext *ctx)
> > {
> > - int ret = 0;
> > - AVFilterContext *ctx = inlink->dst;
> > - VPPContext *vpp = inlink->dst->priv;
> > - AVFilterLink *outlink = ctx->outputs[0];
> > -
> > - if (vpp->qsv) {
> > - ret = ff_qsvvpp_filter_frame(vpp->qsv, inlink, picref);
> > - av_frame_free(&picref);
> > + AVFilterLink *inlink = ctx->inputs[0];
> > + AVFilterLink *outlink = ctx->outputs[0];
> > + VPPContext *s =ctx->priv;
> > + QSVVPPContext *qsv = s->qsv;
> > + AVFrame *in = NULL;
> > + int ret, status;
> > + int64_t pts;
> > +
> > + FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);
> > +
> > + if (!s->eof) {
> > + ret = ff_inlink_consume_frame(inlink, &in);
> > + if (ret < 0)
> > + return ret;
> > +
> > + if (ff_inlink_acknowledge_status(inlink, &status, &pts)) {
> > + if (status == AVERROR_EOF) {
> > + s->eof = 1;
> > + }
> > + }
> > + }
> > +
> > + if (qsv) {
> > + if (in || s->eof) {
> > + qsv->eof = s->eof;
> > + ret = ff_qsvvpp_filter_frame(qsv, inlink, in);
> > + av_frame_free(&in);
> > +
> > + if (s->eof) {
> > + ff_outlink_set_status(outlink, status, pts);
> > + return 0;
> > + }
> > +
> > + if (qsv->got_frame) {
> > + qsv->got_frame = 0;
> > + return ret;
> > + }
> > + }
> > } else {
> > - if (picref->pts != AV_NOPTS_VALUE)
> > - picref->pts = av_rescale_q(picref->pts, inlink-
> > >time_base, outlink->time_base);
> > - ret = ff_filter_frame(outlink, picref);
> > + if (in) {
> > + if (in->pts != AV_NOPTS_VALUE)
> > + in->pts = av_rescale_q(in->pts, inlink->time_base,
> > outlink->time_base);
> > +
> > + ret = ff_filter_frame(outlink, in);
> > + return ret;
> > + }
> > }
> >
> > - return ret;
> > + if (s->eof) {
> > + ff_outlink_set_status(outlink, status, pts);
> > + return 0;
> > + } else {
> > + FF_FILTER_FORWARD_WANTED(outlink, inlink);
> > + }
> > +
> > + return FFERROR_NOT_READY;
> > }
> >
> > static int query_formats(AVFilterContext *ctx) @@ -531,7 +578,6 @@
> > static const AVFilterPad vpp_inputs[] = {
> > .name = "default",
> > .type = AVMEDIA_TYPE_VIDEO,
> > .config_props = config_input,
> > - .filter_frame = filter_frame,
> > },
> > { NULL }
> > };
> > @@ -554,6 +600,7 @@ AVFilter ff_vf_vpp_qsv = {
> > .uninit = vpp_uninit,
> > .inputs = vpp_inputs,
> > .outputs = vpp_outputs,
> > + .activate = activate,
> > .priv_class = &vpp_class,
> > .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE, };
>
> Ping, thanks
Ping for review. @Mark Thompson @lizhong1008 at gmail.com free to review this patch?
>
> Fei
More information about the ffmpeg-devel
mailing list