[FFmpeg-devel] [PATCH] add support for ROI-based encoding

Thu Dec 6 07:57:18 EET 2018

> From: ffmpeg-devel [mailto:ffmpeg-devel-bounces at ffmpeg.org] On Behalf
> Of Guo, Yejun
> Sent: Wednesday, December 5, 2018 5:59 PM
> To: ffmpeg-devel at ffmpeg.org
> Subject: [FFmpeg-devel] [PATCH] add support for ROI-based encoding
> 
> this patch is not ask for merge, it is more to get a feature feedback.
> 
> The encoders such as libx264 support different QPs offset for different MBs,
> it makes possible for ROI-based encoding. It makes sense to add support
> within ffmpeg to generate/accept ROI infos and pass into encoders.
> 
> Typical usage: After AVFrame is decoded, a ffmpeg filter or user's code
> generates ROI info for that frame, and the encoder finally does the
> ROI-based encoding. And so I choose to maintain the ROI info within
> AVFrame struct.
> 
> TODO:
> - remove code in vf_scale.c, it is just an example to generate ROI info
> - use AVBufferRef instead of current implementation within AVFrame struct.
> - add other encoders support
> 
> Signed-off-by: Guo, Yejun <yejun.guo at intel.com>
> ---
>  libavcodec/libx264.c   | 35 +++++++++++++++++++++++++++++++++++
>  libavfilter/vf_scale.c |  8 ++++++++
>  libavutil/frame.c      |  9 +++++++++
>  libavutil/frame.h      | 14 ++++++++++++++
>  4 files changed, 66 insertions(+)
> 
> diff --git a/libavcodec/libx264.c b/libavcodec/libx264.c index
> a68d0a7..d8cc327 100644
> --- a/libavcodec/libx264.c
> +++ b/libavcodec/libx264.c
> @@ -26,6 +26,7 @@
>  #include "libavutil/pixdesc.h"
>  #include "libavutil/stereo3d.h"
>  #include "libavutil/intreadwrite.h"
> +#include "libavutil/avassert.h"
>  #include "avcodec.h"
>  #include "internal.h"
> 
> @@ -345,6 +346,40 @@ static int X264_frame(AVCodecContext *ctx,
> AVPacket *pkt, const AVFrame *frame,
>                  }
>              }
>          }
> +
> +        if (frame->nb_rois > 0) {
> +            if (x4->params.rc.i_aq_mode == X264_AQ_NONE) {
> +                av_log(ctx, AV_LOG_ERROR, "Adaptive quantization
> must be enabled to use ROI encoding, skipping ROI.\n");
> +            }
> +            if (frame->interlaced_frame == 0) {
> +                const static int MBSIZE = 16;
> +                size_t mbx = (frame->width + MBSIZE - 1) / MBSIZE;
> +                size_t mby = (frame->height + MBSIZE - 1) / MBSIZE;

> +                float* qoffsets = (float*)av_malloc(sizeof(float) * mbx *
> mby);
> +                memset(qoffsets, 0, sizeof(float) * mbx * mby);
> +
> +                for (size_t roi = 0; roi < frame->nb_rois; ++roi) {
> +                    int starty = FFMIN(mby, frame->rois[roi].top /
> MBSIZE);
> +                    int endy = FFMIN(mby, (frame->rois[roi].bottom +
> MBSIZE - 1)/ MBSIZE);
> +                    int startx = FFMIN(mbx, frame->rois[roi].left /
> MBSIZE);
> +                    int endx = FFMIN(mbx, (frame->rois[roi].right +
> MBSIZE - 1)/ MBSIZE);
> +                    for (int y = starty; y < endy; ++y) {
> +                        for (int x = startx; x < endx; ++x) {
> +                            qoffsets[x + y*mbx] =
> frame->rois[roi].qoffset;
> +                        }
> +                    }
> +                }
> +
> +                x4->pic.prop.quant_offsets = qoffsets;
> +                x4->pic.prop.quant_offsets_free = av_free;
> +            } else {
> +                av_log(ctx, AV_LOG_ERROR, "interlaced_frame not
> supported for ROI encoding, skipping ROI.\n");
> +            }
> +        } else {
> +            //to be removed in the final code, it is just for debug usage
> now.
> +            printf("ooooops, frame 0x%p with rois %ld\n", frame,
> frame->nb_rois);
> +            av_assert0(!"should not reach here");
> +        }
>      }
> 
>      do {
> diff --git a/libavfilter/vf_scale.c b/libavfilter/vf_scale.c index
> f741419..71def72 100644
> --- a/libavfilter/vf_scale.c
> +++ b/libavfilter/vf_scale.c
> @@ -437,6 +437,14 @@ static int filter_frame(AVFilterLink *link, AVFrame
> *in)
>              return ret;
>      }
> 
> +    // to be removed, just for debug usage temporarily
> +    in->nb_rois = 1;
> +    in->rois[0].top = 0;
> +    in->rois[0].left = 0;
> +    in->rois[0].bottom = in->height;
> +    in->rois[0].right = in->width/2;
> +    in->rois[0].qoffset = 15.0f; // 15.0f, +-5.0f, +-25.0f
> +
>      if (!scale->sws)
>          return ff_filter_frame(outlink, in);
> 
> diff --git a/libavutil/frame.c b/libavutil/frame.c index 9b3fb13..9c38bdd
> 100644
> --- a/libavutil/frame.c
> +++ b/libavutil/frame.c
> @@ -425,6 +425,15 @@ FF_DISABLE_DEPRECATION_WARNINGS
> FF_ENABLE_DEPRECATION_WARNINGS  #endif
> 
> +    dst->nb_rois = src->nb_rois;
> +    for (int i = 0; i < dst->nb_rois; ++i) {
> +        dst->rois[i].top = src->rois[i].top;
> +        dst->rois[i].bottom = src->rois[i].bottom;
> +        dst->rois[i].left = src->rois[i].left;
> +        dst->rois[i].right = src->rois[i].right;
> +        dst->rois[i].qoffset = src->rois[i].qoffset;
> +    }
> +
>      av_buffer_unref(&dst->opaque_ref);
>      av_buffer_unref(&dst->private_ref);
>      if (src->opaque_ref) {
> diff --git a/libavutil/frame.h b/libavutil/frame.h index 66f27f4..b245a90
> 100644
> --- a/libavutil/frame.h
> +++ b/libavutil/frame.h
> @@ -193,6 +193,15 @@ typedef struct AVFrameSideData {
>      AVBufferRef *buf;
>  } AVFrameSideData;
> 
> +
> +typedef struct AVFrameROI {
> +    size_t top;
> +    size_t bottom;
> +    size_t left;
> +    size_t right;
> +    float qoffset;
> +} AVFrameROI;
> +
>  /**
>   * This structure describes decoded (raw) audio or video data.
>   *
> @@ -556,6 +565,11 @@ typedef struct AVFrame {
>      attribute_deprecated
>      AVBufferRef *qp_table_buf;
>  #endif
> +
> +    //TODO: AVBufferRef*
> +    AVFrameROI rois[2];

It means the maxium roi number is 2, thus making nb_rois is useless more or less.
(And will cause segment fault since I have seen any check of nb_rois<= 2). 

> +    size_t nb_rois;
> +
>      /**
>       * For hwaccel-format frames, this should be a reference to the
>       * AVHWFramesContext describing the frame.
> --
> 2.7.4