[FFmpeg-devel] [PATCH] add support for ROI-based encoding
Guo, Yejun
yejun.guo at intel.com
Wed Dec 5 11:58:58 EET 2018
this patch is not ask for merge, it is more to get a feature feedback.
The encoders such as libx264 support different QPs offset for different MBs,
it makes possible for ROI-based encoding. It makes sense to add support
within ffmpeg to generate/accept ROI infos and pass into encoders.
Typical usage: After AVFrame is decoded, a ffmpeg filter or user's code
generates ROI info for that frame, and the encoder finally does the
ROI-based encoding. And so I choose to maintain the ROI info within
AVFrame struct.
TODO:
- remove code in vf_scale.c, it is just an example to generate ROI info
- use AVBufferRef instead of current implementation within AVFrame struct.
- add other encoders support
Signed-off-by: Guo, Yejun <yejun.guo at intel.com>
---
libavcodec/libx264.c | 35 +++++++++++++++++++++++++++++++++++
libavfilter/vf_scale.c | 8 ++++++++
libavutil/frame.c | 9 +++++++++
libavutil/frame.h | 14 ++++++++++++++
4 files changed, 66 insertions(+)
diff --git a/libavcodec/libx264.c b/libavcodec/libx264.c
index a68d0a7..d8cc327 100644
--- a/libavcodec/libx264.c
+++ b/libavcodec/libx264.c
@@ -26,6 +26,7 @@
#include "libavutil/pixdesc.h"
#include "libavutil/stereo3d.h"
#include "libavutil/intreadwrite.h"
+#include "libavutil/avassert.h"
#include "avcodec.h"
#include "internal.h"
@@ -345,6 +346,40 @@ static int X264_frame(AVCodecContext *ctx, AVPacket *pkt, const AVFrame *frame,
}
}
}
+
+ if (frame->nb_rois > 0) {
+ if (x4->params.rc.i_aq_mode == X264_AQ_NONE) {
+ av_log(ctx, AV_LOG_ERROR, "Adaptive quantization must be enabled to use ROI encoding, skipping ROI.\n");
+ }
+ if (frame->interlaced_frame == 0) {
+ const static int MBSIZE = 16;
+ size_t mbx = (frame->width + MBSIZE - 1) / MBSIZE;
+ size_t mby = (frame->height + MBSIZE - 1) / MBSIZE;
+ float* qoffsets = (float*)av_malloc(sizeof(float) * mbx * mby);
+ memset(qoffsets, 0, sizeof(float) * mbx * mby);
+
+ for (size_t roi = 0; roi < frame->nb_rois; ++roi) {
+ int starty = FFMIN(mby, frame->rois[roi].top / MBSIZE);
+ int endy = FFMIN(mby, (frame->rois[roi].bottom + MBSIZE - 1)/ MBSIZE);
+ int startx = FFMIN(mbx, frame->rois[roi].left / MBSIZE);
+ int endx = FFMIN(mbx, (frame->rois[roi].right + MBSIZE - 1)/ MBSIZE);
+ for (int y = starty; y < endy; ++y) {
+ for (int x = startx; x < endx; ++x) {
+ qoffsets[x + y*mbx] = frame->rois[roi].qoffset;
+ }
+ }
+ }
+
+ x4->pic.prop.quant_offsets = qoffsets;
+ x4->pic.prop.quant_offsets_free = av_free;
+ } else {
+ av_log(ctx, AV_LOG_ERROR, "interlaced_frame not supported for ROI encoding, skipping ROI.\n");
+ }
+ } else {
+ //to be removed in the final code, it is just for debug usage now.
+ printf("ooooops, frame 0x%p with rois %ld\n", frame, frame->nb_rois);
+ av_assert0(!"should not reach here");
+ }
}
do {
diff --git a/libavfilter/vf_scale.c b/libavfilter/vf_scale.c
index f741419..71def72 100644
--- a/libavfilter/vf_scale.c
+++ b/libavfilter/vf_scale.c
@@ -437,6 +437,14 @@ static int filter_frame(AVFilterLink *link, AVFrame *in)
return ret;
}
+ // to be removed, just for debug usage temporarily
+ in->nb_rois = 1;
+ in->rois[0].top = 0;
+ in->rois[0].left = 0;
+ in->rois[0].bottom = in->height;
+ in->rois[0].right = in->width/2;
+ in->rois[0].qoffset = 15.0f; // 15.0f, +-5.0f, +-25.0f
+
if (!scale->sws)
return ff_filter_frame(outlink, in);
diff --git a/libavutil/frame.c b/libavutil/frame.c
index 9b3fb13..9c38bdd 100644
--- a/libavutil/frame.c
+++ b/libavutil/frame.c
@@ -425,6 +425,15 @@ FF_DISABLE_DEPRECATION_WARNINGS
FF_ENABLE_DEPRECATION_WARNINGS
#endif
+ dst->nb_rois = src->nb_rois;
+ for (int i = 0; i < dst->nb_rois; ++i) {
+ dst->rois[i].top = src->rois[i].top;
+ dst->rois[i].bottom = src->rois[i].bottom;
+ dst->rois[i].left = src->rois[i].left;
+ dst->rois[i].right = src->rois[i].right;
+ dst->rois[i].qoffset = src->rois[i].qoffset;
+ }
+
av_buffer_unref(&dst->opaque_ref);
av_buffer_unref(&dst->private_ref);
if (src->opaque_ref) {
diff --git a/libavutil/frame.h b/libavutil/frame.h
index 66f27f4..b245a90 100644
--- a/libavutil/frame.h
+++ b/libavutil/frame.h
@@ -193,6 +193,15 @@ typedef struct AVFrameSideData {
AVBufferRef *buf;
} AVFrameSideData;
+
+typedef struct AVFrameROI {
+ size_t top;
+ size_t bottom;
+ size_t left;
+ size_t right;
+ float qoffset;
+} AVFrameROI;
+
/**
* This structure describes decoded (raw) audio or video data.
*
@@ -556,6 +565,11 @@ typedef struct AVFrame {
attribute_deprecated
AVBufferRef *qp_table_buf;
#endif
+
+ //TODO: AVBufferRef*
+ AVFrameROI rois[2];
+ size_t nb_rois;
+
/**
* For hwaccel-format frames, this should be a reference to the
* AVHWFramesContext describing the frame.
--
2.7.4
More information about the ffmpeg-devel
mailing list