[FFmpeg-devel] [PATCH V2 1/2] add support for ROI-based encoding
Guo, Yejun
yejun.guo at intel.com
Tue Dec 25 22:11:35 EET 2018
This patchset contains two patches.
- the first patch (this patch) finished the code and ask for upstream.
- the second patch is just a quick example on how to generate ROI info.
The encoders such as libx264 support different QPs offset for different MBs,
it makes possible for ROI-based encoding. It makes sense to add support
within ffmpeg to generate/accept ROI infos and pass into encoders.
Typical usage: After AVFrame is decoded, a ffmpeg filter or user's code
generates ROI info for that frame, and the encoder finally does the
ROI-based encoding.
This patch just enabled the path from ffmpeg to libx264, the more encoders
can be added later.
Signed-off-by: Guo, Yejun <yejun.guo at intel.com>
---
libavcodec/libx264.c | 40 ++++++++++++++++++++++++++++++++++++++++
libavutil/frame.c | 1 +
libavutil/frame.h | 19 +++++++++++++++++++
3 files changed, 60 insertions(+)
diff --git a/libavcodec/libx264.c b/libavcodec/libx264.c
index a68d0a7..a4f8677 100644
--- a/libavcodec/libx264.c
+++ b/libavcodec/libx264.c
@@ -40,6 +40,10 @@
#include <stdlib.h>
#include <string.h>
+// from x264.h, for quant_offsets, Macroblocks are 16x16
+// blocks of pixels (with respect to the luma plane)
+#define MB_SIZE 16
+
typedef struct X264Context {
AVClass *class;
x264_param_t params;
@@ -345,6 +349,42 @@ static int X264_frame(AVCodecContext *ctx, AVPacket *pkt, const AVFrame *frame,
}
}
}
+
+ AVFrameSideData *sd = av_frame_get_side_data(frame, AV_FRAME_DATA_ROIS);
+ if (sd != NULL) {
+ if (x4->params.rc.i_aq_mode == X264_AQ_NONE) {
+ av_log(ctx, AV_LOG_WARNING, "Adaptive quantization must be enabled to use ROI encoding, skipping ROI.\n");
+ } else {
+ if (frame->interlaced_frame == 0) {
+ size_t mbx = (frame->width + MB_SIZE - 1) / MB_SIZE;
+ size_t mby = (frame->height + MB_SIZE - 1) / MB_SIZE;
+ float* qoffsets;
+ qoffsets = (float*)av_malloc(sizeof(*qoffsets) * mbx * mby);
+ if (qoffsets == NULL)
+ return AVERROR(ENOMEM);
+ memset(qoffsets, 0, sizeof(*qoffsets) * mbx * mby);
+
+ size_t nb_rois = sd->size / sizeof(AVROI);
+ AVROI* rois = (AVROI*)sd->data;
+ for (size_t roi = 0; roi < nb_rois; roi++) {
+ int starty = FFMIN(mby, rois[roi].top / MB_SIZE);
+ int endy = FFMIN(mby, (rois[roi].bottom + MB_SIZE - 1)/ MB_SIZE);
+ int startx = FFMIN(mbx, rois[roi].left / MB_SIZE);
+ int endx = FFMIN(mbx, (rois[roi].right + MB_SIZE - 1)/ MB_SIZE);
+ for (int y = starty; y < endy; y++) {
+ for (int x = startx; x < endx; x++) {
+ qoffsets[x + y*mbx] = rois[roi].qoffset;
+ }
+ }
+ }
+
+ x4->pic.prop.quant_offsets = qoffsets;
+ x4->pic.prop.quant_offsets_free = av_free;
+ } else {
+ av_log(ctx, AV_LOG_WARNING, "interlaced_frame not supported for ROI encoding yet, skipping ROI.\n");
+ }
+ }
+ }
}
do {
diff --git a/libavutil/frame.c b/libavutil/frame.c
index 34a6210..bebc50e 100644
--- a/libavutil/frame.c
+++ b/libavutil/frame.c
@@ -841,6 +841,7 @@ const char *av_frame_side_data_name(enum AVFrameSideDataType type)
case AV_FRAME_DATA_QP_TABLE_DATA: return "QP table data";
#endif
case AV_FRAME_DATA_DYNAMIC_HDR_PLUS: return "HDR Dynamic Metadata SMPTE2094-40 (HDR10+)";
+ case AV_FRAME_DATA_ROIS: return "Regions Of Interest";
}
return NULL;
}
diff --git a/libavutil/frame.h b/libavutil/frame.h
index 582ac47..d18d235 100644
--- a/libavutil/frame.h
+++ b/libavutil/frame.h
@@ -173,6 +173,12 @@ enum AVFrameSideDataType {
* volume transform - application 4 of SMPTE 2094-40:2016 standard.
*/
AV_FRAME_DATA_DYNAMIC_HDR_PLUS,
+
+ /**
+ * Regions Of Interest, the number of ROI area is implied
+ * in the size of buf.
+ */
+ AV_FRAME_DATA_ROIS,
};
enum AVActiveFormatDescription {
@@ -200,6 +206,19 @@ typedef struct AVFrameSideData {
AVBufferRef *buf;
} AVFrameSideData;
+typedef struct AVROI {
+ /* coordinates at frame pixel level.
+ * It will be extended internally if the codec requires an alignment.
+ * If the regions overlap, the last value in the list will be used.
+ */
+ size_t top;
+ size_t bottom;
+ size_t left;
+ size_t right;
+ // quant offset is encoder dependent
+ int qoffset;
+} AVROI;
+
/**
* This structure describes decoded (raw) audio or video data.
*
--
2.7.4
More information about the ffmpeg-devel
mailing list