[FFmpeg-devel] [PATCH V2 1/2] add support for ROI-based encoding

Guo, Yejun yejun.guo at intel.com
Tue Dec 25 22:11:35 EET 2018


This patchset contains two patches.
- the first patch (this patch) finished the code and ask for upstream.
- the second patch is just a quick example on how to generate ROI info.

The encoders such as libx264 support different QPs offset for different MBs,
it makes possible for ROI-based encoding. It makes sense to add support
within ffmpeg to generate/accept ROI infos and pass into encoders.

Typical usage: After AVFrame is decoded, a ffmpeg filter or user's code
generates ROI info for that frame, and the encoder finally does the
ROI-based encoding.

This patch just enabled the path from ffmpeg to libx264, the more encoders
can be added later.

Signed-off-by: Guo, Yejun <yejun.guo at intel.com>
---
 libavcodec/libx264.c | 40 ++++++++++++++++++++++++++++++++++++++++
 libavutil/frame.c    |  1 +
 libavutil/frame.h    | 19 +++++++++++++++++++
 3 files changed, 60 insertions(+)

diff --git a/libavcodec/libx264.c b/libavcodec/libx264.c
index a68d0a7..a4f8677 100644
--- a/libavcodec/libx264.c
+++ b/libavcodec/libx264.c
@@ -40,6 +40,10 @@
 #include <stdlib.h>
 #include <string.h>
 
+// from x264.h, for quant_offsets, Macroblocks are 16x16
+// blocks of pixels (with respect to the luma plane)
+#define MB_SIZE 16
+
 typedef struct X264Context {
     AVClass        *class;
     x264_param_t    params;
@@ -345,6 +349,42 @@ static int X264_frame(AVCodecContext *ctx, AVPacket *pkt, const AVFrame *frame,
                 }
             }
         }
+
+        AVFrameSideData *sd = av_frame_get_side_data(frame, AV_FRAME_DATA_ROIS);
+        if (sd != NULL) {
+            if (x4->params.rc.i_aq_mode == X264_AQ_NONE) {
+                av_log(ctx, AV_LOG_WARNING, "Adaptive quantization must be enabled to use ROI encoding, skipping ROI.\n");
+            } else {
+                if (frame->interlaced_frame == 0) {
+                    size_t mbx = (frame->width + MB_SIZE - 1) / MB_SIZE;
+                    size_t mby = (frame->height + MB_SIZE - 1) / MB_SIZE;
+                    float* qoffsets;
+                    qoffsets = (float*)av_malloc(sizeof(*qoffsets) * mbx * mby);
+                    if (qoffsets == NULL)
+                        return AVERROR(ENOMEM);
+                    memset(qoffsets, 0, sizeof(*qoffsets) * mbx * mby);
+
+                    size_t nb_rois = sd->size / sizeof(AVROI);
+                    AVROI* rois = (AVROI*)sd->data;
+                    for (size_t roi = 0; roi < nb_rois; roi++) {
+                        int starty = FFMIN(mby, rois[roi].top / MB_SIZE);
+                        int endy = FFMIN(mby, (rois[roi].bottom + MB_SIZE - 1)/ MB_SIZE);
+                        int startx = FFMIN(mbx, rois[roi].left / MB_SIZE);
+                        int endx = FFMIN(mbx, (rois[roi].right + MB_SIZE - 1)/ MB_SIZE);
+                        for (int y = starty; y < endy; y++) {
+                            for (int x = startx; x < endx; x++) {
+                                qoffsets[x + y*mbx] = rois[roi].qoffset;
+                            }
+                        }
+                    }
+
+                    x4->pic.prop.quant_offsets = qoffsets;
+                    x4->pic.prop.quant_offsets_free = av_free;
+                } else {
+                    av_log(ctx, AV_LOG_WARNING, "interlaced_frame not supported for ROI encoding yet, skipping ROI.\n");
+                }
+            }
+        }
     }
 
     do {
diff --git a/libavutil/frame.c b/libavutil/frame.c
index 34a6210..bebc50e 100644
--- a/libavutil/frame.c
+++ b/libavutil/frame.c
@@ -841,6 +841,7 @@ const char *av_frame_side_data_name(enum AVFrameSideDataType type)
     case AV_FRAME_DATA_QP_TABLE_DATA:               return "QP table data";
 #endif
     case AV_FRAME_DATA_DYNAMIC_HDR_PLUS: return "HDR Dynamic Metadata SMPTE2094-40 (HDR10+)";
+    case AV_FRAME_DATA_ROIS: return "Regions Of Interest";
     }
     return NULL;
 }
diff --git a/libavutil/frame.h b/libavutil/frame.h
index 582ac47..d18d235 100644
--- a/libavutil/frame.h
+++ b/libavutil/frame.h
@@ -173,6 +173,12 @@ enum AVFrameSideDataType {
      * volume transform - application 4 of SMPTE 2094-40:2016 standard.
      */
     AV_FRAME_DATA_DYNAMIC_HDR_PLUS,
+
+    /**
+     * Regions Of Interest, the number of ROI area is implied
+     * in the size of buf.
+     */
+    AV_FRAME_DATA_ROIS,
 };
 
 enum AVActiveFormatDescription {
@@ -200,6 +206,19 @@ typedef struct AVFrameSideData {
     AVBufferRef *buf;
 } AVFrameSideData;
 
+typedef struct AVROI {
+    /* coordinates at frame pixel level.
+     * It will be extended internally if the codec requires an alignment.
+     * If the regions overlap, the last value in the list will be used.
+     */
+    size_t top;
+    size_t bottom;
+    size_t left;
+    size_t right;
+    // quant offset is encoder dependent
+    int qoffset;
+} AVROI;
+
 /**
  * This structure describes decoded (raw) audio or video data.
  *
-- 
2.7.4



More information about the ffmpeg-devel mailing list