[FFmpeg-devel] [PATCH] Nvidia NVENC 10-bit HEVC encoding and rate control lookahead support

Tue Aug 23 20:10:21 EEST 2016

Hi all

Attached is a patch for the above.

10-bit HEVC encoding is a new feature of the latest Pascal Nvidia GPUs, released in the past few months; I’ve added support for the yuv420p10le and yuv444p10le pixel formats.

Rate control lookahead is available on pre-Pascal models too but is available with the latest SDK/latest drivers.

As part of this I’ve bumped the required SDK version to the latest, which is 7.

Feedback welcome. This is only my second patch; I seem to average about one a year :)

Regards

Oliver

---
configure               |   4 +-
libavcodec/nvenc.c      | 120 ++++++++++++++++++++++++++++++++++++++++++++++--
libavcodec/nvenc.h      |   6 +++
libavcodec/nvenc_hevc.c |   6 ++-
4 files changed, 129 insertions(+), 7 deletions(-)

diff --git a/configure b/configure
index 9b92426..46ff144 100755
--- a/configure
+++ b/configure
@@ -5774,8 +5774,8 @@ enabled mmal && check_func_headers interface/mmal/mmal.h "MMAL_PARAMETER_VIDEO_M

enabled netcdf            && require_pkg_config netcdf netcdf.h nc_inq_libvers
enabled nvenc             && { check_header nvEncodeAPI.h || die "ERROR: nvEncodeAPI.h not found."; } &&
-                             { check_cpp_condition nvEncodeAPI.h "NVENCAPI_MAJOR_VERSION >= 6" ||
-                               die "ERROR: NVENC API version 5 or older is not supported"; } &&
+                             { check_cpp_condition nvEncodeAPI.h "NVENCAPI_MAJOR_VERSION >= 7" ||
+                               die "ERROR: NVENC API version 6 or older is not supported"; } &&
                             { [ $target_os != cygwin ] || die "ERROR: NVENC is not supported on Cygwin currently."; }
enabled openal            && { { for al_libs in "${OPENAL_LIBS}" "-lopenal" "-lOpenAL32"; do
                               check_lib 'AL/al.h' alGetError "${al_libs}" && break; done } ||
diff --git a/libavcodec/nvenc.c b/libavcodec/nvenc.c
index 984dd3b..685dd7d 100644
--- a/libavcodec/nvenc.c
+++ b/libavcodec/nvenc.c
@@ -75,8 +75,10 @@

const enum AVPixelFormat ff_nvenc_pix_fmts[] = {
    AV_PIX_FMT_YUV420P,
+    AV_PIX_FMT_YUV420P10LE,
    AV_PIX_FMT_NV12,
    AV_PIX_FMT_YUV444P,
+    AV_PIX_FMT_YUV444P10LE,
#if CONFIG_CUDA
    AV_PIX_FMT_CUDA,
#endif
@@ -314,6 +316,18 @@ static int nvenc_check_capabilities(AVCodecContext *avctx)
        return AVERROR(ENOSYS);
    }

+    ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_10BIT_ENCODE);
+    if ((ctx->data_pix_fmt == AV_PIX_FMT_YUV420P10LE || ctx->data_pix_fmt == AV_PIX_FMT_YUV444P10LE) && ret <= 0) {
+        av_log(avctx, AV_LOG_VERBOSE, "10 bit encode not supported\n");
+        return AVERROR(ENOSYS);
+    }
+
+    ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_LOOKAHEAD);
+    if (ctx->rc_lookahead > 0 && ret <= 0) {
+        av_log(avctx, AV_LOG_VERBOSE, "RC lookahead not supported\n");
+        return AVERROR(ENOSYS);
+    }
+
    return 0;
}

@@ -673,6 +687,11 @@ static av_cold void nvenc_setup_rate_control(AVCodecContext *avctx)
    } else if (ctx->encode_config.rcParams.averageBitRate > 0) {
        ctx->encode_config.rcParams.vbvBufferSize = 2 * ctx->encode_config.rcParams.averageBitRate;
    }
+
+    if (ctx->rc_lookahead > 0) {
+        ctx->encode_config.rcParams.enableLookahead = 1;
+        ctx->encode_config.rcParams.lookaheadDepth = FFMIN(ctx->rc_lookahead, 32);
+    }
}

static av_cold int nvenc_setup_h264_config(AVCodecContext *avctx)
@@ -800,9 +819,26 @@ static av_cold int nvenc_setup_hevc_config(AVCodecContext *avctx)
        hevc->outputPictureTimingSEI   = 1;
    }

-    /* No other profile is supported in the current SDK version 5 */
-    cc->profileGUID = NV_ENC_HEVC_PROFILE_MAIN_GUID;
-    avctx->profile = FF_PROFILE_HEVC_MAIN;
+    switch(ctx->profile) {
+    case NV_ENC_HEVC_PROFILE_MAIN:
+        cc->profileGUID = NV_ENC_HEVC_PROFILE_MAIN_GUID;
+        avctx->profile = FF_PROFILE_HEVC_MAIN;
+        break;
+    case NV_ENC_HEVC_PROFILE_MAIN_10:
+        cc->profileGUID = NV_ENC_HEVC_PROFILE_MAIN10_GUID;
+        avctx->profile = FF_PROFILE_HEVC_MAIN_10;
+        break;
+    }
+
+    // force setting profile as main10 if input is AV_PIX_FMT_YUVXXXP10LE
+    if (ctx->data_pix_fmt == AV_PIX_FMT_YUV420P10LE || ctx->data_pix_fmt == AV_PIX_FMT_YUV444P10LE) {
+        cc->profileGUID = NV_ENC_HEVC_PROFILE_MAIN10_GUID;
+        avctx->profile = FF_PROFILE_HEVC_MAIN_10;
+    }
+
+    hevc->chromaFormatIDC = ctx->data_pix_fmt == AV_PIX_FMT_YUV444P || ctx->data_pix_fmt == AV_PIX_FMT_YUV444P10LE ? 3 : 1;
+
+    hevc->pixelBitDepthMinus8 = ctx->data_pix_fmt == AV_PIX_FMT_YUV420P10LE || ctx->data_pix_fmt == AV_PIX_FMT_YUV444P10LE ? 2 : 0;

    hevc->level = ctx->level;

@@ -954,6 +990,10 @@ static av_cold int nvenc_alloc_surface(AVCodecContext *avctx, int idx)
        ctx->surfaces[idx].format = NV_ENC_BUFFER_FORMAT_YV12_PL;
        break;

+    case AV_PIX_FMT_YUV420P10LE:
+        ctx->surfaces[idx].format = NV_ENC_BUFFER_FORMAT_YUV420_10BIT;
+        break;
+
    case AV_PIX_FMT_NV12:
        ctx->surfaces[idx].format = NV_ENC_BUFFER_FORMAT_NV12_PL;
        break;
@@ -962,6 +1002,10 @@ static av_cold int nvenc_alloc_surface(AVCodecContext *avctx, int idx)
        ctx->surfaces[idx].format = NV_ENC_BUFFER_FORMAT_YUV444_PL;
        break;

+    case AV_PIX_FMT_YUV444P10LE:
+        ctx->surfaces[idx].format = NV_ENC_BUFFER_FORMAT_YUV444_10BIT;
+        break;
+
    default:
        av_log(avctx, AV_LOG_FATAL, "Invalid input pixel format\n");
        return AVERROR(EINVAL);
@@ -1206,6 +1250,49 @@ static NvencSurface *get_free_frame(NvencContext *ctx)
    return NULL;
}

+static void copy_single_10bit_plane(uint8_t *dst, int dst_linesize,
+                                    const uint8_t *src, int src_linesize,
+                                    int width, int height)
+{
+    if (!dst || !src)
+        return;
+    av_assert0(abs(src_linesize) >= width << 1);
+    av_assert0(abs(dst_linesize) >= width << 1);
+    for (;height > 0; height--) {
+        uint16_t* tdst = (uint16_t*)dst;
+        uint16_t* tsrc = (uint16_t*)src;
+        for (int w = width; w > 0; w--) {
+            *tdst++ = *tsrc++ << 6;
+        }
+        dst += dst_linesize;
+        src += src_linesize;
+    }
+}
+
+static void interleave_10bit_planes(uint8_t *dst, int dst_linesize,
+                                    const uint8_t *src1, int src1_linesize,
+                                    const uint8_t *src2, int src2_linesize,
+                                    int width, int height)
+{
+    if (!dst || !src1 || !src2)
+        return;
+    av_assert0(abs(src1_linesize) >= width);
+    av_assert0(abs(src2_linesize) >= width);
+    av_assert0(abs(dst_linesize) >= width << 1);
+    for (;height > 0; height--) {
+        uint16_t* tdst = (uint16_t*)dst;
+        uint16_t* tsrc1 = (uint16_t*)src1;
+        uint16_t* tsrc2 = (uint16_t*)src2;
+        for (int w = width; w > 0; w-=2) {
+            *tdst++ = *tsrc1++ << 6;
+            *tdst++ = *tsrc2++ << 6;
+        }
+        dst += dst_linesize;
+        src1 += src1_linesize;
+        src2 += src2_linesize;
+    }
+}
+
static int nvenc_copy_frame(AVCodecContext *avctx, NvencSurface *inSurf,
            NV_ENC_LOCK_INPUT_BUFFER *lockBufferParams, const AVFrame *frame)
{
@@ -1228,6 +1315,17 @@ static int nvenc_copy_frame(AVCodecContext *avctx, NvencSurface *inSurf,
        av_image_copy_plane(buf, lockBufferParams->pitch >> 1,
            frame->data[1], frame->linesize[1],
            avctx->width >> 1, avctx->height >> 1);
+    } else if (frame->format == AV_PIX_FMT_YUV420P10LE) {
+        copy_single_10bit_plane(buf, lockBufferParams->pitch,
+            frame->data[0], frame->linesize[0],
+            avctx->width, avctx->height);
+
+        buf += off;
+
+        interleave_10bit_planes(buf, lockBufferParams->pitch,
+            frame->data[1], frame->linesize[1],
+            frame->data[2], frame->linesize[2],
+            avctx->width, avctx->height >> 1);
    } else if (frame->format == AV_PIX_FMT_NV12) {
        av_image_copy_plane(buf, lockBufferParams->pitch,
            frame->data[0], frame->linesize[0],
@@ -1254,6 +1352,22 @@ static int nvenc_copy_frame(AVCodecContext *avctx, NvencSurface *inSurf,
        av_image_copy_plane(buf, lockBufferParams->pitch,
            frame->data[2], frame->linesize[2],
            avctx->width, avctx->height);
+    } else if (frame->format == AV_PIX_FMT_YUV444P10LE) {
+        copy_single_10bit_plane(buf, lockBufferParams->pitch,
+            frame->data[0], frame->linesize[0],
+            avctx->width, avctx->height);
+
+        buf += off;
+
+        copy_single_10bit_plane(buf, lockBufferParams->pitch,
+            frame->data[1], frame->linesize[1],
+            avctx->width, avctx->height);
+
+        buf += off;
+
+        copy_single_10bit_plane(buf, lockBufferParams->pitch,
+            frame->data[2], frame->linesize[2],
+            avctx->width, avctx->height);
    } else {
        av_log(avctx, AV_LOG_FATAL, "Invalid pixel format!\n");
        return AVERROR(EINVAL);
diff --git a/libavcodec/nvenc.h b/libavcodec/nvenc.h
index 961cbc7..9366a26 100644
--- a/libavcodec/nvenc.h
+++ b/libavcodec/nvenc.h
@@ -117,6 +117,11 @@ enum {
};

enum {
+    NV_ENC_HEVC_PROFILE_MAIN,
+    NV_ENC_HEVC_PROFILE_MAIN_10,
+};
+
+enum {
    NVENC_LOWLATENCY = 1,
    NVENC_LOSSLESS   = 2,
    NVENC_ONE_PASS   = 4,
@@ -174,6 +179,7 @@ typedef struct NvencContext
    int device;
    int flags;
    int async_depth;
+    int rc_lookahead;
} NvencContext;

int ff_nvenc_encode_init(AVCodecContext *avctx);
diff --git a/libavcodec/nvenc_hevc.c b/libavcodec/nvenc_hevc.c
index 1ce7c89..04e351a 100644
--- a/libavcodec/nvenc_hevc.c
+++ b/libavcodec/nvenc_hevc.c
@@ -39,8 +39,9 @@ static const AVOption options[] = {
    { "llhp",       "low latency hp",                     0,                   AV_OPT_TYPE_CONST,  { .i64 = PRESET_LOW_LATENCY_HP }, 0, 0, VE, "preset" },
    { "lossless",   "lossless",                           0,                   AV_OPT_TYPE_CONST,  { .i64 = PRESET_LOSSLESS_DEFAULT }, 0, 0, VE, "preset" },
    { "losslesshp", "lossless hp",                        0,                   AV_OPT_TYPE_CONST,  { .i64 = PRESET_LOSSLESS_HP }, 0, 0, VE, "preset" },
-    { "profile", "Set the encoding profile",             OFFSET(profile),      AV_OPT_TYPE_INT,    { .i64 = FF_PROFILE_HEVC_MAIN }, FF_PROFILE_HEVC_MAIN, FF_PROFILE_HEVC_MAIN, VE, "profile" },
-    { "main",    "",                                     0,                    AV_OPT_TYPE_CONST,  { .i64 = FF_PROFILE_HEVC_MAIN }, 0, 0, VE, "profile" },
+    { "profile", "Set the encoding profile",             OFFSET(profile),      AV_OPT_TYPE_INT,    { .i64 = NV_ENC_HEVC_PROFILE_MAIN }, NV_ENC_HEVC_PROFILE_MAIN, FF_PROFILE_HEVC_MAIN_10, VE, "profile" },
+    { "main",    "",                                     0,                    AV_OPT_TYPE_CONST,  { .i64 = NV_ENC_HEVC_PROFILE_MAIN }, 0, 0, VE, "profile" },
+    { "main10",  "",                                     0,                    AV_OPT_TYPE_CONST,  { .i64 = NV_ENC_HEVC_PROFILE_MAIN_10 }, 0, 0, VE, "profile" },
    { "level",   "Set the encoding level restriction",   OFFSET(level),        AV_OPT_TYPE_INT,    { .i64 = NV_ENC_LEVEL_AUTOSELECT }, NV_ENC_LEVEL_AUTOSELECT, NV_ENC_LEVEL_HEVC_62, VE, "level" },
    { "auto",    "",                                     0,                    AV_OPT_TYPE_CONST,  { .i64 = NV_ENC_LEVEL_AUTOSELECT },  0, 0, VE,  "level" },
    { "1",       "",                                     0,                    AV_OPT_TYPE_CONST,  { .i64 = NV_ENC_LEVEL_HEVC_1 },  0, 0, VE,  "level" },
@@ -73,6 +74,7 @@ static const AVOption options[] = {
    { "ll_2pass_quality", "Multi-pass optimized for image quality (only for low-latency presets)",       0, AV_OPT_TYPE_CONST,  { .i64 = NV_ENC_PARAMS_RC_2_PASS_QUALITY },       0, 0, VE, "rc" },
    { "ll_2pass_size",    "Multi-pass optimized for constant frame size (only for low-latency presets)", 0, AV_OPT_TYPE_CONST,  { .i64 = NV_ENC_PARAMS_RC_2_PASS_FRAMESIZE_CAP }, 0, 0, VE, "rc" },
    { "vbr_2pass",        "Multi-pass variable bitrate mode",                                            0, AV_OPT_TYPE_CONST,  { .i64 = NV_ENC_PARAMS_RC_2_PASS_VBR },           0, 0, VE, "rc" },
+    { "rc-lookahead",  "Number of frames to look ahead for rate-control", OFFSET(rc_lookahead), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, INT_MAX, VE },
    { "surfaces", "Number of concurrent surfaces",        OFFSET(nb_surfaces), AV_OPT_TYPE_INT,    { .i64 = 32 },                   0, INT_MAX, VE },
    { "cbr", "Use cbr encoding mode", OFFSET(cbr), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
    { "2pass", "Use 2pass encoding mode", OFFSET(twopass), AV_OPT_TYPE_BOOL, { .i64 = -1 }, -1, 1, VE },
-- 
2.7.4