[FFmpeg-devel] [PATCH] Nvidia NVENC 10-bit HEVC encoding and rate control lookahead support
Oliver Collyer
ovcollyer at mac.com
Tue Aug 23 20:10:21 EEST 2016
Hi all
Attached is a patch for the above.
10-bit HEVC encoding is a new feature of the latest Pascal Nvidia GPUs, released in the past few months; I’ve added support for the yuv420p10le and yuv444p10le pixel formats.
Rate control lookahead is available on pre-Pascal models too but is available with the latest SDK/latest drivers.
As part of this I’ve bumped the required SDK version to the latest, which is 7.
Feedback welcome. This is only my second patch; I seem to average about one a year :)
Regards
Oliver
---
configure | 4 +-
libavcodec/nvenc.c | 120 ++++++++++++++++++++++++++++++++++++++++++++++--
libavcodec/nvenc.h | 6 +++
libavcodec/nvenc_hevc.c | 6 ++-
4 files changed, 129 insertions(+), 7 deletions(-)
diff --git a/configure b/configure
index 9b92426..46ff144 100755
--- a/configure
+++ b/configure
@@ -5774,8 +5774,8 @@ enabled mmal && check_func_headers interface/mmal/mmal.h "MMAL_PARAMETER_VIDEO_M
enabled netcdf && require_pkg_config netcdf netcdf.h nc_inq_libvers
enabled nvenc && { check_header nvEncodeAPI.h || die "ERROR: nvEncodeAPI.h not found."; } &&
- { check_cpp_condition nvEncodeAPI.h "NVENCAPI_MAJOR_VERSION >= 6" ||
- die "ERROR: NVENC API version 5 or older is not supported"; } &&
+ { check_cpp_condition nvEncodeAPI.h "NVENCAPI_MAJOR_VERSION >= 7" ||
+ die "ERROR: NVENC API version 6 or older is not supported"; } &&
{ [ $target_os != cygwin ] || die "ERROR: NVENC is not supported on Cygwin currently."; }
enabled openal && { { for al_libs in "${OPENAL_LIBS}" "-lopenal" "-lOpenAL32"; do
check_lib 'AL/al.h' alGetError "${al_libs}" && break; done } ||
diff --git a/libavcodec/nvenc.c b/libavcodec/nvenc.c
index 984dd3b..685dd7d 100644
--- a/libavcodec/nvenc.c
+++ b/libavcodec/nvenc.c
@@ -75,8 +75,10 @@
const enum AVPixelFormat ff_nvenc_pix_fmts[] = {
AV_PIX_FMT_YUV420P,
+ AV_PIX_FMT_YUV420P10LE,
AV_PIX_FMT_NV12,
AV_PIX_FMT_YUV444P,
+ AV_PIX_FMT_YUV444P10LE,
#if CONFIG_CUDA
AV_PIX_FMT_CUDA,
#endif
@@ -314,6 +316,18 @@ static int nvenc_check_capabilities(AVCodecContext *avctx)
return AVERROR(ENOSYS);
}
+ ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_10BIT_ENCODE);
+ if ((ctx->data_pix_fmt == AV_PIX_FMT_YUV420P10LE || ctx->data_pix_fmt == AV_PIX_FMT_YUV444P10LE) && ret <= 0) {
+ av_log(avctx, AV_LOG_VERBOSE, "10 bit encode not supported\n");
+ return AVERROR(ENOSYS);
+ }
+
+ ret = nvenc_check_cap(avctx, NV_ENC_CAPS_SUPPORT_LOOKAHEAD);
+ if (ctx->rc_lookahead > 0 && ret <= 0) {
+ av_log(avctx, AV_LOG_VERBOSE, "RC lookahead not supported\n");
+ return AVERROR(ENOSYS);
+ }
+
return 0;
}
@@ -673,6 +687,11 @@ static av_cold void nvenc_setup_rate_control(AVCodecContext *avctx)
} else if (ctx->encode_config.rcParams.averageBitRate > 0) {
ctx->encode_config.rcParams.vbvBufferSize = 2 * ctx->encode_config.rcParams.averageBitRate;
}
+
+ if (ctx->rc_lookahead > 0) {
+ ctx->encode_config.rcParams.enableLookahead = 1;
+ ctx->encode_config.rcParams.lookaheadDepth = FFMIN(ctx->rc_lookahead, 32);
+ }
}
static av_cold int nvenc_setup_h264_config(AVCodecContext *avctx)
@@ -800,9 +819,26 @@ static av_cold int nvenc_setup_hevc_config(AVCodecContext *avctx)
hevc->outputPictureTimingSEI = 1;
}
- /* No other profile is supported in the current SDK version 5 */
- cc->profileGUID = NV_ENC_HEVC_PROFILE_MAIN_GUID;
- avctx->profile = FF_PROFILE_HEVC_MAIN;
+ switch(ctx->profile) {
+ case NV_ENC_HEVC_PROFILE_MAIN:
+ cc->profileGUID = NV_ENC_HEVC_PROFILE_MAIN_GUID;
+ avctx->profile = FF_PROFILE_HEVC_MAIN;
+ break;
+ case NV_ENC_HEVC_PROFILE_MAIN_10:
+ cc->profileGUID = NV_ENC_HEVC_PROFILE_MAIN10_GUID;
+ avctx->profile = FF_PROFILE_HEVC_MAIN_10;
+ break;
+ }
+
+ // force setting profile as main10 if input is AV_PIX_FMT_YUVXXXP10LE
+ if (ctx->data_pix_fmt == AV_PIX_FMT_YUV420P10LE || ctx->data_pix_fmt == AV_PIX_FMT_YUV444P10LE) {
+ cc->profileGUID = NV_ENC_HEVC_PROFILE_MAIN10_GUID;
+ avctx->profile = FF_PROFILE_HEVC_MAIN_10;
+ }
+
+ hevc->chromaFormatIDC = ctx->data_pix_fmt == AV_PIX_FMT_YUV444P || ctx->data_pix_fmt == AV_PIX_FMT_YUV444P10LE ? 3 : 1;
+
+ hevc->pixelBitDepthMinus8 = ctx->data_pix_fmt == AV_PIX_FMT_YUV420P10LE || ctx->data_pix_fmt == AV_PIX_FMT_YUV444P10LE ? 2 : 0;
hevc->level = ctx->level;
@@ -954,6 +990,10 @@ static av_cold int nvenc_alloc_surface(AVCodecContext *avctx, int idx)
ctx->surfaces[idx].format = NV_ENC_BUFFER_FORMAT_YV12_PL;
break;
+ case AV_PIX_FMT_YUV420P10LE:
+ ctx->surfaces[idx].format = NV_ENC_BUFFER_FORMAT_YUV420_10BIT;
+ break;
+
case AV_PIX_FMT_NV12:
ctx->surfaces[idx].format = NV_ENC_BUFFER_FORMAT_NV12_PL;
break;
@@ -962,6 +1002,10 @@ static av_cold int nvenc_alloc_surface(AVCodecContext *avctx, int idx)
ctx->surfaces[idx].format = NV_ENC_BUFFER_FORMAT_YUV444_PL;
break;
+ case AV_PIX_FMT_YUV444P10LE:
+ ctx->surfaces[idx].format = NV_ENC_BUFFER_FORMAT_YUV444_10BIT;
+ break;
+
default:
av_log(avctx, AV_LOG_FATAL, "Invalid input pixel format\n");
return AVERROR(EINVAL);
@@ -1206,6 +1250,49 @@ static NvencSurface *get_free_frame(NvencContext *ctx)
return NULL;
}
+static void copy_single_10bit_plane(uint8_t *dst, int dst_linesize,
+ const uint8_t *src, int src_linesize,
+ int width, int height)
+{
+ if (!dst || !src)
+ return;
+ av_assert0(abs(src_linesize) >= width << 1);
+ av_assert0(abs(dst_linesize) >= width << 1);
+ for (;height > 0; height--) {
+ uint16_t* tdst = (uint16_t*)dst;
+ uint16_t* tsrc = (uint16_t*)src;
+ for (int w = width; w > 0; w--) {
+ *tdst++ = *tsrc++ << 6;
+ }
+ dst += dst_linesize;
+ src += src_linesize;
+ }
+}
+
+static void interleave_10bit_planes(uint8_t *dst, int dst_linesize,
+ const uint8_t *src1, int src1_linesize,
+ const uint8_t *src2, int src2_linesize,
+ int width, int height)
+{
+ if (!dst || !src1 || !src2)
+ return;
+ av_assert0(abs(src1_linesize) >= width);
+ av_assert0(abs(src2_linesize) >= width);
+ av_assert0(abs(dst_linesize) >= width << 1);
+ for (;height > 0; height--) {
+ uint16_t* tdst = (uint16_t*)dst;
+ uint16_t* tsrc1 = (uint16_t*)src1;
+ uint16_t* tsrc2 = (uint16_t*)src2;
+ for (int w = width; w > 0; w-=2) {
+ *tdst++ = *tsrc1++ << 6;
+ *tdst++ = *tsrc2++ << 6;
+ }
+ dst += dst_linesize;
+ src1 += src1_linesize;
+ src2 += src2_linesize;
+ }
+}
+
static int nvenc_copy_frame(AVCodecContext *avctx, NvencSurface *inSurf,
NV_ENC_LOCK_INPUT_BUFFER *lockBufferParams, const AVFrame *frame)
{
@@ -1228,6 +1315,17 @@ static int nvenc_copy_frame(AVCodecContext *avctx, NvencSurface *inSurf,
av_image_copy_plane(buf, lockBufferParams->pitch >> 1,
frame->data[1], frame->linesize[1],
avctx->width >> 1, avctx->height >> 1);
+ } else if (frame->format == AV_PIX_FMT_YUV420P10LE) {
+ copy_single_10bit_plane(buf, lockBufferParams->pitch,
+ frame->data[0], frame->linesize[0],
+ avctx->width, avctx->height);
+
+ buf += off;
+
+ interleave_10bit_planes(buf, lockBufferParams->pitch,
+ frame->data[1], frame->linesize[1],
+ frame->data[2], frame->linesize[2],
+ avctx->width, avctx->height >> 1);
} else if (frame->format == AV_PIX_FMT_NV12) {
av_image_copy_plane(buf, lockBufferParams->pitch,
frame->data[0], frame->linesize[0],
@@ -1254,6 +1352,22 @@ static int nvenc_copy_frame(AVCodecContext *avctx, NvencSurface *inSurf,
av_image_copy_plane(buf, lockBufferParams->pitch,
frame->data[2], frame->linesize[2],
avctx->width, avctx->height);
+ } else if (frame->format == AV_PIX_FMT_YUV444P10LE) {
+ copy_single_10bit_plane(buf, lockBufferParams->pitch,
+ frame->data[0], frame->linesize[0],
+ avctx->width, avctx->height);
+
+ buf += off;
+
+ copy_single_10bit_plane(buf, lockBufferParams->pitch,
+ frame->data[1], frame->linesize[1],
+ avctx->width, avctx->height);
+
+ buf += off;
+
+ copy_single_10bit_plane(buf, lockBufferParams->pitch,
+ frame->data[2], frame->linesize[2],
+ avctx->width, avctx->height);
} else {
av_log(avctx, AV_LOG_FATAL, "Invalid pixel format!\n");
return AVERROR(EINVAL);
diff --git a/libavcodec/nvenc.h b/libavcodec/nvenc.h
index 961cbc7..9366a26 100644
--- a/libavcodec/nvenc.h
+++ b/libavcodec/nvenc.h
@@ -117,6 +117,11 @@ enum {
};
enum {
+ NV_ENC_HEVC_PROFILE_MAIN,
+ NV_ENC_HEVC_PROFILE_MAIN_10,
+};
+
+enum {
NVENC_LOWLATENCY = 1,
NVENC_LOSSLESS = 2,
NVENC_ONE_PASS = 4,
@@ -174,6 +179,7 @@ typedef struct NvencContext
int device;
int flags;
int async_depth;
+ int rc_lookahead;
} NvencContext;
int ff_nvenc_encode_init(AVCodecContext *avctx);
diff --git a/libavcodec/nvenc_hevc.c b/libavcodec/nvenc_hevc.c
index 1ce7c89..04e351a 100644
--- a/libavcodec/nvenc_hevc.c
+++ b/libavcodec/nvenc_hevc.c
@@ -39,8 +39,9 @@ static const AVOption options[] = {
{ "llhp", "low latency hp", 0, AV_OPT_TYPE_CONST, { .i64 = PRESET_LOW_LATENCY_HP }, 0, 0, VE, "preset" },
{ "lossless", "lossless", 0, AV_OPT_TYPE_CONST, { .i64 = PRESET_LOSSLESS_DEFAULT }, 0, 0, VE, "preset" },
{ "losslesshp", "lossless hp", 0, AV_OPT_TYPE_CONST, { .i64 = PRESET_LOSSLESS_HP }, 0, 0, VE, "preset" },
- { "profile", "Set the encoding profile", OFFSET(profile), AV_OPT_TYPE_INT, { .i64 = FF_PROFILE_HEVC_MAIN }, FF_PROFILE_HEVC_MAIN, FF_PROFILE_HEVC_MAIN, VE, "profile" },
- { "main", "", 0, AV_OPT_TYPE_CONST, { .i64 = FF_PROFILE_HEVC_MAIN }, 0, 0, VE, "profile" },
+ { "profile", "Set the encoding profile", OFFSET(profile), AV_OPT_TYPE_INT, { .i64 = NV_ENC_HEVC_PROFILE_MAIN }, NV_ENC_HEVC_PROFILE_MAIN, FF_PROFILE_HEVC_MAIN_10, VE, "profile" },
+ { "main", "", 0, AV_OPT_TYPE_CONST, { .i64 = NV_ENC_HEVC_PROFILE_MAIN }, 0, 0, VE, "profile" },
+ { "main10", "", 0, AV_OPT_TYPE_CONST, { .i64 = NV_ENC_HEVC_PROFILE_MAIN_10 }, 0, 0, VE, "profile" },
{ "level", "Set the encoding level restriction", OFFSET(level), AV_OPT_TYPE_INT, { .i64 = NV_ENC_LEVEL_AUTOSELECT }, NV_ENC_LEVEL_AUTOSELECT, NV_ENC_LEVEL_HEVC_62, VE, "level" },
{ "auto", "", 0, AV_OPT_TYPE_CONST, { .i64 = NV_ENC_LEVEL_AUTOSELECT }, 0, 0, VE, "level" },
{ "1", "", 0, AV_OPT_TYPE_CONST, { .i64 = NV_ENC_LEVEL_HEVC_1 }, 0, 0, VE, "level" },
@@ -73,6 +74,7 @@ static const AVOption options[] = {
{ "ll_2pass_quality", "Multi-pass optimized for image quality (only for low-latency presets)", 0, AV_OPT_TYPE_CONST, { .i64 = NV_ENC_PARAMS_RC_2_PASS_QUALITY }, 0, 0, VE, "rc" },
{ "ll_2pass_size", "Multi-pass optimized for constant frame size (only for low-latency presets)", 0, AV_OPT_TYPE_CONST, { .i64 = NV_ENC_PARAMS_RC_2_PASS_FRAMESIZE_CAP }, 0, 0, VE, "rc" },
{ "vbr_2pass", "Multi-pass variable bitrate mode", 0, AV_OPT_TYPE_CONST, { .i64 = NV_ENC_PARAMS_RC_2_PASS_VBR }, 0, 0, VE, "rc" },
+ { "rc-lookahead", "Number of frames to look ahead for rate-control", OFFSET(rc_lookahead), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, INT_MAX, VE },
{ "surfaces", "Number of concurrent surfaces", OFFSET(nb_surfaces), AV_OPT_TYPE_INT, { .i64 = 32 }, 0, INT_MAX, VE },
{ "cbr", "Use cbr encoding mode", OFFSET(cbr), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
{ "2pass", "Use 2pass encoding mode", OFFSET(twopass), AV_OPT_TYPE_BOOL, { .i64 = -1 }, -1, 1, VE },
--
2.7.4
More information about the ffmpeg-devel
mailing list