[FFmpeg-devel] [PATCH] Support HDR dynamic metdata (HDR10+) in HEVC decoder.
James Almer
jamrial at gmail.com
Wed Dec 26 22:11:04 EET 2018
On 12/26/2018 4:40 PM, Mohammad Izadi wrote:
> Decode HDR10+ metadata from SEI message and propagate it to side data.
> ---
> libavcodec/avcodec.h | 10 +-
> libavcodec/avpacket.c | 1 +
> libavcodec/decode.c | 2 +-
> libavcodec/hevc_sei.c | 234 ++++++++++++++++++++++++++++++++++++++++--
> libavcodec/hevc_sei.h | 7 ++
> libavcodec/hevcdec.c | 79 ++++++++++++++
> 6 files changed, 322 insertions(+), 11 deletions(-)
>
> diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
> index fd7f60bf4a..044aa447ab 100644
> --- a/libavcodec/avcodec.h
> +++ b/libavcodec/avcodec.h
> @@ -1328,7 +1328,7 @@ enum AVPacketSideDataType {
> AV_PKT_DATA_METADATA_UPDATE,
>
> /**
> - * MPEGTS stream ID as uint8_t, this is required to pass the stream ID
> + * MPEGTS stream ID, this is required to pass the stream ID
This looks like an unrelated change.
> * information from the demuxer to the corresponding muxer.
> */
> AV_PKT_DATA_MPEGTS_STREAM_ID,
> @@ -1360,6 +1360,14 @@ enum AVPacketSideDataType {
> */
> AV_PKT_DATA_A53_CC,
>
> + /**
> + * HDR10+ dynamic metadata associated with a video frame. The metadata is in
> + * the form of the AVDynamicHDRPlus struct and contains
> + * information for color volume transform - application 4 of
> + * SPMTE 2094-40:2016 standard.
> + */
> + AV_PKT_DATA_HDR_DYNAMIC_HDR_PLUS,
Adding this value should be its own commit, with a minor avcodec version
bump.
> +
> /**
> * This side data is encryption initialization data.
> * The format is not part of ABI, use av_encryption_init_info_* methods to
> diff --git a/libavcodec/avpacket.c b/libavcodec/avpacket.c
> index e160ad3033..137a0489d4 100644
> --- a/libavcodec/avpacket.c
> +++ b/libavcodec/avpacket.c
> @@ -391,6 +391,7 @@ const char *av_packet_side_data_name(enum AVPacketSideDataType type)
> case AV_PKT_DATA_CONTENT_LIGHT_LEVEL: return "Content light level metadata";
> case AV_PKT_DATA_SPHERICAL: return "Spherical Mapping";
> case AV_PKT_DATA_A53_CC: return "A53 Closed Captions";
> + case AV_PKT_DATA_HDR_DYNAMIC_HDR_PLUS: return "HDR10+ Dynamic Metadata (SMPTE 2094-40)";
Vertical alignment.
> case AV_PKT_DATA_ENCRYPTION_INIT_INFO: return "Encryption initialization data";
> case AV_PKT_DATA_ENCRYPTION_INFO: return "Encryption info";
> case AV_PKT_DATA_AFD: return "Active Format Description data";
> diff --git a/libavcodec/decode.c b/libavcodec/decode.c
> index a32ff2fcd3..a2d6ec4f18 100644
> --- a/libavcodec/decode.c
> +++ b/libavcodec/decode.c
> @@ -1378,7 +1378,6 @@ int ff_get_format(AVCodecContext *avctx, const enum AVPixelFormat *fmt)
> if (i == n) {
> av_log(avctx, AV_LOG_ERROR, "Invalid return from get_format(): "
> "%s not in possible list.\n", desc->name);
> - ret = AV_PIX_FMT_NONE;
Also unrelated.
> break;
> }
>
> @@ -1706,6 +1705,7 @@ int ff_decode_frame_props(AVCodecContext *avctx, AVFrame *frame)
> { AV_PKT_DATA_MASTERING_DISPLAY_METADATA, AV_FRAME_DATA_MASTERING_DISPLAY_METADATA },
> { AV_PKT_DATA_CONTENT_LIGHT_LEVEL, AV_FRAME_DATA_CONTENT_LIGHT_LEVEL },
> { AV_PKT_DATA_A53_CC, AV_FRAME_DATA_A53_CC },
> + { AV_PKT_DATA_HDR_DYNAMIC_HDR_PLUS, AV_FRAME_DATA_DYNAMIC_HDR_PLUS },
Vertical alignment again.
> };
>
> if (pkt) {
> diff --git a/libavcodec/hevc_sei.c b/libavcodec/hevc_sei.c
> index c59bd4321e..e261c038c3 100644
> --- a/libavcodec/hevc_sei.c
> +++ b/libavcodec/hevc_sei.c
> @@ -206,10 +206,209 @@ static int decode_registered_user_data_closed_caption(HEVCSEIA53Caption *s, GetB
> return 0;
> }
>
> -static int decode_nal_sei_user_data_registered_itu_t_t35(HEVCSEI *s, GetBitContext *gb,
> +static int decode_registered_user_data_dynamic_hdr_plus(
> + HEVCSEIDynamicHDRPlus *s, GetBitContext *gb,
> + void *logctx, int size)
Same.
> +{
> + const int luminance_den = 10000;
> + const int peak_luminance_den = 15;
> + const int rgb_den = 100000;
> + const int fraction_pixel_den = 1000;
> + const int knee_point_den = 4095;
> + const int bezier_anchor_den = 1023;
> + const int saturation_weight_den = 8;
> +
> + AVDynamicHDRPlus* info = s->info;
> + int bits_left = size * 8;
> + int w, i, j;
> +
> + if (bits_left < 2)
> + return AVERROR(EINVAL);
AVERROR_INVALIDDATA. We use EINVAL only for invalid arguments and not
for invalid bitstream data. Same for other cases below.
> +
> + info->num_windows = get_bits(gb, 2);
> + bits_left -= 2;
> + if (info->num_windows < 1 || info->num_windows > 3) {
> + av_log(logctx, AV_LOG_ERROR, "num_windows=%d, must be in [1, 3]\n",
> + info->num_windows);
> + return AVERROR_INVALIDDATA;
> + }
> +
> + if (bits_left < ((19 * 8 + 1) * (info->num_windows - 1)))
> + return AVERROR(EINVAL);
> + for (w = 1; w < info->num_windows; w++) {
> + info->params[w].window_upper_left_corner_x.num = get_bits(gb, 16);
> + info->params[w].window_upper_left_corner_y.num = get_bits(gb, 16);
> + info->params[w].window_lower_right_corner_x.num = get_bits(gb, 16);
> + info->params[w].window_lower_right_corner_y.num = get_bits(gb, 16);
> + // The corners are set to absolute coordinates here. They should be
> + // converted to the relative coordinates (in [0, 1]) in the decoder.
> + info->params[w].window_upper_left_corner_x.den = 1;
> + info->params[w].window_upper_left_corner_y.den = 1;
> + info->params[w].window_lower_right_corner_x.den = 1;
> + info->params[w].window_lower_right_corner_y.den = 1;
> +
> + info->params[w].center_of_ellipse_x = get_bits(gb, 16);
> + info->params[w].center_of_ellipse_y = get_bits(gb, 16);
> + info->params[w].rotation_angle = get_bits(gb, 8);
> + info->params[w].semimajor_axis_internal_ellipse = get_bits(gb, 16);
> + info->params[w].semimajor_axis_external_ellipse = get_bits(gb, 16);
> + info->params[w].semiminor_axis_external_ellipse = get_bits(gb, 16);
> + info->params[w].overlap_process_option = get_bits(gb, 1);
> + bits_left -= 19 * 8 + 1;
> + }
> +
> + if (bits_left < 28)
> + return AVERROR(EINVAL);
> + info->targeted_system_display_maximum_luminance.num = get_bits(gb, 27);
> + info->targeted_system_display_maximum_luminance.den = luminance_den;
> + info->targeted_system_display_actual_peak_luminance_flag = get_bits(gb, 1);
> + bits_left -= 28;
> +
> + if (info->targeted_system_display_actual_peak_luminance_flag) {
> + int rows, cols;
> + if (bits_left < 10)
> + return AVERROR(EINVAL);
> + rows = get_bits(gb, 5);
> + cols = get_bits(gb, 5);
> + if (((rows < 2) && (rows > 25)) || ((cols < 2) && (cols > 25))) {
> + av_log(logctx, AV_LOG_ERROR, "num_rows=%d, num_cols=%d, they must "
> + "be in [2, 25] for "
> + "targeted_system_display_actual_peak_luminance\n",
> + rows, cols);
> + return AVERROR_INVALIDDATA;
> + }
> + info->num_rows_targeted_system_display_actual_peak_luminance = rows;
> + info->num_cols_targeted_system_display_actual_peak_luminance = cols;
> + bits_left -= 10;
> +
> + if (bits_left < (rows * cols * 4))
> + return AVERROR(EINVAL);
> +
> + for (i = 0; i < rows; i++) {
> + for (j = 0; j < cols; j++) {
> + info->targeted_system_display_actual_peak_luminance[i][j].num =
> + get_bits(gb, 4);
> + info->targeted_system_display_actual_peak_luminance[i][j].den =
> + peak_luminance_den;
> + }
> + }
> + bits_left -= (rows * cols * 4);
> + }
> + for (w = 0; w < info->num_windows; w++) {
> + if (bits_left < (3 * 17 + 17 + 4))
> + return AVERROR(EINVAL);
> + for (i = 0; i < 3; i++) {
> + info->params[w].maxscl[i].num = get_bits(gb, 17);
> + info->params[w].maxscl[i].den = rgb_den;
> + }
> + info->params[w].average_maxrgb.num = get_bits(gb, 17);
> + info->params[w].average_maxrgb.den = rgb_den;
> + info->params[w].num_distribution_maxrgb_percentiles = get_bits(gb, 4);
> + bits_left -= (3 * 17 + 17 + 4);
> +
> + if (bits_left <
> + (info->params[w].num_distribution_maxrgb_percentiles * 24))
> + return AVERROR(EINVAL);
> + for (i = 0; i < info->params[w].num_distribution_maxrgb_percentiles; i++) {
> + info->params[w].distribution_maxrgb[i].percentage = get_bits(gb, 7);
> + info->params[w].distribution_maxrgb[i].percentile.num =
> + get_bits(gb, 17);
> + info->params[w].distribution_maxrgb[i].percentile.den = rgb_den;
> + }
> + bits_left -= (info->params[w].num_distribution_maxrgb_percentiles * 24);
> +
> + if (bits_left < 10)
> + return AVERROR(EINVAL);
> + info->params[w].fraction_bright_pixels.num = get_bits(gb, 10);
> + info->params[w].fraction_bright_pixels.den = fraction_pixel_den;
> + bits_left -= 10;
> + }
> + if (bits_left < 1)
> + return AVERROR(EINVAL);
> + info->mastering_display_actual_peak_luminance_flag = get_bits(gb, 1);
> + bits_left--;
> + if (info->mastering_display_actual_peak_luminance_flag) {
> + int rows, cols;
> + if (bits_left < 10)
> + return AVERROR(EINVAL);
> + rows = get_bits(gb, 5);
> + cols = get_bits(gb, 5);
> + if (((rows < 2) && (rows > 25)) || ((cols < 2) && (cols > 25))) {
> + av_log(logctx, AV_LOG_ERROR, "num_rows=%d, num_cols=%d, they must "
> + "be in [2, 25] for "
> + "mastering_display_actual_peak_luminance\n",
> + rows, cols);
> + return AVERROR_INVALIDDATA;
> + }
> + info->num_rows_mastering_display_actual_peak_luminance = rows;
> + info->num_cols_mastering_display_actual_peak_luminance = cols;
> + bits_left -= 10;
> +
> + if (bits_left < (rows * cols * 4))
> + return AVERROR(EINVAL);
> +
> + for (i = 0; i < rows; i++) {
> + for (j = 0; j < cols; j++) {
> + info->mastering_display_actual_peak_luminance[i][j].num =
> + get_bits(gb, 4);
> + info->mastering_display_actual_peak_luminance[i][j].den =
> + peak_luminance_den;
> + }
> + }
> + bits_left -= (rows * cols * 4);
> + }
> +
> + for (w = 0; w < info->num_windows; w++) {
> + if (bits_left < 1)
> + return AVERROR(EINVAL);
> + info->params[w].tone_mapping_flag = get_bits(gb, 1);
> + bits_left--;
> + if (info->params[w].tone_mapping_flag) {
> + if (bits_left < 28)
> + return AVERROR(EINVAL);
> + info->params[w].knee_point_x.num = get_bits(gb, 12);
> + info->params[w].knee_point_x.den = knee_point_den;
> + info->params[w].knee_point_y.num = get_bits(gb, 12);
> + info->params[w].knee_point_y.den = knee_point_den;
> + info->params[w].num_bezier_curve_anchors = get_bits(gb, 4);
> + bits_left -= 28;
> +
> + if (bits_left < (info->params[w].num_bezier_curve_anchors * 10))
> + return AVERROR(EINVAL);
> + for (i = 0; i < info->params[w].num_bezier_curve_anchors; i++) {
> + info->params[w].bezier_curve_anchors[i].num = get_bits(gb, 10);
> + info->params[w].bezier_curve_anchors[i].den = bezier_anchor_den;
> + }
> + bits_left -= (info->params[w].num_bezier_curve_anchors * 10);
> + }
> +
> + if (bits_left < 1)
> + return AVERROR(EINVAL);
> + info->params[w].color_saturation_mapping_flag = get_bits(gb, 1);
> + bits_left--;
> + if (info->params[w].color_saturation_mapping_flag) {
> + if (bits_left < 6)
> + return AVERROR(EINVAL);
> + info->params[w].color_saturation_weight.num = get_bits(gb, 6);
> + info->params[w].color_saturation_weight.den = saturation_weight_den;
> + bits_left -= 6;
> + }
> + }
> +
> + s->present = 1;
> +
> + skip_bits(gb, bits_left);
> +
> + return 0;
> +}
> +
> +static int decode_nal_sei_user_data_registered_itu_t_t35(HEVCSEI *s,
> + GetBitContext *gb,
> + void *logctx,
> int size)
> {
> - uint32_t country_code;
> + uint8_t country_code;
> + uint16_t provider_code;
> uint32_t user_identifier;
>
> if (size < 7)
> @@ -222,14 +421,31 @@ static int decode_nal_sei_user_data_registered_itu_t_t35(HEVCSEI *s, GetBitConte
> size--;
> }
>
> - skip_bits(gb, 8);
> - skip_bits(gb, 8);
> -
> + provider_code = get_bits(gb, 16);
> user_identifier = get_bits_long(gb, 32);
>
> + // Check for dynamic metadata - HDR10+(SMPTE 2094-40).
> + if ((provider_code == 0x003C) &&
> + ((user_identifier & 0xFFFFFF00) == 0x00010400)) {
> + s->dynamic_hdr_plus.info =
> + av_dynamic_hdr_plus_alloc(NULL);
No, do like with Mastering Metadata and other SEI messages and store the
raw bitstream values directly in HEVCSEIDynamicHDRPlus. Then copy and
derive values as required for the AVDynamicHDRPlus struct in hevcdec.c
> + if (!s->dynamic_hdr_plus.info) {
> + return AVERROR(ENOMEM);
> + }
> +
> + s->dynamic_hdr_plus.info->itu_t_t35_country_code =
> + country_code;
> + s->dynamic_hdr_plus.info->application_version =
> + (uint8_t)((user_identifier & 0x000000FF));
> +
> + return decode_registered_user_data_dynamic_hdr_plus(
> + &s->dynamic_hdr_plus, gb, logctx, size);
> + }
> +
> switch (user_identifier) {
> case MKBETAG('G', 'A', '9', '4'):
> - return decode_registered_user_data_closed_caption(&s->a53_caption, gb, size);
> + return decode_registered_user_data_closed_caption(&s->a53_caption, gb,
> + size);
Unrelated change.
> default:
> skip_bits_long(gb, size * 8);
> break;
> @@ -292,7 +508,7 @@ static int decode_nal_sei_prefix(GetBitContext *gb, void *logctx, HEVCSEI *s,
> case HEVC_SEI_TYPE_ACTIVE_PARAMETER_SETS:
> return decode_nal_sei_active_parameter_sets(s, gb, logctx);
> case HEVC_SEI_TYPE_USER_DATA_REGISTERED_ITU_T_T35:
> - return decode_nal_sei_user_data_registered_itu_t_t35(s, gb, size);
> + return decode_nal_sei_user_data_registered_itu_t_t35(s, gb, logctx, size);
> case HEVC_SEI_TYPE_ALTERNATIVE_TRANSFER_CHARACTERISTICS:
> return decode_nal_sei_alternative_transfer(&s->alternative_transfer, gb);
> default:
> @@ -302,8 +518,7 @@ static int decode_nal_sei_prefix(GetBitContext *gb, void *logctx, HEVCSEI *s,
> }
> }
>
> -static int decode_nal_sei_suffix(GetBitContext *gb, void *logctx, HEVCSEI *s,
> - int type, int size)
> +static int decode_nal_sei_suffix(GetBitContext *gb, void *logctx, HEVCSEI *s, int type, int size)
Same.
> {
> switch (type) {
> case HEVC_SEI_TYPE_DECODED_PICTURE_HASH:
> @@ -365,4 +580,5 @@ void ff_hevc_reset_sei(HEVCSEI *s)
> {
> s->a53_caption.a53_caption_size = 0;
> av_freep(&s->a53_caption.a53_caption);
> + av_freep(&s->dynamic_hdr_plus.info);
You can remove this with the changes i asked above.
> }
> diff --git a/libavcodec/hevc_sei.h b/libavcodec/hevc_sei.h
> index 2fec00ace0..9098b96574 100644
> --- a/libavcodec/hevc_sei.h
> +++ b/libavcodec/hevc_sei.h
> @@ -23,6 +23,7 @@
>
> #include <stdint.h>
>
> +#include "libavutil/hdr_dynamic_metadata.h"
Same.
> #include "get_bits.h"
>
> /**
> @@ -94,6 +95,11 @@ typedef struct HEVCSEIMasteringDisplay {
> uint32_t min_luminance;
> } HEVCSEIMasteringDisplay;
>
> +typedef struct HEVCSEIDynamicHDRPlus{
> + int present;
> + AVDynamicHDRPlus* info;
> +} HEVCSEIDynamicHDRPlus;
> +
> typedef struct HEVCSEIContentLight {
> int present;
> uint16_t max_content_light_level;
> @@ -109,6 +115,7 @@ typedef struct HEVCSEI {
> HEVCSEIPictureHash picture_hash;
> HEVCSEIFramePacking frame_packing;
> HEVCSEIDisplayOrientation display_orientation;
> + HEVCSEIDynamicHDRPlus dynamic_hdr_plus;
> HEVCSEIPictureTiming picture_timing;
> HEVCSEIA53Caption a53_caption;
> HEVCSEIMasteringDisplay mastering_display;
> diff --git a/libavcodec/hevcdec.c b/libavcodec/hevcdec.c
> index 10bf2563c0..70ac837a1b 100644
> --- a/libavcodec/hevcdec.c
> +++ b/libavcodec/hevcdec.c
> @@ -28,6 +28,7 @@
> #include "libavutil/display.h"
> #include "libavutil/internal.h"
> #include "libavutil/mastering_display_metadata.h"
> +#include "libavutil/hdr_dynamic_metadata.h"
> #include "libavutil/md5.h"
> #include "libavutil/opt.h"
> #include "libavutil/pixdesc.h"
> @@ -2769,6 +2770,84 @@ static int set_side_data(HEVCContext *s)
> s->avctx->color_trc = out->color_trc = s->sei.alternative_transfer.preferred_transfer_characteristics;
> }
>
> + if (s->sei.dynamic_hdr_plus.present &&
> + s->sei.dynamic_hdr_plus.info) {
> + int w, i;
> + AVDynamicHDRPlus *metadata =
> + av_dynamic_hdr_plus_create_side_data(out);
> + if (!metadata) return AVERROR(ENOMEM);
> +
> + memcpy(metadata, s->sei.dynamic_hdr_plus.info,
> + sizeof(AVDynamicHDRPlus));
sizeof(AVDynamicHDRPlus) is not part of the ABI.
This memcpy can be removed once you copy/derive all the values in the
struct from the raw bitstream values as i asked above.
> + av_freep(&s->sei.dynamic_hdr_plus.info);
> + // Convert coordinates to relative coordinate in [0, 1].
> + metadata->params[0].window_upper_left_corner_x.num = 0;
> + metadata->params[0].window_upper_left_corner_y.num = 0;
> + metadata->params[0].window_lower_right_corner_x.num = out->width-1;
> + metadata->params[0].window_lower_right_corner_y.num = out->height-1;
> + for (w = 0; w < metadata->num_windows; w++) {
> + metadata->params[w].window_upper_left_corner_x.den = out->width-1;
> + metadata->params[w].window_upper_left_corner_y.den = out->height-1;
> + metadata->params[w].window_lower_right_corner_x.den = out->width-1;
> + metadata->params[w].window_lower_right_corner_y.den = out->height-1;
> + }
> +
> +
> + av_log(s->avctx, AV_LOG_DEBUG, "HDR10+(SMPTE 2094-40):{\n");
> + av_log(s->avctx, AV_LOG_DEBUG,
> + "targeted_system_display_maximum_luminance=%5.4f\n"
> + "targeted_system_display_actual_peak_luminance_flag=%d\n"
> + "mastering_display_actual_peak_luminance_flag=%d\n",
> + av_q2d(metadata->targeted_system_display_maximum_luminance),
> + metadata->targeted_system_display_actual_peak_luminance_flag,
> + metadata->mastering_display_actual_peak_luminance_flag);
> +
> + for (w = 0; w < metadata->num_windows; w++) {
> + av_log(s->avctx, AV_LOG_DEBUG,
> + "window[%d]:{\nBox(%d,%d,%d,%d) "
> + "maxscl=RGB(%5.4f,%5.4f,%5.4f) average_maxrgb=%5.4f "
> + "fraction_bright_pixels=%5.4f ", w,
> + metadata->params[w].window_upper_left_corner_x.num,
> + metadata->params[w].window_upper_left_corner_y.num,
> + metadata->params[w].window_lower_right_corner_x.num,
> + metadata->params[w].window_lower_right_corner_y.num,
> + av_q2d(metadata->params[w].maxscl[0]),
> + av_q2d(metadata->params[w].maxscl[1]),
> + av_q2d(metadata->params[w].maxscl[2]),
> + av_q2d(metadata->params[w].average_maxrgb),
> + av_q2d(metadata->params[w].fraction_bright_pixels));
> + av_log(s->avctx, AV_LOG_DEBUG, "distribution_maxrgb[");
> + for (i = 0;
> + i < metadata->params[w].num_distribution_maxrgb_percentiles;
> + i++) {
> + av_log(s->avctx, AV_LOG_DEBUG, "(%d,%5.4f)",
> + metadata->params[w].distribution_maxrgb[i].percentage,
> + av_q2d(metadata->
> + params[w].distribution_maxrgb[i].percentile));
> + }
> + av_log(s->avctx, AV_LOG_DEBUG, "] ");
> + if (metadata->params[w].tone_mapping_flag) {
> + av_log(s->avctx, AV_LOG_DEBUG, "knee_point(%5.4f,%5.4f) ",
> + av_q2d(metadata->params[w].knee_point_x),
> + av_q2d(metadata->params[w].knee_point_y));
> + }
> + av_log(s->avctx, AV_LOG_DEBUG, "bezier_curve_anchors(");
> + for (i = 0; i < metadata->params[w].num_bezier_curve_anchors; i++) {
> + av_log(s->avctx, AV_LOG_DEBUG, "%5.4f ",
> + av_q2d(metadata->params[w].bezier_curve_anchors[i]));
> + }
> + av_log(s->avctx, AV_LOG_DEBUG, ")");
> + if (metadata->params[w].color_saturation_mapping_flag) {
> + av_log(s->avctx, AV_LOG_DEBUG,
> + " color_saturation_weight=%5.4f",
> + av_q2d(metadata->params[w].color_saturation_weight));
> + }
> + av_log(s->avctx, AV_LOG_DEBUG, "}\n");
> + }
> + av_log(s->avctx, AV_LOG_DEBUG,
> + "} End of HDR10+ (SMPTE 2094-40)\n");
> + }
> +
> return 0;
> }
>
>
More information about the ffmpeg-devel
mailing list