[FFmpeg-devel] [PATCH 09/10] lavc/hevc_ps: reduce the size of ShortTermRPS.used
James Almer
jamrial at gmail.com
Wed Apr 10 16:42:51 EEST 2024
On 4/10/2024 10:31 AM, Anton Khirnov wrote:
> It is currently an array of 32 uint8_t, each storing a single flag. A
> single uint32_t is sufficient.
>
> Reduces sizeof(HEVCSPS) by 1792 bytes.
> ---
> libavcodec/hevc_ps.c | 33 +++++++++++++++++++--------------
> libavcodec/hevc_ps.h | 2 +-
> libavcodec/hevc_refs.c | 6 +++---
> libavcodec/vulkan_hevc.c | 8 ++++----
> 4 files changed, 27 insertions(+), 22 deletions(-)
>
> diff --git a/libavcodec/hevc_ps.c b/libavcodec/hevc_ps.c
> index a6b0021bc3..76fe507e7b 100644
> --- a/libavcodec/hevc_ps.c
> +++ b/libavcodec/hevc_ps.c
> @@ -107,6 +107,7 @@ int ff_hevc_decode_short_term_rps(GetBitContext *gb, AVCodecContext *avctx,
> int k = 0;
> int i;
>
> + rps->used = 0;
> rps->rps_predict = 0;
>
> if (rps != sps->st_rps && sps->nb_st_rps)
> @@ -114,6 +115,7 @@ int ff_hevc_decode_short_term_rps(GetBitContext *gb, AVCodecContext *avctx,
>
> if (rps->rps_predict) {
> const ShortTermRPS *rps_ridx;
> + uint8_t used[32] = { 0 };
> int delta_rps;
>
> if (is_slice_header) {
> @@ -139,13 +141,13 @@ int ff_hevc_decode_short_term_rps(GetBitContext *gb, AVCodecContext *avctx,
> }
> delta_rps = (1 - (rps->delta_rps_sign << 1)) * rps->abs_delta_rps;
> for (i = 0; i <= rps_ridx->num_delta_pocs; i++) {
> - int used = rps->used[k] = get_bits1(gb);
> + used[k] = get_bits1(gb);
>
> rps->use_delta_flag = 0;
> - if (!used)
> + if (!used[k])
> rps->use_delta_flag = get_bits1(gb);
>
> - if (used || rps->use_delta_flag) {
> + if (used[k] || rps->use_delta_flag) {
> if (i < rps_ridx->num_delta_pocs)
> delta_poc = delta_rps + rps_ridx->delta_poc[i];
> else
> @@ -157,7 +159,7 @@ int ff_hevc_decode_short_term_rps(GetBitContext *gb, AVCodecContext *avctx,
> }
> }
>
> - if (k >= FF_ARRAY_ELEMS(rps->used)) {
> + if (k >= FF_ARRAY_ELEMS(used)) {
> av_log(avctx, AV_LOG_ERROR,
> "Invalid num_delta_pocs: %d\n", k);
> return AVERROR_INVALIDDATA;
> @@ -167,35 +169,38 @@ int ff_hevc_decode_short_term_rps(GetBitContext *gb, AVCodecContext *avctx,
> rps->num_negative_pics = k0;
> // sort in increasing order (smallest first)
> if (rps->num_delta_pocs != 0) {
> - int used, tmp;
> + int u, tmp;
> for (i = 1; i < rps->num_delta_pocs; i++) {
> delta_poc = rps->delta_poc[i];
> - used = rps->used[i];
> + u = used[i];
> for (k = i - 1; k >= 0; k--) {
> tmp = rps->delta_poc[k];
> if (delta_poc < tmp) {
> rps->delta_poc[k + 1] = tmp;
> - rps->used[k + 1] = rps->used[k];
> + used[k + 1] = used[k];
> rps->delta_poc[k] = delta_poc;
> - rps->used[k] = used;
> + used[k] = u;
> }
> }
> }
> }
> if ((rps->num_negative_pics >> 1) != 0) {
> - int used;
> + int u;
> k = rps->num_negative_pics - 1;
> // flip the negative values to largest first
> for (i = 0; i < rps->num_negative_pics >> 1; i++) {
> delta_poc = rps->delta_poc[i];
> - used = rps->used[i];
> + u = used[i];
> rps->delta_poc[i] = rps->delta_poc[k];
> - rps->used[i] = rps->used[k];
> + used[i] = used[k];
> rps->delta_poc[k] = delta_poc;
> - rps->used[k] = used;
> + used[k] = u;
> k--;
> }
> }
> +
> + for (unsigned i = 0; i < FF_ARRAY_ELEMS(used); i++)
> + rps->used |= used[i] * (1 << i);
> } else {
> unsigned int nb_positive_pics;
>
> @@ -222,7 +227,7 @@ int ff_hevc_decode_short_term_rps(GetBitContext *gb, AVCodecContext *avctx,
> }
> prev -= delta_poc;
> rps->delta_poc[i] = prev;
> - rps->used[i] = get_bits1(gb);
> + rps->used |= get_bits1(gb) * (1 << i);
> }
> prev = 0;
> for (i = 0; i < nb_positive_pics; i++) {
> @@ -235,7 +240,7 @@ int ff_hevc_decode_short_term_rps(GetBitContext *gb, AVCodecContext *avctx,
> }
> prev += delta_poc;
> rps->delta_poc[rps->num_negative_pics + i] = prev;
> - rps->used[rps->num_negative_pics + i] = get_bits1(gb);
> + rps->used |= get_bits1(gb) * (1 << (rps->num_negative_pics + i));
> }
> }
> }
> diff --git a/libavcodec/hevc_ps.h b/libavcodec/hevc_ps.h
> index 6ef29a8ea7..92b85115f7 100644
> --- a/libavcodec/hevc_ps.h
> +++ b/libavcodec/hevc_ps.h
> @@ -79,7 +79,7 @@ typedef struct ShortTermRPS {
> int num_delta_pocs;
> int rps_idx_num_delta_pocs;
> int32_t delta_poc[32];
> - uint8_t used[32];
> + uint32_t used;
> } ShortTermRPS;
>
> typedef struct HEVCWindow {
> diff --git a/libavcodec/hevc_refs.c b/libavcodec/hevc_refs.c
> index aed649933d..19f3fa81da 100644
> --- a/libavcodec/hevc_refs.c
> +++ b/libavcodec/hevc_refs.c
> @@ -501,7 +501,7 @@ int ff_hevc_frame_rps(HEVCContext *s)
> int poc = s->poc + short_rps->delta_poc[i];
> int list;
>
> - if (!short_rps->used[i])
> + if (!(short_rps->used & (1 << i)))
> list = ST_FOLL;
> else if (i < short_rps->num_negative_pics)
> list = ST_CURR_BEF;
> @@ -540,9 +540,9 @@ int ff_hevc_frame_nb_refs(const HEVCContext *s)
>
> if (rps) {
> for (i = 0; i < rps->num_negative_pics; i++)
> - ret += !!rps->used[i];
> + ret += !!(rps->used & (1 << i));
> for (; i < rps->num_delta_pocs; i++)
> - ret += !!rps->used[i];
> + ret += !!(rps->used & (1 << i));
> }
>
> if (long_rps) {
> diff --git a/libavcodec/vulkan_hevc.c b/libavcodec/vulkan_hevc.c
> index 5d7c6b1b64..c2b65fc201 100644
> --- a/libavcodec/vulkan_hevc.c
> +++ b/libavcodec/vulkan_hevc.c
> @@ -374,17 +374,17 @@ static void set_sps(const HEVCSPS *sps, int sps_idx,
> /* NOTE: This is the predicted, and *reordered* version.
> * Probably incorrect, but the spec doesn't say which version to use. */
> for (int j = 0; j < sps->st_rps[i].num_delta_pocs; j++)
> - str[i].used_by_curr_pic_flag |= sps->st_rps[i].used[j] << j;
> + str[i].used_by_curr_pic_flag |= st_rps->used;
>
> for (int j = 0; j < str[i].num_negative_pics; j++) {
> - str[i].delta_poc_s0_minus1[j] = st_rps->delta_poc[j] - (j ? st_rps->delta_poc[j - 1] : 0) - 1;
> - str[i].used_by_curr_pic_s0_flag |= sps->st_rps[i].used[j] << j;
> + str[i].delta_poc_s0_minus1[j] = st_rps->delta_poc[j] - (j ? st_rps->delta_poc[j - 1] : 0) - 1;
> + str[i].used_by_curr_pic_s0_flag |= st_rps->used & ((1 << str[i].num_negative_pics) - 1);
av_mod_uintp2(st_rps->used, str[i].num_negative_pics).
> }
>
> for (int j = 0; j < str[i].num_positive_pics; j++) {
> str[i].delta_poc_s0_minus1[j] = st_rps->delta_poc[st_rps->num_negative_pics + j] -
> (j ? st_rps->delta_poc[st_rps->num_negative_pics + j - 1] : 0) - 1;
> - str[i].used_by_curr_pic_s0_flag |= sps->st_rps[i].used[str[i].num_negative_pics + j] << j;
> + str[i].used_by_curr_pic_s0_flag |= st_rps->used >> str[i].num_negative_pics;
> }
> }
>
More information about the ffmpeg-devel
mailing list