[FFmpeg-devel] [PATCH 13/23] lavc/hevc/hevcdec: implement MV-HEVC inter-layer prediction

Anton Khirnov anton at khirnov.net
Sat Sep 14 13:45:38 EEST 2024


The per-frame reference picture set contains two more lists -
INTER_LAYER[01]. Assuming at most two layers, INTER_LAYER1 is always
empty, but is added anyway for completeness.

When inter-layer prediction is enabled, INTER_LAYER0 for the
second-layer frame will contain the base-layer frame from the same
access unit, if it exists.

The new lists are then used in per-slice reference picture set
construction as per F.8.3.4 "Decoding process for reference picture
lists construction".
---
 libavcodec/hevc/hevcdec.c |  7 ++++-
 libavcodec/hevc/hevcdec.h |  8 ++++--
 libavcodec/hevc/refs.c    | 58 ++++++++++++++++++++++++++++++---------
 libavcodec/nvdec_hevc.c   |  2 +-
 libavcodec/vdpau_hevc.c   |  2 +-
 5 files changed, 59 insertions(+), 18 deletions(-)

diff --git a/libavcodec/hevc/hevcdec.c b/libavcodec/hevc/hevcdec.c
index d3a47b65f4..cbf763b8be 100644
--- a/libavcodec/hevc/hevcdec.c
+++ b/libavcodec/hevc/hevcdec.c
@@ -784,7 +784,7 @@ static int hls_slice_header(SliceHeader *sh, const HEVCContext *s, GetBitContext
 
             sh->rpl_modification_flag[0] = 0;
             sh->rpl_modification_flag[1] = 0;
-            nb_refs = ff_hevc_frame_nb_refs(sh, pps);
+            nb_refs = ff_hevc_frame_nb_refs(sh, pps, layer_idx);
             if (!nb_refs) {
                 av_log(s->avctx, AV_LOG_ERROR, "Zero refs for a frame with P or B slices.\n");
                 return AVERROR_INVALIDDATA;
@@ -3356,6 +3356,11 @@ static int decode_nal_units(HEVCContext *s, const uint8_t *buf, int length)
     s->eos = 0;
     s->slice_initialized = 0;
 
+    for (int i = 0; i < FF_ARRAY_ELEMS(s->layers); i++) {
+        HEVCLayerContext *l = &s->layers[i];
+        l->cur_frame = NULL;
+    }
+
     /* split the input packet into NAL units, so we know the upper bound on the
      * number of slices in the frame */
     ret = ff_h2645_packet_split(&s->pkt, buf, length, s->avctx,
diff --git a/libavcodec/hevc/hevcdec.h b/libavcodec/hevc/hevcdec.h
index 16b1d1ce92..57bf5aa599 100644
--- a/libavcodec/hevc/hevcdec.h
+++ b/libavcodec/hevc/hevcdec.h
@@ -84,6 +84,8 @@ enum RPSType {
     ST_FOLL,
     LT_CURR,
     LT_FOLL,
+    INTER_LAYER0,
+    INTER_LAYER1,
     NB_RPS_TYPE,
 };
 
@@ -442,6 +444,7 @@ typedef struct HEVCLocalContext {
 
 typedef struct HEVCLayerContext {
     HEVCFrame               DPB[32];
+    HEVCFrame              *cur_frame;
 
     const HEVCSPS          *sps; // RefStruct reference
 
@@ -498,7 +501,7 @@ typedef struct HEVCContext {
     struct AVMD5 *md5_ctx;
 
     ///< candidate references for the current frame
-    RefPicList rps[5];
+    RefPicList rps[NB_RPS_TYPE];
 
     const HEVCVPS *vps; ///< RefStruct reference
     const HEVCPPS *pps; ///< RefStruct reference
@@ -615,7 +618,8 @@ int ff_hevc_res_scale_sign_flag(HEVCLocalContext *lc, int idx);
 /**
  * Get the number of candidate references for the current frame.
  */
-int ff_hevc_frame_nb_refs(const SliceHeader *sh, const HEVCPPS *pps);
+int ff_hevc_frame_nb_refs(const SliceHeader *sh, const HEVCPPS *pps,
+                          unsigned layer_idx);
 
 int ff_hevc_set_new_ref(HEVCContext *s, HEVCLayerContext *l, int poc);
 
diff --git a/libavcodec/hevc/refs.c b/libavcodec/hevc/refs.c
index 20fdbb5794..625ac68aaa 100644
--- a/libavcodec/hevc/refs.c
+++ b/libavcodec/hevc/refs.c
@@ -149,6 +149,7 @@ int ff_hevc_set_new_ref(HEVCContext *s, HEVCLayerContext *l, int poc)
         return AVERROR(ENOMEM);
 
     s->cur_frame = ref;
+    l->cur_frame = ref;
     s->collocated_ref = NULL;
 
     if (s->sh.pic_output_flag)
@@ -248,7 +249,9 @@ int ff_hevc_slice_rpl(HEVCContext *s)
         return ret;
 
     if (!(s->rps[ST_CURR_BEF].nb_refs + s->rps[ST_CURR_AFT].nb_refs +
-          s->rps[LT_CURR].nb_refs) && !s->pps->pps_curr_pic_ref_enabled_flag) {
+          s->rps[LT_CURR].nb_refs +
+          s->rps[INTER_LAYER0].nb_refs + s->rps[INTER_LAYER1].nb_refs) &&
+        !s->pps->pps_curr_pic_ref_enabled_flag) {
         av_log(s->avctx, AV_LOG_ERROR, "Zero refs in the frame RPS.\n");
         return AVERROR_INVALIDDATA;
     }
@@ -258,11 +261,14 @@ int ff_hevc_slice_rpl(HEVCContext *s)
         RefPicList *rpl     = &s->cur_frame->refPicList[list_idx];
 
         /* The order of the elements is
-         * ST_CURR_BEF - ST_CURR_AFT - LT_CURR for the L0 and
-         * ST_CURR_AFT - ST_CURR_BEF - LT_CURR for the L1 */
-        int cand_lists[3] = { list_idx ? ST_CURR_AFT : ST_CURR_BEF,
-                              list_idx ? ST_CURR_BEF : ST_CURR_AFT,
-                              LT_CURR };
+         * ST_CURR_BEF - INTER_LAYER0 - ST_CURR_AFT - LT_CURR - INTER_LAYER1 for the L0 and
+         * ST_CURR_AFT - INTER_LAYER1 - ST_CURR_BEF - LT_CURR - INTER_LAYER0 for the L1 */
+        int cand_lists[] = { list_idx ? ST_CURR_AFT : ST_CURR_BEF,
+                             list_idx ? INTER_LAYER1 : INTER_LAYER0,
+                             list_idx ? ST_CURR_BEF : ST_CURR_AFT,
+                             LT_CURR,
+                             list_idx ? INTER_LAYER0 : INTER_LAYER1
+        };
 
         /* concatenate the candidate lists for the current frame */
         while (rpl_tmp.nb_refs < sh->nb_refs[list_idx]) {
@@ -271,7 +277,11 @@ int ff_hevc_slice_rpl(HEVCContext *s)
                 for (j = 0; j < rps->nb_refs && rpl_tmp.nb_refs < HEVC_MAX_REFS; j++) {
                     rpl_tmp.list[rpl_tmp.nb_refs]       = rps->list[j];
                     rpl_tmp.ref[rpl_tmp.nb_refs]        = rps->ref[j];
-                    rpl_tmp.isLongTerm[rpl_tmp.nb_refs] = i == 2;
+                    // multiview inter-layer refs are treated as long-term here,
+                    // cf. G.8.1.3
+                    rpl_tmp.isLongTerm[rpl_tmp.nb_refs] = cand_lists[i] == LT_CURR ||
+                                                          cand_lists[i] == INTER_LAYER0 ||
+                                                          cand_lists[i] == INTER_LAYER1;
                     rpl_tmp.nb_refs++;
                 }
             }
@@ -410,11 +420,6 @@ int ff_hevc_frame_rps(HEVCContext *s, HEVCLayerContext *l)
     RefPicList               *rps = s->rps;
     int i, ret = 0;
 
-    if (!short_rps) {
-        rps[0].nb_refs = rps[1].nb_refs = 0;
-        return 0;
-    }
-
     unref_missing_refs(l);
 
     /* clear the reference flags on all frames except the current one */
@@ -430,6 +435,9 @@ int ff_hevc_frame_rps(HEVCContext *s, HEVCLayerContext *l)
     for (i = 0; i < NB_RPS_TYPE; i++)
         rps[i].nb_refs = 0;
 
+    if (!short_rps)
+        goto inter_layer;
+
     /* add the short refs */
     for (i = 0; i < short_rps->num_delta_pocs; i++) {
         int poc = s->poc + short_rps->delta_poc[i];
@@ -459,6 +467,24 @@ int ff_hevc_frame_rps(HEVCContext *s, HEVCLayerContext *l)
             goto fail;
     }
 
+inter_layer:
+    /* add inter-layer refs */
+    if (s->sh.inter_layer_pred) {
+        HEVCLayerContext *l0 = &s->layers[0];
+
+        av_assert0(l != l0);
+
+        /* Given the assumption of at most two layers, refPicSet0Flag is
+         * always 1, so only RefPicSetInterLayer0 can ever contain a frame. */
+        if (l0->cur_frame) {
+            // inter-layer refs are treated as short-term here, cf. F.8.1.6
+            ret = add_candidate_ref(s, l0, &rps[INTER_LAYER0], l0->cur_frame->poc,
+                                    HEVC_FRAME_FLAG_SHORT_REF, 1);
+            if (ret < 0)
+                goto fail;
+        }
+    }
+
 fail:
     /* release any frames that are now unused */
     for (i = 0; i < FF_ARRAY_ELEMS(l->DPB); i++)
@@ -467,7 +493,8 @@ fail:
     return ret;
 }
 
-int ff_hevc_frame_nb_refs(const SliceHeader *sh, const HEVCPPS *pps)
+int ff_hevc_frame_nb_refs(const SliceHeader *sh, const HEVCPPS *pps,
+                          unsigned layer_idx)
 {
     int ret = 0;
     int i;
@@ -486,6 +513,11 @@ int ff_hevc_frame_nb_refs(const SliceHeader *sh, const HEVCPPS *pps)
             ret += !!long_rps->used[i];
     }
 
+    if (sh->inter_layer_pred) {
+        av_assert0(pps->sps->vps->num_direct_ref_layers[layer_idx] < 2);
+        ret++;
+    }
+
     if (pps->pps_curr_pic_ref_enabled_flag)
         ret++;
 
diff --git a/libavcodec/nvdec_hevc.c b/libavcodec/nvdec_hevc.c
index 6888507535..e01ce4c782 100644
--- a/libavcodec/nvdec_hevc.c
+++ b/libavcodec/nvdec_hevc.c
@@ -188,7 +188,7 @@ static int nvdec_hevc_start_frame(AVCodecContext *avctx,
 
             .NumBitsForShortTermRPSInSlice                = s->sh.short_term_rps ? s->sh.short_term_ref_pic_set_size : 0,
             .NumDeltaPocsOfRefRpsIdx                      = s->sh.short_term_rps ? s->sh.short_term_rps->rps_idx_num_delta_pocs : 0,
-            .NumPocTotalCurr                              = ff_hevc_frame_nb_refs(&s->sh, pps),
+            .NumPocTotalCurr                              = ff_hevc_frame_nb_refs(&s->sh, pps, s->cur_layer),
             .NumPocStCurrBefore                           = s->rps[ST_CURR_BEF].nb_refs,
             .NumPocStCurrAfter                            = s->rps[ST_CURR_AFT].nb_refs,
             .NumPocLtCurr                                 = s->rps[LT_CURR].nb_refs,
diff --git a/libavcodec/vdpau_hevc.c b/libavcodec/vdpau_hevc.c
index affb7e7f5a..0ddcafd897 100644
--- a/libavcodec/vdpau_hevc.c
+++ b/libavcodec/vdpau_hevc.c
@@ -206,7 +206,7 @@ static int vdpau_hevc_start_frame(AVCodecContext *avctx,
         }
     }
     /* See section 7.4.7.2 of the specification. */
-    info->NumPocTotalCurr = ff_hevc_frame_nb_refs(&h->sh, pps);
+    info->NumPocTotalCurr = ff_hevc_frame_nb_refs(&h->sh, pps, h->cur_layer);
     if (sh->short_term_ref_pic_set_sps_flag == 0 && sh->short_term_rps) {
         /* Corresponds to specification field, NumDeltaPocs[RefRpsIdx].
            Only applicable when short_term_ref_pic_set_sps_flag == 0.
-- 
2.43.0



More information about the ffmpeg-devel mailing list